Libav
af_amix.c
Go to the documentation of this file.
1 /*
2  * Audio Mix Filter
3  * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
31 #include "libavutil/attributes.h"
32 #include "libavutil/audio_fifo.h"
33 #include "libavutil/avassert.h"
34 #include "libavutil/avstring.h"
36 #include "libavutil/common.h"
37 #include "libavutil/float_dsp.h"
38 #include "libavutil/mathematics.h"
39 #include "libavutil/opt.h"
40 #include "libavutil/samplefmt.h"
41 
42 #include "audio.h"
43 #include "avfilter.h"
44 #include "formats.h"
45 #include "internal.h"
46 
47 #define INPUT_OFF 0
48 #define INPUT_ON 1
49 #define INPUT_INACTIVE 2
51 #define DURATION_LONGEST 0
52 #define DURATION_SHORTEST 1
53 #define DURATION_FIRST 2
54 
55 
56 typedef struct FrameInfo {
58  int64_t pts;
59  struct FrameInfo *next;
60 } FrameInfo;
61 
70 typedef struct FrameList {
71  int nb_frames;
75 } FrameList;
76 
77 static void frame_list_clear(FrameList *frame_list)
78 {
79  if (frame_list) {
80  while (frame_list->list) {
81  FrameInfo *info = frame_list->list;
82  frame_list->list = info->next;
83  av_free(info);
84  }
85  frame_list->nb_frames = 0;
86  frame_list->nb_samples = 0;
87  frame_list->end = NULL;
88  }
89 }
90 
91 static int frame_list_next_frame_size(FrameList *frame_list)
92 {
93  if (!frame_list->list)
94  return 0;
95  return frame_list->list->nb_samples;
96 }
97 
98 static int64_t frame_list_next_pts(FrameList *frame_list)
99 {
100  if (!frame_list->list)
101  return AV_NOPTS_VALUE;
102  return frame_list->list->pts;
103 }
104 
105 static void frame_list_remove_samples(FrameList *frame_list, int nb_samples)
106 {
107  if (nb_samples >= frame_list->nb_samples) {
108  frame_list_clear(frame_list);
109  } else {
110  int samples = nb_samples;
111  while (samples > 0) {
112  FrameInfo *info = frame_list->list;
113  av_assert0(info != NULL);
114  if (info->nb_samples <= samples) {
115  samples -= info->nb_samples;
116  frame_list->list = info->next;
117  if (!frame_list->list)
118  frame_list->end = NULL;
119  frame_list->nb_frames--;
120  frame_list->nb_samples -= info->nb_samples;
121  av_free(info);
122  } else {
123  info->nb_samples -= samples;
124  info->pts += samples;
125  frame_list->nb_samples -= samples;
126  samples = 0;
127  }
128  }
129  }
130 }
131 
132 static int frame_list_add_frame(FrameList *frame_list, int nb_samples, int64_t pts)
133 {
134  FrameInfo *info = av_malloc(sizeof(*info));
135  if (!info)
136  return AVERROR(ENOMEM);
137  info->nb_samples = nb_samples;
138  info->pts = pts;
139  info->next = NULL;
140 
141  if (!frame_list->list) {
142  frame_list->list = info;
143  frame_list->end = info;
144  } else {
145  av_assert0(frame_list->end != NULL);
146  frame_list->end->next = info;
147  frame_list->end = info;
148  }
149  frame_list->nb_frames++;
150  frame_list->nb_samples += nb_samples;
151 
152  return 0;
153 }
154 
155 
156 typedef struct MixContext {
157  const AVClass *class;
159 
160  int nb_inputs;
167  int planar;
170  float *input_scale;
171  float scale_norm;
172  int64_t next_pts;
174 } MixContext;
175 
176 #define OFFSET(x) offsetof(MixContext, x)
177 #define A AV_OPT_FLAG_AUDIO_PARAM
178 static const AVOption options[] = {
179  { "inputs", "Number of inputs.",
180  OFFSET(nb_inputs), AV_OPT_TYPE_INT, { .i64 = 2 }, 1, 32, A },
181  { "duration", "How to determine the end-of-stream.",
182  OFFSET(duration_mode), AV_OPT_TYPE_INT, { .i64 = DURATION_LONGEST }, 0, 2, A, "duration" },
183  { "longest", "Duration of longest input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_LONGEST }, INT_MIN, INT_MAX, A, "duration" },
184  { "shortest", "Duration of shortest input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_SHORTEST }, INT_MIN, INT_MAX, A, "duration" },
185  { "first", "Duration of first input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_FIRST }, INT_MIN, INT_MAX, A, "duration" },
186  { "dropout_transition", "Transition time, in seconds, for volume "
187  "renormalization when an input stream ends.",
188  OFFSET(dropout_transition), AV_OPT_TYPE_FLOAT, { .dbl = 2.0 }, 0, INT_MAX, A },
189  { NULL },
190 };
191 
192 static const AVClass amix_class = {
193  .class_name = "amix filter",
194  .item_name = av_default_item_name,
195  .option = options,
196  .version = LIBAVUTIL_VERSION_INT,
197 };
198 
199 
207 static void calculate_scales(MixContext *s, int nb_samples)
208 {
209  int i;
210 
211  if (s->scale_norm > s->active_inputs) {
212  s->scale_norm -= nb_samples / (s->dropout_transition * s->sample_rate);
214  }
215 
216  for (i = 0; i < s->nb_inputs; i++) {
217  if (s->input_state[i] == INPUT_ON)
218  s->input_scale[i] = 1.0f / s->scale_norm;
219  else
220  s->input_scale[i] = 0.0f;
221  }
222 }
223 
224 static int config_output(AVFilterLink *outlink)
225 {
226  AVFilterContext *ctx = outlink->src;
227  MixContext *s = ctx->priv;
228  int i;
229  char buf[64];
230 
231  s->planar = av_sample_fmt_is_planar(outlink->format);
232  s->sample_rate = outlink->sample_rate;
233  outlink->time_base = (AVRational){ 1, outlink->sample_rate };
235 
236  s->frame_list = av_mallocz(sizeof(*s->frame_list));
237  if (!s->frame_list)
238  return AVERROR(ENOMEM);
239 
240  s->fifos = av_mallocz(s->nb_inputs * sizeof(*s->fifos));
241  if (!s->fifos)
242  return AVERROR(ENOMEM);
243 
245  for (i = 0; i < s->nb_inputs; i++) {
246  s->fifos[i] = av_audio_fifo_alloc(outlink->format, s->nb_channels, 1024);
247  if (!s->fifos[i])
248  return AVERROR(ENOMEM);
249  }
250 
252  if (!s->input_state)
253  return AVERROR(ENOMEM);
254  memset(s->input_state, INPUT_ON, s->nb_inputs);
255  s->active_inputs = s->nb_inputs;
256 
257  s->input_scale = av_mallocz(s->nb_inputs * sizeof(*s->input_scale));
258  if (!s->input_scale)
259  return AVERROR(ENOMEM);
260  s->scale_norm = s->active_inputs;
261  calculate_scales(s, 0);
262 
263  av_get_channel_layout_string(buf, sizeof(buf), -1, outlink->channel_layout);
264 
265  av_log(ctx, AV_LOG_VERBOSE,
266  "inputs:%d fmt:%s srate:%d cl:%s\n", s->nb_inputs,
267  av_get_sample_fmt_name(outlink->format), outlink->sample_rate, buf);
268 
269  return 0;
270 }
271 
275 static int output_frame(AVFilterLink *outlink, int nb_samples)
276 {
277  AVFilterContext *ctx = outlink->src;
278  MixContext *s = ctx->priv;
279  AVFrame *out_buf, *in_buf;
280  int i;
281 
282  calculate_scales(s, nb_samples);
283 
284  out_buf = ff_get_audio_buffer(outlink, nb_samples);
285  if (!out_buf)
286  return AVERROR(ENOMEM);
287 
288  in_buf = ff_get_audio_buffer(outlink, nb_samples);
289  if (!in_buf) {
290  av_frame_free(&out_buf);
291  return AVERROR(ENOMEM);
292  }
293 
294  for (i = 0; i < s->nb_inputs; i++) {
295  if (s->input_state[i] == INPUT_ON) {
296  int planes, plane_size, p;
297 
298  av_audio_fifo_read(s->fifos[i], (void **)in_buf->extended_data,
299  nb_samples);
300 
301  planes = s->planar ? s->nb_channels : 1;
302  plane_size = nb_samples * (s->planar ? 1 : s->nb_channels);
303  plane_size = FFALIGN(plane_size, 16);
304 
305  for (p = 0; p < planes; p++) {
306  s->fdsp.vector_fmac_scalar((float *)out_buf->extended_data[p],
307  (float *) in_buf->extended_data[p],
308  s->input_scale[i], plane_size);
309  }
310  }
311  }
312  av_frame_free(&in_buf);
313 
314  out_buf->pts = s->next_pts;
315  if (s->next_pts != AV_NOPTS_VALUE)
316  s->next_pts += nb_samples;
317 
318  return ff_filter_frame(outlink, out_buf);
319 }
320 
326 {
327  int i;
328  int available_samples = INT_MAX;
329 
330  av_assert0(s->nb_inputs > 1);
331 
332  for (i = 1; i < s->nb_inputs; i++) {
333  int nb_samples;
334  if (s->input_state[i] == INPUT_OFF)
335  continue;
336  nb_samples = av_audio_fifo_size(s->fifos[i]);
337  available_samples = FFMIN(available_samples, nb_samples);
338  }
339  if (available_samples == INT_MAX)
340  return 0;
341  return available_samples;
342 }
343 
347 static int request_samples(AVFilterContext *ctx, int min_samples)
348 {
349  MixContext *s = ctx->priv;
350  int i, ret;
351 
352  av_assert0(s->nb_inputs > 1);
353 
354  for (i = 1; i < s->nb_inputs; i++) {
355  ret = 0;
356  if (s->input_state[i] == INPUT_OFF)
357  continue;
358  while (!ret && av_audio_fifo_size(s->fifos[i]) < min_samples)
359  ret = ff_request_frame(ctx->inputs[i]);
360  if (ret == AVERROR_EOF) {
361  if (av_audio_fifo_size(s->fifos[i]) == 0) {
362  s->input_state[i] = INPUT_OFF;
363  continue;
364  }
365  } else if (ret < 0)
366  return ret;
367  }
368  return 0;
369 }
370 
378 {
379  int i;
380  int active_inputs = 0;
381  for (i = 0; i < s->nb_inputs; i++)
382  active_inputs += !!(s->input_state[i] != INPUT_OFF);
383  s->active_inputs = active_inputs;
384 
385  if (!active_inputs ||
386  (s->duration_mode == DURATION_FIRST && s->input_state[0] == INPUT_OFF) ||
387  (s->duration_mode == DURATION_SHORTEST && active_inputs != s->nb_inputs))
388  return AVERROR_EOF;
389  return 0;
390 }
391 
392 static int request_frame(AVFilterLink *outlink)
393 {
394  AVFilterContext *ctx = outlink->src;
395  MixContext *s = ctx->priv;
396  int ret;
397  int wanted_samples, available_samples;
398 
399  ret = calc_active_inputs(s);
400  if (ret < 0)
401  return ret;
402 
403  if (s->input_state[0] == INPUT_OFF) {
404  ret = request_samples(ctx, 1);
405  if (ret < 0)
406  return ret;
407 
408  ret = calc_active_inputs(s);
409  if (ret < 0)
410  return ret;
411 
412  available_samples = get_available_samples(s);
413  if (!available_samples)
414  return AVERROR(EAGAIN);
415 
416  return output_frame(outlink, available_samples);
417  }
418 
419  if (s->frame_list->nb_frames == 0) {
420  ret = ff_request_frame(ctx->inputs[0]);
421  if (ret == AVERROR_EOF) {
422  s->input_state[0] = INPUT_OFF;
423  if (s->nb_inputs == 1)
424  return AVERROR_EOF;
425  else
426  return AVERROR(EAGAIN);
427  } else if (ret < 0)
428  return ret;
429  }
431 
432  wanted_samples = frame_list_next_frame_size(s->frame_list);
433 
434  if (s->active_inputs > 1) {
435  ret = request_samples(ctx, wanted_samples);
436  if (ret < 0)
437  return ret;
438 
439  ret = calc_active_inputs(s);
440  if (ret < 0)
441  return ret;
442  }
443 
444  if (s->active_inputs > 1) {
445  available_samples = get_available_samples(s);
446  if (!available_samples)
447  return AVERROR(EAGAIN);
448  available_samples = FFMIN(available_samples, wanted_samples);
449  } else {
450  available_samples = wanted_samples;
451  }
452 
454  frame_list_remove_samples(s->frame_list, available_samples);
455 
456  return output_frame(outlink, available_samples);
457 }
458 
459 static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
460 {
461  AVFilterContext *ctx = inlink->dst;
462  MixContext *s = ctx->priv;
463  AVFilterLink *outlink = ctx->outputs[0];
464  int i, ret = 0;
465 
466  for (i = 0; i < ctx->nb_inputs; i++)
467  if (ctx->inputs[i] == inlink)
468  break;
469  if (i >= ctx->nb_inputs) {
470  av_log(ctx, AV_LOG_ERROR, "unknown input link\n");
471  ret = AVERROR(EINVAL);
472  goto fail;
473  }
474 
475  if (i == 0) {
476  int64_t pts = av_rescale_q(buf->pts, inlink->time_base,
477  outlink->time_base);
478  ret = frame_list_add_frame(s->frame_list, buf->nb_samples, pts);
479  if (ret < 0)
480  goto fail;
481  }
482 
483  ret = av_audio_fifo_write(s->fifos[i], (void **)buf->extended_data,
484  buf->nb_samples);
485 
486 fail:
487  av_frame_free(&buf);
488 
489  return ret;
490 }
491 
492 static av_cold int init(AVFilterContext *ctx)
493 {
494  MixContext *s = ctx->priv;
495  int i;
496 
497  for (i = 0; i < s->nb_inputs; i++) {
498  char name[32];
499  AVFilterPad pad = { 0 };
500 
501  snprintf(name, sizeof(name), "input%d", i);
502  pad.type = AVMEDIA_TYPE_AUDIO;
503  pad.name = av_strdup(name);
505 
506  ff_insert_inpad(ctx, i, &pad);
507  }
508 
509  avpriv_float_dsp_init(&s->fdsp, 0);
510 
511  return 0;
512 }
513 
514 static av_cold void uninit(AVFilterContext *ctx)
515 {
516  int i;
517  MixContext *s = ctx->priv;
518 
519  if (s->fifos) {
520  for (i = 0; i < s->nb_inputs; i++)
521  av_audio_fifo_free(s->fifos[i]);
522  av_freep(&s->fifos);
523  }
525  av_freep(&s->frame_list);
526  av_freep(&s->input_state);
527  av_freep(&s->input_scale);
528 
529  for (i = 0; i < ctx->nb_inputs; i++)
530  av_freep(&ctx->input_pads[i].name);
531 }
532 
534 {
536  ff_add_format(&formats, AV_SAMPLE_FMT_FLT);
538  ff_set_common_formats(ctx, formats);
541  return 0;
542 }
543 
545  {
546  .name = "default",
547  .type = AVMEDIA_TYPE_AUDIO,
548  .config_props = config_output,
549  .request_frame = request_frame
550  },
551  { NULL }
552 };
553 
555  .name = "amix",
556  .description = NULL_IF_CONFIG_SMALL("Audio mixing."),
557  .priv_size = sizeof(MixContext),
558  .priv_class = &amix_class,
559 
560  .init = init,
561  .uninit = uninit,
563 
564  .inputs = NULL,
565  .outputs = avfilter_af_amix_outputs,
566 
568 };