FFmpeg  2.6.9
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
aacenc.c
Go to the documentation of this file.
1 /*
2  * AAC encoder
3  * Copyright (C) 2008 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * AAC encoder
25  */
26 
27 /***********************************
28  * TODOs:
29  * add sane pulse detection
30  * add temporal noise shaping
31  ***********************************/
32 
33 #include "libavutil/float_dsp.h"
34 #include "libavutil/opt.h"
35 #include "avcodec.h"
36 #include "put_bits.h"
37 #include "internal.h"
38 #include "mpeg4audio.h"
39 #include "kbdwin.h"
40 #include "sinewin.h"
41 
42 #include "aac.h"
43 #include "aactab.h"
44 #include "aacenc.h"
45 
46 #include "psymodel.h"
47 
48 #define AAC_MAX_CHANNELS 6
49 
50 #define ERROR_IF(cond, ...) \
51  if (cond) { \
52  av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
53  return AVERROR(EINVAL); \
54  }
55 
56 #define WARN_IF(cond, ...) \
57  if (cond) { \
58  av_log(avctx, AV_LOG_WARNING, __VA_ARGS__); \
59  }
60 
61 float ff_aac_pow34sf_tab[428];
62 
63 static const uint8_t swb_size_1024_96[] = {
64  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
65  12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
66  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
67 };
68 
69 static const uint8_t swb_size_1024_64[] = {
70  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
71  12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
72  40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
73 };
74 
75 static const uint8_t swb_size_1024_48[] = {
76  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
77  12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
78  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
79  96
80 };
81 
82 static const uint8_t swb_size_1024_32[] = {
83  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
84  12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
85  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
86 };
87 
88 static const uint8_t swb_size_1024_24[] = {
89  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
90  12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
91  32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
92 };
93 
94 static const uint8_t swb_size_1024_16[] = {
95  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
96  12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
97  32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
98 };
99 
100 static const uint8_t swb_size_1024_8[] = {
101  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
102  16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
103  32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
104 };
105 
106 static const uint8_t *swb_size_1024[] = {
111  swb_size_1024_8
112 };
113 
114 static const uint8_t swb_size_128_96[] = {
115  4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
116 };
117 
118 static const uint8_t swb_size_128_48[] = {
119  4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
120 };
121 
122 static const uint8_t swb_size_128_24[] = {
123  4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
124 };
125 
126 static const uint8_t swb_size_128_16[] = {
127  4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
128 };
129 
130 static const uint8_t swb_size_128_8[] = {
131  4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
132 };
133 
134 static const uint8_t *swb_size_128[] = {
135  /* the last entry on the following row is swb_size_128_64 but is a
136  duplicate of swb_size_128_96 */
141  swb_size_128_8
142 };
143 
144 /** default channel configurations */
145 static const uint8_t aac_chan_configs[6][5] = {
146  {1, TYPE_SCE}, // 1 channel - single channel element
147  {1, TYPE_CPE}, // 2 channels - channel pair
148  {2, TYPE_SCE, TYPE_CPE}, // 3 channels - center + stereo
149  {3, TYPE_SCE, TYPE_CPE, TYPE_SCE}, // 4 channels - front center + stereo + back center
150  {3, TYPE_SCE, TYPE_CPE, TYPE_CPE}, // 5 channels - front center + stereo + back stereo
151  {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
152 };
153 
154 /**
155  * Table to remap channels from libavcodec's default order to AAC order.
156  */
158  { 0 },
159  { 0, 1 },
160  { 2, 0, 1 },
161  { 2, 0, 1, 3 },
162  { 2, 0, 1, 3, 4 },
163  { 2, 0, 1, 4, 5, 3 },
164 };
165 
166 /**
167  * Make AAC audio config object.
168  * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
169  */
171 {
172  PutBitContext pb;
173  AACEncContext *s = avctx->priv_data;
174 
175  init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
176  put_bits(&pb, 5, 2); //object type - AAC-LC
177  put_bits(&pb, 4, s->samplerate_index); //sample rate index
178  put_bits(&pb, 4, s->channels);
179  //GASpecificConfig
180  put_bits(&pb, 1, 0); //frame length - 1024 samples
181  put_bits(&pb, 1, 0); //does not depend on core coder
182  put_bits(&pb, 1, 0); //is not extension
183 
184  //Explicitly Mark SBR absent
185  put_bits(&pb, 11, 0x2b7); //sync extension
186  put_bits(&pb, 5, AOT_SBR);
187  put_bits(&pb, 1, 0);
188  flush_put_bits(&pb);
189 }
190 
191 #define WINDOW_FUNC(type) \
192 static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
193  SingleChannelElement *sce, \
194  const float *audio)
195 
196 WINDOW_FUNC(only_long)
197 {
198  const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
199  const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
200  float *out = sce->ret_buf;
201 
202  fdsp->vector_fmul (out, audio, lwindow, 1024);
203  fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
204 }
205 
206 WINDOW_FUNC(long_start)
207 {
208  const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
209  const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
210  float *out = sce->ret_buf;
211 
212  fdsp->vector_fmul(out, audio, lwindow, 1024);
213  memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
214  fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
215  memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
216 }
217 
218 WINDOW_FUNC(long_stop)
219 {
220  const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
221  const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
222  float *out = sce->ret_buf;
223 
224  memset(out, 0, sizeof(out[0]) * 448);
225  fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
226  memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
227  fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
228 }
229 
230 WINDOW_FUNC(eight_short)
231 {
232  const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
233  const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
234  const float *in = audio + 448;
235  float *out = sce->ret_buf;
236  int w;
237 
238  for (w = 0; w < 8; w++) {
239  fdsp->vector_fmul (out, in, w ? pwindow : swindow, 128);
240  out += 128;
241  in += 128;
242  fdsp->vector_fmul_reverse(out, in, swindow, 128);
243  out += 128;
244  }
245 }
246 
247 static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
249  const float *audio) = {
250  [ONLY_LONG_SEQUENCE] = apply_only_long_window,
251  [LONG_START_SEQUENCE] = apply_long_start_window,
252  [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
253  [LONG_STOP_SEQUENCE] = apply_long_stop_window
254 };
255 
257  float *audio)
258 {
259  int i;
260  float *output = sce->ret_buf;
261 
262  apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
263 
265  s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
266  else
267  for (i = 0; i < 1024; i += 128)
268  s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
269  memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
270  memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
271 }
272 
273 /**
274  * Encode ics_info element.
275  * @see Table 4.6 (syntax of ics_info)
276  */
278 {
279  int w;
280 
281  put_bits(&s->pb, 1, 0); // ics_reserved bit
282  put_bits(&s->pb, 2, info->window_sequence[0]);
283  put_bits(&s->pb, 1, info->use_kb_window[0]);
284  if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
285  put_bits(&s->pb, 6, info->max_sfb);
286  put_bits(&s->pb, 1, 0); // no prediction
287  } else {
288  put_bits(&s->pb, 4, info->max_sfb);
289  for (w = 1; w < 8; w++)
290  put_bits(&s->pb, 1, !info->group_len[w]);
291  }
292 }
293 
294 /**
295  * Encode MS data.
296  * @see 4.6.8.1 "Joint Coding - M/S Stereo"
297  */
299 {
300  int i, w;
301 
302  put_bits(pb, 2, cpe->ms_mode);
303  if (cpe->ms_mode == 1)
304  for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
305  for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
306  put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
307 }
308 
309 /**
310  * Produce integer coefficients from scalefactors provided by the model.
311  */
312 static void adjust_frame_information(ChannelElement *cpe, int chans)
313 {
314  int i, w, w2, g, ch;
315  int start, maxsfb, cmaxsfb;
316 
317  for (ch = 0; ch < chans; ch++) {
318  IndividualChannelStream *ics = &cpe->ch[ch].ics;
319  start = 0;
320  maxsfb = 0;
321  cpe->ch[ch].pulse.num_pulse = 0;
322  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
323  for (w2 = 0; w2 < ics->group_len[w]; w2++) {
324  start = (w+w2) * 128;
325  for (g = 0; g < ics->num_swb; g++) {
326  //apply M/S
327  if (cpe->common_window && !ch && cpe->ms_mask[w*16 + g]) {
328  for (i = 0; i < ics->swb_sizes[g]; i++) {
329  cpe->ch[0].coeffs[start+i] = (cpe->ch[0].pcoeffs[start+i] + cpe->ch[1].pcoeffs[start+i]) * 0.5f;
330  cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].pcoeffs[start+i];
331  }
332  }
333  start += ics->swb_sizes[g];
334  }
335  for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
336  ;
337  maxsfb = FFMAX(maxsfb, cmaxsfb);
338  }
339  }
340  ics->max_sfb = maxsfb;
341 
342  //adjust zero bands for window groups
343  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
344  for (g = 0; g < ics->max_sfb; g++) {
345  i = 1;
346  for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
347  if (!cpe->ch[ch].zeroes[w2*16 + g]) {
348  i = 0;
349  break;
350  }
351  }
352  cpe->ch[ch].zeroes[w*16 + g] = i;
353  }
354  }
355  }
356 
357  if (chans > 1 && cpe->common_window) {
358  IndividualChannelStream *ics0 = &cpe->ch[0].ics;
359  IndividualChannelStream *ics1 = &cpe->ch[1].ics;
360  int msc = 0;
361  ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
362  ics1->max_sfb = ics0->max_sfb;
363  for (w = 0; w < ics0->num_windows*16; w += 16)
364  for (i = 0; i < ics0->max_sfb; i++)
365  if (cpe->ms_mask[w+i])
366  msc++;
367  if (msc == 0 || ics0->max_sfb == 0)
368  cpe->ms_mode = 0;
369  else
370  cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
371  }
372 }
373 
374 /**
375  * Encode scalefactor band coding type.
376  */
378 {
379  int w;
380 
381  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
382  s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
383 }
384 
385 /**
386  * Encode scalefactors.
387  */
390 {
391  int off = sce->sf_idx[0], diff;
392  int i, w;
393 
394  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
395  for (i = 0; i < sce->ics.max_sfb; i++) {
396  if (!sce->zeroes[w*16 + i]) {
397  diff = sce->sf_idx[w*16 + i] - off + SCALE_DIFF_ZERO;
398  av_assert0(diff >= 0 && diff <= 120);
399  off = sce->sf_idx[w*16 + i];
401  }
402  }
403  }
404 }
405 
406 /**
407  * Encode pulse data.
408  */
409 static void encode_pulses(AACEncContext *s, Pulse *pulse)
410 {
411  int i;
412 
413  put_bits(&s->pb, 1, !!pulse->num_pulse);
414  if (!pulse->num_pulse)
415  return;
416 
417  put_bits(&s->pb, 2, pulse->num_pulse - 1);
418  put_bits(&s->pb, 6, pulse->start);
419  for (i = 0; i < pulse->num_pulse; i++) {
420  put_bits(&s->pb, 5, pulse->pos[i]);
421  put_bits(&s->pb, 4, pulse->amp[i]);
422  }
423 }
424 
425 /**
426  * Encode spectral coefficients processed by psychoacoustic model.
427  */
429 {
430  int start, i, w, w2;
431 
432  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
433  start = 0;
434  for (i = 0; i < sce->ics.max_sfb; i++) {
435  if (sce->zeroes[w*16 + i]) {
436  start += sce->ics.swb_sizes[i];
437  continue;
438  }
439  for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
440  s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
441  sce->ics.swb_sizes[i],
442  sce->sf_idx[w*16 + i],
443  sce->band_type[w*16 + i],
444  s->lambda);
445  start += sce->ics.swb_sizes[i];
446  }
447  }
448 }
449 
450 /**
451  * Encode one channel of audio data.
452  */
455  int common_window)
456 {
457  put_bits(&s->pb, 8, sce->sf_idx[0]);
458  if (!common_window)
459  put_ics_info(s, &sce->ics);
460  encode_band_info(s, sce);
461  encode_scale_factors(avctx, s, sce);
462  encode_pulses(s, &sce->pulse);
463  put_bits(&s->pb, 1, 0); //tns
464  put_bits(&s->pb, 1, 0); //ssr
465  encode_spectral_coeffs(s, sce);
466  return 0;
467 }
468 
469 /**
470  * Write some auxiliary information about the created AAC file.
471  */
472 static void put_bitstream_info(AACEncContext *s, const char *name)
473 {
474  int i, namelen, padbits;
475 
476  namelen = strlen(name) + 2;
477  put_bits(&s->pb, 3, TYPE_FIL);
478  put_bits(&s->pb, 4, FFMIN(namelen, 15));
479  if (namelen >= 15)
480  put_bits(&s->pb, 8, namelen - 14);
481  put_bits(&s->pb, 4, 0); //extension type - filler
482  padbits = -put_bits_count(&s->pb) & 7;
484  for (i = 0; i < namelen - 2; i++)
485  put_bits(&s->pb, 8, name[i]);
486  put_bits(&s->pb, 12 - padbits, 0);
487 }
488 
489 /*
490  * Copy input samples.
491  * Channels are reordered from libavcodec's default order to AAC order.
492  */
494 {
495  int ch;
496  int end = 2048 + (frame ? frame->nb_samples : 0);
497  const uint8_t *channel_map = aac_chan_maps[s->channels - 1];
498 
499  /* copy and remap input samples */
500  for (ch = 0; ch < s->channels; ch++) {
501  /* copy last 1024 samples of previous frame to the start of the current frame */
502  memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
503 
504  /* copy new samples and zero any remaining samples */
505  if (frame) {
506  memcpy(&s->planar_samples[ch][2048],
507  frame->extended_data[channel_map[ch]],
508  frame->nb_samples * sizeof(s->planar_samples[0][0]));
509  }
510  memset(&s->planar_samples[ch][end], 0,
511  (3072 - end) * sizeof(s->planar_samples[0][0]));
512  }
513 }
514 
515 static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
516  const AVFrame *frame, int *got_packet_ptr)
517 {
518  AACEncContext *s = avctx->priv_data;
519  float **samples = s->planar_samples, *samples2, *la, *overlap;
520  ChannelElement *cpe;
521  int i, ch, w, g, chans, tag, start_ch, ret, ms_mode = 0;
522  int chan_el_counter[4];
524 
525  if (s->last_frame == 2)
526  return 0;
527 
528  /* add current frame to queue */
529  if (frame) {
530  if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
531  return ret;
532  }
533 
534  copy_input_samples(s, frame);
535  if (s->psypp)
537 
538  if (!avctx->frame_number)
539  return 0;
540 
541  start_ch = 0;
542  for (i = 0; i < s->chan_map[0]; i++) {
543  FFPsyWindowInfo* wi = windows + start_ch;
544  tag = s->chan_map[i+1];
545  chans = tag == TYPE_CPE ? 2 : 1;
546  cpe = &s->cpe[i];
547  for (ch = 0; ch < chans; ch++) {
548  IndividualChannelStream *ics = &cpe->ch[ch].ics;
549  int cur_channel = start_ch + ch;
550  overlap = &samples[cur_channel][0];
551  samples2 = overlap + 1024;
552  la = samples2 + (448+64);
553  if (!frame)
554  la = NULL;
555  if (tag == TYPE_LFE) {
556  wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
557  wi[ch].window_shape = 0;
558  wi[ch].num_windows = 1;
559  wi[ch].grouping[0] = 1;
560 
561  /* Only the lowest 12 coefficients are used in a LFE channel.
562  * The expression below results in only the bottom 8 coefficients
563  * being used for 11.025kHz to 16kHz sample rates.
564  */
565  ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
566  } else {
567  wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
568  ics->window_sequence[0]);
569  }
570  ics->window_sequence[1] = ics->window_sequence[0];
571  ics->window_sequence[0] = wi[ch].window_type[0];
572  ics->use_kb_window[1] = ics->use_kb_window[0];
573  ics->use_kb_window[0] = wi[ch].window_shape;
574  ics->num_windows = wi[ch].num_windows;
575  ics->swb_sizes = s->psy.bands [ics->num_windows == 8];
576  ics->num_swb = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
577  for (w = 0; w < ics->num_windows; w++)
578  ics->group_len[w] = wi[ch].grouping[w];
579 
580  apply_window_and_mdct(s, &cpe->ch[ch], overlap);
581 
582  if (isnan(cpe->ch[ch].coeffs[ 0]) || isinf(cpe->ch[ch].coeffs[ 0]) ||
583  isnan(cpe->ch[ch].coeffs[ 128]) || isinf(cpe->ch[ch].coeffs[ 128]) ||
584  isnan(cpe->ch[ch].coeffs[2*128]) || isinf(cpe->ch[ch].coeffs[2*128]) ||
585  isnan(cpe->ch[ch].coeffs[3*128]) || isinf(cpe->ch[ch].coeffs[3*128]) ||
586  isnan(cpe->ch[ch].coeffs[4*128]) || isinf(cpe->ch[ch].coeffs[4*128]) ||
587  isnan(cpe->ch[ch].coeffs[5*128]) || isinf(cpe->ch[ch].coeffs[5*128]) ||
588  isnan(cpe->ch[ch].coeffs[6*128]) || isinf(cpe->ch[ch].coeffs[6*128]) ||
589  isnan(cpe->ch[ch].coeffs[7*128]) || isinf(cpe->ch[ch].coeffs[7*128])) {
590  av_log(avctx, AV_LOG_ERROR, "Input contains NaN/+-Inf\n");
591  return AVERROR(EINVAL);
592  }
593  }
594  start_ch += chans;
595  }
596  if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels)) < 0)
597  return ret;
598  do {
599  int frame_bits;
600 
601  init_put_bits(&s->pb, avpkt->data, avpkt->size);
602 
603  if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT))
605  start_ch = 0;
606  memset(chan_el_counter, 0, sizeof(chan_el_counter));
607  for (i = 0; i < s->chan_map[0]; i++) {
608  FFPsyWindowInfo* wi = windows + start_ch;
609  const float *coeffs[2];
610  tag = s->chan_map[i+1];
611  chans = tag == TYPE_CPE ? 2 : 1;
612  cpe = &s->cpe[i];
613  put_bits(&s->pb, 3, tag);
614  put_bits(&s->pb, 4, chan_el_counter[tag]++);
615  for (ch = 0; ch < chans; ch++)
616  coeffs[ch] = cpe->ch[ch].coeffs;
617  s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
618  for (ch = 0; ch < chans; ch++) {
619  s->cur_channel = start_ch + ch;
620  s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
621  }
622  cpe->common_window = 0;
623  if (chans > 1
624  && wi[0].window_type[0] == wi[1].window_type[0]
625  && wi[0].window_shape == wi[1].window_shape) {
626 
627  cpe->common_window = 1;
628  for (w = 0; w < wi[0].num_windows; w++) {
629  if (wi[0].grouping[w] != wi[1].grouping[w]) {
630  cpe->common_window = 0;
631  break;
632  }
633  }
634  }
635  s->cur_channel = start_ch;
636  if (s->options.stereo_mode && cpe->common_window) {
637  if (s->options.stereo_mode > 0) {
638  IndividualChannelStream *ics = &cpe->ch[0].ics;
639  for (w = 0; w < ics->num_windows; w += ics->group_len[w])
640  for (g = 0; g < ics->num_swb; g++)
641  cpe->ms_mask[w*16+g] = 1;
642  } else if (s->coder->search_for_ms) {
643  s->coder->search_for_ms(s, cpe, s->lambda);
644  }
645  }
646  adjust_frame_information(cpe, chans);
647  if (chans == 2) {
648  put_bits(&s->pb, 1, cpe->common_window);
649  if (cpe->common_window) {
650  put_ics_info(s, &cpe->ch[0].ics);
651  encode_ms_info(&s->pb, cpe);
652  if (cpe->ms_mode) ms_mode = 1;
653  }
654  }
655  for (ch = 0; ch < chans; ch++) {
656  s->cur_channel = start_ch + ch;
657  encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
658  }
659  start_ch += chans;
660  }
661 
662  frame_bits = put_bits_count(&s->pb);
663  if (frame_bits <= 6144 * s->channels - 3) {
664  s->psy.bitres.bits = frame_bits / s->channels;
665  break;
666  }
667  if (ms_mode) {
668  for (i = 0; i < s->chan_map[0]; i++) {
669  // Must restore coeffs
670  chans = tag == TYPE_CPE ? 2 : 1;
671  cpe = &s->cpe[i];
672  for (ch = 0; ch < chans; ch++)
673  memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
674  }
675  }
676 
677  s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;
678 
679  } while (1);
680 
681  put_bits(&s->pb, 3, TYPE_END);
682  flush_put_bits(&s->pb);
683  avctx->frame_bits = put_bits_count(&s->pb);
684 
685  // rate control stuff
686  if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
687  float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
688  s->lambda *= ratio;
689  s->lambda = FFMIN(s->lambda, 65536.f);
690  }
691 
692  if (!frame)
693  s->last_frame++;
694 
695  ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
696  &avpkt->duration);
697 
698  avpkt->size = put_bits_count(&s->pb) >> 3;
699  *got_packet_ptr = 1;
700  return 0;
701 }
702 
704 {
705  AACEncContext *s = avctx->priv_data;
706 
707  ff_mdct_end(&s->mdct1024);
708  ff_mdct_end(&s->mdct128);
709  ff_psy_end(&s->psy);
710  if (s->psypp)
712  av_freep(&s->buffer.samples);
713  av_freep(&s->cpe);
714  av_freep(&s->fdsp);
715  ff_af_queue_close(&s->afq);
716  return 0;
717 }
718 
720 {
721  int ret = 0;
722 
724  if (!s->fdsp)
725  return AVERROR(ENOMEM);
726 
727  // window init
732 
733  if (ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0))
734  return ret;
735  if (ret = ff_mdct_init(&s->mdct128, 8, 0, 32768.0))
736  return ret;
737 
738  return 0;
739 }
740 
742 {
743  int ch;
744  FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
745  FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
746  FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + FF_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
747 
748  for(ch = 0; ch < s->channels; ch++)
749  s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
750 
751  return 0;
752 alloc_fail:
753  return AVERROR(ENOMEM);
754 }
755 
757 {
758  AACEncContext *s = avctx->priv_data;
759  int i, ret = 0;
760  const uint8_t *sizes[2];
761  uint8_t grouping[AAC_MAX_CHANNELS];
762  int lengths[2];
763 
764  avctx->frame_size = 1024;
765 
766  for (i = 0; i < 16; i++)
768  break;
769 
770  s->channels = avctx->channels;
771 
772  ERROR_IF(i == 16
773  || i >= (sizeof(swb_size_1024) / sizeof(*swb_size_1024))
774  || i >= (sizeof(swb_size_128) / sizeof(*swb_size_128)),
775  "Unsupported sample rate %d\n", avctx->sample_rate);
777  "Unsupported number of channels: %d\n", s->channels);
779  "Unsupported profile %d\n", avctx->profile);
780  WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
781  "Too many bits per frame requested, clamping to max\n");
782 
783  avctx->bit_rate = (int)FFMIN(
784  6144 * s->channels / 1024.0 * avctx->sample_rate,
785  avctx->bit_rate);
786 
787  s->samplerate_index = i;
788 
790 
791  if ((ret = dsp_init(avctx, s)) < 0)
792  goto fail;
793 
794  if ((ret = alloc_buffers(avctx, s)) < 0)
795  goto fail;
796 
797  avctx->extradata_size = 5;
799 
800  sizes[0] = swb_size_1024[i];
801  sizes[1] = swb_size_128[i];
802  lengths[0] = ff_aac_num_swb_1024[i];
803  lengths[1] = ff_aac_num_swb_128[i];
804  for (i = 0; i < s->chan_map[0]; i++)
805  grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
806  if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
807  s->chan_map[0], grouping)) < 0)
808  goto fail;
809  s->psypp = ff_psy_preprocess_init(avctx);
811 
812  if (HAVE_MIPSDSPR1)
814 
815  s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
816 
818 
819  for (i = 0; i < 428; i++)
820  ff_aac_pow34sf_tab[i] = sqrt(ff_aac_pow2sf_tab[i] * sqrt(ff_aac_pow2sf_tab[i]));
821 
822  avctx->initial_padding = 1024;
823  ff_af_queue_init(avctx, &s->afq);
824 
825  return 0;
826 fail:
827  aac_encode_end(avctx);
828  return ret;
829 }
830 
831 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
832 static const AVOption aacenc_options[] = {
833  {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
834  {"auto", "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
835  {"ms_off", "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
836  {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
837  {"aac_coder", "", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
838  {"faac", "FAAC-inspired method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
839  {"anmr", "ANMR method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
840  {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
841  {"fast", "Constant quantizer", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
842  {NULL}
843 };
844 
845 static const AVClass aacenc_class = {
846  "AAC encoder",
850 };
851 
852 /* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
853  * failures */
854 static const int mpeg4audio_sample_rates[16] = {
855  96000, 88200, 64000, 48000, 44100, 32000,
856  24000, 22050, 16000, 12000, 11025, 8000, 7350
857 };
858 
860  .name = "aac",
861  .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
862  .type = AVMEDIA_TYPE_AUDIO,
863  .id = AV_CODEC_ID_AAC,
864  .priv_data_size = sizeof(AACEncContext),
866  .encode2 = aac_encode_frame,
867  .close = aac_encode_end,
869  .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY |
871  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
873  .priv_class = &aacenc_class,
874 };