FFmpeg  2.6.9
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
wmaenc.c
Go to the documentation of this file.
1 /*
2  * WMA compatible encoder
3  * Copyright (c) 2007 Michael Niedermayer
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "libavutil/attributes.h"
23 
24 #include "avcodec.h"
25 #include "internal.h"
26 #include "wma.h"
27 #include "libavutil/avassert.h"
28 
29 
31 {
32  WMACodecContext *s = avctx->priv_data;
33  int i, flags1, flags2, block_align;
34  uint8_t *extradata;
35  int ret;
36 
37  s->avctx = avctx;
38 
39  if (avctx->channels > MAX_CHANNELS) {
40  av_log(avctx, AV_LOG_ERROR,
41  "too many channels: got %i, need %i or fewer\n",
42  avctx->channels, MAX_CHANNELS);
43  return AVERROR(EINVAL);
44  }
45 
46  if (avctx->sample_rate > 48000) {
47  av_log(avctx, AV_LOG_ERROR, "sample rate is too high: %d > 48kHz\n",
48  avctx->sample_rate);
49  return AVERROR(EINVAL);
50  }
51 
52  if (avctx->bit_rate < 24 * 1000) {
53  av_log(avctx, AV_LOG_ERROR,
54  "bitrate too low: got %i, need 24000 or higher\n",
55  avctx->bit_rate);
56  return AVERROR(EINVAL);
57  }
58 
59  /* extract flag infos */
60  flags1 = 0;
61  flags2 = 1;
62  if (avctx->codec->id == AV_CODEC_ID_WMAV1) {
63  extradata = av_malloc(4);
64  if (!extradata)
65  return AVERROR(ENOMEM);
66  avctx->extradata_size = 4;
67  AV_WL16(extradata, flags1);
68  AV_WL16(extradata + 2, flags2);
69  } else if (avctx->codec->id == AV_CODEC_ID_WMAV2) {
70  extradata = av_mallocz(10);
71  if (!extradata)
72  return AVERROR(ENOMEM);
73  avctx->extradata_size = 10;
74  AV_WL32(extradata, flags1);
75  AV_WL16(extradata + 4, flags2);
76  } else {
77  av_assert0(0);
78  }
79  avctx->extradata = extradata;
80  s->use_exp_vlc = flags2 & 0x0001;
81  s->use_bit_reservoir = flags2 & 0x0002;
82  s->use_variable_block_len = flags2 & 0x0004;
83  if (avctx->channels == 2)
84  s->ms_stereo = 1;
85 
86  if ((ret = ff_wma_init(avctx, flags2)) < 0)
87  return ret;
88 
89  /* init MDCT */
90  for (i = 0; i < s->nb_block_sizes; i++)
91  ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 0, 1.0);
92 
93  block_align = avctx->bit_rate * (int64_t) s->frame_len /
94  (avctx->sample_rate * 8);
95  block_align = FFMIN(block_align, MAX_CODED_SUPERFRAME_SIZE);
96  avctx->block_align = block_align;
97  avctx->frame_size = avctx->initial_padding = s->frame_len;
98 
99  return 0;
100 }
101 
103 {
104  WMACodecContext *s = avctx->priv_data;
105  float **audio = (float **) frame->extended_data;
106  int len = frame->nb_samples;
107  int window_index = s->frame_len_bits - s->block_len_bits;
108  FFTContext *mdct = &s->mdct_ctx[window_index];
109  int ch;
110  const float *win = s->windows[window_index];
111  int window_len = 1 << s->block_len_bits;
112  float n = 2.0 * 32768.0 / window_len;
113 
114  for (ch = 0; ch < avctx->channels; ch++) {
115  memcpy(s->output, s->frame_out[ch], window_len * sizeof(*s->output));
116  s->fdsp->vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len);
117  s->fdsp->vector_fmul_reverse(&s->output[window_len], s->frame_out[ch],
118  win, len);
119  s->fdsp->vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len);
120  mdct->mdct_calc(mdct, s->coefs[ch], s->output);
121  }
122 }
123 
124 // FIXME use for decoding too
125 static void init_exp(WMACodecContext *s, int ch, const int *exp_param)
126 {
127  int n;
128  const uint16_t *ptr;
129  float v, *q, max_scale, *q_end;
130 
131  ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits];
132  q = s->exponents[ch];
133  q_end = q + s->block_len;
134  max_scale = 0;
135  while (q < q_end) {
136  /* XXX: use a table */
137  v = pow(10, *exp_param++ *(1.0 / 16.0));
138  max_scale = FFMAX(max_scale, v);
139  n = *ptr++;
140  do {
141  *q++ = v;
142  } while (--n);
143  }
144  s->max_exponent[ch] = max_scale;
145 }
146 
147 static void encode_exp_vlc(WMACodecContext *s, int ch, const int *exp_param)
148 {
149  int last_exp;
150  const uint16_t *ptr;
151  float *q, *q_end;
152 
153  ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits];
154  q = s->exponents[ch];
155  q_end = q + s->block_len;
156  if (s->version == 1) {
157  last_exp = *exp_param++;
158  av_assert0(last_exp - 10 >= 0 && last_exp - 10 < 32);
159  put_bits(&s->pb, 5, last_exp - 10);
160  q += *ptr++;
161  } else
162  last_exp = 36;
163  while (q < q_end) {
164  int exp = *exp_param++;
165  int code = exp - last_exp + 60;
166  av_assert1(code >= 0 && code < 120);
169  /* XXX: use a table */
170  q += *ptr++;
171  last_exp = exp;
172  }
173 }
174 
175 static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
176  int total_gain)
177 {
178  int v, bsize, ch, coef_nb_bits, parse_exponents;
179  float mdct_norm;
180  int nb_coefs[MAX_CHANNELS];
181  static const int fixed_exp[25] = {
182  20, 20, 20, 20, 20,
183  20, 20, 20, 20, 20,
184  20, 20, 20, 20, 20,
185  20, 20, 20, 20, 20,
186  20, 20, 20, 20, 20
187  };
188 
189  // FIXME remove duplication relative to decoder
190  if (s->use_variable_block_len) {
191  av_assert0(0); // FIXME not implemented
192  } else {
193  /* fixed block len */
197  }
198 
199  s->block_len = 1 << s->block_len_bits;
200 // av_assert0((s->block_pos + s->block_len) <= s->frame_len);
201  bsize = s->frame_len_bits - s->block_len_bits;
202 
203  // FIXME factor
204  v = s->coefs_end[bsize] - s->coefs_start;
205  for (ch = 0; ch < s->avctx->channels; ch++)
206  nb_coefs[ch] = v;
207  {
208  int n4 = s->block_len / 2;
209  mdct_norm = 1.0 / (float) n4;
210  if (s->version == 1)
211  mdct_norm *= sqrt(n4);
212  }
213 
214  if (s->avctx->channels == 2)
215  put_bits(&s->pb, 1, !!s->ms_stereo);
216 
217  for (ch = 0; ch < s->avctx->channels; ch++) {
218  // FIXME only set channel_coded when needed, instead of always
219  s->channel_coded[ch] = 1;
220  if (s->channel_coded[ch])
221  init_exp(s, ch, fixed_exp);
222  }
223 
224  for (ch = 0; ch < s->avctx->channels; ch++) {
225  if (s->channel_coded[ch]) {
226  WMACoef *coefs1;
227  float *coefs, *exponents, mult;
228  int i, n;
229 
230  coefs1 = s->coefs1[ch];
231  exponents = s->exponents[ch];
232  mult = pow(10, total_gain * 0.05) / s->max_exponent[ch];
233  mult *= mdct_norm;
234  coefs = src_coefs[ch];
235  if (s->use_noise_coding && 0) {
236  av_assert0(0); // FIXME not implemented
237  } else {
238  coefs += s->coefs_start;
239  n = nb_coefs[ch];
240  for (i = 0; i < n; i++) {
241  double t = *coefs++ / (exponents[i] * mult);
242  if (t < -32768 || t > 32767)
243  return -1;
244 
245  coefs1[i] = lrint(t);
246  }
247  }
248  }
249  }
250 
251  v = 0;
252  for (ch = 0; ch < s->avctx->channels; ch++) {
253  int a = s->channel_coded[ch];
254  put_bits(&s->pb, 1, a);
255  v |= a;
256  }
257 
258  if (!v)
259  return 1;
260 
261  for (v = total_gain - 1; v >= 127; v -= 127)
262  put_bits(&s->pb, 7, 127);
263  put_bits(&s->pb, 7, v);
264 
265  coef_nb_bits = ff_wma_total_gain_to_bits(total_gain);
266 
267  if (s->use_noise_coding) {
268  for (ch = 0; ch < s->avctx->channels; ch++) {
269  if (s->channel_coded[ch]) {
270  int i, n;
271  n = s->exponent_high_sizes[bsize];
272  for (i = 0; i < n; i++) {
273  put_bits(&s->pb, 1, s->high_band_coded[ch][i] = 0);
274  if (0)
275  nb_coefs[ch] -= s->exponent_high_bands[bsize][i];
276  }
277  }
278  }
279  }
280 
281  parse_exponents = 1;
282  if (s->block_len_bits != s->frame_len_bits)
283  put_bits(&s->pb, 1, parse_exponents);
284 
285  if (parse_exponents) {
286  for (ch = 0; ch < s->avctx->channels; ch++) {
287  if (s->channel_coded[ch]) {
288  if (s->use_exp_vlc) {
289  encode_exp_vlc(s, ch, fixed_exp);
290  } else {
291  av_assert0(0); // FIXME not implemented
292 // encode_exp_lsp(s, ch);
293  }
294  }
295  }
296  } else
297  av_assert0(0); // FIXME not implemented
298 
299  for (ch = 0; ch < s->avctx->channels; ch++) {
300  if (s->channel_coded[ch]) {
301  int run, tindex;
302  WMACoef *ptr, *eptr;
303  tindex = (ch == 1 && s->ms_stereo);
304  ptr = &s->coefs1[ch][0];
305  eptr = ptr + nb_coefs[ch];
306 
307  run = 0;
308  for (; ptr < eptr; ptr++) {
309  if (*ptr) {
310  int level = *ptr;
311  int abs_level = FFABS(level);
312  int code = 0;
313  if (abs_level <= s->coef_vlcs[tindex]->max_level)
314  if (run < s->coef_vlcs[tindex]->levels[abs_level - 1])
315  code = run + s->int_table[tindex][abs_level - 1];
316 
317  av_assert2(code < s->coef_vlcs[tindex]->n);
318  put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[code],
319  s->coef_vlcs[tindex]->huffcodes[code]);
320 
321  if (code == 0) {
322  if (1 << coef_nb_bits <= abs_level)
323  return -1;
324 
325  put_bits(&s->pb, coef_nb_bits, abs_level);
326  put_bits(&s->pb, s->frame_len_bits, run);
327  }
328  // FIXME the sign is flipped somewhere
329  put_bits(&s->pb, 1, level < 0);
330  run = 0;
331  } else
332  run++;
333  }
334  if (run)
335  put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[1],
336  s->coef_vlcs[tindex]->huffcodes[1]);
337  }
338  if (s->version == 1 && s->avctx->channels >= 2)
340  }
341  return 0;
342 }
343 
344 static int encode_frame(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
345  uint8_t *buf, int buf_size, int total_gain)
346 {
347  init_put_bits(&s->pb, buf, buf_size);
348 
349  if (s->use_bit_reservoir)
350  av_assert0(0); // FIXME not implemented
351  else if (encode_block(s, src_coefs, total_gain) < 0)
352  return INT_MAX;
353 
355 
356  return put_bits_count(&s->pb) / 8 - s->avctx->block_align;
357 }
358 
359 static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt,
360  const AVFrame *frame, int *got_packet_ptr)
361 {
362  WMACodecContext *s = avctx->priv_data;
363  int i, total_gain, ret, error;
364 
365  s->block_len_bits = s->frame_len_bits; // required by non variable block len
366  s->block_len = 1 << s->block_len_bits;
367 
368  apply_window_and_mdct(avctx, frame);
369 
370  if (s->ms_stereo) {
371  float a, b;
372  int i;
373 
374  for (i = 0; i < s->block_len; i++) {
375  a = s->coefs[0][i] * 0.5;
376  b = s->coefs[1][i] * 0.5;
377  s->coefs[0][i] = a + b;
378  s->coefs[1][i] = a - b;
379  }
380  }
381 
382  if ((ret = ff_alloc_packet2(avctx, avpkt, 2 * MAX_CODED_SUPERFRAME_SIZE)) < 0)
383  return ret;
384 
385  total_gain = 128;
386  for (i = 64; i; i >>= 1) {
387  error = encode_frame(s, s->coefs, avpkt->data, avpkt->size,
388  total_gain - i);
389  if (error <= 0)
390  total_gain -= i;
391  }
392 
393  while(total_gain <= 128 && error > 0)
394  error = encode_frame(s, s->coefs, avpkt->data, avpkt->size, total_gain++);
395  if (error > 0) {
396  av_log(avctx, AV_LOG_ERROR, "Invalid input data or requested bitrate too low, cannot encode\n");
397  avpkt->size = 0;
398  return AVERROR(EINVAL);
399  }
400  av_assert0((put_bits_count(&s->pb) & 7) == 0);
401  i= avctx->block_align - (put_bits_count(&s->pb)+7)/8;
402  av_assert0(i>=0);
403  while(i--)
404  put_bits(&s->pb, 8, 'N');
405 
406  flush_put_bits(&s->pb);
407  av_assert0(put_bits_ptr(&s->pb) - s->pb.buf == avctx->block_align);
408 
409  if (frame->pts != AV_NOPTS_VALUE)
410  avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
411 
412  avpkt->size = avctx->block_align;
413  *got_packet_ptr = 1;
414  return 0;
415 }
416 
417 #if CONFIG_WMAV1_ENCODER
418 AVCodec ff_wmav1_encoder = {
419  .name = "wmav1",
420  .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 1"),
421  .type = AVMEDIA_TYPE_AUDIO,
422  .id = AV_CODEC_ID_WMAV1,
423  .priv_data_size = sizeof(WMACodecContext),
424  .init = encode_init,
425  .encode2 = encode_superframe,
426  .close = ff_wma_end,
427  .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
429 };
430 #endif
431 #if CONFIG_WMAV2_ENCODER
432 AVCodec ff_wmav2_encoder = {
433  .name = "wmav2",
434  .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 2"),
435  .type = AVMEDIA_TYPE_AUDIO,
436  .id = AV_CODEC_ID_WMAV2,
437  .priv_data_size = sizeof(WMACodecContext),
438  .init = encode_init,
439  .encode2 = encode_superframe,
440  .close = ff_wma_end,
441  .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
443 };
444 #endif