FFmpeg  2.6.9
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vp8.c
Go to the documentation of this file.
1 /*
2  * VP7/VP8 compatible video decoder
3  *
4  * Copyright (C) 2010 David Conrad
5  * Copyright (C) 2010 Ronald S. Bultje
6  * Copyright (C) 2010 Fiona Glaser
7  * Copyright (C) 2012 Daniel Kang
8  * Copyright (C) 2014 Peter Ross
9  *
10  * This file is part of FFmpeg.
11  *
12  * FFmpeg is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public
14  * License as published by the Free Software Foundation; either
15  * version 2.1 of the License, or (at your option) any later version.
16  *
17  * FFmpeg is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with FFmpeg; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25  */
26 
27 #include "libavutil/imgutils.h"
28 
29 #include "avcodec.h"
30 #include "internal.h"
31 #include "rectangle.h"
32 #include "thread.h"
33 #include "vp8.h"
34 #include "vp8data.h"
35 
36 #if ARCH_ARM
37 # include "arm/vp8.h"
38 #endif
39 
40 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
41 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
42 #elif CONFIG_VP7_DECODER
43 #define VPX(vp7, f) vp7_ ## f
44 #else // CONFIG_VP8_DECODER
45 #define VPX(vp7, f) vp8_ ## f
46 #endif
47 
48 static void free_buffers(VP8Context *s)
49 {
50  int i;
51  if (s->thread_data)
52  for (i = 0; i < MAX_THREADS; i++) {
53 #if HAVE_THREADS
54  pthread_cond_destroy(&s->thread_data[i].cond);
56 #endif
58  }
59  av_freep(&s->thread_data);
62  av_freep(&s->top_nnz);
63  av_freep(&s->top_border);
64 
65  s->macroblocks = NULL;
66 }
67 
68 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
69 {
70  int ret;
71  if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
72  ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
73  return ret;
74  if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
76  return AVERROR(ENOMEM);
77  }
78  return 0;
79 }
80 
82 {
85 }
86 
87 #if CONFIG_VP8_DECODER
88 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
89 {
90  int ret;
91 
92  vp8_release_frame(s, dst);
93 
94  if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
95  return ret;
96  if (src->seg_map &&
97  !(dst->seg_map = av_buffer_ref(src->seg_map))) {
98  vp8_release_frame(s, dst);
99  return AVERROR(ENOMEM);
100  }
101 
102  return 0;
103 }
104 #endif /* CONFIG_VP8_DECODER */
105 
106 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
107 {
108  VP8Context *s = avctx->priv_data;
109  int i;
110 
111  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
112  vp8_release_frame(s, &s->frames[i]);
113  memset(s->framep, 0, sizeof(s->framep));
114 
115  if (free_mem)
116  free_buffers(s);
117 }
118 
119 static void vp8_decode_flush(AVCodecContext *avctx)
120 {
121  vp8_decode_flush_impl(avctx, 0);
122 }
123 
125 {
126  VP8Frame *frame = NULL;
127  int i;
128 
129  // find a free buffer
130  for (i = 0; i < 5; i++)
131  if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
132  &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
133  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
134  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
135  frame = &s->frames[i];
136  break;
137  }
138  if (i == 5) {
139  av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
140  abort();
141  }
142  if (frame->tf.f->data[0])
143  vp8_release_frame(s, frame);
144 
145  return frame;
146 }
147 
148 static av_always_inline
149 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
150 {
151  AVCodecContext *avctx = s->avctx;
152  int i, ret;
153 
154  if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
155  height != s->avctx->height) {
157 
158  ret = ff_set_dimensions(s->avctx, width, height);
159  if (ret < 0)
160  return ret;
161  }
162 
163  s->mb_width = (s->avctx->coded_width + 15) / 16;
164  s->mb_height = (s->avctx->coded_height + 15) / 16;
165 
166  s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
167  avctx->thread_count > 1;
168  if (!s->mb_layout) { // Frame threading and one thread
169  s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
170  sizeof(*s->macroblocks));
172  } else // Sliced threading
173  s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
174  sizeof(*s->macroblocks));
175  s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
176  s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
178 
179  if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
180  !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
181  free_buffers(s);
182  return AVERROR(ENOMEM);
183  }
184 
185  for (i = 0; i < MAX_THREADS; i++) {
187  av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
188  if (!s->thread_data[i].filter_strength) {
189  free_buffers(s);
190  return AVERROR(ENOMEM);
191  }
192 #if HAVE_THREADS
193  pthread_mutex_init(&s->thread_data[i].lock, NULL);
194  pthread_cond_init(&s->thread_data[i].cond, NULL);
195 #endif
196  }
197 
198  s->macroblocks = s->macroblocks_base + 1;
199 
200  return 0;
201 }
202 
204 {
205  return update_dimensions(s, width, height, IS_VP7);
206 }
207 
209 {
210  return update_dimensions(s, width, height, IS_VP8);
211 }
212 
213 
215 {
216  VP56RangeCoder *c = &s->c;
217  int i;
218 
220 
221  if (vp8_rac_get(c)) { // update segment feature data
223 
224  for (i = 0; i < 4; i++)
226 
227  for (i = 0; i < 4; i++)
229  }
230  if (s->segmentation.update_map)
231  for (i = 0; i < 3; i++)
232  s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
233 }
234 
236 {
237  VP56RangeCoder *c = &s->c;
238  int i;
239 
240  for (i = 0; i < 4; i++) {
241  if (vp8_rac_get(c)) {
242  s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
243 
244  if (vp8_rac_get(c))
245  s->lf_delta.ref[i] = -s->lf_delta.ref[i];
246  }
247  }
248 
249  for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
250  if (vp8_rac_get(c)) {
251  s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
252 
253  if (vp8_rac_get(c))
254  s->lf_delta.mode[i] = -s->lf_delta.mode[i];
255  }
256  }
257 }
258 
259 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
260 {
261  const uint8_t *sizes = buf;
262  int i;
263 
264  s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
265 
266  buf += 3 * (s->num_coeff_partitions - 1);
267  buf_size -= 3 * (s->num_coeff_partitions - 1);
268  if (buf_size < 0)
269  return -1;
270 
271  for (i = 0; i < s->num_coeff_partitions - 1; i++) {
272  int size = AV_RL24(sizes + 3 * i);
273  if (buf_size - size < 0)
274  return -1;
275 
276  ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
277  buf += size;
278  buf_size -= size;
279  }
280  ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
281 
282  return 0;
283 }
284 
285 static void vp7_get_quants(VP8Context *s)
286 {
287  VP56RangeCoder *c = &s->c;
288 
289  int yac_qi = vp8_rac_get_uint(c, 7);
290  int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
291  int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
292  int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
293  int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
294  int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
295 
296  s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
297  s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
298  s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
299  s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
300  s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
301  s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
302 }
303 
304 static void vp8_get_quants(VP8Context *s)
305 {
306  VP56RangeCoder *c = &s->c;
307  int i, base_qi;
308 
309  int yac_qi = vp8_rac_get_uint(c, 7);
310  int ydc_delta = vp8_rac_get_sint(c, 4);
311  int y2dc_delta = vp8_rac_get_sint(c, 4);
312  int y2ac_delta = vp8_rac_get_sint(c, 4);
313  int uvdc_delta = vp8_rac_get_sint(c, 4);
314  int uvac_delta = vp8_rac_get_sint(c, 4);
315 
316  for (i = 0; i < 4; i++) {
317  if (s->segmentation.enabled) {
318  base_qi = s->segmentation.base_quant[i];
319  if (!s->segmentation.absolute_vals)
320  base_qi += yac_qi;
321  } else
322  base_qi = yac_qi;
323 
324  s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)];
325  s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
326  s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
327  /* 101581>>16 is equivalent to 155/100 */
328  s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
329  s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
330  s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
331 
332  s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
333  s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
334  }
335 }
336 
337 /**
338  * Determine which buffers golden and altref should be updated with after this frame.
339  * The spec isn't clear here, so I'm going by my understanding of what libvpx does
340  *
341  * Intra frames update all 3 references
342  * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
343  * If the update (golden|altref) flag is set, it's updated with the current frame
344  * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
345  * If the flag is not set, the number read means:
346  * 0: no update
347  * 1: VP56_FRAME_PREVIOUS
348  * 2: update golden with altref, or update altref with golden
349  */
351 {
352  VP56RangeCoder *c = &s->c;
353 
354  if (update)
355  return VP56_FRAME_CURRENT;
356 
357  switch (vp8_rac_get_uint(c, 2)) {
358  case 1:
359  return VP56_FRAME_PREVIOUS;
360  case 2:
362  }
363  return VP56_FRAME_NONE;
364 }
365 
367 {
368  int i, j;
369  for (i = 0; i < 4; i++)
370  for (j = 0; j < 16; j++)
371  memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
372  sizeof(s->prob->token[i][j]));
373 }
374 
376 {
377  VP56RangeCoder *c = &s->c;
378  int i, j, k, l, m;
379 
380  for (i = 0; i < 4; i++)
381  for (j = 0; j < 8; j++)
382  for (k = 0; k < 3; k++)
383  for (l = 0; l < NUM_DCT_TOKENS-1; l++)
385  int prob = vp8_rac_get_uint(c, 8);
386  for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
387  s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
388  }
389 }
390 
391 #define VP7_MVC_SIZE 17
392 #define VP8_MVC_SIZE 19
393 
395  int mvc_size)
396 {
397  VP56RangeCoder *c = &s->c;
398  int i, j;
399 
400  if (vp8_rac_get(c))
401  for (i = 0; i < 4; i++)
402  s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
403  if (vp8_rac_get(c))
404  for (i = 0; i < 3; i++)
405  s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
406 
407  // 17.2 MV probability update
408  for (i = 0; i < 2; i++)
409  for (j = 0; j < mvc_size; j++)
411  s->prob->mvc[i][j] = vp8_rac_get_nn(c);
412 }
413 
414 static void update_refs(VP8Context *s)
415 {
416  VP56RangeCoder *c = &s->c;
417 
418  int update_golden = vp8_rac_get(c);
419  int update_altref = vp8_rac_get(c);
420 
421  s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
422  s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
423 }
424 
425 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
426 {
427  int i, j;
428 
429  for (j = 1; j < 3; j++) {
430  for (i = 0; i < height / 2; i++)
431  memcpy(dst->data[j] + i * dst->linesize[j],
432  src->data[j] + i * src->linesize[j], width / 2);
433  }
434 }
435 
436 static void fade(uint8_t *dst, int dst_linesize,
437  const uint8_t *src, int src_linesize,
438  int width, int height,
439  int alpha, int beta)
440 {
441  int i, j;
442  for (j = 0; j < height; j++) {
443  for (i = 0; i < width; i++) {
444  uint8_t y = src[j * src_linesize + i];
445  dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
446  }
447  }
448 }
449 
451 {
452  int alpha = (int8_t) vp8_rac_get_uint(c, 8);
453  int beta = (int8_t) vp8_rac_get_uint(c, 8);
454  int ret;
455 
456  if (!s->keyframe && (alpha || beta)) {
457  int width = s->mb_width * 16;
458  int height = s->mb_height * 16;
459  AVFrame *src, *dst;
460 
461  if (!s->framep[VP56_FRAME_PREVIOUS] ||
462  !s->framep[VP56_FRAME_GOLDEN]) {
463  av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
464  return AVERROR_INVALIDDATA;
465  }
466 
467  dst =
468  src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
469 
470  /* preserve the golden frame, write a new previous frame */
473  if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
474  return ret;
475 
476  dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
477 
478  copy_chroma(dst, src, width, height);
479  }
480 
481  fade(dst->data[0], dst->linesize[0],
482  src->data[0], src->linesize[0],
483  width, height, alpha, beta);
484  }
485 
486  return 0;
487 }
488 
489 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
490 {
491  VP56RangeCoder *c = &s->c;
492  int part1_size, hscale, vscale, i, j, ret;
493  int width = s->avctx->width;
494  int height = s->avctx->height;
495 
496  s->profile = (buf[0] >> 1) & 7;
497  if (s->profile > 1) {
498  avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
499  return AVERROR_INVALIDDATA;
500  }
501 
502  s->keyframe = !(buf[0] & 1);
503  s->invisible = 0;
504  part1_size = AV_RL24(buf) >> 4;
505 
506  if (buf_size < 4 - s->profile + part1_size) {
507  av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
508  return AVERROR_INVALIDDATA;
509  }
510 
511  buf += 4 - s->profile;
512  buf_size -= 4 - s->profile;
513 
514  memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
515 
516  ff_vp56_init_range_decoder(c, buf, part1_size);
517  buf += part1_size;
518  buf_size -= part1_size;
519 
520  /* A. Dimension information (keyframes only) */
521  if (s->keyframe) {
522  width = vp8_rac_get_uint(c, 12);
523  height = vp8_rac_get_uint(c, 12);
524  hscale = vp8_rac_get_uint(c, 2);
525  vscale = vp8_rac_get_uint(c, 2);
526  if (hscale || vscale)
527  avpriv_request_sample(s->avctx, "Upscaling");
528 
532  sizeof(s->prob->pred16x16));
534  sizeof(s->prob->pred8x8c));
535  for (i = 0; i < 2; i++)
536  memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
537  sizeof(vp7_mv_default_prob[i]));
538  memset(&s->segmentation, 0, sizeof(s->segmentation));
539  memset(&s->lf_delta, 0, sizeof(s->lf_delta));
540  memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
541  }
542 
543  if (s->keyframe || s->profile > 0)
544  memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
545 
546  /* B. Decoding information for all four macroblock-level features */
547  for (i = 0; i < 4; i++) {
548  s->feature_enabled[i] = vp8_rac_get(c);
549  if (s->feature_enabled[i]) {
551 
552  for (j = 0; j < 3; j++)
553  s->feature_index_prob[i][j] =
554  vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
555 
556  if (vp7_feature_value_size[s->profile][i])
557  for (j = 0; j < 4; j++)
558  s->feature_value[i][j] =
560  }
561  }
562 
563  s->segmentation.enabled = 0;
564  s->segmentation.update_map = 0;
565  s->lf_delta.enabled = 0;
566 
567  s->num_coeff_partitions = 1;
568  ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
569 
570  if (!s->macroblocks_base || /* first frame */
571  width != s->avctx->width || height != s->avctx->height ||
572  (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
573  if ((ret = vp7_update_dimensions(s, width, height)) < 0)
574  return ret;
575  }
576 
577  /* C. Dequantization indices */
578  vp7_get_quants(s);
579 
580  /* D. Golden frame update flag (a Flag) for interframes only */
581  if (!s->keyframe) {
584  }
585 
586  s->update_last = 1;
587  s->update_probabilities = 1;
588  s->fade_present = 1;
589 
590  if (s->profile > 0) {
592  if (!s->update_probabilities)
593  s->prob[1] = s->prob[0];
594 
595  if (!s->keyframe)
596  s->fade_present = vp8_rac_get(c);
597  }
598 
599  /* E. Fading information for previous frame */
600  if (s->fade_present && vp8_rac_get(c)) {
601  if ((ret = vp7_fade_frame(s ,c)) < 0)
602  return ret;
603  }
604 
605  /* F. Loop filter type */
606  if (!s->profile)
607  s->filter.simple = vp8_rac_get(c);
608 
609  /* G. DCT coefficient ordering specification */
610  if (vp8_rac_get(c))
611  for (i = 1; i < 16; i++)
612  s->prob[0].scan[i] = zigzag_scan[vp8_rac_get_uint(c, 4)];
613 
614  /* H. Loop filter levels */
615  if (s->profile > 0)
616  s->filter.simple = vp8_rac_get(c);
617  s->filter.level = vp8_rac_get_uint(c, 6);
618  s->filter.sharpness = vp8_rac_get_uint(c, 3);
619 
620  /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
622 
623  s->mbskip_enabled = 0;
624 
625  /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
626  if (!s->keyframe) {
627  s->prob->intra = vp8_rac_get_uint(c, 8);
628  s->prob->last = vp8_rac_get_uint(c, 8);
630  }
631 
632  return 0;
633 }
634 
635 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
636 {
637  VP56RangeCoder *c = &s->c;
638  int header_size, hscale, vscale, ret;
639  int width = s->avctx->width;
640  int height = s->avctx->height;
641 
642  if (buf_size < 3) {
643  av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
644  return AVERROR_INVALIDDATA;
645  }
646 
647  s->keyframe = !(buf[0] & 1);
648  s->profile = (buf[0]>>1) & 7;
649  s->invisible = !(buf[0] & 0x10);
650  header_size = AV_RL24(buf) >> 5;
651  buf += 3;
652  buf_size -= 3;
653 
654  if (s->profile > 3)
655  av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
656 
657  if (!s->profile)
659  sizeof(s->put_pixels_tab));
660  else // profile 1-3 use bilinear, 4+ aren't defined so whatever
662  sizeof(s->put_pixels_tab));
663 
664  if (header_size > buf_size - 7 * s->keyframe) {
665  av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
666  return AVERROR_INVALIDDATA;
667  }
668 
669  if (s->keyframe) {
670  if (AV_RL24(buf) != 0x2a019d) {
672  "Invalid start code 0x%x\n", AV_RL24(buf));
673  return AVERROR_INVALIDDATA;
674  }
675  width = AV_RL16(buf + 3) & 0x3fff;
676  height = AV_RL16(buf + 5) & 0x3fff;
677  hscale = buf[4] >> 6;
678  vscale = buf[6] >> 6;
679  buf += 7;
680  buf_size -= 7;
681 
682  if (hscale || vscale)
683  avpriv_request_sample(s->avctx, "Upscaling");
684 
688  sizeof(s->prob->pred16x16));
690  sizeof(s->prob->pred8x8c));
691  memcpy(s->prob->mvc, vp8_mv_default_prob,
692  sizeof(s->prob->mvc));
693  memset(&s->segmentation, 0, sizeof(s->segmentation));
694  memset(&s->lf_delta, 0, sizeof(s->lf_delta));
695  }
696 
697  ff_vp56_init_range_decoder(c, buf, header_size);
698  buf += header_size;
699  buf_size -= header_size;
700 
701  if (s->keyframe) {
702  s->colorspace = vp8_rac_get(c);
703  if (s->colorspace)
704  av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
705  s->fullrange = vp8_rac_get(c);
706  }
707 
708  if ((s->segmentation.enabled = vp8_rac_get(c)))
710  else
711  s->segmentation.update_map = 0; // FIXME: move this to some init function?
712 
713  s->filter.simple = vp8_rac_get(c);
714  s->filter.level = vp8_rac_get_uint(c, 6);
715  s->filter.sharpness = vp8_rac_get_uint(c, 3);
716 
717  if ((s->lf_delta.enabled = vp8_rac_get(c)))
718  if (vp8_rac_get(c))
719  update_lf_deltas(s);
720 
721  if (setup_partitions(s, buf, buf_size)) {
722  av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
723  return AVERROR_INVALIDDATA;
724  }
725 
726  if (!s->macroblocks_base || /* first frame */
727  width != s->avctx->width || height != s->avctx->height ||
728  (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
729  if ((ret = vp8_update_dimensions(s, width, height)) < 0)
730  return ret;
731 
732  vp8_get_quants(s);
733 
734  if (!s->keyframe) {
735  update_refs(s);
737  s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
738  }
739 
740  // if we aren't saving this frame's probabilities for future frames,
741  // make a copy of the current probabilities
742  if (!(s->update_probabilities = vp8_rac_get(c)))
743  s->prob[1] = s->prob[0];
744 
745  s->update_last = s->keyframe || vp8_rac_get(c);
746 
748 
749  if ((s->mbskip_enabled = vp8_rac_get(c)))
750  s->prob->mbskip = vp8_rac_get_uint(c, 8);
751 
752  if (!s->keyframe) {
753  s->prob->intra = vp8_rac_get_uint(c, 8);
754  s->prob->last = vp8_rac_get_uint(c, 8);
755  s->prob->golden = vp8_rac_get_uint(c, 8);
757  }
758 
759  return 0;
760 }
761 
762 static av_always_inline
763 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
764 {
765  dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
766  av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
767  dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
768  av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
769 }
770 
771 /**
772  * Motion vector coding, 17.1.
773  */
775 {
776  int bit, x = 0;
777 
778  if (vp56_rac_get_prob_branchy(c, p[0])) {
779  int i;
780 
781  for (i = 0; i < 3; i++)
782  x += vp56_rac_get_prob(c, p[9 + i]) << i;
783  for (i = (vp7 ? 7 : 9); i > 3; i--)
784  x += vp56_rac_get_prob(c, p[9 + i]) << i;
785  if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
786  x += 8;
787  } else {
788  // small_mvtree
789  const uint8_t *ps = p + 2;
790  bit = vp56_rac_get_prob(c, *ps);
791  ps += 1 + 3 * bit;
792  x += 4 * bit;
793  bit = vp56_rac_get_prob(c, *ps);
794  ps += 1 + bit;
795  x += 2 * bit;
796  x += vp56_rac_get_prob(c, *ps);
797  }
798 
799  return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
800 }
801 
803 {
804  return read_mv_component(c, p, 1);
805 }
806 
808 {
809  return read_mv_component(c, p, 0);
810 }
811 
812 static av_always_inline
813 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
814 {
815  if (is_vp7)
816  return vp7_submv_prob;
817 
818  if (left == top)
819  return vp8_submv_prob[4 - !!left];
820  if (!top)
821  return vp8_submv_prob[2];
822  return vp8_submv_prob[1 - !!left];
823 }
824 
825 /**
826  * Split motion vector prediction, 16.4.
827  * @returns the number of motion vectors parsed (2, 4 or 16)
828  */
829 static av_always_inline
831  int layout, int is_vp7)
832 {
833  int part_idx;
834  int n, num;
835  VP8Macroblock *top_mb;
836  VP8Macroblock *left_mb = &mb[-1];
837  const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
838  const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
839  VP56mv *top_mv;
840  VP56mv *left_mv = left_mb->bmv;
841  VP56mv *cur_mv = mb->bmv;
842 
843  if (!layout) // layout is inlined, s->mb_layout is not
844  top_mb = &mb[2];
845  else
846  top_mb = &mb[-s->mb_width - 1];
847  mbsplits_top = vp8_mbsplits[top_mb->partitioning];
848  top_mv = top_mb->bmv;
849 
853  else
854  part_idx = VP8_SPLITMVMODE_8x8;
855  } else {
856  part_idx = VP8_SPLITMVMODE_4x4;
857  }
858 
859  num = vp8_mbsplit_count[part_idx];
860  mbsplits_cur = vp8_mbsplits[part_idx],
861  firstidx = vp8_mbfirstidx[part_idx];
862  mb->partitioning = part_idx;
863 
864  for (n = 0; n < num; n++) {
865  int k = firstidx[n];
866  uint32_t left, above;
867  const uint8_t *submv_prob;
868 
869  if (!(k & 3))
870  left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
871  else
872  left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
873  if (k <= 3)
874  above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
875  else
876  above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
877 
878  submv_prob = get_submv_prob(left, above, is_vp7);
879 
880  if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
881  if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
882  if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
883  mb->bmv[n].y = mb->mv.y +
884  read_mv_component(c, s->prob->mvc[0], is_vp7);
885  mb->bmv[n].x = mb->mv.x +
886  read_mv_component(c, s->prob->mvc[1], is_vp7);
887  } else {
888  AV_ZERO32(&mb->bmv[n]);
889  }
890  } else {
891  AV_WN32A(&mb->bmv[n], above);
892  }
893  } else {
894  AV_WN32A(&mb->bmv[n], left);
895  }
896  }
897 
898  return num;
899 }
900 
901 /**
902  * The vp7 reference decoder uses a padding macroblock column (added to right
903  * edge of the frame) to guard against illegal macroblock offsets. The
904  * algorithm has bugs that permit offsets to straddle the padding column.
905  * This function replicates those bugs.
906  *
907  * @param[out] edge_x macroblock x address
908  * @param[out] edge_y macroblock y address
909  *
910  * @return macroblock offset legal (boolean)
911  */
912 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
913  int xoffset, int yoffset, int boundary,
914  int *edge_x, int *edge_y)
915 {
916  int vwidth = mb_width + 1;
917  int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
918  if (new < boundary || new % vwidth == vwidth - 1)
919  return 0;
920  *edge_y = new / vwidth;
921  *edge_x = new % vwidth;
922  return 1;
923 }
924 
925 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
926 {
927  return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
928 }
929 
930 static av_always_inline
932  int mb_x, int mb_y, int layout)
933 {
934  VP8Macroblock *mb_edge[12];
935  enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
936  enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
937  int idx = CNT_ZERO;
938  VP56mv near_mv[3];
939  uint8_t cnt[3] = { 0 };
940  VP56RangeCoder *c = &s->c;
941  int i;
942 
943  AV_ZERO32(&near_mv[0]);
944  AV_ZERO32(&near_mv[1]);
945  AV_ZERO32(&near_mv[2]);
946 
947  for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
948  const VP7MVPred * pred = &vp7_mv_pred[i];
949  int edge_x, edge_y;
950 
951  if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
952  pred->yoffset, !s->profile, &edge_x, &edge_y)) {
953  VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
954  ? s->macroblocks_base + 1 + edge_x +
955  (s->mb_width + 1) * (edge_y + 1)
956  : s->macroblocks + edge_x +
957  (s->mb_height - edge_y - 1) * 2;
958  uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
959  if (mv) {
960  if (AV_RN32A(&near_mv[CNT_NEAREST])) {
961  if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
962  idx = CNT_NEAREST;
963  } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
964  if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
965  continue;
966  idx = CNT_NEAR;
967  } else {
968  AV_WN32A(&near_mv[CNT_NEAR], mv);
969  idx = CNT_NEAR;
970  }
971  } else {
972  AV_WN32A(&near_mv[CNT_NEAREST], mv);
973  idx = CNT_NEAREST;
974  }
975  } else {
976  idx = CNT_ZERO;
977  }
978  } else {
979  idx = CNT_ZERO;
980  }
981  cnt[idx] += vp7_mv_pred[i].score;
982  }
983 
985 
986  if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
987  mb->mode = VP8_MVMODE_MV;
988 
989  if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
990 
991  if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
992 
993  if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
994  AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
995  else
996  AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
997 
998  if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
999  mb->mode = VP8_MVMODE_SPLIT;
1000  mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1001  } else {
1002  mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1003  mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1004  mb->bmv[0] = mb->mv;
1005  }
1006  } else {
1007  mb->mv = near_mv[CNT_NEAR];
1008  mb->bmv[0] = mb->mv;
1009  }
1010  } else {
1011  mb->mv = near_mv[CNT_NEAREST];
1012  mb->bmv[0] = mb->mv;
1013  }
1014  } else {
1015  mb->mode = VP8_MVMODE_ZERO;
1016  AV_ZERO32(&mb->mv);
1017  mb->bmv[0] = mb->mv;
1018  }
1019 }
1020 
1021 static av_always_inline
1023  int mb_x, int mb_y, int layout)
1024 {
1025  VP8Macroblock *mb_edge[3] = { 0 /* top */,
1026  mb - 1 /* left */,
1027  0 /* top-left */ };
1028  enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1029  enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1030  int idx = CNT_ZERO;
1031  int cur_sign_bias = s->sign_bias[mb->ref_frame];
1032  int8_t *sign_bias = s->sign_bias;
1033  VP56mv near_mv[4];
1034  uint8_t cnt[4] = { 0 };
1035  VP56RangeCoder *c = &s->c;
1036 
1037  if (!layout) { // layout is inlined (s->mb_layout is not)
1038  mb_edge[0] = mb + 2;
1039  mb_edge[2] = mb + 1;
1040  } else {
1041  mb_edge[0] = mb - s->mb_width - 1;
1042  mb_edge[2] = mb - s->mb_width - 2;
1043  }
1044 
1045  AV_ZERO32(&near_mv[0]);
1046  AV_ZERO32(&near_mv[1]);
1047  AV_ZERO32(&near_mv[2]);
1048 
1049  /* Process MB on top, left and top-left */
1050 #define MV_EDGE_CHECK(n) \
1051  { \
1052  VP8Macroblock *edge = mb_edge[n]; \
1053  int edge_ref = edge->ref_frame; \
1054  if (edge_ref != VP56_FRAME_CURRENT) { \
1055  uint32_t mv = AV_RN32A(&edge->mv); \
1056  if (mv) { \
1057  if (cur_sign_bias != sign_bias[edge_ref]) { \
1058  /* SWAR negate of the values in mv. */ \
1059  mv = ~mv; \
1060  mv = ((mv & 0x7fff7fff) + \
1061  0x00010001) ^ (mv & 0x80008000); \
1062  } \
1063  if (!n || mv != AV_RN32A(&near_mv[idx])) \
1064  AV_WN32A(&near_mv[++idx], mv); \
1065  cnt[idx] += 1 + (n != 2); \
1066  } else \
1067  cnt[CNT_ZERO] += 1 + (n != 2); \
1068  } \
1069  }
1070 
1071  MV_EDGE_CHECK(0)
1072  MV_EDGE_CHECK(1)
1073  MV_EDGE_CHECK(2)
1074 
1076  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1077  mb->mode = VP8_MVMODE_MV;
1078 
1079  /* If we have three distinct MVs, merge first and last if they're the same */
1080  if (cnt[CNT_SPLITMV] &&
1081  AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1082  cnt[CNT_NEAREST] += 1;
1083 
1084  /* Swap near and nearest if necessary */
1085  if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1086  FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1087  FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1088  }
1089 
1090  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1091  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1092  /* Choose the best mv out of 0,0 and the nearest mv */
1093  clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1094  cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1095  (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1096  (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1097 
1098  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1099  mb->mode = VP8_MVMODE_SPLIT;
1100  mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1101  } else {
1102  mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1103  mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1104  mb->bmv[0] = mb->mv;
1105  }
1106  } else {
1107  clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1108  mb->bmv[0] = mb->mv;
1109  }
1110  } else {
1111  clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1112  mb->bmv[0] = mb->mv;
1113  }
1114  } else {
1115  mb->mode = VP8_MVMODE_ZERO;
1116  AV_ZERO32(&mb->mv);
1117  mb->bmv[0] = mb->mv;
1118  }
1119 }
1120 
1121 static av_always_inline
1123  int mb_x, int keyframe, int layout)
1124 {
1125  uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1126 
1127  if (layout) {
1128  VP8Macroblock *mb_top = mb - s->mb_width - 1;
1129  memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1130  }
1131  if (keyframe) {
1132  int x, y;
1133  uint8_t *top;
1134  uint8_t *const left = s->intra4x4_pred_mode_left;
1135  if (layout)
1136  top = mb->intra4x4_pred_mode_top;
1137  else
1138  top = s->intra4x4_pred_mode_top + 4 * mb_x;
1139  for (y = 0; y < 4; y++) {
1140  for (x = 0; x < 4; x++) {
1141  const uint8_t *ctx;
1142  ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1143  *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1144  left[y] = top[x] = *intra4x4;
1145  intra4x4++;
1146  }
1147  }
1148  } else {
1149  int i;
1150  for (i = 0; i < 16; i++)
1151  intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1153  }
1154 }
1155 
1156 static av_always_inline
1157 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1158  uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1159 {
1160  VP56RangeCoder *c = &s->c;
1161  const char *vp7_feature_name[] = { "q-index",
1162  "lf-delta",
1163  "partial-golden-update",
1164  "blit-pitch" };
1165  if (is_vp7) {
1166  int i;
1167  *segment = 0;
1168  for (i = 0; i < 4; i++) {
1169  if (s->feature_enabled[i]) {
1172  s->feature_index_prob[i]);
1174  "Feature %s present in macroblock (value 0x%x)\n",
1175  vp7_feature_name[i], s->feature_value[i][index]);
1176  }
1177  }
1178  }
1179  } else if (s->segmentation.update_map) {
1180  int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1181  *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1182  } else if (s->segmentation.enabled)
1183  *segment = ref ? *ref : *segment;
1184  mb->segment = *segment;
1185 
1186  mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1187 
1188  if (s->keyframe) {
1191 
1192  if (mb->mode == MODE_I4x4) {
1193  decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1194  } else {
1195  const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1196  : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1197  if (s->mb_layout)
1198  AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1199  else
1200  AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1201  AV_WN32A(s->intra4x4_pred_mode_left, modes);
1202  }
1203 
1207  } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1208  // inter MB, 16.2
1209  if (vp56_rac_get_prob_branchy(c, s->prob->last))
1210  mb->ref_frame =
1211  (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1213  else
1215  s->ref_count[mb->ref_frame - 1]++;
1216 
1217  // motion vectors, 16.3
1218  if (is_vp7)
1219  vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1220  else
1221  vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1222  } else {
1223  // intra MB, 16.1
1225 
1226  if (mb->mode == MODE_I4x4)
1227  decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1228 
1230  s->prob->pred8x8c);
1233  AV_ZERO32(&mb->bmv[0]);
1234  }
1235 }
1236 
1237 /**
1238  * @param r arithmetic bitstream reader context
1239  * @param block destination for block coefficients
1240  * @param probs probabilities to use when reading trees from the bitstream
1241  * @param i initial coeff index, 0 unless a separate DC block is coded
1242  * @param qmul array holding the dc/ac dequant factor at position 0/1
1243  *
1244  * @return 0 if no coeffs were decoded
1245  * otherwise, the index of the last coeff decoded plus one
1246  */
1247 static av_always_inline
1249  uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1250  int i, uint8_t *token_prob, int16_t qmul[2],
1251  const uint8_t scan[16], int vp7)
1252 {
1253  VP56RangeCoder c = *r;
1254  goto skip_eob;
1255  do {
1256  int coeff;
1257 restart:
1258  if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1259  break;
1260 
1261 skip_eob:
1262  if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1263  if (++i == 16)
1264  break; // invalid input; blocks should end with EOB
1265  token_prob = probs[i][0];
1266  if (vp7)
1267  goto restart;
1268  goto skip_eob;
1269  }
1270 
1271  if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1272  coeff = 1;
1273  token_prob = probs[i + 1][1];
1274  } else {
1275  if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1276  coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1277  if (coeff)
1278  coeff += vp56_rac_get_prob(&c, token_prob[5]);
1279  coeff += 2;
1280  } else {
1281  // DCT_CAT*
1282  if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1283  if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1284  coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1285  } else { // DCT_CAT2
1286  coeff = 7;
1287  coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1288  coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1289  }
1290  } else { // DCT_CAT3 and up
1291  int a = vp56_rac_get_prob(&c, token_prob[8]);
1292  int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1293  int cat = (a << 1) + b;
1294  coeff = 3 + (8 << cat);
1295  coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1296  }
1297  }
1298  token_prob = probs[i + 1][2];
1299  }
1300  block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1301  } while (++i < 16);
1302 
1303  *r = c;
1304  return i;
1305 }
1306 
1307 static av_always_inline
1308 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1309 {
1310  int16_t dc = block[0];
1311  int ret = 0;
1312 
1313  if (pred[1] > 3) {
1314  dc += pred[0];
1315  ret = 1;
1316  }
1317 
1318  if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1319  block[0] = pred[0] = dc;
1320  pred[1] = 0;
1321  } else {
1322  if (pred[0] == dc)
1323  pred[1]++;
1324  block[0] = pred[0] = dc;
1325  }
1326 
1327  return ret;
1328 }
1329 
1331  int16_t block[16],
1332  uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1333  int i, uint8_t *token_prob,
1334  int16_t qmul[2],
1335  const uint8_t scan[16])
1336 {
1337  return decode_block_coeffs_internal(r, block, probs, i,
1338  token_prob, qmul, scan, IS_VP7);
1339 }
1340 
1341 #ifndef vp8_decode_block_coeffs_internal
1343  int16_t block[16],
1344  uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1345  int i, uint8_t *token_prob,
1346  int16_t qmul[2])
1347 {
1348  return decode_block_coeffs_internal(r, block, probs, i,
1349  token_prob, qmul, zigzag_scan, IS_VP8);
1350 }
1351 #endif
1352 
1353 /**
1354  * @param c arithmetic bitstream reader context
1355  * @param block destination for block coefficients
1356  * @param probs probabilities to use when reading trees from the bitstream
1357  * @param i initial coeff index, 0 unless a separate DC block is coded
1358  * @param zero_nhood the initial prediction context for number of surrounding
1359  * all-zero blocks (only left/top, so 0-2)
1360  * @param qmul array holding the dc/ac dequant factor at position 0/1
1361  * @param scan scan pattern (VP7 only)
1362  *
1363  * @return 0 if no coeffs were decoded
1364  * otherwise, the index of the last coeff decoded plus one
1365  */
1366 static av_always_inline
1368  uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1369  int i, int zero_nhood, int16_t qmul[2],
1370  const uint8_t scan[16], int vp7)
1371 {
1372  uint8_t *token_prob = probs[i][zero_nhood];
1373  if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1374  return 0;
1375  return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1376  token_prob, qmul, scan)
1377  : vp8_decode_block_coeffs_internal(c, block, probs, i,
1378  token_prob, qmul);
1379 }
1380 
1381 static av_always_inline
1383  VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1384  int is_vp7)
1385 {
1386  int i, x, y, luma_start = 0, luma_ctx = 3;
1387  int nnz_pred, nnz, nnz_total = 0;
1388  int segment = mb->segment;
1389  int block_dc = 0;
1390 
1391  if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1392  nnz_pred = t_nnz[8] + l_nnz[8];
1393 
1394  // decode DC values and do hadamard
1395  nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1396  nnz_pred, s->qmat[segment].luma_dc_qmul,
1397  zigzag_scan, is_vp7);
1398  l_nnz[8] = t_nnz[8] = !!nnz;
1399 
1400  if (is_vp7 && mb->mode > MODE_I4x4) {
1401  nnz |= inter_predict_dc(td->block_dc,
1402  s->inter_dc_pred[mb->ref_frame - 1]);
1403  }
1404 
1405  if (nnz) {
1406  nnz_total += nnz;
1407  block_dc = 1;
1408  if (nnz == 1)
1409  s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1410  else
1411  s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1412  }
1413  luma_start = 1;
1414  luma_ctx = 0;
1415  }
1416 
1417  // luma blocks
1418  for (y = 0; y < 4; y++)
1419  for (x = 0; x < 4; x++) {
1420  nnz_pred = l_nnz[y] + t_nnz[x];
1421  nnz = decode_block_coeffs(c, td->block[y][x],
1422  s->prob->token[luma_ctx],
1423  luma_start, nnz_pred,
1424  s->qmat[segment].luma_qmul,
1425  s->prob[0].scan, is_vp7);
1426  /* nnz+block_dc may be one more than the actual last index,
1427  * but we don't care */
1428  td->non_zero_count_cache[y][x] = nnz + block_dc;
1429  t_nnz[x] = l_nnz[y] = !!nnz;
1430  nnz_total += nnz;
1431  }
1432 
1433  // chroma blocks
1434  // TODO: what to do about dimensions? 2nd dim for luma is x,
1435  // but for chroma it's (y<<1)|x
1436  for (i = 4; i < 6; i++)
1437  for (y = 0; y < 2; y++)
1438  for (x = 0; x < 2; x++) {
1439  nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1440  nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1441  s->prob->token[2], 0, nnz_pred,
1442  s->qmat[segment].chroma_qmul,
1443  s->prob[0].scan, is_vp7);
1444  td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1445  t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1446  nnz_total += nnz;
1447  }
1448 
1449  // if there were no coded coeffs despite the macroblock not being marked skip,
1450  // we MUST not do the inner loop filter and should not do IDCT
1451  // Since skip isn't used for bitstream prediction, just manually set it.
1452  if (!nnz_total)
1453  mb->skip = 1;
1454 }
1455 
1456 static av_always_inline
1457 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1458  uint8_t *src_cb, uint8_t *src_cr,
1459  int linesize, int uvlinesize, int simple)
1460 {
1461  AV_COPY128(top_border, src_y + 15 * linesize);
1462  if (!simple) {
1463  AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1464  AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1465  }
1466 }
1467 
1468 static av_always_inline
1469 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1470  uint8_t *src_cr, int linesize, int uvlinesize, int mb_x,
1471  int mb_y, int mb_width, int simple, int xchg)
1472 {
1473  uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1474  src_y -= linesize;
1475  src_cb -= uvlinesize;
1476  src_cr -= uvlinesize;
1477 
1478 #define XCHG(a, b, xchg) \
1479  do { \
1480  if (xchg) \
1481  AV_SWAP64(b, a); \
1482  else \
1483  AV_COPY64(b, a); \
1484  } while (0)
1485 
1486  XCHG(top_border_m1 + 8, src_y - 8, xchg);
1487  XCHG(top_border, src_y, xchg);
1488  XCHG(top_border + 8, src_y + 8, 1);
1489  if (mb_x < mb_width - 1)
1490  XCHG(top_border + 32, src_y + 16, 1);
1491 
1492  // only copy chroma for normal loop filter
1493  // or to initialize the top row to 127
1494  if (!simple || !mb_y) {
1495  XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1496  XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1497  XCHG(top_border + 16, src_cb, 1);
1498  XCHG(top_border + 24, src_cr, 1);
1499  }
1500 }
1501 
1502 static av_always_inline
1503 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1504 {
1505  if (!mb_x)
1506  return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1507  else
1508  return mb_y ? mode : LEFT_DC_PRED8x8;
1509 }
1510 
1511 static av_always_inline
1512 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1513 {
1514  if (!mb_x)
1515  return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1516  else
1517  return mb_y ? mode : HOR_PRED8x8;
1518 }
1519 
1520 static av_always_inline
1521 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1522 {
1523  switch (mode) {
1524  case DC_PRED8x8:
1525  return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1526  case VERT_PRED8x8:
1527  return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1528  case HOR_PRED8x8:
1529  return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1530  case PLANE_PRED8x8: /* TM */
1531  return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1532  }
1533  return mode;
1534 }
1535 
1536 static av_always_inline
1537 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1538 {
1539  if (!mb_x) {
1540  return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1541  } else {
1542  return mb_y ? mode : HOR_VP8_PRED;
1543  }
1544 }
1545 
1546 static av_always_inline
1547 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1548  int *copy_buf, int vp7)
1549 {
1550  switch (mode) {
1551  case VERT_PRED:
1552  if (!mb_x && mb_y) {
1553  *copy_buf = 1;
1554  return mode;
1555  }
1556  /* fall-through */
1557  case DIAG_DOWN_LEFT_PRED:
1558  case VERT_LEFT_PRED:
1559  return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1560  case HOR_PRED:
1561  if (!mb_y) {
1562  *copy_buf = 1;
1563  return mode;
1564  }
1565  /* fall-through */
1566  case HOR_UP_PRED:
1567  return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1568  case TM_VP8_PRED:
1569  return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1570  case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1571  * as 16x16/8x8 DC */
1572  case DIAG_DOWN_RIGHT_PRED:
1573  case VERT_RIGHT_PRED:
1574  case HOR_DOWN_PRED:
1575  if (!mb_y || !mb_x)
1576  *copy_buf = 1;
1577  return mode;
1578  }
1579  return mode;
1580 }
1581 
1582 static av_always_inline
1584  VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1585 {
1586  int x, y, mode, nnz;
1587  uint32_t tr;
1588 
1589  /* for the first row, we need to run xchg_mb_border to init the top edge
1590  * to 127 otherwise, skip it if we aren't going to deblock */
1591  if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1592  xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1593  s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1594  s->filter.simple, 1);
1595 
1596  if (mb->mode < MODE_I4x4) {
1597  mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1598  s->hpc.pred16x16[mode](dst[0], s->linesize);
1599  } else {
1600  uint8_t *ptr = dst[0];
1601  uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1602  const uint8_t lo = is_vp7 ? 128 : 127;
1603  const uint8_t hi = is_vp7 ? 128 : 129;
1604  uint8_t tr_top[4] = { lo, lo, lo, lo };
1605 
1606  // all blocks on the right edge of the macroblock use bottom edge
1607  // the top macroblock for their topright edge
1608  uint8_t *tr_right = ptr - s->linesize + 16;
1609 
1610  // if we're on the right edge of the frame, said edge is extended
1611  // from the top macroblock
1612  if (mb_y && mb_x == s->mb_width - 1) {
1613  tr = tr_right[-1] * 0x01010101u;
1614  tr_right = (uint8_t *) &tr;
1615  }
1616 
1617  if (mb->skip)
1619 
1620  for (y = 0; y < 4; y++) {
1621  uint8_t *topright = ptr + 4 - s->linesize;
1622  for (x = 0; x < 4; x++) {
1623  int copy = 0, linesize = s->linesize;
1624  uint8_t *dst = ptr + 4 * x;
1625  DECLARE_ALIGNED(4, uint8_t, copy_dst)[5 * 8];
1626 
1627  if ((y == 0 || x == 3) && mb_y == 0) {
1628  topright = tr_top;
1629  } else if (x == 3)
1630  topright = tr_right;
1631 
1632  mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1633  mb_y + y, &copy, is_vp7);
1634  if (copy) {
1635  dst = copy_dst + 12;
1636  linesize = 8;
1637  if (!(mb_y + y)) {
1638  copy_dst[3] = lo;
1639  AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1640  } else {
1641  AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1642  if (!(mb_x + x)) {
1643  copy_dst[3] = hi;
1644  } else {
1645  copy_dst[3] = ptr[4 * x - s->linesize - 1];
1646  }
1647  }
1648  if (!(mb_x + x)) {
1649  copy_dst[11] =
1650  copy_dst[19] =
1651  copy_dst[27] =
1652  copy_dst[35] = hi;
1653  } else {
1654  copy_dst[11] = ptr[4 * x - 1];
1655  copy_dst[19] = ptr[4 * x + s->linesize - 1];
1656  copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1657  copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1658  }
1659  }
1660  s->hpc.pred4x4[mode](dst, topright, linesize);
1661  if (copy) {
1662  AV_COPY32(ptr + 4 * x, copy_dst + 12);
1663  AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1664  AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1665  AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1666  }
1667 
1668  nnz = td->non_zero_count_cache[y][x];
1669  if (nnz) {
1670  if (nnz == 1)
1671  s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1672  td->block[y][x], s->linesize);
1673  else
1674  s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1675  td->block[y][x], s->linesize);
1676  }
1677  topright += 4;
1678  }
1679 
1680  ptr += 4 * s->linesize;
1681  intra4x4 += 4;
1682  }
1683  }
1684 
1686  mb_x, mb_y, is_vp7);
1687  s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1688  s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1689 
1690  if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1691  xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1692  s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1693  s->filter.simple, 0);
1694 }
1695 
1696 static const uint8_t subpel_idx[3][8] = {
1697  { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1698  // also function pointer index
1699  { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1700  { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1701 };
1702 
1703 /**
1704  * luma MC function
1705  *
1706  * @param s VP8 decoding context
1707  * @param dst target buffer for block data at block position
1708  * @param ref reference picture buffer at origin (0, 0)
1709  * @param mv motion vector (relative to block position) to get pixel data from
1710  * @param x_off horizontal position of block from origin (0, 0)
1711  * @param y_off vertical position of block from origin (0, 0)
1712  * @param block_w width of block (16, 8 or 4)
1713  * @param block_h height of block (always same as block_w)
1714  * @param width width of src/dst plane data
1715  * @param height height of src/dst plane data
1716  * @param linesize size of a single line of plane data, including padding
1717  * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1718  */
1719 static av_always_inline
1721  ThreadFrame *ref, const VP56mv *mv,
1722  int x_off, int y_off, int block_w, int block_h,
1723  int width, int height, ptrdiff_t linesize,
1724  vp8_mc_func mc_func[3][3])
1725 {
1726  uint8_t *src = ref->f->data[0];
1727 
1728  if (AV_RN32A(mv)) {
1729  int src_linesize = linesize;
1730 
1731  int mx = (mv->x << 1) & 7, mx_idx = subpel_idx[0][mx];
1732  int my = (mv->y << 1) & 7, my_idx = subpel_idx[0][my];
1733 
1734  x_off += mv->x >> 2;
1735  y_off += mv->y >> 2;
1736 
1737  // edge emulation
1738  ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1739  src += y_off * linesize + x_off;
1740  if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1741  y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1743  src - my_idx * linesize - mx_idx,
1744  EDGE_EMU_LINESIZE, linesize,
1745  block_w + subpel_idx[1][mx],
1746  block_h + subpel_idx[1][my],
1747  x_off - mx_idx, y_off - my_idx,
1748  width, height);
1749  src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1750  src_linesize = EDGE_EMU_LINESIZE;
1751  }
1752  mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1753  } else {
1754  ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1755  mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1756  linesize, block_h, 0, 0);
1757  }
1758 }
1759 
1760 /**
1761  * chroma MC function
1762  *
1763  * @param s VP8 decoding context
1764  * @param dst1 target buffer for block data at block position (U plane)
1765  * @param dst2 target buffer for block data at block position (V plane)
1766  * @param ref reference picture buffer at origin (0, 0)
1767  * @param mv motion vector (relative to block position) to get pixel data from
1768  * @param x_off horizontal position of block from origin (0, 0)
1769  * @param y_off vertical position of block from origin (0, 0)
1770  * @param block_w width of block (16, 8 or 4)
1771  * @param block_h height of block (always same as block_w)
1772  * @param width width of src/dst plane data
1773  * @param height height of src/dst plane data
1774  * @param linesize size of a single line of plane data, including padding
1775  * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1776  */
1777 static av_always_inline
1779  uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1780  int x_off, int y_off, int block_w, int block_h,
1781  int width, int height, ptrdiff_t linesize,
1782  vp8_mc_func mc_func[3][3])
1783 {
1784  uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1785 
1786  if (AV_RN32A(mv)) {
1787  int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1788  int my = mv->y & 7, my_idx = subpel_idx[0][my];
1789 
1790  x_off += mv->x >> 3;
1791  y_off += mv->y >> 3;
1792 
1793  // edge emulation
1794  src1 += y_off * linesize + x_off;
1795  src2 += y_off * linesize + x_off;
1796  ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1797  if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1798  y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1800  src1 - my_idx * linesize - mx_idx,
1801  EDGE_EMU_LINESIZE, linesize,
1802  block_w + subpel_idx[1][mx],
1803  block_h + subpel_idx[1][my],
1804  x_off - mx_idx, y_off - my_idx, width, height);
1805  src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1806  mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1807 
1809  src2 - my_idx * linesize - mx_idx,
1810  EDGE_EMU_LINESIZE, linesize,
1811  block_w + subpel_idx[1][mx],
1812  block_h + subpel_idx[1][my],
1813  x_off - mx_idx, y_off - my_idx, width, height);
1814  src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1815  mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1816  } else {
1817  mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1818  mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1819  }
1820  } else {
1821  ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1822  mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1823  mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1824  }
1825 }
1826 
1827 static av_always_inline
1829  ThreadFrame *ref_frame, int x_off, int y_off,
1830  int bx_off, int by_off, int block_w, int block_h,
1831  int width, int height, VP56mv *mv)
1832 {
1833  VP56mv uvmv = *mv;
1834 
1835  /* Y */
1836  vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1837  ref_frame, mv, x_off + bx_off, y_off + by_off,
1838  block_w, block_h, width, height, s->linesize,
1839  s->put_pixels_tab[block_w == 8]);
1840 
1841  /* U/V */
1842  if (s->profile == 3) {
1843  /* this block only applies VP8; it is safe to check
1844  * only the profile, as VP7 profile <= 1 */
1845  uvmv.x &= ~7;
1846  uvmv.y &= ~7;
1847  }
1848  x_off >>= 1;
1849  y_off >>= 1;
1850  bx_off >>= 1;
1851  by_off >>= 1;
1852  width >>= 1;
1853  height >>= 1;
1854  block_w >>= 1;
1855  block_h >>= 1;
1856  vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1857  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1858  &uvmv, x_off + bx_off, y_off + by_off,
1859  block_w, block_h, width, height, s->uvlinesize,
1860  s->put_pixels_tab[1 + (block_w == 4)]);
1861 }
1862 
1863 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1864  * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1865 static av_always_inline
1866 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1867  int mb_xy, int ref)
1868 {
1869  /* Don't prefetch refs that haven't been used very often this frame. */
1870  if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1871  int x_off = mb_x << 4, y_off = mb_y << 4;
1872  int mx = (mb->mv.x >> 2) + x_off + 8;
1873  int my = (mb->mv.y >> 2) + y_off;
1874  uint8_t **src = s->framep[ref]->tf.f->data;
1875  int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1876  /* For threading, a ff_thread_await_progress here might be useful, but
1877  * it actually slows down the decoder. Since a bad prefetch doesn't
1878  * generate bad decoder output, we don't run it here. */
1879  s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1880  off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1881  s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1882  }
1883 }
1884 
1885 /**
1886  * Apply motion vectors to prediction buffer, chapter 18.
1887  */
1888 static av_always_inline
1890  VP8Macroblock *mb, int mb_x, int mb_y)
1891 {
1892  int x_off = mb_x << 4, y_off = mb_y << 4;
1893  int width = 16 * s->mb_width, height = 16 * s->mb_height;
1894  ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1895  VP56mv *bmv = mb->bmv;
1896 
1897  switch (mb->partitioning) {
1898  case VP8_SPLITMVMODE_NONE:
1899  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1900  0, 0, 16, 16, width, height, &mb->mv);
1901  break;
1902  case VP8_SPLITMVMODE_4x4: {
1903  int x, y;
1904  VP56mv uvmv;
1905 
1906  /* Y */
1907  for (y = 0; y < 4; y++) {
1908  for (x = 0; x < 4; x++) {
1909  vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1910  ref, &bmv[4 * y + x],
1911  4 * x + x_off, 4 * y + y_off, 4, 4,
1912  width, height, s->linesize,
1913  s->put_pixels_tab[2]);
1914  }
1915  }
1916 
1917  /* U/V */
1918  x_off >>= 1;
1919  y_off >>= 1;
1920  width >>= 1;
1921  height >>= 1;
1922  for (y = 0; y < 2; y++) {
1923  for (x = 0; x < 2; x++) {
1924  uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1925  mb->bmv[2 * y * 4 + 2 * x + 1].x +
1926  mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1927  mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1928  uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1929  mb->bmv[2 * y * 4 + 2 * x + 1].y +
1930  mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1931  mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1932  uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
1933  uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
1934  if (s->profile == 3) {
1935  uvmv.x &= ~7;
1936  uvmv.y &= ~7;
1937  }
1938  vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1939  dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1940  &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1941  width, height, s->uvlinesize,
1942  s->put_pixels_tab[2]);
1943  }
1944  }
1945  break;
1946  }
1947  case VP8_SPLITMVMODE_16x8:
1948  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1949  0, 0, 16, 8, width, height, &bmv[0]);
1950  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1951  0, 8, 16, 8, width, height, &bmv[1]);
1952  break;
1953  case VP8_SPLITMVMODE_8x16:
1954  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1955  0, 0, 8, 16, width, height, &bmv[0]);
1956  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1957  8, 0, 8, 16, width, height, &bmv[1]);
1958  break;
1959  case VP8_SPLITMVMODE_8x8:
1960  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1961  0, 0, 8, 8, width, height, &bmv[0]);
1962  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1963  8, 0, 8, 8, width, height, &bmv[1]);
1964  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1965  0, 8, 8, 8, width, height, &bmv[2]);
1966  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1967  8, 8, 8, 8, width, height, &bmv[3]);
1968  break;
1969  }
1970 }
1971 
1972 static av_always_inline
1974 {
1975  int x, y, ch;
1976 
1977  if (mb->mode != MODE_I4x4) {
1978  uint8_t *y_dst = dst[0];
1979  for (y = 0; y < 4; y++) {
1980  uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1981  if (nnz4) {
1982  if (nnz4 & ~0x01010101) {
1983  for (x = 0; x < 4; x++) {
1984  if ((uint8_t) nnz4 == 1)
1985  s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
1986  td->block[y][x],
1987  s->linesize);
1988  else if ((uint8_t) nnz4 > 1)
1989  s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
1990  td->block[y][x],
1991  s->linesize);
1992  nnz4 >>= 8;
1993  if (!nnz4)
1994  break;
1995  }
1996  } else {
1997  s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1998  }
1999  }
2000  y_dst += 4 * s->linesize;
2001  }
2002  }
2003 
2004  for (ch = 0; ch < 2; ch++) {
2005  uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2006  if (nnz4) {
2007  uint8_t *ch_dst = dst[1 + ch];
2008  if (nnz4 & ~0x01010101) {
2009  for (y = 0; y < 2; y++) {
2010  for (x = 0; x < 2; x++) {
2011  if ((uint8_t) nnz4 == 1)
2012  s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2013  td->block[4 + ch][(y << 1) + x],
2014  s->uvlinesize);
2015  else if ((uint8_t) nnz4 > 1)
2016  s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2017  td->block[4 + ch][(y << 1) + x],
2018  s->uvlinesize);
2019  nnz4 >>= 8;
2020  if (!nnz4)
2021  goto chroma_idct_end;
2022  }
2023  ch_dst += 4 * s->uvlinesize;
2024  }
2025  } else {
2026  s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2027  }
2028  }
2029 chroma_idct_end:
2030  ;
2031  }
2032 }
2033 
2034 static av_always_inline
2036  VP8FilterStrength *f, int is_vp7)
2037 {
2038  int interior_limit, filter_level;
2039 
2040  if (s->segmentation.enabled) {
2041  filter_level = s->segmentation.filter_level[mb->segment];
2042  if (!s->segmentation.absolute_vals)
2043  filter_level += s->filter.level;
2044  } else
2045  filter_level = s->filter.level;
2046 
2047  if (s->lf_delta.enabled) {
2048  filter_level += s->lf_delta.ref[mb->ref_frame];
2049  filter_level += s->lf_delta.mode[mb->mode];
2050  }
2051 
2052  filter_level = av_clip_uintp2(filter_level, 6);
2053 
2054  interior_limit = filter_level;
2055  if (s->filter.sharpness) {
2056  interior_limit >>= (s->filter.sharpness + 3) >> 2;
2057  interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2058  }
2059  interior_limit = FFMAX(interior_limit, 1);
2060 
2061  f->filter_level = filter_level;
2062  f->inner_limit = interior_limit;
2063  f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2064  mb->mode == VP8_MVMODE_SPLIT;
2065 }
2066 
2067 static av_always_inline
2069  int mb_x, int mb_y, int is_vp7)
2070 {
2071  int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2072  int filter_level = f->filter_level;
2073  int inner_limit = f->inner_limit;
2074  int inner_filter = f->inner_filter;
2075  int linesize = s->linesize;
2076  int uvlinesize = s->uvlinesize;
2077  static const uint8_t hev_thresh_lut[2][64] = {
2078  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2079  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2080  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2081  3, 3, 3, 3 },
2082  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2083  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2084  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2085  2, 2, 2, 2 }
2086  };
2087 
2088  if (!filter_level)
2089  return;
2090 
2091  if (is_vp7) {
2092  bedge_lim_y = filter_level;
2093  bedge_lim_uv = filter_level * 2;
2094  mbedge_lim = filter_level + 2;
2095  } else {
2096  bedge_lim_y =
2097  bedge_lim_uv = filter_level * 2 + inner_limit;
2098  mbedge_lim = bedge_lim_y + 4;
2099  }
2100 
2101  hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2102 
2103  if (mb_x) {
2104  s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2105  mbedge_lim, inner_limit, hev_thresh);
2106  s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2107  mbedge_lim, inner_limit, hev_thresh);
2108  }
2109 
2110 #define H_LOOP_FILTER_16Y_INNER(cond) \
2111  if (cond && inner_filter) { \
2112  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2113  bedge_lim_y, inner_limit, \
2114  hev_thresh); \
2115  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2116  bedge_lim_y, inner_limit, \
2117  hev_thresh); \
2118  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2119  bedge_lim_y, inner_limit, \
2120  hev_thresh); \
2121  s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2122  uvlinesize, bedge_lim_uv, \
2123  inner_limit, hev_thresh); \
2124  }
2125 
2126  H_LOOP_FILTER_16Y_INNER(!is_vp7)
2127 
2128  if (mb_y) {
2129  s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2130  mbedge_lim, inner_limit, hev_thresh);
2131  s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2132  mbedge_lim, inner_limit, hev_thresh);
2133  }
2134 
2135  if (inner_filter) {
2136  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2137  linesize, bedge_lim_y,
2138  inner_limit, hev_thresh);
2139  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2140  linesize, bedge_lim_y,
2141  inner_limit, hev_thresh);
2142  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2143  linesize, bedge_lim_y,
2144  inner_limit, hev_thresh);
2145  s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2146  dst[2] + 4 * uvlinesize,
2147  uvlinesize, bedge_lim_uv,
2148  inner_limit, hev_thresh);
2149  }
2150 
2151  H_LOOP_FILTER_16Y_INNER(is_vp7)
2152 }
2153 
2154 static av_always_inline
2156  int mb_x, int mb_y)
2157 {
2158  int mbedge_lim, bedge_lim;
2159  int filter_level = f->filter_level;
2160  int inner_limit = f->inner_limit;
2161  int inner_filter = f->inner_filter;
2162  int linesize = s->linesize;
2163 
2164  if (!filter_level)
2165  return;
2166 
2167  bedge_lim = 2 * filter_level + inner_limit;
2168  mbedge_lim = bedge_lim + 4;
2169 
2170  if (mb_x)
2171  s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2172  if (inner_filter) {
2173  s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2174  s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2175  s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2176  }
2177 
2178  if (mb_y)
2179  s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2180  if (inner_filter) {
2181  s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2182  s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2183  s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2184  }
2185 }
2186 
2187 #define MARGIN (16 << 2)
2188 static av_always_inline
2190  VP8Frame *prev_frame, int is_vp7)
2191 {
2192  VP8Context *s = avctx->priv_data;
2193  int mb_x, mb_y;
2194 
2195  s->mv_min.y = -MARGIN;
2196  s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2197  for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2199  ((s->mb_width + 1) * (mb_y + 1) + 1);
2200  int mb_xy = mb_y * s->mb_width;
2201 
2202  AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2203 
2204  s->mv_min.x = -MARGIN;
2205  s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2206  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2207  if (mb_y == 0)
2208  AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2209  DC_PRED * 0x01010101);
2210  decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2211  prev_frame && prev_frame->seg_map ?
2212  prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2213  s->mv_min.x -= 64;
2214  s->mv_max.x -= 64;
2215  }
2216  s->mv_min.y -= 64;
2217  s->mv_max.y -= 64;
2218  }
2219 }
2220 
2221 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2222  VP8Frame *prev_frame)
2223 {
2224  vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2225 }
2226 
2227 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2228  VP8Frame *prev_frame)
2229 {
2230  vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2231 }
2232 
2233 #if HAVE_THREADS
2234 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2235  do { \
2236  int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2237  if (otd->thread_mb_pos < tmp) { \
2238  pthread_mutex_lock(&otd->lock); \
2239  td->wait_mb_pos = tmp; \
2240  do { \
2241  if (otd->thread_mb_pos >= tmp) \
2242  break; \
2243  pthread_cond_wait(&otd->cond, &otd->lock); \
2244  } while (1); \
2245  td->wait_mb_pos = INT_MAX; \
2246  pthread_mutex_unlock(&otd->lock); \
2247  } \
2248  } while (0)
2249 
2250 #define update_pos(td, mb_y, mb_x) \
2251  do { \
2252  int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2253  int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2254  (num_jobs > 1); \
2255  int is_null = !next_td || !prev_td; \
2256  int pos_check = (is_null) ? 1 \
2257  : (next_td != td && \
2258  pos >= next_td->wait_mb_pos) || \
2259  (prev_td != td && \
2260  pos >= prev_td->wait_mb_pos); \
2261  td->thread_mb_pos = pos; \
2262  if (sliced_threading && pos_check) { \
2263  pthread_mutex_lock(&td->lock); \
2264  pthread_cond_broadcast(&td->cond); \
2265  pthread_mutex_unlock(&td->lock); \
2266  } \
2267  } while (0)
2268 #else
2269 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
2270 #define update_pos(td, mb_y, mb_x)
2271 #endif
2272 
2274  int jobnr, int threadnr, int is_vp7)
2275 {
2276  VP8Context *s = avctx->priv_data;
2277  VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2278  int mb_y = td->thread_mb_pos >> 16;
2279  int mb_x, mb_xy = mb_y * s->mb_width;
2280  int num_jobs = s->num_jobs;
2281  VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2282  VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2283  VP8Macroblock *mb;
2284  uint8_t *dst[3] = {
2285  curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2286  curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2287  curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2288  };
2289  if (mb_y == 0)
2290  prev_td = td;
2291  else
2292  prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2293  if (mb_y == s->mb_height - 1)
2294  next_td = td;
2295  else
2296  next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2297  if (s->mb_layout == 1)
2298  mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2299  else {
2300  // Make sure the previous frame has read its segmentation map,
2301  // if we re-use the same map.
2302  if (prev_frame && s->segmentation.enabled &&
2304  ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2305  mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2306  memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2307  AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2308  }
2309 
2310  if (!is_vp7 || mb_y == 0)
2311  memset(td->left_nnz, 0, sizeof(td->left_nnz));
2312 
2313  s->mv_min.x = -MARGIN;
2314  s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2315 
2316  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2317  // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2318  if (prev_td != td) {
2319  if (threadnr != 0) {
2320  check_thread_pos(td, prev_td,
2321  mb_x + (is_vp7 ? 2 : 1),
2322  mb_y - (is_vp7 ? 2 : 1));
2323  } else {
2324  check_thread_pos(td, prev_td,
2325  mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2326  mb_y - (is_vp7 ? 2 : 1));
2327  }
2328  }
2329 
2330  s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2331  s->linesize, 4);
2332  s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2333  dst[2] - dst[1], 2);
2334 
2335  if (!s->mb_layout)
2336  decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2337  prev_frame && prev_frame->seg_map ?
2338  prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2339 
2340  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2341 
2342  if (!mb->skip)
2343  decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2344 
2345  if (mb->mode <= MODE_I4x4)
2346  intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2347  else
2348  inter_predict(s, td, dst, mb, mb_x, mb_y);
2349 
2350  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2351 
2352  if (!mb->skip) {
2353  idct_mb(s, td, dst, mb);
2354  } else {
2355  AV_ZERO64(td->left_nnz);
2356  AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2357 
2358  /* Reset DC block predictors if they would exist
2359  * if the mb had coefficients */
2360  if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2361  td->left_nnz[8] = 0;
2362  s->top_nnz[mb_x][8] = 0;
2363  }
2364  }
2365 
2366  if (s->deblock_filter)
2367  filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2368 
2369  if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2370  if (s->filter.simple)
2371  backup_mb_border(s->top_border[mb_x + 1], dst[0],
2372  NULL, NULL, s->linesize, 0, 1);
2373  else
2374  backup_mb_border(s->top_border[mb_x + 1], dst[0],
2375  dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2376  }
2377 
2378  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2379 
2380  dst[0] += 16;
2381  dst[1] += 8;
2382  dst[2] += 8;
2383  s->mv_min.x -= 64;
2384  s->mv_max.x -= 64;
2385 
2386  if (mb_x == s->mb_width + 1) {
2387  update_pos(td, mb_y, s->mb_width + 3);
2388  } else {
2389  update_pos(td, mb_y, mb_x);
2390  }
2391  }
2392 }
2393 
2394 static void vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2395  int jobnr, int threadnr)
2396 {
2397  decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2398 }
2399 
2400 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2401  int jobnr, int threadnr)
2402 {
2403  decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2404 }
2405 
2406 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2407  int jobnr, int threadnr, int is_vp7)
2408 {
2409  VP8Context *s = avctx->priv_data;
2410  VP8ThreadData *td = &s->thread_data[threadnr];
2411  int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
2412  AVFrame *curframe = s->curframe->tf.f;
2413  VP8Macroblock *mb;
2414  VP8ThreadData *prev_td, *next_td;
2415  uint8_t *dst[3] = {
2416  curframe->data[0] + 16 * mb_y * s->linesize,
2417  curframe->data[1] + 8 * mb_y * s->uvlinesize,
2418  curframe->data[2] + 8 * mb_y * s->uvlinesize
2419  };
2420 
2421  if (s->mb_layout == 1)
2422  mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2423  else
2424  mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2425 
2426  if (mb_y == 0)
2427  prev_td = td;
2428  else
2429  prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2430  if (mb_y == s->mb_height - 1)
2431  next_td = td;
2432  else
2433  next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2434 
2435  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2436  VP8FilterStrength *f = &td->filter_strength[mb_x];
2437  if (prev_td != td)
2438  check_thread_pos(td, prev_td,
2439  (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2440  if (next_td != td)
2441  if (next_td != &s->thread_data[0])
2442  check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2443 
2444  if (num_jobs == 1) {
2445  if (s->filter.simple)
2446  backup_mb_border(s->top_border[mb_x + 1], dst[0],
2447  NULL, NULL, s->linesize, 0, 1);
2448  else
2449  backup_mb_border(s->top_border[mb_x + 1], dst[0],
2450  dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2451  }
2452 
2453  if (s->filter.simple)
2454  filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2455  else
2456  filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2457  dst[0] += 16;
2458  dst[1] += 8;
2459  dst[2] += 8;
2460 
2461  update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2462  }
2463 }
2464 
2465 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2466  int jobnr, int threadnr)
2467 {
2468  filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2469 }
2470 
2471 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2472  int jobnr, int threadnr)
2473 {
2474  filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2475 }
2476 
2477 static av_always_inline
2478 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2479  int threadnr, int is_vp7)
2480 {
2481  VP8Context *s = avctx->priv_data;
2482  VP8ThreadData *td = &s->thread_data[jobnr];
2483  VP8ThreadData *next_td = NULL, *prev_td = NULL;
2484  VP8Frame *curframe = s->curframe;
2485  int mb_y, num_jobs = s->num_jobs;
2486 
2487  td->thread_nr = threadnr;
2488  for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2489  if (mb_y >= s->mb_height)
2490  break;
2491  td->thread_mb_pos = mb_y << 16;
2492  s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2493  if (s->deblock_filter)
2494  s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2495  update_pos(td, mb_y, INT_MAX & 0xFFFF);
2496 
2497  s->mv_min.y -= 64;
2498  s->mv_max.y -= 64;
2499 
2500  if (avctx->active_thread_type == FF_THREAD_FRAME)
2501  ff_thread_report_progress(&curframe->tf, mb_y, 0);
2502  }
2503 
2504  return 0;
2505 }
2506 
2507 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2508  int jobnr, int threadnr)
2509 {
2510  return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2511 }
2512 
2513 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2514  int jobnr, int threadnr)
2515 {
2516  return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2517 }
2518 
2519 
2520 static av_always_inline
2521 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2522  AVPacket *avpkt, int is_vp7)
2523 {
2524  VP8Context *s = avctx->priv_data;
2525  int ret, i, referenced, num_jobs;
2526  enum AVDiscard skip_thresh;
2527  VP8Frame *av_uninit(curframe), *prev_frame;
2528 
2529  if (is_vp7)
2530  ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2531  else
2532  ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2533 
2534  if (ret < 0)
2535  goto err;
2536 
2537  prev_frame = s->framep[VP56_FRAME_CURRENT];
2538 
2539  referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2541 
2542  skip_thresh = !referenced ? AVDISCARD_NONREF
2543  : !s->keyframe ? AVDISCARD_NONKEY
2544  : AVDISCARD_ALL;
2545 
2546  if (avctx->skip_frame >= skip_thresh) {
2547  s->invisible = 1;
2548  memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2549  goto skip_decode;
2550  }
2551  s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2552 
2553  // release no longer referenced frames
2554  for (i = 0; i < 5; i++)
2555  if (s->frames[i].tf.f->data[0] &&
2556  &s->frames[i] != prev_frame &&
2557  &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2558  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2559  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2560  vp8_release_frame(s, &s->frames[i]);
2561 
2562  curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2563 
2564  if (!s->colorspace)
2565  avctx->colorspace = AVCOL_SPC_BT470BG;
2566  if (s->fullrange)
2567  avctx->color_range = AVCOL_RANGE_JPEG;
2568  else
2569  avctx->color_range = AVCOL_RANGE_MPEG;
2570 
2571  /* Given that arithmetic probabilities are updated every frame, it's quite
2572  * likely that the values we have on a random interframe are complete
2573  * junk if we didn't start decode on a keyframe. So just don't display
2574  * anything rather than junk. */
2575  if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2576  !s->framep[VP56_FRAME_GOLDEN] ||
2577  !s->framep[VP56_FRAME_GOLDEN2])) {
2578  av_log(avctx, AV_LOG_WARNING,
2579  "Discarding interframe without a prior keyframe!\n");
2580  ret = AVERROR_INVALIDDATA;
2581  goto err;
2582  }
2583 
2584  curframe->tf.f->key_frame = s->keyframe;
2585  curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2587  if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2588  goto err;
2589 
2590  // check if golden and altref are swapped
2591  if (s->update_altref != VP56_FRAME_NONE)
2593  else
2595 
2596  if (s->update_golden != VP56_FRAME_NONE)
2598  else
2600 
2601  if (s->update_last)
2602  s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2603  else
2605 
2606  s->next_framep[VP56_FRAME_CURRENT] = curframe;
2607 
2608  if (avctx->codec->update_thread_context)
2609  ff_thread_finish_setup(avctx);
2610 
2611  s->linesize = curframe->tf.f->linesize[0];
2612  s->uvlinesize = curframe->tf.f->linesize[1];
2613 
2614  memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2615  /* Zero macroblock structures for top/top-left prediction
2616  * from outside the frame. */
2617  if (!s->mb_layout)
2618  memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2619  (s->mb_width + 1) * sizeof(*s->macroblocks));
2620  if (!s->mb_layout && s->keyframe)
2621  memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2622 
2623  memset(s->ref_count, 0, sizeof(s->ref_count));
2624 
2625  if (s->mb_layout == 1) {
2626  // Make sure the previous frame has read its segmentation map,
2627  // if we re-use the same map.
2628  if (prev_frame && s->segmentation.enabled &&
2630  ff_thread_await_progress(&prev_frame->tf, 1, 0);
2631  if (is_vp7)
2632  vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2633  else
2634  vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2635  }
2636 
2637  if (avctx->active_thread_type == FF_THREAD_FRAME)
2638  num_jobs = 1;
2639  else
2640  num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2641  s->num_jobs = num_jobs;
2642  s->curframe = curframe;
2643  s->prev_frame = prev_frame;
2644  s->mv_min.y = -MARGIN;
2645  s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2646  for (i = 0; i < MAX_THREADS; i++) {
2647  s->thread_data[i].thread_mb_pos = 0;
2648  s->thread_data[i].wait_mb_pos = INT_MAX;
2649  }
2650  if (is_vp7)
2652  num_jobs);
2653  else
2655  num_jobs);
2656 
2657  ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2658  memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2659 
2660 skip_decode:
2661  // if future frames don't use the updated probabilities,
2662  // reset them to the values we saved
2663  if (!s->update_probabilities)
2664  s->prob[0] = s->prob[1];
2665 
2666  if (!s->invisible) {
2667  if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2668  return ret;
2669  *got_frame = 1;
2670  }
2671 
2672  return avpkt->size;
2673 err:
2674  memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2675  return ret;
2676 }
2677 
2678 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2679  AVPacket *avpkt)
2680 {
2681  return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2682 }
2683 
2684 #if CONFIG_VP7_DECODER
2685 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2686  AVPacket *avpkt)
2687 {
2688  return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2689 }
2690 #endif /* CONFIG_VP7_DECODER */
2691 
2693 {
2694  VP8Context *s = avctx->priv_data;
2695  int i;
2696 
2697  if (!s)
2698  return 0;
2699 
2700  vp8_decode_flush_impl(avctx, 1);
2701  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2702  av_frame_free(&s->frames[i].tf.f);
2703 
2704  return 0;
2705 }
2706 
2708 {
2709  int i;
2710  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2711  s->frames[i].tf.f = av_frame_alloc();
2712  if (!s->frames[i].tf.f)
2713  return AVERROR(ENOMEM);
2714  }
2715  return 0;
2716 }
2717 
2718 static av_always_inline
2719 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2720 {
2721  VP8Context *s = avctx->priv_data;
2722  int ret;
2723 
2724  s->avctx = avctx;
2725  s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2726  avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2727  avctx->internal->allocate_progress = 1;
2728 
2729  ff_videodsp_init(&s->vdsp, 8);
2730 
2731  ff_vp78dsp_init(&s->vp8dsp);
2732  if (CONFIG_VP7_DECODER && is_vp7) {
2734  ff_vp7dsp_init(&s->vp8dsp);
2737  } else if (CONFIG_VP8_DECODER && !is_vp7) {
2739  ff_vp8dsp_init(&s->vp8dsp);
2742  }
2743 
2744  /* does not change for VP8 */
2745  memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
2746 
2747  if ((ret = vp8_init_frames(s)) < 0) {
2748  ff_vp8_decode_free(avctx);
2749  return ret;
2750  }
2751 
2752  return 0;
2753 }
2754 
2755 #if CONFIG_VP7_DECODER
2756 static int vp7_decode_init(AVCodecContext *avctx)
2757 {
2758  return vp78_decode_init(avctx, IS_VP7);
2759 }
2760 #endif /* CONFIG_VP7_DECODER */
2761 
2763 {
2764  return vp78_decode_init(avctx, IS_VP8);
2765 }
2766 
2767 #if CONFIG_VP8_DECODER
2768 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2769 {
2770  VP8Context *s = avctx->priv_data;
2771  int ret;
2772 
2773  s->avctx = avctx;
2774 
2775  if ((ret = vp8_init_frames(s)) < 0) {
2776  ff_vp8_decode_free(avctx);
2777  return ret;
2778  }
2779 
2780  return 0;
2781 }
2782 
2783 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2784 
2785 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2786  const AVCodecContext *src)
2787 {
2788  VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2789  int i;
2790 
2791  if (s->macroblocks_base &&
2792  (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2793  free_buffers(s);
2794  s->mb_width = s_src->mb_width;
2795  s->mb_height = s_src->mb_height;
2796  }
2797 
2798  s->prob[0] = s_src->prob[!s_src->update_probabilities];
2799  s->segmentation = s_src->segmentation;
2800  s->lf_delta = s_src->lf_delta;
2801  memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2802 
2803  for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2804  if (s_src->frames[i].tf.f->data[0]) {
2805  int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2806  if (ret < 0)
2807  return ret;
2808  }
2809  }
2810 
2811  s->framep[0] = REBASE(s_src->next_framep[0]);
2812  s->framep[1] = REBASE(s_src->next_framep[1]);
2813  s->framep[2] = REBASE(s_src->next_framep[2]);
2814  s->framep[3] = REBASE(s_src->next_framep[3]);
2815 
2816  return 0;
2817 }
2818 #endif /* CONFIG_VP8_DECODER */
2819 
2820 #if CONFIG_VP7_DECODER
2821 AVCodec ff_vp7_decoder = {
2822  .name = "vp7",
2823  .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2824  .type = AVMEDIA_TYPE_VIDEO,
2825  .id = AV_CODEC_ID_VP7,
2826  .priv_data_size = sizeof(VP8Context),
2827  .init = vp7_decode_init,
2828  .close = ff_vp8_decode_free,
2829  .decode = vp7_decode_frame,
2830  .capabilities = CODEC_CAP_DR1,
2832 };
2833 #endif /* CONFIG_VP7_DECODER */
2834 
2835 #if CONFIG_VP8_DECODER
2836 AVCodec ff_vp8_decoder = {
2837  .name = "vp8",
2838  .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2839  .type = AVMEDIA_TYPE_VIDEO,
2840  .id = AV_CODEC_ID_VP8,
2841  .priv_data_size = sizeof(VP8Context),
2843  .close = ff_vp8_decode_free,
2847  .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2848  .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2849 };
2850 #endif /* CONFIG_VP7_DECODER */