Libav
vp9.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of Libav.
8  *
9  * Libav is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * Libav is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with Libav; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "libavutil/avassert.h"
25 
26 #include "avcodec.h"
27 #include "get_bits.h"
28 #include "internal.h"
29 #include "videodsp.h"
30 #include "vp56.h"
31 #include "vp9.h"
32 #include "vp9data.h"
33 
34 #define VP9_SYNCCODE 0x498342
35 #define MAX_PROB 255
36 
37 static void vp9_decode_flush(AVCodecContext *avctx)
38 {
39  VP9Context *s = avctx->priv_data;
40  int i;
41 
42  for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
43  av_frame_unref(s->refs[i]);
44 }
45 
46 static int update_size(AVCodecContext *avctx, int w, int h)
47 {
48  VP9Context *s = avctx->priv_data;
49  uint8_t *p;
50 
51  if (s->above_partition_ctx && w == avctx->width && h == avctx->height)
52  return 0;
53 
54  vp9_decode_flush(avctx);
55 
56  if (w <= 0 || h <= 0)
57  return AVERROR_INVALIDDATA;
58 
59  avctx->width = w;
60  avctx->height = h;
61  s->sb_cols = (w + 63) >> 6;
62  s->sb_rows = (h + 63) >> 6;
63  s->cols = (w + 7) >> 3;
64  s->rows = (h + 7) >> 3;
65 
66 #define assign(var, type, n) var = (type)p; p += s->sb_cols * n * sizeof(*var)
68  p = av_malloc(s->sb_cols *
69  (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx) +
70  64 * s->sb_rows * (1 + sizeof(*s->mv[0]) * 2)));
71  if (!p)
72  return AVERROR(ENOMEM);
74  assign(s->above_skip_ctx, uint8_t *, 8);
75  assign(s->above_txfm_ctx, uint8_t *, 8);
76  assign(s->above_mode_ctx, uint8_t *, 16);
77  assign(s->above_y_nnz_ctx, uint8_t *, 16);
78  assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
79  assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
80  assign(s->intra_pred_data[0], uint8_t *, 64);
81  assign(s->intra_pred_data[1], uint8_t *, 32);
82  assign(s->intra_pred_data[2], uint8_t *, 32);
84  assign(s->above_intra_ctx, uint8_t *, 8);
85  assign(s->above_comp_ctx, uint8_t *, 8);
86  assign(s->above_ref_ctx, uint8_t *, 8);
88  assign(s->lflvl, VP9Filter *, 1);
89  assign(s->above_mv_ctx, VP56mv(*)[2], 16);
90  assign(s->segmentation_map, uint8_t *, 64 * s->sb_rows);
91  assign(s->mv[0], VP9MVRefPair *, 64 * s->sb_rows);
92  assign(s->mv[1], VP9MVRefPair *, 64 * s->sb_rows);
93 #undef assign
94 
95  return 0;
96 }
97 
98 // The sign bit is at the end, not the start, of a bit sequence
100 {
101  int v = get_bits(gb, n);
102  return get_bits1(gb) ? -v : v;
103 }
104 
105 static av_always_inline int inv_recenter_nonneg(int v, int m)
106 {
107  if (v > 2 * m)
108  return v;
109  if (v & 1)
110  return m - ((v + 1) >> 1);
111  return m + (v >> 1);
112 }
113 
114 // differential forward probability updates
115 static int update_prob(VP56RangeCoder *c, int p)
116 {
117  static const int inv_map_table[MAX_PROB - 1] = {
118  7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
119  189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
120  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
121  25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
122  40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
123  55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
124  70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
125  86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
126  101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
127  116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
128  131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
129  146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
130  161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
131  177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
132  192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
133  207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
134  222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
135  237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
136  252, 253,
137  };
138  int d;
139 
140  /* This code is trying to do a differential probability update. For a
141  * current probability A in the range [1, 255], the difference to a new
142  * probability of any value can be expressed differentially as 1-A, 255-A
143  * where some part of this (absolute range) exists both in positive as
144  * well as the negative part, whereas another part only exists in one
145  * half. We're trying to code this shared part differentially, i.e.
146  * times two where the value of the lowest bit specifies the sign, and
147  * the single part is then coded on top of this. This absolute difference
148  * then again has a value of [0, 254], but a bigger value in this range
149  * indicates that we're further away from the original value A, so we
150  * can code this as a VLC code, since higher values are increasingly
151  * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
152  * updates vs. the 'fine, exact' updates further down the range, which
153  * adds one extra dimension to this differential update model. */
154 
155  if (!vp8_rac_get(c)) {
156  d = vp8_rac_get_uint(c, 4) + 0;
157  } else if (!vp8_rac_get(c)) {
158  d = vp8_rac_get_uint(c, 4) + 16;
159  } else if (!vp8_rac_get(c)) {
160  d = vp8_rac_get_uint(c, 5) + 32;
161  } else {
162  d = vp8_rac_get_uint(c, 7);
163  if (d >= 65) {
164  d = (d << 1) - 65 + vp8_rac_get(c);
165  d = av_clip(d, 0, MAX_PROB - 65 - 1);
166  }
167  d += 64;
168  }
169 
170  return p <= 128
171  ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1)
172  : 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
173 }
174 
176  const uint8_t *data, int size, int *ref)
177 {
178  VP9Context *s = avctx->priv_data;
179  int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
180  int last_invisible;
181  const uint8_t *data2;
182 
183  /* general header */
184  if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
185  av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
186  return ret;
187  }
188  if (get_bits(&s->gb, 2) != 0x2) { // frame marker
189  av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
190  return AVERROR_INVALIDDATA;
191  }
192  s->profile = get_bits1(&s->gb);
193  if (get_bits1(&s->gb)) { // reserved bit
194  av_log(avctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
195  return AVERROR_INVALIDDATA;
196  }
197  if (get_bits1(&s->gb)) {
198  *ref = get_bits(&s->gb, 3);
199  return 0;
200  }
201 
202  s->last_keyframe = s->keyframe;
203  s->keyframe = !get_bits1(&s->gb);
204 
205  last_invisible = s->invisible;
206  s->invisible = !get_bits1(&s->gb);
207  s->errorres = get_bits1(&s->gb);
208  // FIXME disable this upon resolution change
209  s->use_last_frame_mvs = !s->errorres && !last_invisible;
210 
211  if (s->keyframe) {
212  if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
213  av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
214  return AVERROR_INVALIDDATA;
215  }
216  s->colorspace = get_bits(&s->gb, 3);
217  if (s->colorspace == 7) { // RGB = profile 1
218  av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
219  return AVERROR_INVALIDDATA;
220  }
221  s->fullrange = get_bits1(&s->gb);
222  // for profile 1, here follows the subsampling bits
223  s->refreshrefmask = 0xff;
224  w = get_bits(&s->gb, 16) + 1;
225  h = get_bits(&s->gb, 16) + 1;
226  if (get_bits1(&s->gb)) // display size
227  skip_bits(&s->gb, 32);
228  } else {
229  s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
230  s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
231  if (s->intraonly) {
232  if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
233  av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
234  return AVERROR_INVALIDDATA;
235  }
236  s->refreshrefmask = get_bits(&s->gb, 8);
237  w = get_bits(&s->gb, 16) + 1;
238  h = get_bits(&s->gb, 16) + 1;
239  if (get_bits1(&s->gb)) // display size
240  skip_bits(&s->gb, 32);
241  } else {
242  s->refreshrefmask = get_bits(&s->gb, 8);
243  s->refidx[0] = get_bits(&s->gb, 3);
244  s->signbias[0] = get_bits1(&s->gb);
245  s->refidx[1] = get_bits(&s->gb, 3);
246  s->signbias[1] = get_bits1(&s->gb);
247  s->refidx[2] = get_bits(&s->gb, 3);
248  s->signbias[2] = get_bits1(&s->gb);
249  if (!s->refs[s->refidx[0]]->buf[0] ||
250  !s->refs[s->refidx[1]]->buf[0] ||
251  !s->refs[s->refidx[2]]->buf[0]) {
252  av_log(avctx, AV_LOG_ERROR,
253  "Not all references are available\n");
254  return AVERROR_INVALIDDATA;
255  }
256  if (get_bits1(&s->gb)) {
257  w = s->refs[s->refidx[0]]->width;
258  h = s->refs[s->refidx[0]]->height;
259  } else if (get_bits1(&s->gb)) {
260  w = s->refs[s->refidx[1]]->width;
261  h = s->refs[s->refidx[1]]->height;
262  } else if (get_bits1(&s->gb)) {
263  w = s->refs[s->refidx[2]]->width;
264  h = s->refs[s->refidx[2]]->height;
265  } else {
266  w = get_bits(&s->gb, 16) + 1;
267  h = get_bits(&s->gb, 16) + 1;
268  }
269  if (get_bits1(&s->gb)) // display size
270  skip_bits(&s->gb, 32);
271  s->highprecisionmvs = get_bits1(&s->gb);
273  get_bits(&s->gb, 2);
274  s->allowcompinter = s->signbias[0] != s->signbias[1] ||
275  s->signbias[0] != s->signbias[2];
276  if (s->allowcompinter) {
277  if (s->signbias[0] == s->signbias[1]) {
278  s->fixcompref = 2;
279  s->varcompref[0] = 0;
280  s->varcompref[1] = 1;
281  } else if (s->signbias[0] == s->signbias[2]) {
282  s->fixcompref = 1;
283  s->varcompref[0] = 0;
284  s->varcompref[1] = 2;
285  } else {
286  s->fixcompref = 0;
287  s->varcompref[0] = 1;
288  s->varcompref[1] = 2;
289  }
290  }
291  }
292  }
293 
294  s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
295  s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
296  s->framectxid = c = get_bits(&s->gb, 2);
297 
298  /* loopfilter header data */
299  s->filter.level = get_bits(&s->gb, 6);
300  sharp = get_bits(&s->gb, 3);
301  /* If sharpness changed, reinit lim/mblim LUTs. if it didn't change,
302  * keep the old cache values since they are still valid. */
303  if (s->filter.sharpness != sharp)
304  memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
305  s->filter.sharpness = sharp;
306  if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
307  if (get_bits1(&s->gb)) {
308  for (i = 0; i < 4; i++)
309  if (get_bits1(&s->gb))
310  s->lf_delta.ref[i] = get_bits_with_sign(&s->gb, 6);
311  for (i = 0; i < 2; i++)
312  if (get_bits1(&s->gb))
313  s->lf_delta.mode[i] = get_bits_with_sign(&s->gb, 6);
314  }
315  } else {
316  memset(&s->lf_delta, 0, sizeof(s->lf_delta));
317  }
318 
319  /* quantization header data */
320  s->yac_qi = get_bits(&s->gb, 8);
321  s->ydc_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
322  s->uvdc_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
323  s->uvac_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
324  s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
325  s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
326 
327  /* segmentation header info */
328  if ((s->segmentation.enabled = get_bits1(&s->gb))) {
329  if ((s->segmentation.update_map = get_bits1(&s->gb))) {
330  for (i = 0; i < 7; i++)
331  s->prob.seg[i] = get_bits1(&s->gb) ?
332  get_bits(&s->gb, 8) : 255;
333  if ((s->segmentation.temporal = get_bits1(&s->gb)))
334  for (i = 0; i < 3; i++)
335  s->prob.segpred[i] = get_bits1(&s->gb) ?
336  get_bits(&s->gb, 8) : 255;
337  }
338 
339  if (get_bits1(&s->gb)) {
341  for (i = 0; i < 8; i++) {
342  if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
343  s->segmentation.feat[i].q_val = get_bits_with_sign(&s->gb, 8);
344  if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
345  s->segmentation.feat[i].lf_val = get_bits_with_sign(&s->gb, 6);
346  if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
347  s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
348  s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
349  }
350  }
351  } else {
352  s->segmentation.feat[0].q_enabled = 0;
353  s->segmentation.feat[0].lf_enabled = 0;
354  s->segmentation.feat[0].skip_enabled = 0;
355  s->segmentation.feat[0].ref_enabled = 0;
356  }
357 
358  // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
359  for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
360  int qyac, qydc, quvac, quvdc, lflvl, sh;
361 
362  if (s->segmentation.feat[i].q_enabled) {
364  qyac = s->segmentation.feat[i].q_val;
365  else
366  qyac = s->yac_qi + s->segmentation.feat[i].q_val;
367  } else {
368  qyac = s->yac_qi;
369  }
370  qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
371  quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
372  quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
373  qyac = av_clip_uintp2(qyac, 8);
374 
375  s->segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[qydc];
376  s->segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[qyac];
377  s->segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[quvdc];
378  s->segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[quvac];
379 
380  sh = s->filter.level >= 32;
381  if (s->segmentation.feat[i].lf_enabled) {
383  lflvl = s->segmentation.feat[i].lf_val;
384  else
385  lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
386  } else {
387  lflvl = s->filter.level;
388  }
389  s->segmentation.feat[i].lflvl[0][0] =
390  s->segmentation.feat[i].lflvl[0][1] =
391  av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
392  for (j = 1; j < 4; j++) {
393  s->segmentation.feat[i].lflvl[j][0] =
394  av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
395  s->lf_delta.mode[0]) << sh), 6);
396  s->segmentation.feat[i].lflvl[j][1] =
397  av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
398  s->lf_delta.mode[1]) << sh), 6);
399  }
400  }
401 
402  /* tiling info */
403  if ((ret = update_size(avctx, w, h)) < 0) {
404  av_log(avctx, AV_LOG_ERROR,
405  "Failed to initialize decoder for %dx%d\n", w, h);
406  return ret;
407  }
408  for (s->tiling.log2_tile_cols = 0;
409  (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
410  s->tiling.log2_tile_cols++) ;
411  for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
412  max = FFMAX(0, max - 1);
413  while (max > s->tiling.log2_tile_cols) {
414  if (get_bits1(&s->gb))
415  s->tiling.log2_tile_cols++;
416  else
417  break;
418  }
419  s->tiling.log2_tile_rows = decode012(&s->gb);
420  s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
421  if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
422  s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
423  s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
424  sizeof(VP56RangeCoder) *
425  s->tiling.tile_cols);
426  if (!s->c_b) {
427  av_log(avctx, AV_LOG_ERROR,
428  "Ran out of memory during range coder init\n");
429  return AVERROR(ENOMEM);
430  }
431  }
432 
433  if (s->keyframe || s->errorres || s->intraonly) {
434  s->prob_ctx[0].p =
435  s->prob_ctx[1].p =
436  s->prob_ctx[2].p =
439  sizeof(ff_vp9_default_coef_probs));
441  sizeof(ff_vp9_default_coef_probs));
443  sizeof(ff_vp9_default_coef_probs));
445  sizeof(ff_vp9_default_coef_probs));
446  }
447 
448  // next 16 bits is size of the rest of the header (arith-coded)
449  size2 = get_bits(&s->gb, 16);
450  data2 = align_get_bits(&s->gb);
451  if (size2 > size - (data2 - data)) {
452  av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
453  return AVERROR_INVALIDDATA;
454  }
455  ff_vp56_init_range_decoder(&s->c, data2, size2);
456  if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
457  av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
458  return AVERROR_INVALIDDATA;
459  }
460 
461  if (s->keyframe || s->intraonly)
462  memset(s->counts.coef, 0,
463  sizeof(s->counts.coef) + sizeof(s->counts.eob));
464  else
465  memset(&s->counts, 0, sizeof(s->counts));
466 
467  /* FIXME is it faster to not copy here, but do it down in the fw updates
468  * as explicit copies if the fw update is missing (and skip the copy upon
469  * fw update)? */
470  s->prob.p = s->prob_ctx[c].p;
471 
472  // txfm updates
473  if (s->lossless) {
474  s->txfmmode = TX_4X4;
475  } else {
476  s->txfmmode = vp8_rac_get_uint(&s->c, 2);
477  if (s->txfmmode == 3)
478  s->txfmmode += vp8_rac_get(&s->c);
479 
480  if (s->txfmmode == TX_SWITCHABLE) {
481  for (i = 0; i < 2; i++)
482  if (vp56_rac_get_prob_branchy(&s->c, 252))
483  s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
484  for (i = 0; i < 2; i++)
485  for (j = 0; j < 2; j++)
486  if (vp56_rac_get_prob_branchy(&s->c, 252))
487  s->prob.p.tx16p[i][j] =
488  update_prob(&s->c, s->prob.p.tx16p[i][j]);
489  for (i = 0; i < 2; i++)
490  for (j = 0; j < 3; j++)
491  if (vp56_rac_get_prob_branchy(&s->c, 252))
492  s->prob.p.tx32p[i][j] =
493  update_prob(&s->c, s->prob.p.tx32p[i][j]);
494  }
495  }
496 
497  // coef updates
498  for (i = 0; i < 4; i++) {
499  uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
500  if (vp8_rac_get(&s->c)) {
501  for (j = 0; j < 2; j++)
502  for (k = 0; k < 2; k++)
503  for (l = 0; l < 6; l++)
504  for (m = 0; m < 6; m++) {
505  uint8_t *p = s->prob.coef[i][j][k][l][m];
506  uint8_t *r = ref[j][k][l][m];
507  if (m >= 3 && l == 0) // dc only has 3 pt
508  break;
509  for (n = 0; n < 3; n++) {
510  if (vp56_rac_get_prob_branchy(&s->c, 252))
511  p[n] = update_prob(&s->c, r[n]);
512  else
513  p[n] = r[n];
514  }
515  p[3] = 0;
516  }
517  } else {
518  for (j = 0; j < 2; j++)
519  for (k = 0; k < 2; k++)
520  for (l = 0; l < 6; l++)
521  for (m = 0; m < 6; m++) {
522  uint8_t *p = s->prob.coef[i][j][k][l][m];
523  uint8_t *r = ref[j][k][l][m];
524  if (m > 3 && l == 0) // dc only has 3 pt
525  break;
526  memcpy(p, r, 3);
527  p[3] = 0;
528  }
529  }
530  if (s->txfmmode == i)
531  break;
532  }
533 
534  // mode updates
535  for (i = 0; i < 3; i++)
536  if (vp56_rac_get_prob_branchy(&s->c, 252))
537  s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
538  if (!s->keyframe && !s->intraonly) {
539  for (i = 0; i < 7; i++)
540  for (j = 0; j < 3; j++)
541  if (vp56_rac_get_prob_branchy(&s->c, 252))
542  s->prob.p.mv_mode[i][j] =
543  update_prob(&s->c, s->prob.p.mv_mode[i][j]);
544 
545  if (s->filtermode == FILTER_SWITCHABLE)
546  for (i = 0; i < 4; i++)
547  for (j = 0; j < 2; j++)
548  if (vp56_rac_get_prob_branchy(&s->c, 252))
549  s->prob.p.filter[i][j] =
550  update_prob(&s->c, s->prob.p.filter[i][j]);
551 
552  for (i = 0; i < 4; i++)
553  if (vp56_rac_get_prob_branchy(&s->c, 252))
554  s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
555 
556  if (s->allowcompinter) {
557  s->comppredmode = vp8_rac_get(&s->c);
558  if (s->comppredmode)
559  s->comppredmode += vp8_rac_get(&s->c);
560  if (s->comppredmode == PRED_SWITCHABLE)
561  for (i = 0; i < 5; i++)
562  if (vp56_rac_get_prob_branchy(&s->c, 252))
563  s->prob.p.comp[i] =
564  update_prob(&s->c, s->prob.p.comp[i]);
565  } else {
567  }
568 
569  if (s->comppredmode != PRED_COMPREF) {
570  for (i = 0; i < 5; i++) {
571  if (vp56_rac_get_prob_branchy(&s->c, 252))
572  s->prob.p.single_ref[i][0] =
573  update_prob(&s->c, s->prob.p.single_ref[i][0]);
574  if (vp56_rac_get_prob_branchy(&s->c, 252))
575  s->prob.p.single_ref[i][1] =
576  update_prob(&s->c, s->prob.p.single_ref[i][1]);
577  }
578  }
579 
580  if (s->comppredmode != PRED_SINGLEREF) {
581  for (i = 0; i < 5; i++)
582  if (vp56_rac_get_prob_branchy(&s->c, 252))
583  s->prob.p.comp_ref[i] =
584  update_prob(&s->c, s->prob.p.comp_ref[i]);
585  }
586 
587  for (i = 0; i < 4; i++)
588  for (j = 0; j < 9; j++)
589  if (vp56_rac_get_prob_branchy(&s->c, 252))
590  s->prob.p.y_mode[i][j] =
591  update_prob(&s->c, s->prob.p.y_mode[i][j]);
592 
593  for (i = 0; i < 4; i++)
594  for (j = 0; j < 4; j++)
595  for (k = 0; k < 3; k++)
596  if (vp56_rac_get_prob_branchy(&s->c, 252))
597  s->prob.p.partition[3 - i][j][k] =
598  update_prob(&s->c,
599  s->prob.p.partition[3 - i][j][k]);
600 
601  // mv fields don't use the update_prob subexp model for some reason
602  for (i = 0; i < 3; i++)
603  if (vp56_rac_get_prob_branchy(&s->c, 252))
604  s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
605 
606  for (i = 0; i < 2; i++) {
607  if (vp56_rac_get_prob_branchy(&s->c, 252))
608  s->prob.p.mv_comp[i].sign =
609  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
610 
611  for (j = 0; j < 10; j++)
612  if (vp56_rac_get_prob_branchy(&s->c, 252))
613  s->prob.p.mv_comp[i].classes[j] =
614  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
615 
616  if (vp56_rac_get_prob_branchy(&s->c, 252))
617  s->prob.p.mv_comp[i].class0 =
618  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
619 
620  for (j = 0; j < 10; j++)
621  if (vp56_rac_get_prob_branchy(&s->c, 252))
622  s->prob.p.mv_comp[i].bits[j] =
623  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
624  }
625 
626  for (i = 0; i < 2; i++) {
627  for (j = 0; j < 2; j++)
628  for (k = 0; k < 3; k++)
629  if (vp56_rac_get_prob_branchy(&s->c, 252))
630  s->prob.p.mv_comp[i].class0_fp[j][k] =
631  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
632 
633  for (j = 0; j < 3; j++)
634  if (vp56_rac_get_prob_branchy(&s->c, 252))
635  s->prob.p.mv_comp[i].fp[j] =
636  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
637  }
638 
639  if (s->highprecisionmvs) {
640  for (i = 0; i < 2; i++) {
641  if (vp56_rac_get_prob_branchy(&s->c, 252))
642  s->prob.p.mv_comp[i].class0_hp =
643  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
644 
645  if (vp56_rac_get_prob_branchy(&s->c, 252))
646  s->prob.p.mv_comp[i].hp =
647  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
648  }
649  }
650  }
651 
652  return (data2 - data) + size2;
653 }
654 
655 static int decode_subblock(AVCodecContext *avctx, int row, int col,
656  VP9Filter *lflvl,
657  ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
658 {
659  VP9Context *s = avctx->priv_data;
660  int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
661  (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
662  int ret;
663  const uint8_t *p = s->keyframe ? ff_vp9_default_kf_partition_probs[bl][c]
664  : s->prob.p.partition[bl][c];
665  enum BlockPartition bp;
666  ptrdiff_t hbs = 4 >> bl;
667 
668  if (bl == BL_8X8) {
670  ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
671  } else if (col + hbs < s->cols) {
672  if (row + hbs < s->rows) {
674  switch (bp) {
675  case PARTITION_NONE:
676  ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
677  bl, bp);
678  break;
679  case PARTITION_H:
680  ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
681  bl, bp);
682  if (!ret) {
683  yoff += hbs * 8 * s->cur_frame->linesize[0];
684  uvoff += hbs * 4 * s->cur_frame->linesize[1];
685  ret = ff_vp9_decode_block(avctx, row + hbs, col, lflvl,
686  yoff, uvoff, bl, bp);
687  }
688  break;
689  case PARTITION_V:
690  ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
691  bl, bp);
692  if (!ret) {
693  yoff += hbs * 8;
694  uvoff += hbs * 4;
695  ret = ff_vp9_decode_block(avctx, row, col + hbs, lflvl,
696  yoff, uvoff, bl, bp);
697  }
698  break;
699  case PARTITION_SPLIT:
700  ret = decode_subblock(avctx, row, col, lflvl,
701  yoff, uvoff, bl + 1);
702  if (!ret) {
703  ret = decode_subblock(avctx, row, col + hbs, lflvl,
704  yoff + 8 * hbs, uvoff + 4 * hbs,
705  bl + 1);
706  if (!ret) {
707  yoff += hbs * 8 * s->cur_frame->linesize[0];
708  uvoff += hbs * 4 * s->cur_frame->linesize[1];
709  ret = decode_subblock(avctx, row + hbs, col, lflvl,
710  yoff, uvoff, bl + 1);
711  if (!ret) {
712  ret = decode_subblock(avctx, row + hbs, col + hbs,
713  lflvl, yoff + 8 * hbs,
714  uvoff + 4 * hbs, bl + 1);
715  }
716  }
717  }
718  break;
719  default:
720  av_log(avctx, AV_LOG_ERROR, "Unexpected partition %d.", bp);
721  return AVERROR_INVALIDDATA;
722  }
723  } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
724  bp = PARTITION_SPLIT;
725  ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
726  if (!ret)
727  ret = decode_subblock(avctx, row, col + hbs, lflvl,
728  yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
729  } else {
730  bp = PARTITION_H;
731  ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
732  bl, bp);
733  }
734  } else if (row + hbs < s->rows) {
735  if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
736  bp = PARTITION_SPLIT;
737  ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
738  if (!ret) {
739  yoff += hbs * 8 * s->cur_frame->linesize[0];
740  uvoff += hbs * 4 * s->cur_frame->linesize[1];
741  ret = decode_subblock(avctx, row + hbs, col, lflvl,
742  yoff, uvoff, bl + 1);
743  }
744  } else {
745  bp = PARTITION_V;
746  ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
747  bl, bp);
748  }
749  } else {
750  bp = PARTITION_SPLIT;
751  ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
752  }
753  s->counts.partition[bl][c][bp]++;
754 
755  return ret;
756 }
757 
758 static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
759  int row, int col,
760  ptrdiff_t yoff, ptrdiff_t uvoff)
761 {
762  VP9Context *s = avctx->priv_data;
763  uint8_t *dst = s->cur_frame->data[0] + yoff, *lvl = lflvl->level;
764  ptrdiff_t ls_y = s->cur_frame->linesize[0], ls_uv = s->cur_frame->linesize[1];
765  int y, x, p;
766 
767  /* FIXME: In how far can we interleave the v/h loopfilter calls? E.g.
768  * if you think of them as acting on a 8x8 block max, we can interleave
769  * each v/h within the single x loop, but that only works if we work on
770  * 8 pixel blocks, and we won't always do that (we want at least 16px
771  * to use SSE2 optimizations, perhaps 32 for AVX2). */
772 
773  // filter edges between columns, Y plane (e.g. block1 | block2)
774  for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
775  uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
776  uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
777  unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
778  unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
779  unsigned hm = hm1 | hm2 | hm13 | hm23;
780 
781  for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
782  if (hm1 & x) {
783  int L = *l, H = L >> 4;
784  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
785 
786  if (col || x > 1) {
787  if (hmask1[0] & x) {
788  if (hmask2[0] & x) {
789  av_assert2(l[8] == L);
790  s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
791  } else {
792  s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
793  }
794  } else if (hm2 & x) {
795  L = l[8];
796  H |= (L >> 4) << 8;
797  E |= s->filter.mblim_lut[L] << 8;
798  I |= s->filter.lim_lut[L] << 8;
799  s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
800  [!!(hmask2[1] & x)]
801  [0](ptr, ls_y, E, I, H);
802  } else {
803  s->dsp.loop_filter_8[!!(hmask1[1] & x)]
804  [0](ptr, ls_y, E, I, H);
805  }
806  }
807  } else if (hm2 & x) {
808  int L = l[8], H = L >> 4;
809  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
810 
811  if (col || x > 1) {
812  s->dsp.loop_filter_8[!!(hmask2[1] & x)]
813  [0](ptr + 8 * ls_y, ls_y, E, I, H);
814  }
815  }
816  if (hm13 & x) {
817  int L = *l, H = L >> 4;
818  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
819 
820  if (hm23 & x) {
821  L = l[8];
822  H |= (L >> 4) << 8;
823  E |= s->filter.mblim_lut[L] << 8;
824  I |= s->filter.lim_lut[L] << 8;
825  s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
826  } else {
827  s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
828  }
829  } else if (hm23 & x) {
830  int L = l[8], H = L >> 4;
831  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
832 
833  s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
834  }
835  }
836  }
837 
838  // block1
839  // filter edges between rows, Y plane (e.g. ------)
840  // block2
841  dst = s->cur_frame->data[0] + yoff;
842  lvl = lflvl->level;
843  for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
844  uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
845  unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
846 
847  for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
848  if (row || y) {
849  if (vm & x) {
850  int L = *l, H = L >> 4;
851  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
852 
853  if (vmask[0] & x) {
854  if (vmask[0] & (x << 1)) {
855  av_assert2(l[1] == L);
856  s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
857  } else {
858  s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
859  }
860  } else if (vm & (x << 1)) {
861  L = l[1];
862  H |= (L >> 4) << 8;
863  E |= s->filter.mblim_lut[L] << 8;
864  I |= s->filter.lim_lut[L] << 8;
865  s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
866  [!!(vmask[1] & (x << 1))]
867  [1](ptr, ls_y, E, I, H);
868  } else {
869  s->dsp.loop_filter_8[!!(vmask[1] & x)]
870  [1](ptr, ls_y, E, I, H);
871  }
872  } else if (vm & (x << 1)) {
873  int L = l[1], H = L >> 4;
874  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
875 
876  s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
877  [1](ptr + 8, ls_y, E, I, H);
878  }
879  }
880  if (vm3 & x) {
881  int L = *l, H = L >> 4;
882  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
883 
884  if (vm3 & (x << 1)) {
885  L = l[1];
886  H |= (L >> 4) << 8;
887  E |= s->filter.mblim_lut[L] << 8;
888  I |= s->filter.lim_lut[L] << 8;
889  s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
890  } else {
891  s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
892  }
893  } else if (vm3 & (x << 1)) {
894  int L = l[1], H = L >> 4;
895  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
896 
897  s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
898  }
899  }
900  }
901 
902  // same principle but for U/V planes
903  for (p = 0; p < 2; p++) {
904  lvl = lflvl->level;
905  dst = s->cur_frame->data[1 + p] + uvoff;
906  for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
907  uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
908  uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
909  unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
910  unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
911 
912  for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
913  if (col || x > 1) {
914  if (hm1 & x) {
915  int L = *l, H = L >> 4;
916  int E = s->filter.mblim_lut[L];
917  int I = s->filter.lim_lut[L];
918 
919  if (hmask1[0] & x) {
920  if (hmask2[0] & x) {
921  av_assert2(l[16] == L);
922  s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
923  } else {
924  s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
925  }
926  } else if (hm2 & x) {
927  L = l[16];
928  H |= (L >> 4) << 8;
929  E |= s->filter.mblim_lut[L] << 8;
930  I |= s->filter.lim_lut[L] << 8;
931  s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
932  [!!(hmask2[1] & x)]
933  [0](ptr, ls_uv, E, I, H);
934  } else {
935  s->dsp.loop_filter_8[!!(hmask1[1] & x)]
936  [0](ptr, ls_uv, E, I, H);
937  }
938  } else if (hm2 & x) {
939  int L = l[16], H = L >> 4;
940  int E = s->filter.mblim_lut[L];
941  int I = s->filter.lim_lut[L];
942 
943  s->dsp.loop_filter_8[!!(hmask2[1] & x)]
944  [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
945  }
946  }
947  if (x & 0xAA)
948  l += 2;
949  }
950  }
951  lvl = lflvl->level;
952  dst = s->cur_frame->data[1 + p] + uvoff;
953  for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
954  uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
955  unsigned vm = vmask[0] | vmask[1] | vmask[2];
956 
957  for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
958  if (row || y) {
959  if (vm & x) {
960  int L = *l, H = L >> 4;
961  int E = s->filter.mblim_lut[L];
962  int I = s->filter.lim_lut[L];
963 
964  if (vmask[0] & x) {
965  if (vmask[0] & (x << 2)) {
966  av_assert2(l[2] == L);
967  s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
968  } else {
969  s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
970  }
971  } else if (vm & (x << 2)) {
972  L = l[2];
973  H |= (L >> 4) << 8;
974  E |= s->filter.mblim_lut[L] << 8;
975  I |= s->filter.lim_lut[L] << 8;
976  s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
977  [!!(vmask[1] & (x << 2))]
978  [1](ptr, ls_uv, E, I, H);
979  } else {
980  s->dsp.loop_filter_8[!!(vmask[1] & x)]
981  [1](ptr, ls_uv, E, I, H);
982  }
983  } else if (vm & (x << 2)) {
984  int L = l[2], H = L >> 4;
985  int E = s->filter.mblim_lut[L];
986  int I = s->filter.lim_lut[L];
987 
988  s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
989  [1](ptr + 8, ls_uv, E, I, H);
990  }
991  }
992  }
993  if (y & 1)
994  lvl += 16;
995  }
996  }
997 }
998 
999 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1000 {
1001  int sb_start = (idx * n) >> log2_n;
1002  int sb_end = ((idx + 1) * n) >> log2_n;
1003  *start = FFMIN(sb_start, n) << 3;
1004  *end = FFMIN(sb_end, n) << 3;
1005 }
1006 
1007 static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
1008  int *got_frame, const uint8_t *data, int size)
1009 {
1010  VP9Context *s = avctx->priv_data;
1011  int ret, tile_row, tile_col, i, ref = -1, row, col;
1012  ptrdiff_t yoff = 0, uvoff = 0;
1013 
1014  ret = decode_frame_header(avctx, data, size, &ref);
1015  if (ret < 0) {
1016  return ret;
1017  } else if (!ret) {
1018  if (!s->refs[ref]->buf[0]) {
1019  av_log(avctx, AV_LOG_ERROR,
1020  "Requested reference %d not available\n", ref);
1021  return AVERROR_INVALIDDATA;
1022  }
1023 
1024  ret = av_frame_ref(frame, s->refs[ref]);
1025  if (ret < 0)
1026  return ret;
1027  *got_frame = 1;
1028  return 0;
1029  }
1030  data += ret;
1031  size -= ret;
1032 
1033  s->cur_frame = frame;
1034 
1036  if ((ret = ff_get_buffer(avctx, s->cur_frame,
1037  s->refreshrefmask ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
1038  return ret;
1039  s->cur_frame->key_frame = s->keyframe;
1042 
1043  // main tile decode loop
1044  memset(s->above_partition_ctx, 0, s->cols);
1045  memset(s->above_skip_ctx, 0, s->cols);
1046  if (s->keyframe || s->intraonly)
1047  memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1048  else
1049  memset(s->above_mode_ctx, NEARESTMV, s->cols);
1050  memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1051  memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
1052  memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
1053  memset(s->above_segpred_ctx, 0, s->cols);
1054  for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
1056  tile_row, s->tiling.log2_tile_rows, s->sb_rows);
1057  for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
1058  int64_t tile_size;
1059 
1060  if (tile_col == s->tiling.tile_cols - 1 &&
1061  tile_row == s->tiling.tile_rows - 1) {
1062  tile_size = size;
1063  } else {
1064  tile_size = AV_RB32(data);
1065  data += 4;
1066  size -= 4;
1067  }
1068  if (tile_size > size)
1069  return AVERROR_INVALIDDATA;
1070  ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
1071  if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) // marker bit
1072  return AVERROR_INVALIDDATA;
1073  data += tile_size;
1074  size -= tile_size;
1075  }
1076 
1077  for (row = s->tiling.tile_row_start;
1078  row < s->tiling.tile_row_end;
1079  row += 8, yoff += s->cur_frame->linesize[0] * 64,
1080  uvoff += s->cur_frame->linesize[1] * 32) {
1081  VP9Filter *lflvl = s->lflvl;
1082  ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1083 
1084  for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
1086  &s->tiling.tile_col_end,
1087  tile_col, s->tiling.log2_tile_cols, s->sb_cols);
1088 
1089  memset(s->left_partition_ctx, 0, 8);
1090  memset(s->left_skip_ctx, 0, 8);
1091  if (s->keyframe || s->intraonly)
1092  memset(s->left_mode_ctx, DC_PRED, 16);
1093  else
1094  memset(s->left_mode_ctx, NEARESTMV, 8);
1095  memset(s->left_y_nnz_ctx, 0, 16);
1096  memset(s->left_uv_nnz_ctx, 0, 16);
1097  memset(s->left_segpred_ctx, 0, 8);
1098 
1099  memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
1100  for (col = s->tiling.tile_col_start;
1101  col < s->tiling.tile_col_end;
1102  col += 8, yoff2 += 64, uvoff2 += 32, lflvl++) {
1103  // FIXME integrate with lf code (i.e. zero after each
1104  // use, similar to invtxfm coefficients, or similar)
1105  memset(lflvl->mask, 0, sizeof(lflvl->mask));
1106 
1107  if ((ret = decode_subblock(avctx, row, col, lflvl,
1108  yoff2, uvoff2, BL_64X64)) < 0)
1109  return ret;
1110  }
1111  memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
1112  }
1113 
1114  // backup pre-loopfilter reconstruction data for intra
1115  // prediction of next row of sb64s
1116  if (row + 8 < s->rows) {
1117  memcpy(s->intra_pred_data[0],
1118  s->cur_frame->data[0] + yoff +
1119  63 * s->cur_frame->linesize[0],
1120  8 * s->cols);
1121  memcpy(s->intra_pred_data[1],
1122  s->cur_frame->data[1] + uvoff +
1123  31 * s->cur_frame->linesize[1],
1124  4 * s->cols);
1125  memcpy(s->intra_pred_data[2],
1126  s->cur_frame->data[2] + uvoff +
1127  31 * s->cur_frame->linesize[2],
1128  4 * s->cols);
1129  }
1130 
1131  // loopfilter one row
1132  if (s->filter.level) {
1133  yoff2 = yoff;
1134  uvoff2 = uvoff;
1135  lflvl = s->lflvl;
1136  for (col = 0; col < s->cols;
1137  col += 8, yoff2 += 64, uvoff2 += 32, lflvl++)
1138  loopfilter_subblock(avctx, lflvl, row, col, yoff2, uvoff2);
1139  }
1140  }
1141  }
1142 
1143  // bw adaptivity (or in case of parallel decoding mode, fw adaptivity
1144  // probability maintenance between frames)
1145  if (s->refreshctx) {
1146  if (s->parallelmode) {
1147  int j, k, l, m;
1148  for (i = 0; i < 4; i++) {
1149  for (j = 0; j < 2; j++)
1150  for (k = 0; k < 2; k++)
1151  for (l = 0; l < 6; l++)
1152  for (m = 0; m < 6; m++)
1153  memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
1154  s->prob.coef[i][j][k][l][m], 3);
1155  if (s->txfmmode == i)
1156  break;
1157  }
1158  s->prob_ctx[s->framectxid].p = s->prob.p;
1159  } else {
1160  ff_vp9_adapt_probs(s);
1161  }
1162  }
1163  FFSWAP(VP9MVRefPair *, s->mv[0], s->mv[1]);
1164 
1165  // ref frame setup
1166  for (i = 0; i < 8; i++)
1167  if (s->refreshrefmask & (1 << i)) {
1168  av_frame_unref(s->refs[i]);
1169  ret = av_frame_ref(s->refs[i], s->cur_frame);
1170  if (ret < 0)
1171  return ret;
1172  }
1173 
1174  if (s->invisible)
1176  else
1177  *got_frame = 1;
1178 
1179  return 0;
1180 }
1181 
1182 static int vp9_decode_packet(AVCodecContext *avctx, void *frame,
1183  int *got_frame, AVPacket *avpkt)
1184 {
1185  const uint8_t *data = avpkt->data;
1186  int size = avpkt->size;
1187  int marker, ret;
1188 
1189  /* Read superframe index - this is a collection of individual frames
1190  * that together lead to one visible frame */
1191  marker = data[size - 1];
1192  if ((marker & 0xe0) == 0xc0) {
1193  int nbytes = 1 + ((marker >> 3) & 0x3);
1194  int n_frames = 1 + (marker & 0x7);
1195  int idx_sz = 2 + n_frames * nbytes;
1196 
1197  if (size >= idx_sz && data[size - idx_sz] == marker) {
1198  const uint8_t *idx = data + size + 1 - idx_sz;
1199 
1200  while (n_frames--) {
1201  unsigned sz = AV_RL32(idx);
1202 
1203  if (nbytes < 4)
1204  sz &= (1 << (8 * nbytes)) - 1;
1205  idx += nbytes;
1206 
1207  if (sz > size) {
1208  av_log(avctx, AV_LOG_ERROR,
1209  "Superframe packet size too big: %u > %d\n",
1210  sz, size);
1211  return AVERROR_INVALIDDATA;
1212  }
1213 
1214  ret = vp9_decode_frame(avctx, frame, got_frame, data, sz);
1215  if (ret < 0)
1216  return ret;
1217  data += sz;
1218  size -= sz;
1219  }
1220  return size;
1221  }
1222  }
1223 
1224  /* If we get here, there was no valid superframe index, i.e. this is just
1225  * one whole single frame. Decode it as such from the complete input buf. */
1226  if ((ret = vp9_decode_frame(avctx, frame, got_frame, data, size)) < 0)
1227  return ret;
1228  return size;
1229 }
1230 
1232 {
1233  VP9Context *s = avctx->priv_data;
1234  int i;
1235 
1236  for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
1237  av_frame_free(&s->refs[i]);
1238 
1239  av_freep(&s->c_b);
1241 
1242  return 0;
1243 }
1244 
1246 {
1247  VP9Context *s = avctx->priv_data;
1248  int i;
1249 
1250  avctx->pix_fmt = AV_PIX_FMT_YUV420P;
1251 
1252  ff_vp9dsp_init(&s->dsp);
1253  ff_videodsp_init(&s->vdsp, 8);
1254 
1255  for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++) {
1256  s->refs[i] = av_frame_alloc();
1257  if (!s->refs[i]) {
1258  vp9_decode_free(avctx);
1259  return AVERROR(ENOMEM);
1260  }
1261  }
1262 
1263  s->filter.sharpness = -1;
1264 
1265  return 0;
1266 }
1267 
1269  .name = "vp9",
1270  .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1271  .type = AVMEDIA_TYPE_VIDEO,
1272  .id = AV_CODEC_ID_VP9,
1273  .priv_data_size = sizeof(VP9Context),
1274  .init = vp9_decode_init,
1278  .capabilities = CODEC_CAP_DR1,
1279 };