35 #define VP9_SYNCCODE 0x498342
148 #define MAX_SEGMENT 8
207 unsigned coef[4][2][2][6][6][3];
208 unsigned eob[4][2][2][6][6][2];
258 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
259 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
261 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
262 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
329 s->
cols = (w + 7) >> 3;
330 s->
rows = (h + 7) >> 3;
332 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
409 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
415 static const int inv_map_table[255] = {
416 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
417 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
418 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
419 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
420 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
421 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
422 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
423 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
424 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
425 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
426 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
427 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
428 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
429 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
430 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
431 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
432 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
433 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
475 int c, i, j, k, l,
m,
n, w, h, max, size2, res, sharp;
589 for (i = 0; i < 3; i++) {
593 if (refw == w && refh == h) {
596 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
598 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
602 s->
mvscale[i][0] = (refw << 14) / w;
603 s->
mvscale[i][1] = (refh << 14) / h;
624 for (i = 0; i < 4; i++)
627 for (i = 0; i < 2; i++)
646 for (i = 0; i < 7; i++)
650 for (i = 0; i < 3; i++)
659 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
666 for (i = 0; i < 8; i++) {
685 int qyac, qydc, quvac, quvdc, lflvl, sh;
695 qydc = av_clip_uintp2(qyac + s->
ydc_qdelta, 8);
698 qyac = av_clip_uintp2(qyac, 8);
716 av_clip_uintp2(lflvl + (s->
lf_delta.
ref[0] << sh), 6);
717 for (j = 1; j < 4; j++) {
735 for (max = 0; (s->
sb_cols >> max) >= 4; max++) ;
736 max =
FFMAX(0, max - 1);
771 if (size2 > size - (data2 - data)) {
800 for (i = 0; i < 2; i++)
803 for (i = 0; i < 2; i++)
804 for (j = 0; j < 2; j++)
808 for (i = 0; i < 2; i++)
809 for (j = 0; j < 3; j++)
817 for (i = 0; i < 4; i++) {
820 for (j = 0; j < 2; j++)
821 for (k = 0; k < 2; k++)
822 for (l = 0; l < 6; l++)
823 for (m = 0; m < 6; m++) {
826 if (m >= 3 && l == 0)
828 for (n = 0; n < 3; n++) {
838 for (j = 0; j < 2; j++)
839 for (k = 0; k < 2; k++)
840 for (l = 0; l < 6; l++)
841 for (m = 0; m < 6; m++) {
855 for (i = 0; i < 3; i++)
859 for (i = 0; i < 7; i++)
860 for (j = 0; j < 3; j++)
866 for (i = 0; i < 4; i++)
867 for (j = 0; j < 2; j++)
872 for (i = 0; i < 4; i++)
881 for (i = 0; i < 5; i++)
890 for (i = 0; i < 5; i++) {
901 for (i = 0; i < 5; i++)
907 for (i = 0; i < 4; i++)
908 for (j = 0; j < 9; j++)
913 for (i = 0; i < 4; i++)
914 for (j = 0; j < 4; j++)
915 for (k = 0; k < 3; k++)
921 for (i = 0; i < 3; i++)
925 for (i = 0; i < 2; i++) {
929 for (j = 0; j < 10; j++)
937 for (j = 0; j < 10; j++)
943 for (i = 0; i < 2; i++) {
944 for (j = 0; j < 2; j++)
945 for (k = 0; k < 3; k++)
950 for (j = 0; j < 3; j++)
957 for (i = 0; i < 2; i++) {
969 return (data2 - data) + size2;
980 VP56mv *pmv,
int ref,
int z,
int idx,
int sb)
982 static const int8_t mv_ref_blk_off[
N_BS_SIZES][8][2] = {
983 [
BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
984 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
985 [
BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
986 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
987 [
BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
988 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
989 [
BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
990 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
991 [
BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
992 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
993 [
BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
994 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
995 [
BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
996 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
997 [
BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
998 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
999 [
BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1000 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1001 [
BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1002 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1003 [
BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1004 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1005 [
BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1006 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1007 [
BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1008 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1011 int row = s->
row, col = s->
col, row7 = s->
row7;
1012 const int8_t (*p)[2] = mv_ref_blk_off[b->
bs];
1013 #define INVALID_MV 0x80008000U
1017 #define RETURN_DIRECT_MV(mv) \
1019 uint32_t m = AV_RN32A(&mv); \
1023 } else if (mem == INVALID_MV) { \
1025 } else if (m != mem) { \
1032 if (sb == 2 || sb == 1) {
1034 }
else if (sb == 3) {
1040 #define RETURN_MV(mv) \
1045 clamp_mv(&tmp, &mv, s); \
1046 m = AV_RN32A(&tmp); \
1050 } else if (mem == INVALID_MV) { \
1052 } else if (m != mem) { \
1057 uint32_t m = AV_RN32A(&mv); \
1059 clamp_mv(pmv, &mv, s); \
1061 } else if (mem == INVALID_MV) { \
1063 } else if (m != mem) { \
1064 clamp_mv(pmv, &mv, s); \
1072 if (mv->
ref[0] == ref) {
1074 }
else if (mv->
ref[1] == ref) {
1080 if (mv->
ref[0] == ref) {
1082 }
else if (mv->
ref[1] == ref) {
1092 for (; i < 8; i++) {
1093 int c = p[i][0] + col,
r = p[i][1] + row;
1098 if (mv->
ref[0] == ref) {
1100 }
else if (mv->
ref[1] == ref) {
1112 if (mv->
ref[0] == ref) {
1114 }
else if (mv->
ref[1] == ref) {
1119 #define RETURN_SCALE_MV(mv, scale) \
1122 VP56mv mv_temp = { -mv.x, -mv.y }; \
1123 RETURN_MV(mv_temp); \
1130 for (i = 0; i < 8; i++) {
1131 int c = p[i][0] + col,
r = p[i][1] + row;
1136 if (mv->
ref[0] != ref && mv->
ref[0] >= 0) {
1139 if (mv->
ref[1] != ref && mv->
ref[1] >= 0 &&
1153 if (mv->
ref[0] != ref && mv->
ref[0] >= 0) {
1156 if (mv->
ref[1] != ref && mv->
ref[1] >= 0 &&
1167 #undef RETURN_SCALE_MV
1181 for (n = 0, m = 0; m <
c; m++) {
1207 n = (n << 3) | (bit << 1);
1220 return sign ? -(n + 1) : (n + 1);
1235 mode ==
NEWMV ? -1 : sb);
1237 if ((mode ==
NEWMV || sb == -1) &&
1252 if (mode ==
NEWMV) {
1266 mode ==
NEWMV ? -1 : sb);
1267 if ((mode ==
NEWMV || sb == -1) &&
1282 if (mode ==
NEWMV) {
1307 int v16 = v * 0x0101;
1315 uint32_t v32 = v * 0x01010101;
1324 uint64_t v64 = v * 0x0101010101010101ULL;
1330 uint32_t v32 = v * 0x01010101;
1345 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1348 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1356 int row = s->
row, col = s->
col, row7 = s->
row7;
1357 enum TxfmMode max_tx = max_tx_for_bl_bp[b->
bs];
1361 int vref, filter_id;
1378 for (
y = 0;
y < h4;
y++) {
1379 int idx_base = (
y + row) * 8 * s->
sb_cols + col;
1380 for (x = 0; x < w4; x++)
1381 pred =
FFMIN(pred, refsegmap[idx_base + x]);
1387 &refsegmap[idx_base], w4);
1426 if (have_a && have_l) {
1450 }
else if (have_l) {
1498 l[0] = a[1] = b->
mode[1];
1500 l[0] = a[1] = b->
mode[1] = b->
mode[0];
1508 l[1] = a[1] = b->
mode[3];
1510 l[1] = a[1] = b->
mode[3] = b->
mode[2];
1514 l[1] = a[1] = b->
mode[3] = b->
mode[1];
1526 }
else if (b->
intra) {
1555 static const uint8_t size_group[10] = {
1556 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1558 int sz = size_group[b->
bs];
1569 static const uint8_t inter_mode_ctx_lut[14][14] = {
1570 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1571 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1572 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1573 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1574 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1575 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1576 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1577 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1578 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1579 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1580 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1581 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1582 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1583 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1618 }
else if (have_l) {
1649 if (refl == refa && refa == s->
varcompref[1]) {
1656 c = (refa == refl) ? 3 : 1;
1673 c = (refl == refa) ? 4 : 2;
1685 }
else if (have_l) {
1811 }
else if (have_l) {
1825 b->
ref[0] = 1 + bit;
1834 static const uint8_t off[10] = {
1835 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1931 #define SPLAT_CTX(var, val, n) \
1933 case 1: var = val; break; \
1934 case 2: AV_WN16A(&var, val * 0x0101); break; \
1935 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1936 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
1938 uint64_t v64 = val * 0x0101010101010101ULL; \
1939 AV_WN64A( &var, v64); \
1940 AV_WN64A(&((uint8_t *) &var)[8], v64); \
1945 #define SPLAT_CTX(var, val, n) \
1947 case 1: var = val; break; \
1948 case 2: AV_WN16A(&var, val * 0x0101); break; \
1949 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1951 uint32_t v32 = val * 0x01010101; \
1952 AV_WN32A( &var, v32); \
1953 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1957 uint32_t v32 = val * 0x01010101; \
1958 AV_WN32A( &var, v32); \
1959 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1960 AV_WN32A(&((uint8_t *) &var)[8], v32); \
1961 AV_WN32A(&((uint8_t *) &var)[12], v32); \
1968 #define SET_CTXS(dir, off, n) \
1970 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
1971 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
1972 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
1973 if (!s->keyframe && !s->intraonly) { \
1974 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
1975 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
1976 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
1978 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
1979 if (s->filtermode == FILTER_SWITCHABLE) { \
1980 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
1985 case 1:
SET_CTXS(above, col, 1);
break;
1986 case 2:
SET_CTXS(above, col, 2);
break;
1987 case 4:
SET_CTXS(above, col, 4);
break;
1988 case 8:
SET_CTXS(above, col, 8);
break;
1991 case 1:
SET_CTXS(left, row7, 1);
break;
1992 case 2:
SET_CTXS(left, row7, 2);
break;
1993 case 4:
SET_CTXS(left, row7, 4);
break;
1994 case 8:
SET_CTXS(left, row7, 8);
break;
2014 for (n = 0; n < w4 * 2; n++) {
2018 for (n = 0; n < h4 * 2; n++) {
2026 for (
y = 0;
y < h4;
y++) {
2027 int x, o = (row +
y) * s->
sb_cols * 8 + col;
2031 for (x = 0; x < w4; x++) {
2035 }
else if (b->
comp) {
2036 for (x = 0; x < w4; x++) {
2037 mv[x].ref[0] = b->
ref[0];
2038 mv[x].ref[1] = b->
ref[1];
2043 for (x = 0; x < w4; x++) {
2044 mv[x].ref[0] = b->
ref[0];
2055 int is_tx32x32,
unsigned (*cnt)[6][3],
2056 unsigned (*eob)[6][2],
uint8_t (*p)[6][11],
2057 int nnz,
const int16_t *scan,
const int16_t (*nb)[2],
2058 const int16_t *band_counts,
const int16_t *qmul)
2060 int i = 0,
band = 0, band_left = band_counts[
band];
2074 cnt[
band][nnz][0]++;
2076 band_left = band_counts[++
band];
2078 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2080 if (++i == n_coeffs)
2087 cnt[
band][nnz][1]++;
2095 cnt[
band][nnz][2]++;
2098 cache[rc] = val = 2;
2149 band_left = band_counts[++
band];
2154 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2156 }
while (++i < n_coeffs);
2162 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2163 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2164 const int16_t (*nb)[2],
const int16_t *band_counts,
2165 const int16_t *qmul)
2168 nnz, scan, nb, band_counts, qmul);
2172 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2173 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2174 const int16_t (*nb)[2],
const int16_t *band_counts,
2175 const int16_t *qmul)
2178 nnz, scan, nb, band_counts, qmul);
2185 int row = s->
row, col = s->
col;
2190 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2191 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2192 int n, pl, x,
y, res;
2195 const int16_t *
const *yscans =
vp9_scans[tx];
2201 static const int16_t band_counts[4][8] = {
2202 { 1, 2, 3, 4, 3, 16 - 13 },
2203 { 1, 2, 3, 4, 11, 64 - 21 },
2204 { 1, 2, 3, 4, 11, 256 - 21 },
2205 { 1, 2, 3, 4, 11, 1024 - 21 },
2207 const int16_t *y_band_counts = band_counts[b->tx];
2208 const int16_t *uv_band_counts = band_counts[b->
uvtx];
2210 #define MERGE(la, end, step, rd) \
2211 for (n = 0; n < end; n += step) \
2212 la[n] = !!rd(&la[n])
2213 #define MERGE_CTX(step, rd) \
2215 MERGE(l, end_y, step, rd); \
2216 MERGE(a, end_x, step, rd); \
2219 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2220 for (n = 0, y = 0; y < end_y; y += step) { \
2221 for (x = 0; x < end_x; x += step, n += step * step) { \
2222 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2223 res = decode_coeffs_b##v(&s->c, s->block + 16 * n, 16 * step * step, \
2224 c, e, p, a[x] + l[y], yscans[txtp], \
2225 ynbs[txtp], y_band_counts, qmul[0]); \
2226 a[x] = l[y] = !!res; \
2228 AV_WN16A(&s->eob[n], res); \
2235 #define SPLAT(la, end, step, cond) \
2237 for (n = 1; n < end; n += step) \
2238 la[n] = la[n - 1]; \
2239 } else if (step == 4) { \
2241 for (n = 0; n < end; n += step) \
2242 AV_WN32A(&la[n], la[n] * 0x01010101); \
2244 for (n = 0; n < end; n += step) \
2245 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2249 if (HAVE_FAST_64BIT) { \
2250 for (n = 0; n < end; n += step) \
2251 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2253 for (n = 0; n < end; n += step) { \
2254 uint32_t v32 = la[n] * 0x01010101; \
2255 AV_WN32A(&la[n], v32); \
2256 AV_WN32A(&la[n + 4], v32); \
2260 for (n = 0; n < end; n += step) \
2261 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2264 #define SPLAT_CTX(step) \
2266 SPLAT(a, end_x, step, end_x == w4); \
2267 SPLAT(l, end_y, step, end_y == h4); \
2292 #define DECODE_UV_COEF_LOOP(step) \
2293 for (n = 0, y = 0; y < end_y; y += step) { \
2294 for (x = 0; x < end_x; x += step, n += step * step) { \
2295 res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n, \
2296 16 * step * step, c, e, p, a[x] + l[y], \
2297 uvscan, uvnb, uv_band_counts, qmul[1]); \
2298 a[x] = l[y] = !!res; \
2300 AV_WN16A(&s->uveob[pl][n], res); \
2302 s->uveob[pl][n] = res; \
2314 for (pl = 0; pl < 2; pl++) {
2336 1024, c, e, p, a[0] + l[0],
2337 uvscan, uvnb, uv_band_counts, qmul[1]);
2338 a[0] = l[0] = !!res;
2347 uint8_t *dst_edge, ptrdiff_t stride_edge,
2348 uint8_t *dst_inner, ptrdiff_t stride_inner,
2349 uint8_t *l,
int col,
int x,
int w,
2353 int have_top = row > 0 || y > 0;
2355 int have_right = x < w - 1;
2356 static const uint8_t mode_conv[10][2 ][2 ] = {
2378 static const struct {
2387 [
DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2390 [
VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2391 [
HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2393 [
HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2394 [
TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2403 mode = mode_conv[
mode][have_left][have_top];
2404 if (edges[mode].needs_top) {
2406 int n_px_need = 4 << tx, n_px_have = (((s->
cols - col) << !p) - x) * 4;
2407 int n_px_need_tr = 0;
2409 if (tx ==
TX_4X4 && edges[mode].needs_topright && have_right)
2416 top = !(row & 7) && !y ?
2418 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2420 topleft = !(row & 7) && !y ?
2422 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2423 &dst_inner[-stride_inner];
2427 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2428 (tx !=
TX_4X4 || !edges[mode].needs_topright || have_right) &&
2429 n_px_need + n_px_need_tr <= n_px_have) {
2433 if (n_px_need <= n_px_have) {
2434 memcpy(*a, top, n_px_need);
2436 memcpy(*a, top, n_px_have);
2437 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2438 n_px_need - n_px_have);
2441 memset(*a, 127, n_px_need);
2443 if (edges[mode].needs_topleft) {
2444 if (have_left && have_top) {
2445 (*a)[-1] = topleft[-1];
2447 (*a)[-1] = have_top ? 129 : 127;
2450 if (tx ==
TX_4X4 && edges[mode].needs_topright) {
2451 if (have_top && have_right &&
2452 n_px_need + n_px_need_tr <= n_px_have) {
2453 memcpy(&(*a)[4], &top[4], 4);
2455 memset(&(*a)[4], (*a)[3], 4);
2460 if (edges[mode].needs_left) {
2462 int n_px_need = 4 << tx, i, n_px_have = (((s->
rows - row) << !p) -
y) * 4;
2463 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2464 ptrdiff_t
stride = x == 0 ? stride_edge : stride_inner;
2466 if (edges[mode].invert_left) {
2467 if (n_px_need <= n_px_have) {
2468 for (i = 0; i < n_px_need; i++)
2469 l[i] = dst[i * stride - 1];
2471 for (i = 0; i < n_px_have; i++)
2472 l[i] = dst[i * stride - 1];
2473 memset(&l[n_px_have], l[n_px_have - 1], n_px_need - n_px_have);
2476 if (n_px_need <= n_px_have) {
2477 for (i = 0; i < n_px_need; i++)
2478 l[n_px_need - 1 - i] = dst[i * stride - 1];
2480 for (i = 0; i < n_px_have; i++)
2481 l[n_px_need - 1 - i] = dst[i * stride - 1];
2482 memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have);
2486 memset(l, 129, 4 << tx);
2497 int row = s->
row, col = s->
col;
2498 int w4 =
bwh_tab[1][b->
bs][0] << 1, step1d = 1 << b->tx,
n;
2499 int h4 =
bwh_tab[1][b->
bs][1] << 1, x,
y, step = 1 << (b->tx * 2);
2500 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2501 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2503 int uvstep1d = 1 << b->
uvtx, p;
2508 for (
n = 0, y = 0; y < end_y; y += step1d) {
2509 uint8_t *ptr = dst, *ptr_r = dst_r;
2510 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2511 ptr_r += 4 * step1d,
n += step) {
2521 col, x, w4, row, y, b->tx, 0);
2535 step = 1 << (b->
uvtx * 2);
2536 for (p = 0; p < 2; p++) {
2537 dst = s->
dst[1 + p];
2539 for (
n = 0, y = 0; y < end_y; y += uvstep1d) {
2540 uint8_t *ptr = dst, *ptr_r = dst_r;
2541 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2542 ptr_r += 4 * uvstep1d,
n += step) {
2550 col, x, w4, row, y, b->
uvtx, p + 1);
2563 uint8_t *dst, ptrdiff_t dst_stride,
2564 const uint8_t *ref, ptrdiff_t ref_stride,
2566 ptrdiff_t
y, ptrdiff_t x,
const VP56mv *
mv,
2567 int bw,
int bh,
int w,
int h,
2568 const uint16_t *scale,
const uint8_t *step)
2570 #define scale_mv(n, dim) (((int64_t)n * scale[dim]) >> 14)
2576 int refbw_m1, refbh_m1;
2581 ref += y * ref_stride + x;
2584 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2585 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2589 th = (y + refbh_m1 + 4 + 7) >> 6;
2591 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2593 ref - 3 * ref_stride - 3,
2595 refbw_m1 + 8, refbh_m1 + 8,
2596 x - 3, y - 3, w, h);
2600 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2605 ptrdiff_t dst_stride,
2606 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2607 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2609 ptrdiff_t
y, ptrdiff_t x,
const VP56mv *
mv,
2610 int bw,
int bh,
int w,
int h,
2611 const uint16_t *scale,
const uint8_t *step)
2617 int refbw_m1, refbh_m1;
2622 ref_u += y * src_stride_u + x;
2623 ref_v += y * src_stride_v + x;
2626 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2627 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2631 th = (y + refbh_m1 + 4 + 7) >> 5;
2633 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2635 ref_u - 3 * src_stride_u - 3,
2637 refbw_m1 + 8, refbh_m1 + 8,
2638 x - 3, y - 3, w, h);
2640 smc(dst_u, dst_stride, ref_u, 144, bh, mx, my, step[0], step[1]);
2643 ref_v - 3 * src_stride_v - 3,
2645 refbw_m1 + 8, refbh_m1 + 8,
2646 x - 3, y - 3, w, h);
2648 smc(dst_v, dst_stride, ref_v, 144, bh, mx, my, step[0], step[1]);
2650 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2651 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2655 #define FN(x) x##_scaled
2656 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
2657 mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2658 mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2659 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2660 row, col, mv, bw, bh, w, h, i) \
2661 mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2662 row, col, mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2665 #undef mc_chroma_dir
2669 uint8_t *dst, ptrdiff_t dst_stride,
2670 const uint8_t *ref, ptrdiff_t ref_stride,
2672 ptrdiff_t
y, ptrdiff_t x,
const VP56mv *
mv,
2673 int bw,
int bh,
int w,
int h)
2675 int mx = mv->
x, my = mv->
y,
th;
2679 ref += y * ref_stride + x;
2685 th = (y + bh + 4 * !!my + 7) >> 6;
2687 if (x < !!mx * 3 || y < !!my * 3 ||
2688 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2690 ref - !!my * 3 * ref_stride - !!mx * 3,
2692 bw + !!mx * 7, bh + !!my * 7,
2693 x - !!mx * 3, y - !!my * 3, w, h);
2697 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2702 ptrdiff_t dst_stride,
2703 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2704 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2706 ptrdiff_t
y, ptrdiff_t x,
const VP56mv *
mv,
2707 int bw,
int bh,
int w,
int h)
2709 int mx = mv->
x, my = mv->
y,
th;
2713 ref_u += y * src_stride_u + x;
2714 ref_v += y * src_stride_v + x;
2720 th = (y + bh + 4 * !!my + 7) >> 5;
2722 if (x < !!mx * 3 || y < !!my * 3 ||
2723 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2725 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2727 bw + !!mx * 7, bh + !!my * 7,
2728 x - !!mx * 3, y - !!my * 3, w, h);
2730 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2733 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2735 bw + !!mx * 7, bh + !!my * 7,
2736 x - !!mx * 3, y - !!my * 3, w, h);
2738 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2740 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2741 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2746 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
2747 mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2749 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2750 row, col, mv, bw, bh, w, h, i) \
2751 mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2752 row, col, mv, bw, bh, w, h)
2754 #undef mc_luma_dir_dir
2755 #undef mc_chroma_dir_dir
2762 int row = s->
row, col = s->
col;
2765 inter_pred_scaled(ctx);
2772 int w4 =
bwh_tab[1][b->
bs][0] << 1, step1d = 1 << b->tx,
n;
2773 int h4 =
bwh_tab[1][b->
bs][1] << 1, x,
y, step = 1 << (b->tx * 2);
2774 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2775 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2777 int uvstep1d = 1 << b->
uvtx, p;
2781 for (
n = 0, y = 0; y < end_y; y += step1d) {
2783 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
n += step) {
2796 step = 1 << (b->
uvtx * 2);
2797 for (p = 0; p < 2; p++) {
2798 dst = s->
dst[p + 1];
2799 for (
n = 0, y = 0; y < end_y; y += uvstep1d) {
2801 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
n += step) {
2815 int row_and_7,
int col_and_7,
2816 int w,
int h,
int col_end,
int row_end,
2829 if (tx ==
TX_4X4 && is_uv) {
2844 if (tx ==
TX_4X4 && !skip_inter) {
2845 int t = 1 << col_and_7, m_col = (t << w) - t,
y;
2846 int m_col_odd = (t << (w - 1)) - t;
2850 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2852 for (
y = row_and_7;
y < h + row_and_7;
y++) {
2853 int col_mask_id = 2 - !(
y & 7);
2855 lflvl->
mask[is_uv][0][
y][1] |= m_row_8;
2856 lflvl->
mask[is_uv][0][
y][2] |= m_row_4;
2867 if ((col_end & 1) && (
y & 1)) {
2868 lflvl->
mask[is_uv][1][
y][col_mask_id] |= m_col_odd;
2870 lflvl->
mask[is_uv][1][
y][col_mask_id] |= m_col;
2874 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2876 for (
y = row_and_7;
y < h + row_and_7;
y++) {
2877 int col_mask_id = 2 - !(
y & 3);
2879 lflvl->
mask[is_uv][0][
y][1] |= m_row_8;
2880 lflvl->
mask[is_uv][0][
y][2] |= m_row_4;
2881 lflvl->
mask[is_uv][1][
y][col_mask_id] |= m_col;
2882 lflvl->
mask[is_uv][0][
y][3] |= m_col;
2883 lflvl->
mask[is_uv][1][
y][3] |= m_col;
2887 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2890 int mask_id = (tx ==
TX_8X8);
2891 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2892 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2893 int m_row = m_col & masks[l2];
2897 if (is_uv && tx >
TX_8X8 && (w ^ (w - 1)) == 1) {
2898 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2899 int m_row_8 = m_row - m_row_16;
2901 for (y = row_and_7; y < h + row_and_7; y++) {
2902 lflvl->
mask[is_uv][0][
y][0] |= m_row_16;
2903 lflvl->
mask[is_uv][0][
y][1] |= m_row_8;
2906 for (y = row_and_7; y < h + row_and_7; y++)
2907 lflvl->
mask[is_uv][0][y][mask_id] |= m_row;
2910 if (is_uv && tx >
TX_8X8 && (h ^ (h - 1)) == 1) {
2911 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2912 lflvl->
mask[is_uv][1][y][0] |= m_col;
2913 if (y - row_and_7 == h - 1)
2914 lflvl->
mask[is_uv][1][
y][1] |= m_col;
2916 for (y = row_and_7; y < h + row_and_7; y += step1d)
2917 lflvl->
mask[is_uv][1][y][mask_id] |= m_col;
2919 }
else if (tx !=
TX_4X4) {
2922 mask_id = (tx ==
TX_8X8) || (is_uv && h == 1);
2923 lflvl->
mask[is_uv][1][row_and_7][mask_id] |= m_col;
2924 mask_id = (tx ==
TX_8X8) || (is_uv && w == 1);
2925 for (y = row_and_7; y < h + row_and_7; y++)
2926 lflvl->
mask[is_uv][0][y][mask_id] |= t;
2928 int t8 = t & 0x01,
t4 = t -
t8;
2930 for (y = row_and_7; y < h + row_and_7; y++) {
2931 lflvl->
mask[is_uv][0][
y][2] |=
t4;
2932 lflvl->
mask[is_uv][0][
y][1] |=
t8;
2934 lflvl->
mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2936 int t8 = t & 0x11,
t4 = t -
t8;
2938 for (y = row_and_7; y < h + row_and_7; y++) {
2939 lflvl->
mask[is_uv][0][
y][2] |=
t4;
2940 lflvl->
mask[is_uv][0][
y][1] |=
t8;
2942 lflvl->
mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2948 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2962 s->
min_mv.
x = -(128 + col * 64);
2963 s->
min_mv.
y = -(128 + row * 64);
2971 b->
uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2978 #define SPLAT_ZERO_CTX(v, n) \
2980 case 1: v = 0; break; \
2981 case 2: AV_ZERO16(&v); break; \
2982 case 4: AV_ZERO32(&v); break; \
2983 case 8: AV_ZERO64(&v); break; \
2984 case 16: AV_ZERO128(&v); break; \
2986 #define SPLAT_ZERO_YUV(dir, var, off, n) \
2988 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
2989 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
2990 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3008 s->
block += w4 * h4 * 64;
3009 s->
uvblock[0] += w4 * h4 * 16;
3010 s->
uvblock[1] += w4 * h4 * 16;
3011 s->
eob += 4 * w4 * h4;
3012 s->
uveob[0] += w4 * h4;
3013 s->
uveob[1] += w4 * h4;
3022 emu[0] = (col + w4) * 8 > f->
linesize[0] ||
3023 (row + h4) > s->
rows;
3024 emu[1] = (col + w4) * 4 > f->
linesize[1] ||
3025 (row + h4) > s->
rows;
3030 s->
dst[0] = f->
data[0] + yoff;
3038 s->
dst[1] = f->
data[1] + uvoff;
3039 s->
dst[2] = f->
data[2] + uvoff;
3050 for (
n = 0; o < w;
n++) {
3056 s->
tmp_y + o, 64, h, 0, 0);
3064 for (
n = 1; o < w;
n++) {
3070 s->
tmp_uv[0] + o, 32, h, 0, 0);
3072 s->
tmp_uv[1] + o, 32, h, 0, 0);
3086 mask_edges(lflvl, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3087 mask_edges(lflvl, 1, row7, col7, x_end, y_end,
3090 b->
uvtx, skip_inter);
3097 limit >>= (sharp + 3) >> 2;
3098 limit =
FFMIN(limit, 9 - sharp);
3100 limit =
FFMAX(limit, 1);
3109 s->
block += w4 * h4 * 64;
3110 s->
uvblock[0] += w4 * h4 * 16;
3111 s->
uvblock[1] += w4 * h4 * 16;
3112 s->
eob += 4 * w4 * h4;
3113 s->
uveob[0] += w4 * h4;
3114 s->
uveob[1] += w4 * h4;
3119 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
3127 ptrdiff_t hbs = 4 >> bl;
3133 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3134 }
else if (col + hbs < s->cols) {
3135 if (row + hbs < s->rows) {
3139 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3142 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3143 yoff += hbs * 8 * y_stride;
3144 uvoff += hbs * 4 * uv_stride;
3145 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3148 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3151 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3154 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3156 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3157 yoff += hbs * 8 * y_stride;
3158 uvoff += hbs * 4 * uv_stride;
3159 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3160 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3161 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3168 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3170 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3173 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3175 }
else if (row + hbs < s->rows) {
3178 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3179 yoff += hbs * 8 * y_stride;
3180 uvoff += hbs * 4 * uv_stride;
3181 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3184 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3188 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3194 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
3198 ptrdiff_t hbs = 4 >> bl;
3204 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3205 }
else if (s->
b->
bl == bl) {
3206 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3208 yoff += hbs * 8 * y_stride;
3209 uvoff += hbs * 4 * uv_stride;
3210 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3214 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->
bl, b->
bp);
3218 if (col + hbs < s->cols) {
3219 if (row + hbs < s->rows) {
3221 uvoff + 4 * hbs, bl + 1);
3222 yoff += hbs * 8 * y_stride;
3223 uvoff += hbs * 4 * uv_stride;
3224 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3226 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3230 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3232 }
else if (row + hbs < s->rows) {
3233 yoff += hbs * 8 * y_stride;
3234 uvoff += hbs * 4 * uv_stride;
3235 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3241 int row,
int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3256 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
3257 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->
mask[0][0][
y];
3259 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3260 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3261 unsigned hm = hm1 | hm2 | hm13 | hm23;
3263 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
3265 int L = *l,
H = L >> 4;
3269 if (hmask1[0] & x) {
3270 if (hmask2[0] & x) {
3276 }
else if (hm2 & x) {
3283 [0](ptr, ls_y,
E, I,
H);
3286 [0](ptr, ls_y, E, I, H);
3289 }
else if (hm2 & x) {
3290 int L = l[8],
H = L >> 4;
3295 [0](ptr + 8 * ls_y, ls_y, E, I, H);
3299 int L = *l,
H = L >> 4;
3311 }
else if (hm23 & x) {
3312 int L = l[8],
H = L >> 4;
3323 dst = f->
data[0] + yoff;
3325 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
3326 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->
mask[0][1][
y];
3327 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3329 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
3332 int L = *l,
H = L >> 4;
3336 if (vmask[0] & (x << 1)) {
3342 }
else if (vm & (x << 1)) {
3348 [!!(vmask[1] & (x << 1))]
3349 [1](ptr, ls_y, E, I, H);
3352 [1](ptr, ls_y, E, I, H);
3354 }
else if (vm & (x << 1)) {
3355 int L = l[1],
H = L >> 4;
3359 [1](ptr + 8, ls_y,
E, I,
H);
3363 int L = *l,
H = L >> 4;
3366 if (vm3 & (x << 1)) {
3375 }
else if (vm3 & (x << 1)) {
3376 int L = l[1],
H = L >> 4;
3385 for (p = 0; p < 2; p++) {
3387 dst = f->
data[1 + p] + uvoff;
3388 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
3389 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->
mask[1][0][
y];
3391 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
3392 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
3394 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
3397 int L = *l,
H = L >> 4;
3400 if (hmask1[0] & x) {
3401 if (hmask2[0] & x) {
3407 }
else if (hm2 & x) {
3414 [0](ptr, ls_uv,
E, I,
H);
3417 [0](ptr, ls_uv, E, I, H);
3419 }
else if (hm2 & x) {
3420 int L = l[16],
H = L >> 4;
3424 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
3432 dst = f->
data[1 + p] + uvoff;
3433 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3434 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->
mask[1][1][
y];
3435 unsigned vm = vmask[0] | vmask[1] | vmask[2];
3437 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3440 int L = *l,
H = L >> 4;
3444 if (vmask[0] & (x << 2)) {
3450 }
else if (vm & (x << 2)) {
3456 [!!(vmask[1] & (x << 2))]
3457 [1](ptr, ls_uv, E, I, H);
3460 [1](ptr, ls_uv, E, I, H);
3462 }
else if (vm & (x << 2)) {
3463 int L = l[2],
H = L >> 4;
3467 [1](ptr + 8, ls_uv,
E, I,
H);
3479 int sb_start = ( idx *
n) >> log2_n;
3480 int sb_end = ((idx + 1) * n) >> log2_n;
3481 *start =
FFMIN(sb_start, n) << 3;
3482 *end =
FFMIN(sb_end, n) << 3;
3486 int max_count,
int update_factor)
3488 unsigned ct = ct0 + ct1, p2, p1;
3494 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3495 p2 = av_clip(p2, 1, 255);
3496 ct =
FFMIN(ct, max_count);
3497 update_factor =
FASTDIV(update_factor * ct, max_count);
3500 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3510 for (i = 0; i < 4; i++)
3511 for (j = 0; j < 2; j++)
3512 for (k = 0; k < 2; k++)
3513 for (l = 0; l < 6; l++)
3514 for (m = 0; m < 6; m++) {
3519 if (l == 0 && m >= 3)
3523 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3536 for (i = 0; i < 3; i++)
3540 for (i = 0; i < 4; i++)
3545 for (i = 0; i < 5; i++)
3551 for (i = 0; i < 5; i++)
3557 for (i = 0; i < 5; i++) {
3561 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3562 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3567 for (i = 0; i < 4; i++)
3568 for (j = 0; j < 4; j++) {
3572 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3573 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3579 for (i = 0; i < 2; i++) {
3585 adapt_prob(&p->
tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3593 for (i = 0; i < 4; i++) {
3597 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3603 for (i = 0; i < 7; i++) {
3607 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3608 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3617 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3618 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3623 for (i = 0; i < 2; i++) {
3625 unsigned *
c, (*c2)[2], sum;
3632 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3637 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3640 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3644 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3652 for (j = 0; j < 10; j++)
3653 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3655 for (j = 0; j < 2; j++) {
3658 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3659 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3664 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3665 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3677 for (i = 0; i < 4; i++) {
3681 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3701 for (i = 0; i < 10; i++) {
3705 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3737 for (i = 0; i < 2; i++) {
3742 for (i = 0; i < 8; i++) {
3764 int res, tile_row, tile_col, i, ref, row, col;
3765 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3770 }
else if (res == 0) {
3799 for (i = 0; i < 8; i++) {
3839 "Failed to allocate block buffers\n");
3845 for (i = 0; i < 4; i++) {
3846 for (j = 0; j < 2; j++)
3847 for (k = 0; k < 2; k++)
3848 for (l = 0; l < 6; l++)
3849 for (m = 0; m < 6; m++)
3886 if (tile_size > size) {
3901 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 32) {
3903 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3921 memcpy(&s->
c, &s->
c_b[tile_col],
sizeof(s->
c));
3925 col < s->tiling.tile_col_end;
3926 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3930 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
3942 memcpy(&s->
c_b[tile_col], &s->
c,
sizeof(s->
c));
3952 if (row + 8 < s->
rows) {
3954 f->
data[0] + yoff + 63 * ls_y,
3957 f->
data[1] + uvoff + 31 * ls_uv,
3960 f->
data[2] + uvoff + 31 * ls_uv,
3968 lflvl_ptr = s->
lflvl;
3969 for (col = 0; col < s->
cols;
3970 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3986 }
while (s->
pass++ == 1);
3990 for (i = 0; i < 8; i++) {
4010 for (i = 0; i < 2; i++)
4012 for (i = 0; i < 8; i++)
4021 for (i = 0; i < 2; i++) {
4029 for (i = 0; i < 8; i++) {
4067 (!ssrc->intra_pred_data[0] || s->
cols != ssrc->cols || s->
rows != ssrc->rows)) {
4071 for (i = 0; i < 2; i++) {
4074 if (ssrc->frames[i].tf.f->data[0]) {
4079 for (i = 0; i < 8; i++) {
4082 if (ssrc->next_refs[i].f->data[0]) {
4093 if (ssrc->segmentation.enabled) {