FFmpeg  2.6.9
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
postprocess.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3  *
4  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 /**
24  * @file
25  * postprocessing.
26  */
27 
28 /*
29  C MMX MMX2 3DNow AltiVec
30 isVertDC Ec Ec Ec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
34 isHorizDC Ec Ec Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
39 deRing E e e* Ecp
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
42 Vertical X1# a E E
43 Horizontal X1# a E E
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
49 
50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = almost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
57 */
58 
59 /*
60 TODO:
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66  (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
68 split this huge file
69 optimize c versions
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71 ...
72 */
73 
74 //Changelog: use git log
75 
76 #include "config.h"
77 #include "libavutil/avutil.h"
78 #include "libavutil/avassert.h"
79 #include "libavutil/intreadwrite.h"
80 #include <inttypes.h>
81 #include <stdio.h>
82 #include <stdlib.h>
83 #include <string.h>
84 //#undef HAVE_MMXEXT_INLINE
85 //#define HAVE_AMD3DNOW_INLINE
86 //#undef HAVE_MMX_INLINE
87 //#undef ARCH_X86
88 //#define DEBUG_BRIGHTNESS
89 #include "postprocess.h"
90 #include "postprocess_internal.h"
91 #include "libavutil/avstring.h"
92 
93 #include "libavutil/ffversion.h"
94 const char postproc_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
95 
96 unsigned postproc_version(void)
97 {
100 }
101 
102 const char *postproc_configuration(void)
103 {
104  return FFMPEG_CONFIGURATION;
105 }
106 
107 const char *postproc_license(void)
108 {
109 #define LICENSE_PREFIX "libpostproc license: "
110  return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
111 }
112 
113 #if HAVE_ALTIVEC_H
114 #include <altivec.h>
115 #endif
116 
117 #define GET_MODE_BUFFER_SIZE 500
118 #define OPTIONS_ARRAY_SIZE 10
119 #define BLOCK_SIZE 8
120 #define TEMP_STRIDE 8
121 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
122 
123 #if ARCH_X86 && HAVE_INLINE_ASM
124 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
125 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
126 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
127 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
128 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
129 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
130 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
131 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
132 #endif
133 
134 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
135 
136 
137 static const struct PPFilter filters[]=
138 {
139  {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
140  {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
141 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
142  {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
143  {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
144  {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
145  {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
146  {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
147  {"dr", "dering", 1, 5, 6, DERING},
148  {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
149  {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
150  {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
151  {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
152  {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
153  {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
154  {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
155  {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
156  {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
157  {"be", "bitexact", 1, 0, 0, BITEXACT},
158  {"vi", "visualize", 1, 0, 0, VISUALIZE},
159  {NULL, NULL,0,0,0,0} //End Marker
160 };
161 
162 static const char * const replaceTable[]=
163 {
164  "default", "hb:a,vb:a,dr:a",
165  "de", "hb:a,vb:a,dr:a",
166  "fast", "h1:a,v1:a,dr:a",
167  "fa", "h1:a,v1:a,dr:a",
168  "ac", "ha:a:128:7,va:a,dr:a",
169  NULL //End Marker
170 };
171 
172 
173 #if ARCH_X86 && HAVE_INLINE_ASM
174 static inline void prefetchnta(const void *p)
175 {
176  __asm__ volatile( "prefetchnta (%0)\n\t"
177  : : "r" (p)
178  );
179 }
180 
181 static inline void prefetcht0(const void *p)
182 {
183  __asm__ volatile( "prefetcht0 (%0)\n\t"
184  : : "r" (p)
185  );
186 }
187 
188 static inline void prefetcht1(const void *p)
189 {
190  __asm__ volatile( "prefetcht1 (%0)\n\t"
191  : : "r" (p)
192  );
193 }
194 
195 static inline void prefetcht2(const void *p)
196 {
197  __asm__ volatile( "prefetcht2 (%0)\n\t"
198  : : "r" (p)
199  );
200 }
201 #endif
202 
203 /* The horizontal functions exist only in C because the MMX
204  * code is faster with vertical filters and transposing. */
205 
206 /**
207  * Check if the given 8x8 Block is mostly "flat"
208  */
209 static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
210 {
211  int numEq= 0;
212  int y;
213  const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
214  const int dcThreshold= dcOffset*2 + 1;
215 
216  for(y=0; y<BLOCK_SIZE; y++){
217  numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
218  numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
219  numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
220  numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
221  numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
222  numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
223  numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
224  src+= stride;
225  }
226  return numEq > c->ppMode.flatnessThreshold;
227 }
228 
229 /**
230  * Check if the middle 8x8 Block in the given 8x16 block is flat
231  */
232 static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
233 {
234  int numEq= 0;
235  int y;
236  const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
237  const int dcThreshold= dcOffset*2 + 1;
238 
239  src+= stride*4; // src points to begin of the 8x8 Block
240  for(y=0; y<BLOCK_SIZE-1; y++){
241  numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
242  numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
243  numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
244  numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
245  numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
246  numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
247  numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
248  numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
249  src+= stride;
250  }
251  return numEq > c->ppMode.flatnessThreshold;
252 }
253 
254 static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
255 {
256  int i;
257  for(i=0; i<2; i++){
258  if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
259  src += stride;
260  if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
261  src += stride;
262  if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
263  src += stride;
264  if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
265  src += stride;
266  }
267  return 1;
268 }
269 
270 static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
271 {
272  int x;
273  src+= stride*4;
274  for(x=0; x<BLOCK_SIZE; x+=4){
275  if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
276  if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
277  if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
278  if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
279  }
280  return 1;
281 }
282 
283 static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
284 {
285  if( isHorizDC_C(src, stride, c) ){
286  return isHorizMinMaxOk_C(src, stride, c->QP);
287  }else{
288  return 2;
289  }
290 }
291 
292 static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
293 {
294  if( isVertDC_C(src, stride, c) ){
295  return isVertMinMaxOk_C(src, stride, c->QP);
296  }else{
297  return 2;
298  }
299 }
300 
301 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
302 {
303  int y;
304  for(y=0; y<BLOCK_SIZE; y++){
305  const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
306 
307  if(FFABS(middleEnergy) < 8*c->QP){
308  const int q=(dst[3] - dst[4])/2;
309  const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
310  const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
311 
312  int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
313  d= FFMAX(d, 0);
314 
315  d= (5*d + 32) >> 6;
316  d*= FFSIGN(-middleEnergy);
317 
318  if(q>0)
319  {
320  d = FFMAX(d, 0);
321  d = FFMIN(d, q);
322  }
323  else
324  {
325  d = FFMIN(d, 0);
326  d = FFMAX(d, q);
327  }
328 
329  dst[3]-= d;
330  dst[4]+= d;
331  }
332  dst+= stride;
333  }
334 }
335 
336 /**
337  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
338  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
339  */
340 static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
341 {
342  int y;
343  for(y=0; y<BLOCK_SIZE; y++){
344  const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
345  const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
346 
347  int sums[10];
348  sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
349  sums[1] = sums[0] - first + dst[3];
350  sums[2] = sums[1] - first + dst[4];
351  sums[3] = sums[2] - first + dst[5];
352  sums[4] = sums[3] - first + dst[6];
353  sums[5] = sums[4] - dst[0] + dst[7];
354  sums[6] = sums[5] - dst[1] + last;
355  sums[7] = sums[6] - dst[2] + last;
356  sums[8] = sums[7] - dst[3] + last;
357  sums[9] = sums[8] - dst[4] + last;
358 
359  dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
360  dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
361  dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
362  dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
363  dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
364  dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
365  dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
366  dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
367 
368  dst+= stride;
369  }
370 }
371 
372 /**
373  * Experimental Filter 1 (Horizontal)
374  * will not damage linear gradients
375  * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
376  * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
377  * MMX2 version does correct clipping C version does not
378  * not identical with the vertical one
379  */
380 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
381 {
382  int y;
383  static uint64_t lut[256];
384  if(!lut[255])
385  {
386  int i;
387  for(i=0; i<256; i++)
388  {
389  int v= i < 128 ? 2*i : 2*(i-256);
390 /*
391 //Simulate 112242211 9-Tap filter
392  uint64_t a= (v/16) & 0xFF;
393  uint64_t b= (v/8) & 0xFF;
394  uint64_t c= (v/4) & 0xFF;
395  uint64_t d= (3*v/8) & 0xFF;
396 */
397 //Simulate piecewise linear interpolation
398  uint64_t a= (v/16) & 0xFF;
399  uint64_t b= (v*3/16) & 0xFF;
400  uint64_t c= (v*5/16) & 0xFF;
401  uint64_t d= (7*v/16) & 0xFF;
402  uint64_t A= (0x100 - a)&0xFF;
403  uint64_t B= (0x100 - b)&0xFF;
404  uint64_t C= (0x100 - c)&0xFF;
405  uint64_t D= (0x100 - c)&0xFF;
406 
407  lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
408  (D<<24) | (C<<16) | (B<<8) | (A);
409  //lut[i] = (v<<32) | (v<<24);
410  }
411  }
412 
413  for(y=0; y<BLOCK_SIZE; y++){
414  int a= src[1] - src[2];
415  int b= src[3] - src[4];
416  int c= src[5] - src[6];
417 
418  int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
419 
420  if(d < QP){
421  int v = d * FFSIGN(-b);
422 
423  src[1] +=v/8;
424  src[2] +=v/4;
425  src[3] +=3*v/8;
426  src[4] -=3*v/8;
427  src[5] -=v/4;
428  src[6] -=v/8;
429  }
430  src+=stride;
431  }
432 }
433 
434 /**
435  * accurate deblock filter
436  */
438  int stride, const PPContext *c, int mode)
439 {
440  int y;
441  const int QP= c->QP;
442  const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
443  const int dcThreshold= dcOffset*2 + 1;
444 //START_TIMER
445  src+= step*4; // src points to begin of the 8x8 Block
446  for(y=0; y<8; y++){
447  int numEq= 0;
448 
449  numEq += ((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold;
450  numEq += ((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold;
451  numEq += ((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold;
452  numEq += ((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold;
453  numEq += ((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold;
454  numEq += ((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold;
455  numEq += ((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold;
456  numEq += ((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold;
457  numEq += ((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold;
458  if(numEq > c->ppMode.flatnessThreshold){
459  int min, max, x;
460 
461  if(src[0] > src[step]){
462  max= src[0];
463  min= src[step];
464  }else{
465  max= src[step];
466  min= src[0];
467  }
468  for(x=2; x<8; x+=2){
469  if(src[x*step] > src[(x+1)*step]){
470  if(src[x *step] > max) max= src[ x *step];
471  if(src[(x+1)*step] < min) min= src[(x+1)*step];
472  }else{
473  if(src[(x+1)*step] > max) max= src[(x+1)*step];
474  if(src[ x *step] < min) min= src[ x *step];
475  }
476  }
477  if(max-min < 2*QP){
478  const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
479  const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
480 
481  int sums[10];
482  sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
483  sums[1] = sums[0] - first + src[3*step];
484  sums[2] = sums[1] - first + src[4*step];
485  sums[3] = sums[2] - first + src[5*step];
486  sums[4] = sums[3] - first + src[6*step];
487  sums[5] = sums[4] - src[0*step] + src[7*step];
488  sums[6] = sums[5] - src[1*step] + last;
489  sums[7] = sums[6] - src[2*step] + last;
490  sums[8] = sums[7] - src[3*step] + last;
491  sums[9] = sums[8] - src[4*step] + last;
492 
493  if (mode & VISUALIZE) {
494  src[0*step] =
495  src[1*step] =
496  src[2*step] =
497  src[3*step] =
498  src[4*step] =
499  src[5*step] =
500  src[6*step] =
501  src[7*step] = 128;
502  }
503  src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
504  src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
505  src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
506  src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
507  src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
508  src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
509  src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
510  src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
511  }
512  }else{
513  const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
514 
515  if(FFABS(middleEnergy) < 8*QP){
516  const int q=(src[3*step] - src[4*step])/2;
517  const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
518  const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
519 
520  int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
521  d= FFMAX(d, 0);
522 
523  d= (5*d + 32) >> 6;
524  d*= FFSIGN(-middleEnergy);
525 
526  if(q>0){
527  d = FFMAX(d, 0);
528  d = FFMIN(d, q);
529  }else{
530  d = FFMIN(d, 0);
531  d = FFMAX(d, q);
532  }
533 
534  if ((mode & VISUALIZE) && d) {
535  d= (d < 0) ? 32 : -32;
536  src[3*step]= av_clip_uint8(src[3*step] - d);
537  src[4*step]= av_clip_uint8(src[4*step] + d);
538  d = 0;
539  }
540 
541  src[3*step]-= d;
542  src[4*step]+= d;
543  }
544  }
545 
546  src += stride;
547  }
548 /*if(step==16){
549  STOP_TIMER("step16")
550 }else{
551  STOP_TIMER("stepX")
552 }*/
553 }
554 
555 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
556 //Plain C versions
557 //we always compile C for testing which needs bitexactness
558 #define TEMPLATE_PP_C 1
559 #include "postprocess_template.c"
560 
561 #if HAVE_ALTIVEC
562 # define TEMPLATE_PP_ALTIVEC 1
564 # include "postprocess_template.c"
565 #endif
566 
567 #if ARCH_X86 && HAVE_INLINE_ASM
568 # if CONFIG_RUNTIME_CPUDETECT
569 # define TEMPLATE_PP_MMX 1
570 # include "postprocess_template.c"
571 # define TEMPLATE_PP_MMXEXT 1
572 # include "postprocess_template.c"
573 # define TEMPLATE_PP_3DNOW 1
574 # include "postprocess_template.c"
575 # define TEMPLATE_PP_SSE2 1
576 # include "postprocess_template.c"
577 # else
578 # if HAVE_SSE2_INLINE
579 # define TEMPLATE_PP_SSE2 1
580 # include "postprocess_template.c"
581 # elif HAVE_MMXEXT_INLINE
582 # define TEMPLATE_PP_MMXEXT 1
583 # include "postprocess_template.c"
584 # elif HAVE_AMD3DNOW_INLINE
585 # define TEMPLATE_PP_3DNOW 1
586 # include "postprocess_template.c"
587 # elif HAVE_MMX_INLINE
588 # define TEMPLATE_PP_MMX 1
589 # include "postprocess_template.c"
590 # endif
591 # endif
592 #endif
593 
594 typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
595  const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
596 
597 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
598  const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
599 {
600  pp_fn pp = postProcess_C;
601  PPContext *c= (PPContext *)vc;
602  PPMode *ppMode= (PPMode *)vm;
603  c->ppMode= *ppMode; //FIXME
604 
605  if (!(ppMode->lumMode & BITEXACT)) {
606 #if CONFIG_RUNTIME_CPUDETECT
607 #if ARCH_X86 && HAVE_INLINE_ASM
608  // ordered per speed fastest first
609  if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2;
610  else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2;
611  else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow;
612  else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX;
613 #elif HAVE_ALTIVEC
614  if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec;
615 #endif
616 #else /* CONFIG_RUNTIME_CPUDETECT */
617 #if HAVE_SSE2_INLINE
618  pp = postProcess_SSE2;
619 #elif HAVE_MMXEXT_INLINE
620  pp = postProcess_MMX2;
621 #elif HAVE_AMD3DNOW_INLINE
622  pp = postProcess_3DNow;
623 #elif HAVE_MMX_INLINE
624  pp = postProcess_MMX;
625 #elif HAVE_ALTIVEC
626  pp = postProcess_altivec;
627 #endif
628 #endif /* !CONFIG_RUNTIME_CPUDETECT */
629  }
630 
631  pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
632 }
633 
634 /* -pp Command line Help
635 */
636 const char pp_help[] =
637 "Available postprocessing filters:\n"
638 "Filters Options\n"
639 "short long name short long option Description\n"
640 "* * a autoq CPU power dependent enabler\n"
641 " c chrom chrominance filtering enabled\n"
642 " y nochrom chrominance filtering disabled\n"
643 " n noluma luma filtering disabled\n"
644 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
645 " 1. difference factor: default=32, higher -> more deblocking\n"
646 " 2. flatness threshold: default=39, lower -> more deblocking\n"
647 " the h & v deblocking filters share these\n"
648 " so you can't set different thresholds for h / v\n"
649 "vb vdeblock (2 threshold) vertical deblocking filter\n"
650 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
651 "va vadeblock (2 threshold) vertical deblocking filter\n"
652 "h1 x1hdeblock experimental h deblock filter 1\n"
653 "v1 x1vdeblock experimental v deblock filter 1\n"
654 "dr dering deringing filter\n"
655 "al autolevels automatic brightness / contrast\n"
656 " f fullyrange stretch luminance to (0..255)\n"
657 "lb linblenddeint linear blend deinterlacer\n"
658 "li linipoldeint linear interpolating deinterlace\n"
659 "ci cubicipoldeint cubic interpolating deinterlacer\n"
660 "md mediandeint median deinterlacer\n"
661 "fd ffmpegdeint ffmpeg deinterlacer\n"
662 "l5 lowpass5 FIR lowpass deinterlacer\n"
663 "de default hb:a,vb:a,dr:a\n"
664 "fa fast h1:a,v1:a,dr:a\n"
665 "ac ha:a:128:7,va:a,dr:a\n"
666 "tn tmpnoise (3 threshold) temporal noise reducer\n"
667 " 1. <= 2. <= 3. larger -> stronger filtering\n"
668 "fq forceQuant <quantizer> force quantizer\n"
669 "Usage:\n"
670 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
671 "long form example:\n"
672 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
673 "short form example:\n"
674 "vb:a/hb:a/lb de,-vb\n"
675 "more examples:\n"
676 "tn:64:128:256\n"
677 "\n"
678 ;
679 
680 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
681 {
683  char *p= temp;
684  static const char filterDelimiters[] = ",/";
685  static const char optionDelimiters[] = ":|";
686  struct PPMode *ppMode;
687  char *filterToken;
688 
689  if (!name) {
690  av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
691  return NULL;
692  }
693 
694  if (!strcmp(name, "help")) {
695  const char *p;
696  for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
697  av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
698  av_log(NULL, AV_LOG_INFO, "%s", temp);
699  }
700  return NULL;
701  }
702 
703  ppMode= av_malloc(sizeof(PPMode));
704  if (!ppMode)
705  return NULL;
706 
707  ppMode->lumMode= 0;
708  ppMode->chromMode= 0;
709  ppMode->maxTmpNoise[0]= 700;
710  ppMode->maxTmpNoise[1]= 1500;
711  ppMode->maxTmpNoise[2]= 3000;
712  ppMode->maxAllowedY= 234;
713  ppMode->minAllowedY= 16;
714  ppMode->baseDcDiff= 256/8;
715  ppMode->flatnessThreshold= 56-16-1;
716  ppMode->maxClippedThreshold= 0.01;
717  ppMode->error=0;
718 
719  memset(temp, 0, GET_MODE_BUFFER_SIZE);
720  av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
721 
722  av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
723 
724  for(;;){
725  const char *filterName;
726  int q= 1000000; //PP_QUALITY_MAX;
727  int chrom=-1;
728  int luma=-1;
729  const char *option;
730  const char *options[OPTIONS_ARRAY_SIZE];
731  int i;
732  int filterNameOk=0;
733  int numOfUnknownOptions=0;
734  int enable=1; //does the user want us to enabled or disabled the filter
735  char *tokstate;
736 
737  filterToken= av_strtok(p, filterDelimiters, &tokstate);
738  if(!filterToken) break;
739  p+= strlen(filterToken) + 1; // p points to next filterToken
740  filterName= av_strtok(filterToken, optionDelimiters, &tokstate);
741  if (!filterName) {
742  ppMode->error++;
743  break;
744  }
745  av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
746 
747  if(*filterName == '-'){
748  enable=0;
749  filterName++;
750  }
751 
752  for(;;){ //for all options
753  option= av_strtok(NULL, optionDelimiters, &tokstate);
754  if(!option) break;
755 
756  av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
757  if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
758  else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
759  else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
760  else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
761  else{
762  options[numOfUnknownOptions] = option;
763  numOfUnknownOptions++;
764  }
765  if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
766  }
767  options[numOfUnknownOptions] = NULL;
768 
769  /* replace stuff from the replace Table */
770  for(i=0; replaceTable[2*i]; i++){
771  if(!strcmp(replaceTable[2*i], filterName)){
772  int newlen= strlen(replaceTable[2*i + 1]);
773  int plen;
774  int spaceLeft;
775 
776  p--, *p=',';
777 
778  plen= strlen(p);
779  spaceLeft= p - temp + plen;
780  if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
781  ppMode->error++;
782  break;
783  }
784  memmove(p + newlen, p, plen+1);
785  memcpy(p, replaceTable[2*i + 1], newlen);
786  filterNameOk=1;
787  }
788  }
789 
790  for(i=0; filters[i].shortName; i++){
791  if( !strcmp(filters[i].longName, filterName)
792  || !strcmp(filters[i].shortName, filterName)){
793  ppMode->lumMode &= ~filters[i].mask;
794  ppMode->chromMode &= ~filters[i].mask;
795 
796  filterNameOk=1;
797  if(!enable) break; // user wants to disable it
798 
799  if(q >= filters[i].minLumQuality && luma)
800  ppMode->lumMode|= filters[i].mask;
801  if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
802  if(q >= filters[i].minChromQuality)
803  ppMode->chromMode|= filters[i].mask;
804 
805  if(filters[i].mask == LEVEL_FIX){
806  int o;
807  ppMode->minAllowedY= 16;
808  ppMode->maxAllowedY= 234;
809  for(o=0; options[o]; o++){
810  if( !strcmp(options[o],"fullyrange")
811  ||!strcmp(options[o],"f")){
812  ppMode->minAllowedY= 0;
813  ppMode->maxAllowedY= 255;
814  numOfUnknownOptions--;
815  }
816  }
817  }
818  else if(filters[i].mask == TEMP_NOISE_FILTER)
819  {
820  int o;
821  int numOfNoises=0;
822 
823  for(o=0; options[o]; o++){
824  char *tail;
825  ppMode->maxTmpNoise[numOfNoises]=
826  strtol(options[o], &tail, 0);
827  if(tail!=options[o]){
828  numOfNoises++;
829  numOfUnknownOptions--;
830  if(numOfNoises >= 3) break;
831  }
832  }
833  }
834  else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
835  || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
836  int o;
837 
838  for(o=0; options[o] && o<2; o++){
839  char *tail;
840  int val= strtol(options[o], &tail, 0);
841  if(tail==options[o]) break;
842 
843  numOfUnknownOptions--;
844  if(o==0) ppMode->baseDcDiff= val;
845  else ppMode->flatnessThreshold= val;
846  }
847  }
848  else if(filters[i].mask == FORCE_QUANT){
849  int o;
850  ppMode->forcedQuant= 15;
851 
852  for(o=0; options[o] && o<1; o++){
853  char *tail;
854  int val= strtol(options[o], &tail, 0);
855  if(tail==options[o]) break;
856 
857  numOfUnknownOptions--;
858  ppMode->forcedQuant= val;
859  }
860  }
861  }
862  }
863  if(!filterNameOk) ppMode->error++;
864  ppMode->error += numOfUnknownOptions;
865  }
866 
867  av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
868  if(ppMode->error){
869  av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
870  av_free(ppMode);
871  return NULL;
872  }
873  return ppMode;
874 }
875 
877  av_free(mode);
878 }
879 
880 static void reallocAlign(void **p, int size){
881  av_free(*p);
882  *p= av_mallocz(size);
883 }
884 
885 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
886  int mbWidth = (width+15)>>4;
887  int mbHeight= (height+15)>>4;
888  int i;
889 
890  c->stride= stride;
891  c->qpStride= qpStride;
892 
893  reallocAlign((void **)&c->tempDst, stride*24+32);
894  reallocAlign((void **)&c->tempSrc, stride*24);
895  reallocAlign((void **)&c->tempBlocks, 2*16*8);
896  reallocAlign((void **)&c->yHistogram, 256*sizeof(uint64_t));
897  for(i=0; i<256; i++)
898  c->yHistogram[i]= width*height/64*15/256;
899 
900  for(i=0; i<3; i++){
901  //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
902  reallocAlign((void **)&c->tempBlurred[i], stride*mbHeight*16 + 17*1024);
903  reallocAlign((void **)&c->tempBlurredPast[i], 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
904  }
905 
906  reallocAlign((void **)&c->deintTemp, 2*width+32);
907  reallocAlign((void **)&c->nonBQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
908  reallocAlign((void **)&c->stdQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
909  reallocAlign((void **)&c->forcedQPTable, mbWidth*sizeof(QP_STORE_T));
910 }
911 
912 static const char * context_to_name(void * ptr) {
913  return "postproc";
914 }
915 
916 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
917 
918 av_cold pp_context *pp_get_context(int width, int height, int cpuCaps){
919  PPContext *c= av_mallocz(sizeof(PPContext));
920  int stride= FFALIGN(width, 16); //assumed / will realloc if needed
921  int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
922 
923  if (!c)
924  return NULL;
925 
927  if(cpuCaps&PP_FORMAT){
928  c->hChromaSubSample= cpuCaps&0x3;
929  c->vChromaSubSample= (cpuCaps>>4)&0x3;
930  }else{
931  c->hChromaSubSample= 1;
932  c->vChromaSubSample= 1;
933  }
934  if (cpuCaps & PP_CPU_CAPS_AUTO) {
935  c->cpuCaps = av_get_cpu_flags();
936  } else {
937  c->cpuCaps = 0;
938  if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps |= AV_CPU_FLAG_MMX;
939  if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
940  if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps |= AV_CPU_FLAG_3DNOW;
941  if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
942  }
943 
944  reallocBuffers(c, width, height, stride, qpStride);
945 
946  c->frameNum=-1;
947 
948  return c;
949 }
950 
951 av_cold void pp_free_context(void *vc){
952  PPContext *c = (PPContext*)vc;
953  int i;
954 
955  for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurred); i++)
956  av_free(c->tempBlurred[i]);
957  for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurredPast); i++)
958  av_free(c->tempBlurredPast[i]);
959 
960  av_free(c->tempBlocks);
961  av_free(c->yHistogram);
962  av_free(c->tempDst);
963  av_free(c->tempSrc);
964  av_free(c->deintTemp);
965  av_free(c->stdQPTable);
966  av_free(c->nonBQPTable);
968 
969  memset(c, 0, sizeof(PPContext));
970 
971  av_free(c);
972 }
973 
974 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
975  uint8_t * dst[3], const int dstStride[3],
976  int width, int height,
977  const QP_STORE_T *QP_store, int QPStride,
978  pp_mode *vm, void *vc, int pict_type)
979 {
980  int mbWidth = (width+15)>>4;
981  int mbHeight= (height+15)>>4;
982  PPMode *mode = vm;
983  PPContext *c = vc;
984  int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
985  int absQPStride = FFABS(QPStride);
986 
987  // c->stride and c->QPStride are always positive
988  if(c->stride < minStride || c->qpStride < absQPStride)
989  reallocBuffers(c, width, height,
990  FFMAX(minStride, c->stride),
991  FFMAX(c->qpStride, absQPStride));
992 
993  if(!QP_store || (mode->lumMode & FORCE_QUANT)){
994  int i;
995  QP_store= c->forcedQPTable;
996  absQPStride = QPStride = 0;
997  if(mode->lumMode & FORCE_QUANT)
998  for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
999  else
1000  for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1001  }
1002 
1003  if(pict_type & PP_PICT_TYPE_QP2){
1004  int i;
1005  const int count= FFMAX(mbHeight * absQPStride, mbWidth);
1006  for(i=0; i<(count>>2); i++){
1007  AV_WN32(c->stdQPTable + (i<<2), AV_RN32(QP_store + (i<<2)) >> 1 & 0x7F7F7F7F);
1008  }
1009  for(i<<=2; i<count; i++){
1010  c->stdQPTable[i] = QP_store[i]>>1;
1011  }
1012  QP_store= c->stdQPTable;
1013  QPStride= absQPStride;
1014  }
1015 
1016  if(0){
1017  int x,y;
1018  for(y=0; y<mbHeight; y++){
1019  for(x=0; x<mbWidth; x++){
1020  av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1021  }
1022  av_log(c, AV_LOG_INFO, "\n");
1023  }
1024  av_log(c, AV_LOG_INFO, "\n");
1025  }
1026 
1027  if((pict_type&7)!=3){
1028  if (QPStride >= 0){
1029  int i;
1030  const int count= FFMAX(mbHeight * QPStride, mbWidth);
1031  for(i=0; i<(count>>2); i++){
1032  AV_WN32(c->nonBQPTable + (i<<2), AV_RN32(QP_store + (i<<2)) & 0x3F3F3F3F);
1033  }
1034  for(i<<=2; i<count; i++){
1035  c->nonBQPTable[i] = QP_store[i] & 0x3F;
1036  }
1037  } else {
1038  int i,j;
1039  for(i=0; i<mbHeight; i++) {
1040  for(j=0; j<absQPStride; j++) {
1041  c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1042  }
1043  }
1044  }
1045  }
1046 
1047  av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1048  mode->lumMode, mode->chromMode);
1049 
1050  postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1051  width, height, QP_store, QPStride, 0, mode, c);
1052 
1053  if (!(src[1] && src[2] && dst[1] && dst[2]))
1054  return;
1055 
1056  width = (width )>>c->hChromaSubSample;
1057  height = (height)>>c->vChromaSubSample;
1058 
1059  if(mode->chromMode){
1060  postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1061  width, height, QP_store, QPStride, 1, mode, c);
1062  postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1063  width, height, QP_store, QPStride, 2, mode, c);
1064  }
1065  else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1066  linecpy(dst[1], src[1], height, srcStride[1]);
1067  linecpy(dst[2], src[2], height, srcStride[2]);
1068  }else{
1069  int y;
1070  for(y=0; y<height; y++){
1071  memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1072  memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1073  }
1074  }
1075 }