109 #define LICENSE_PREFIX "libpostproc license: "
117 #define GET_MODE_BUFFER_SIZE 500
118 #define OPTIONS_ARRAY_SIZE 10
120 #define TEMP_STRIDE 8
123 #if ARCH_X86 && HAVE_INLINE_ASM
147 {
"dr",
"dering", 1, 5, 6,
DERING},
148 {
"al",
"autolevels", 0, 1, 2,
LEVEL_FIX},
157 {
"be",
"bitexact", 1, 0, 0,
BITEXACT},
164 "default",
"hb:a,vb:a,dr:a",
165 "de",
"hb:a,vb:a,dr:a",
166 "fast",
"h1:a,v1:a,dr:a",
167 "fa",
"h1:a,v1:a,dr:a",
168 "ac",
"ha:a:128:7,va:a,dr:a",
173 #if ARCH_X86 && HAVE_INLINE_ASM
174 static inline void prefetchnta(
const void *p)
176 __asm__
volatile(
"prefetchnta (%0)\n\t"
181 static inline void prefetcht0(
const void *p)
183 __asm__
volatile(
"prefetcht0 (%0)\n\t"
188 static inline void prefetcht1(
const void *p)
190 __asm__
volatile(
"prefetcht1 (%0)\n\t"
195 static inline void prefetcht2(
const void *p)
197 __asm__
volatile(
"prefetcht2 (%0)\n\t"
214 const int dcThreshold= dcOffset*2 + 1;
217 numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
218 numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
219 numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
220 numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
221 numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
222 numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
223 numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
237 const int dcThreshold= dcOffset*2 + 1;
241 numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
242 numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
243 numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
244 numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
245 numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
246 numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
247 numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
248 numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
258 if((
unsigned)(src[0] - src[5] + 2*QP) > 4*QP)
return 0;
260 if((
unsigned)(src[2] - src[7] + 2*QP) > 4*QP)
return 0;
262 if((
unsigned)(src[4] - src[1] + 2*QP) > 4*QP)
return 0;
264 if((
unsigned)(src[6] - src[3] + 2*QP) > 4*QP)
return 0;
275 if((
unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP)
return 0;
276 if((
unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP)
return 0;
277 if((
unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP)
return 0;
278 if((
unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP)
return 0;
305 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
307 if(
FFABS(middleEnergy) < 8*c->
QP){
308 const int q=(dst[3] - dst[4])/2;
309 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
310 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
316 d*=
FFSIGN(-middleEnergy);
344 const int first=
FFABS(dst[-1] - dst[0]) < c->
QP ? dst[-1] : dst[0];
345 const int last=
FFABS(dst[8] - dst[7]) < c->
QP ? dst[8] : dst[7];
348 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
349 sums[1] = sums[0] - first + dst[3];
350 sums[2] = sums[1] - first + dst[4];
351 sums[3] = sums[2] - first + dst[5];
352 sums[4] = sums[3] - first + dst[6];
353 sums[5] = sums[4] - dst[0] + dst[7];
354 sums[6] = sums[5] - dst[1] + last;
355 sums[7] = sums[6] - dst[2] + last;
356 sums[8] = sums[7] - dst[3] + last;
357 sums[9] = sums[8] - dst[4] + last;
359 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
360 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
361 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
362 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
363 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
364 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
365 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
366 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
383 static uint64_t lut[256];
389 int v= i < 128 ? 2*i : 2*(i-256);
398 uint64_t
a= (v/16) & 0xFF;
399 uint64_t
b= (v*3/16) & 0xFF;
400 uint64_t
c= (v*5/16) & 0xFF;
401 uint64_t d= (7*v/16) & 0xFF;
402 uint64_t
A= (0x100 -
a)&0xFF;
403 uint64_t
B= (0x100 -
b)&0xFF;
404 uint64_t
C= (0x100 -
c)&0xFF;
405 uint64_t
D= (0x100 -
c)&0xFF;
407 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
408 (D<<24) | (C<<16) | (B<<8) | (A);
414 int a= src[1] - src[2];
415 int b= src[3] - src[4];
416 int c= src[5] - src[6];
443 const int dcThreshold= dcOffset*2 + 1;
449 numEq += ((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold;
450 numEq += ((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold;
451 numEq += ((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold;
452 numEq += ((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold;
453 numEq += ((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold;
454 numEq += ((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold;
455 numEq += ((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold;
456 numEq += ((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold;
457 numEq += ((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold;
461 if(src[0] > src[step]){
469 if(src[x*step] > src[(x+1)*step]){
470 if(src[x *step] > max) max= src[ x *step];
471 if(src[(x+1)*step] <
min) min= src[(x+1)*step];
473 if(src[(x+1)*step] > max) max= src[(x+1)*step];
474 if(src[ x *step] < min) min= src[ x *step];
478 const int first=
FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
479 const int last=
FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
482 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
483 sums[1] = sums[0] - first + src[3*step];
484 sums[2] = sums[1] - first + src[4*step];
485 sums[3] = sums[2] - first + src[5*step];
486 sums[4] = sums[3] - first + src[6*step];
487 sums[5] = sums[4] - src[0*step] + src[7*step];
488 sums[6] = sums[5] - src[1*step] + last;
489 sums[7] = sums[6] - src[2*step] + last;
490 sums[8] = sums[7] - src[3*step] + last;
491 sums[9] = sums[8] - src[4*step] + last;
503 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
504 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
505 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
506 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
507 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
508 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
509 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
510 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
513 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
515 if(
FFABS(middleEnergy) < 8*
QP){
516 const int q=(src[3*step] - src[4*step])/2;
517 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
518 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
524 d*=
FFSIGN(-middleEnergy);
535 d= (d < 0) ? 32 : -32;
536 src[3*step]= av_clip_uint8(src[3*step] - d);
537 src[4*step]= av_clip_uint8(src[4*step] + d);
558 #define TEMPLATE_PP_C 1
562 # define TEMPLATE_PP_ALTIVEC 1
567 #if ARCH_X86 && HAVE_INLINE_ASM
568 # if CONFIG_RUNTIME_CPUDETECT
569 # define TEMPLATE_PP_MMX 1
571 # define TEMPLATE_PP_MMXEXT 1
573 # define TEMPLATE_PP_3DNOW 1
575 # define TEMPLATE_PP_SSE2 1
578 # if HAVE_SSE2_INLINE
579 # define TEMPLATE_PP_SSE2 1
581 # elif HAVE_MMXEXT_INLINE
582 # define TEMPLATE_PP_MMXEXT 1
584 # elif HAVE_AMD3DNOW_INLINE
585 # define TEMPLATE_PP_3DNOW 1
587 # elif HAVE_MMX_INLINE
588 # define TEMPLATE_PP_MMX 1
600 pp_fn pp = postProcess_C;
606 #if CONFIG_RUNTIME_CPUDETECT
607 #if ARCH_X86 && HAVE_INLINE_ASM
618 pp = postProcess_SSE2;
619 #elif HAVE_MMXEXT_INLINE
620 pp = postProcess_MMX2;
621 #elif HAVE_AMD3DNOW_INLINE
622 pp = postProcess_3DNow;
623 #elif HAVE_MMX_INLINE
624 pp = postProcess_MMX;
626 pp = postProcess_altivec;
631 pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
637 "Available postprocessing filters:\n"
639 "short long name short long option Description\n"
640 "* * a autoq CPU power dependent enabler\n"
641 " c chrom chrominance filtering enabled\n"
642 " y nochrom chrominance filtering disabled\n"
643 " n noluma luma filtering disabled\n"
644 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
645 " 1. difference factor: default=32, higher -> more deblocking\n"
646 " 2. flatness threshold: default=39, lower -> more deblocking\n"
647 " the h & v deblocking filters share these\n"
648 " so you can't set different thresholds for h / v\n"
649 "vb vdeblock (2 threshold) vertical deblocking filter\n"
650 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
651 "va vadeblock (2 threshold) vertical deblocking filter\n"
652 "h1 x1hdeblock experimental h deblock filter 1\n"
653 "v1 x1vdeblock experimental v deblock filter 1\n"
654 "dr dering deringing filter\n"
655 "al autolevels automatic brightness / contrast\n"
656 " f fullyrange stretch luminance to (0..255)\n"
657 "lb linblenddeint linear blend deinterlacer\n"
658 "li linipoldeint linear interpolating deinterlace\n"
659 "ci cubicipoldeint cubic interpolating deinterlacer\n"
660 "md mediandeint median deinterlacer\n"
661 "fd ffmpegdeint ffmpeg deinterlacer\n"
662 "l5 lowpass5 FIR lowpass deinterlacer\n"
663 "de default hb:a,vb:a,dr:a\n"
664 "fa fast h1:a,v1:a,dr:a\n"
665 "ac ha:a:128:7,va:a,dr:a\n"
666 "tn tmpnoise (3 threshold) temporal noise reducer\n"
667 " 1. <= 2. <= 3. larger -> stronger filtering\n"
668 "fq forceQuant <quantizer> force quantizer\n"
670 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
671 "long form example:\n"
672 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
673 "short form example:\n"
674 "vb:a/hb:a/lb de,-vb\n"
684 static const char filterDelimiters[] =
",/";
685 static const char optionDelimiters[] =
":|";
694 if (!strcmp(name,
"help")) {
696 for (p =
pp_help; strchr(p,
'\n'); p = strchr(p,
'\n') + 1) {
725 const char *filterName;
733 int numOfUnknownOptions=0;
737 filterToken=
av_strtok(p, filterDelimiters, &tokstate);
738 if(!filterToken)
break;
739 p+= strlen(filterToken) + 1;
740 filterName=
av_strtok(filterToken, optionDelimiters, &tokstate);
747 if(*filterName ==
'-'){
757 if(!strcmp(
"autoq", option) || !strcmp(
"a", option)) q= quality;
758 else if(!strcmp(
"nochrom", option) || !strcmp(
"y", option)) chrom=0;
759 else if(!strcmp(
"chrom", option) || !strcmp(
"c", option)) chrom=1;
760 else if(!strcmp(
"noluma", option) || !strcmp(
"n", option)) luma=0;
762 options[numOfUnknownOptions] =
option;
763 numOfUnknownOptions++;
767 options[numOfUnknownOptions] =
NULL;
779 spaceLeft= p - temp + plen;
784 memmove(p + newlen, p, plen+1);
791 if( !strcmp(filters[i].longName, filterName)
792 || !strcmp(filters[i].shortName, filterName)){
799 if(q >= filters[i].minLumQuality && luma)
801 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
802 if(q >= filters[i].minChromQuality)
809 for(o=0; options[o]; o++){
810 if( !strcmp(options[o],
"fullyrange")
811 ||!strcmp(options[o],
"f")){
814 numOfUnknownOptions--;
823 for(o=0; options[o]; o++){
826 strtol(options[o], &tail, 0);
827 if(tail!=options[o]){
829 numOfUnknownOptions--;
830 if(numOfNoises >= 3)
break;
838 for(o=0; options[o] && o<2; o++){
840 int val= strtol(options[o], &tail, 0);
841 if(tail==options[o])
break;
843 numOfUnknownOptions--;
852 for(o=0; options[o] && o<1; o++){
854 int val= strtol(options[o], &tail, 0);
855 if(tail==options[o])
break;
857 numOfUnknownOptions--;
863 if(!filterNameOk) ppMode->
error++;
864 ppMode->
error += numOfUnknownOptions;
886 int mbWidth = (width+15)>>4;
887 int mbHeight= (height+15)>>4;
921 int qpStride= (width+15)/16 + 2;
975 uint8_t * dst[3],
const int dstStride[3],
978 pp_mode *vm,
void *vc,
int pict_type)
980 int mbWidth = (width+15)>>4;
981 int mbHeight= (height+15)>>4;
985 int absQPStride =
FFABS(QPStride);
996 absQPStride = QPStride = 0;
1005 const int count=
FFMAX(mbHeight * absQPStride, mbWidth);
1006 for(i=0; i<(count>>2); i++){
1009 for(i<<=2; i<
count; i++){
1013 QPStride= absQPStride;
1018 for(y=0; y<mbHeight; y++){
1019 for(x=0; x<mbWidth; x++){
1027 if((pict_type&7)!=3){
1030 const int count=
FFMAX(mbHeight * QPStride, mbWidth);
1031 for(i=0; i<(count>>2); i++){
1034 for(i<<=2; i<
count; i++){
1039 for(i=0; i<mbHeight; i++) {
1040 for(j=0; j<absQPStride; j++) {
1041 c->
nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1050 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1051 width, height, QP_store, QPStride, 0, mode, c);
1053 if (!(src[1] && src[2] && dst[1] && dst[2]))
1060 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1061 width, height, QP_store, QPStride, 1, mode, c);
1062 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1063 width, height, QP_store, QPStride, 2, mode, c);
1065 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1066 linecpy(dst[1], src[1], height, srcStride[1]);
1067 linecpy(dst[2], src[2], height, srcStride[2]);
1071 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1072 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);