Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
52867 views
1
/*
2
* Copyright (C) 2001-2003 Michael Niedermayer ([email protected])
3
*
4
* AltiVec optimizations (C) 2004 Romain Dolbeau <[email protected]>
5
*
6
* This file is part of FFmpeg.
7
*
8
* FFmpeg is free software; you can redistribute it and/or modify
9
* it under the terms of the GNU General Public License as published by
10
* the Free Software Foundation; either version 2 of the License, or
11
* (at your option) any later version.
12
*
13
* FFmpeg is distributed in the hope that it will be useful,
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
* GNU General Public License for more details.
17
*
18
* You should have received a copy of the GNU General Public License
19
* along with FFmpeg; if not, write to the Free Software
20
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
*/
22
23
/**
24
* @file
25
* postprocessing.
26
*/
27
28
/*
29
C MMX MMX2 3DNow AltiVec
30
isVertDC Ec Ec Ec
31
isVertMinMaxOk Ec Ec Ec
32
doVertLowPass E e e Ec
33
doVertDefFilter Ec Ec e e Ec
34
isHorizDC Ec Ec Ec
35
isHorizMinMaxOk a E Ec
36
doHorizLowPass E e e Ec
37
doHorizDefFilter Ec Ec e e Ec
38
do_a_deblock Ec E Ec E
39
deRing E e e* Ecp
40
Vertical RKAlgo1 E a a
41
Horizontal RKAlgo1 a a
42
Vertical X1# a E E
43
Horizontal X1# a E E
44
LinIpolDeinterlace e E E*
45
CubicIpolDeinterlace a e e*
46
LinBlendDeinterlace e E E*
47
MedianDeinterlace# E Ec Ec
48
TempDeNoiser# E e e Ec
49
50
* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51
# more or less selfinvented filters so the exactness is not too meaningful
52
E = Exact implementation
53
e = almost exact implementation (slightly different rounding,...)
54
a = alternative / approximate impl
55
c = checked against the other implementations (-vo md5)
56
p = partially optimized, still some work to do
57
*/
58
59
/*
60
TODO:
61
reduce the time wasted on the mem transfer
62
unroll stuff if instructions depend too much on the prior one
63
move YScale thing to the end instead of fixing QP
64
write a faster and higher quality deblocking filter :)
65
make the mainloop more flexible (variable number of blocks at once
66
(the if/else stuff per block is slowing things down)
67
compare the quality & speed of all filters
68
split this huge file
69
optimize c versions
70
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71
...
72
*/
73
74
//Changelog: use git log
75
76
#include "config.h"
77
#include "libavutil/avutil.h"
78
#include "libavutil/avassert.h"
79
#include "libavutil/intreadwrite.h"
80
#include <inttypes.h>
81
#include <stdio.h>
82
#include <stdlib.h>
83
#include <string.h>
84
//#undef HAVE_MMXEXT_INLINE
85
//#define HAVE_AMD3DNOW_INLINE
86
//#undef HAVE_MMX_INLINE
87
//#undef ARCH_X86
88
//#define DEBUG_BRIGHTNESS
89
#include "postprocess.h"
90
#include "postprocess_internal.h"
91
#include "libavutil/avstring.h"
92
93
#include "libavutil/ffversion.h"
94
const char postproc_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
95
96
unsigned postproc_version(void)
97
{
98
av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
99
return LIBPOSTPROC_VERSION_INT;
100
}
101
102
const char *postproc_configuration(void)
103
{
104
return FFMPEG_CONFIGURATION;
105
}
106
107
const char *postproc_license(void)
108
{
109
#define LICENSE_PREFIX "libpostproc license: "
110
return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
111
}
112
113
#if HAVE_ALTIVEC_H
114
#include <altivec.h>
115
#endif
116
117
#define GET_MODE_BUFFER_SIZE 500
118
#define OPTIONS_ARRAY_SIZE 10
119
#define BLOCK_SIZE 8
120
#define TEMP_STRIDE 8
121
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
122
123
#if ARCH_X86 && HAVE_INLINE_ASM
124
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
125
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
126
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
127
DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
128
DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
129
DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
130
DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
131
DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
132
#endif
133
134
DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
135
136
137
static const struct PPFilter filters[]=
138
{
139
{"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
140
{"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
141
/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
142
{"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
143
{"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
144
{"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
145
{"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
146
{"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
147
{"dr", "dering", 1, 5, 6, DERING},
148
{"al", "autolevels", 0, 1, 2, LEVEL_FIX},
149
{"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
150
{"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
151
{"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
152
{"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
153
{"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
154
{"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
155
{"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
156
{"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
157
{"be", "bitexact", 1, 0, 0, BITEXACT},
158
{"vi", "visualize", 1, 0, 0, VISUALIZE},
159
{NULL, NULL,0,0,0,0} //End Marker
160
};
161
162
static const char * const replaceTable[]=
163
{
164
"default", "hb:a,vb:a,dr:a",
165
"de", "hb:a,vb:a,dr:a",
166
"fast", "h1:a,v1:a,dr:a",
167
"fa", "h1:a,v1:a,dr:a",
168
"ac", "ha:a:128:7,va:a,dr:a",
169
NULL //End Marker
170
};
171
172
/* The horizontal functions exist only in C because the MMX
173
* code is faster with vertical filters and transposing. */
174
175
/**
176
* Check if the given 8x8 Block is mostly "flat"
177
*/
178
static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
179
{
180
int numEq= 0;
181
int y;
182
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
183
const int dcThreshold= dcOffset*2 + 1;
184
185
for(y=0; y<BLOCK_SIZE; y++){
186
numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
187
numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
188
numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
189
numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
190
numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
191
numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
192
numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
193
src+= stride;
194
}
195
return numEq > c->ppMode.flatnessThreshold;
196
}
197
198
/**
199
* Check if the middle 8x8 Block in the given 8x16 block is flat
200
*/
201
static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
202
{
203
int numEq= 0;
204
int y;
205
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
206
const int dcThreshold= dcOffset*2 + 1;
207
208
src+= stride*4; // src points to begin of the 8x8 Block
209
for(y=0; y<BLOCK_SIZE-1; y++){
210
numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
211
numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
212
numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
213
numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
214
numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
215
numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
216
numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
217
numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
218
src+= stride;
219
}
220
return numEq > c->ppMode.flatnessThreshold;
221
}
222
223
static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
224
{
225
int i;
226
for(i=0; i<2; i++){
227
if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
228
src += stride;
229
if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
230
src += stride;
231
if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
232
src += stride;
233
if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
234
src += stride;
235
}
236
return 1;
237
}
238
239
static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
240
{
241
int x;
242
src+= stride*4;
243
for(x=0; x<BLOCK_SIZE; x+=4){
244
if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
245
if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
246
if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
247
if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
248
}
249
return 1;
250
}
251
252
static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
253
{
254
if( isHorizDC_C(src, stride, c) ){
255
return isHorizMinMaxOk_C(src, stride, c->QP);
256
}else{
257
return 2;
258
}
259
}
260
261
static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
262
{
263
if( isVertDC_C(src, stride, c) ){
264
return isVertMinMaxOk_C(src, stride, c->QP);
265
}else{
266
return 2;
267
}
268
}
269
270
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
271
{
272
int y;
273
for(y=0; y<BLOCK_SIZE; y++){
274
const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
275
276
if(FFABS(middleEnergy) < 8*c->QP){
277
const int q=(dst[3] - dst[4])/2;
278
const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
279
const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
280
281
int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
282
d= FFMAX(d, 0);
283
284
d= (5*d + 32) >> 6;
285
d*= FFSIGN(-middleEnergy);
286
287
if(q>0)
288
{
289
d = FFMAX(d, 0);
290
d = FFMIN(d, q);
291
}
292
else
293
{
294
d = FFMIN(d, 0);
295
d = FFMAX(d, q);
296
}
297
298
dst[3]-= d;
299
dst[4]+= d;
300
}
301
dst+= stride;
302
}
303
}
304
305
/**
306
* Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
307
* using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
308
*/
309
static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
310
{
311
int y;
312
for(y=0; y<BLOCK_SIZE; y++){
313
const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
314
const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
315
316
int sums[10];
317
sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
318
sums[1] = sums[0] - first + dst[3];
319
sums[2] = sums[1] - first + dst[4];
320
sums[3] = sums[2] - first + dst[5];
321
sums[4] = sums[3] - first + dst[6];
322
sums[5] = sums[4] - dst[0] + dst[7];
323
sums[6] = sums[5] - dst[1] + last;
324
sums[7] = sums[6] - dst[2] + last;
325
sums[8] = sums[7] - dst[3] + last;
326
sums[9] = sums[8] - dst[4] + last;
327
328
dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
329
dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
330
dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
331
dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
332
dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
333
dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
334
dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
335
dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
336
337
dst+= stride;
338
}
339
}
340
341
/**
342
* Experimental Filter 1 (Horizontal)
343
* will not damage linear gradients
344
* Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
345
* can only smooth blocks at the expected locations (it cannot smooth them if they did move)
346
* MMX2 version does correct clipping C version does not
347
* not identical with the vertical one
348
*/
349
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
350
{
351
int y;
352
static uint64_t lut[256];
353
if(!lut[255])
354
{
355
int i;
356
for(i=0; i<256; i++)
357
{
358
int v= i < 128 ? 2*i : 2*(i-256);
359
/*
360
//Simulate 112242211 9-Tap filter
361
uint64_t a= (v/16) & 0xFF;
362
uint64_t b= (v/8) & 0xFF;
363
uint64_t c= (v/4) & 0xFF;
364
uint64_t d= (3*v/8) & 0xFF;
365
*/
366
//Simulate piecewise linear interpolation
367
uint64_t a= (v/16) & 0xFF;
368
uint64_t b= (v*3/16) & 0xFF;
369
uint64_t c= (v*5/16) & 0xFF;
370
uint64_t d= (7*v/16) & 0xFF;
371
uint64_t A= (0x100 - a)&0xFF;
372
uint64_t B= (0x100 - b)&0xFF;
373
uint64_t C= (0x100 - c)&0xFF;
374
uint64_t D= (0x100 - c)&0xFF;
375
376
lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
377
(D<<24) | (C<<16) | (B<<8) | (A);
378
//lut[i] = (v<<32) | (v<<24);
379
}
380
}
381
382
for(y=0; y<BLOCK_SIZE; y++){
383
int a= src[1] - src[2];
384
int b= src[3] - src[4];
385
int c= src[5] - src[6];
386
387
int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
388
389
if(d < QP){
390
int v = d * FFSIGN(-b);
391
392
src[1] +=v/8;
393
src[2] +=v/4;
394
src[3] +=3*v/8;
395
src[4] -=3*v/8;
396
src[5] -=v/4;
397
src[6] -=v/8;
398
}
399
src+=stride;
400
}
401
}
402
403
/**
404
* accurate deblock filter
405
*/
406
static av_always_inline void do_a_deblock_C(uint8_t *src, int step,
407
int stride, const PPContext *c, int mode)
408
{
409
int y;
410
const int QP= c->QP;
411
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
412
const int dcThreshold= dcOffset*2 + 1;
413
//START_TIMER
414
src+= step*4; // src points to begin of the 8x8 Block
415
for(y=0; y<8; y++){
416
int numEq= 0;
417
418
numEq += ((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold;
419
numEq += ((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold;
420
numEq += ((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold;
421
numEq += ((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold;
422
numEq += ((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold;
423
numEq += ((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold;
424
numEq += ((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold;
425
numEq += ((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold;
426
numEq += ((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold;
427
if(numEq > c->ppMode.flatnessThreshold){
428
int min, max, x;
429
430
if(src[0] > src[step]){
431
max= src[0];
432
min= src[step];
433
}else{
434
max= src[step];
435
min= src[0];
436
}
437
for(x=2; x<8; x+=2){
438
if(src[x*step] > src[(x+1)*step]){
439
if(src[x *step] > max) max= src[ x *step];
440
if(src[(x+1)*step] < min) min= src[(x+1)*step];
441
}else{
442
if(src[(x+1)*step] > max) max= src[(x+1)*step];
443
if(src[ x *step] < min) min= src[ x *step];
444
}
445
}
446
if(max-min < 2*QP){
447
const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
448
const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
449
450
int sums[10];
451
sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
452
sums[1] = sums[0] - first + src[3*step];
453
sums[2] = sums[1] - first + src[4*step];
454
sums[3] = sums[2] - first + src[5*step];
455
sums[4] = sums[3] - first + src[6*step];
456
sums[5] = sums[4] - src[0*step] + src[7*step];
457
sums[6] = sums[5] - src[1*step] + last;
458
sums[7] = sums[6] - src[2*step] + last;
459
sums[8] = sums[7] - src[3*step] + last;
460
sums[9] = sums[8] - src[4*step] + last;
461
462
if (mode & VISUALIZE) {
463
src[0*step] =
464
src[1*step] =
465
src[2*step] =
466
src[3*step] =
467
src[4*step] =
468
src[5*step] =
469
src[6*step] =
470
src[7*step] = 128;
471
}
472
src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
473
src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
474
src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
475
src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
476
src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
477
src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
478
src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
479
src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
480
}
481
}else{
482
const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
483
484
if(FFABS(middleEnergy) < 8*QP){
485
const int q=(src[3*step] - src[4*step])/2;
486
const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
487
const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
488
489
int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
490
d= FFMAX(d, 0);
491
492
d= (5*d + 32) >> 6;
493
d*= FFSIGN(-middleEnergy);
494
495
if(q>0){
496
d = FFMAX(d, 0);
497
d = FFMIN(d, q);
498
}else{
499
d = FFMIN(d, 0);
500
d = FFMAX(d, q);
501
}
502
503
if ((mode & VISUALIZE) && d) {
504
d= (d < 0) ? 32 : -32;
505
src[3*step]= av_clip_uint8(src[3*step] - d);
506
src[4*step]= av_clip_uint8(src[4*step] + d);
507
d = 0;
508
}
509
510
src[3*step]-= d;
511
src[4*step]+= d;
512
}
513
}
514
515
src += stride;
516
}
517
/*if(step==16){
518
STOP_TIMER("step16")
519
}else{
520
STOP_TIMER("stepX")
521
}*/
522
}
523
524
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
525
//Plain C versions
526
//we always compile C for testing which needs bitexactness
527
#define TEMPLATE_PP_C 1
528
#include "postprocess_template.c"
529
530
#if HAVE_ALTIVEC
531
# define TEMPLATE_PP_ALTIVEC 1
532
# include "postprocess_altivec_template.c"
533
# include "postprocess_template.c"
534
#endif
535
536
#if ARCH_X86 && HAVE_INLINE_ASM
537
# if CONFIG_RUNTIME_CPUDETECT
538
# define TEMPLATE_PP_MMX 1
539
# include "postprocess_template.c"
540
# define TEMPLATE_PP_MMXEXT 1
541
# include "postprocess_template.c"
542
# define TEMPLATE_PP_3DNOW 1
543
# include "postprocess_template.c"
544
# define TEMPLATE_PP_SSE2 1
545
# include "postprocess_template.c"
546
# else
547
# if HAVE_SSE2_INLINE
548
# define TEMPLATE_PP_SSE2 1
549
# include "postprocess_template.c"
550
# elif HAVE_MMXEXT_INLINE
551
# define TEMPLATE_PP_MMXEXT 1
552
# include "postprocess_template.c"
553
# elif HAVE_AMD3DNOW_INLINE
554
# define TEMPLATE_PP_3DNOW 1
555
# include "postprocess_template.c"
556
# elif HAVE_MMX_INLINE
557
# define TEMPLATE_PP_MMX 1
558
# include "postprocess_template.c"
559
# endif
560
# endif
561
#endif
562
563
typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
564
const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
565
566
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
567
const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
568
{
569
pp_fn pp = postProcess_C;
570
PPContext *c= (PPContext *)vc;
571
PPMode *ppMode= (PPMode *)vm;
572
c->ppMode= *ppMode; //FIXME
573
574
if (!(ppMode->lumMode & BITEXACT)) {
575
#if CONFIG_RUNTIME_CPUDETECT
576
#if ARCH_X86 && HAVE_INLINE_ASM
577
// ordered per speed fastest first
578
if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2;
579
else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2;
580
else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow;
581
else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX;
582
#elif HAVE_ALTIVEC
583
if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec;
584
#endif
585
#else /* CONFIG_RUNTIME_CPUDETECT */
586
#if HAVE_SSE2_INLINE
587
pp = postProcess_SSE2;
588
#elif HAVE_MMXEXT_INLINE
589
pp = postProcess_MMX2;
590
#elif HAVE_AMD3DNOW_INLINE
591
pp = postProcess_3DNow;
592
#elif HAVE_MMX_INLINE
593
pp = postProcess_MMX;
594
#elif HAVE_ALTIVEC
595
pp = postProcess_altivec;
596
#endif
597
#endif /* !CONFIG_RUNTIME_CPUDETECT */
598
}
599
600
pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
601
}
602
603
/* -pp Command line Help
604
*/
605
const char pp_help[] =
606
"Available postprocessing filters:\n"
607
"Filters Options\n"
608
"short long name short long option Description\n"
609
"* * a autoq CPU power dependent enabler\n"
610
" c chrom chrominance filtering enabled\n"
611
" y nochrom chrominance filtering disabled\n"
612
" n noluma luma filtering disabled\n"
613
"hb hdeblock (2 threshold) horizontal deblocking filter\n"
614
" 1. difference factor: default=32, higher -> more deblocking\n"
615
" 2. flatness threshold: default=39, lower -> more deblocking\n"
616
" the h & v deblocking filters share these\n"
617
" so you can't set different thresholds for h / v\n"
618
"vb vdeblock (2 threshold) vertical deblocking filter\n"
619
"ha hadeblock (2 threshold) horizontal deblocking filter\n"
620
"va vadeblock (2 threshold) vertical deblocking filter\n"
621
"h1 x1hdeblock experimental h deblock filter 1\n"
622
"v1 x1vdeblock experimental v deblock filter 1\n"
623
"dr dering deringing filter\n"
624
"al autolevels automatic brightness / contrast\n"
625
" f fullyrange stretch luminance to (0..255)\n"
626
"lb linblenddeint linear blend deinterlacer\n"
627
"li linipoldeint linear interpolating deinterlace\n"
628
"ci cubicipoldeint cubic interpolating deinterlacer\n"
629
"md mediandeint median deinterlacer\n"
630
"fd ffmpegdeint ffmpeg deinterlacer\n"
631
"l5 lowpass5 FIR lowpass deinterlacer\n"
632
"de default hb:a,vb:a,dr:a\n"
633
"fa fast h1:a,v1:a,dr:a\n"
634
"ac ha:a:128:7,va:a,dr:a\n"
635
"tn tmpnoise (3 threshold) temporal noise reducer\n"
636
" 1. <= 2. <= 3. larger -> stronger filtering\n"
637
"fq forceQuant <quantizer> force quantizer\n"
638
"Usage:\n"
639
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
640
"long form example:\n"
641
"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
642
"short form example:\n"
643
"vb:a/hb:a/lb de,-vb\n"
644
"more examples:\n"
645
"tn:64:128:256\n"
646
"\n"
647
;
648
649
pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
650
{
651
char temp[GET_MODE_BUFFER_SIZE];
652
char *p= temp;
653
static const char filterDelimiters[] = ",/";
654
static const char optionDelimiters[] = ":|";
655
struct PPMode *ppMode;
656
char *filterToken;
657
658
if (!name) {
659
av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
660
return NULL;
661
}
662
663
if (!strcmp(name, "help")) {
664
const char *p;
665
for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
666
av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
667
av_log(NULL, AV_LOG_INFO, "%s", temp);
668
}
669
return NULL;
670
}
671
672
ppMode= av_malloc(sizeof(PPMode));
673
if (!ppMode)
674
return NULL;
675
676
ppMode->lumMode= 0;
677
ppMode->chromMode= 0;
678
ppMode->maxTmpNoise[0]= 700;
679
ppMode->maxTmpNoise[1]= 1500;
680
ppMode->maxTmpNoise[2]= 3000;
681
ppMode->maxAllowedY= 234;
682
ppMode->minAllowedY= 16;
683
ppMode->baseDcDiff= 256/8;
684
ppMode->flatnessThreshold= 56-16-1;
685
ppMode->maxClippedThreshold= (AVRational){1,100};
686
ppMode->error=0;
687
688
memset(temp, 0, GET_MODE_BUFFER_SIZE);
689
av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
690
691
av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
692
693
for(;;){
694
const char *filterName;
695
int q= 1000000; //PP_QUALITY_MAX;
696
int chrom=-1;
697
int luma=-1;
698
const char *option;
699
const char *options[OPTIONS_ARRAY_SIZE];
700
int i;
701
int filterNameOk=0;
702
int numOfUnknownOptions=0;
703
int enable=1; //does the user want us to enabled or disabled the filter
704
char *tokstate;
705
706
filterToken= av_strtok(p, filterDelimiters, &tokstate);
707
if(!filterToken) break;
708
p+= strlen(filterToken) + 1; // p points to next filterToken
709
filterName= av_strtok(filterToken, optionDelimiters, &tokstate);
710
if (!filterName) {
711
ppMode->error++;
712
break;
713
}
714
av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
715
716
if(*filterName == '-'){
717
enable=0;
718
filterName++;
719
}
720
721
for(;;){ //for all options
722
option= av_strtok(NULL, optionDelimiters, &tokstate);
723
if(!option) break;
724
725
av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
726
if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
727
else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
728
else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
729
else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
730
else{
731
options[numOfUnknownOptions] = option;
732
numOfUnknownOptions++;
733
}
734
if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
735
}
736
options[numOfUnknownOptions] = NULL;
737
738
/* replace stuff from the replace Table */
739
for(i=0; replaceTable[2*i]; i++){
740
if(!strcmp(replaceTable[2*i], filterName)){
741
size_t newlen = strlen(replaceTable[2*i + 1]);
742
int plen;
743
int spaceLeft;
744
745
p--, *p=',';
746
747
plen= strlen(p);
748
spaceLeft= p - temp + plen;
749
if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
750
ppMode->error++;
751
break;
752
}
753
memmove(p + newlen, p, plen+1);
754
memcpy(p, replaceTable[2*i + 1], newlen);
755
filterNameOk=1;
756
}
757
}
758
759
for(i=0; filters[i].shortName; i++){
760
if( !strcmp(filters[i].longName, filterName)
761
|| !strcmp(filters[i].shortName, filterName)){
762
ppMode->lumMode &= ~filters[i].mask;
763
ppMode->chromMode &= ~filters[i].mask;
764
765
filterNameOk=1;
766
if(!enable) break; // user wants to disable it
767
768
if(q >= filters[i].minLumQuality && luma)
769
ppMode->lumMode|= filters[i].mask;
770
if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
771
if(q >= filters[i].minChromQuality)
772
ppMode->chromMode|= filters[i].mask;
773
774
if(filters[i].mask == LEVEL_FIX){
775
int o;
776
ppMode->minAllowedY= 16;
777
ppMode->maxAllowedY= 234;
778
for(o=0; options[o]; o++){
779
if( !strcmp(options[o],"fullyrange")
780
||!strcmp(options[o],"f")){
781
ppMode->minAllowedY= 0;
782
ppMode->maxAllowedY= 255;
783
numOfUnknownOptions--;
784
}
785
}
786
}
787
else if(filters[i].mask == TEMP_NOISE_FILTER)
788
{
789
int o;
790
int numOfNoises=0;
791
792
for(o=0; options[o]; o++){
793
char *tail;
794
ppMode->maxTmpNoise[numOfNoises]=
795
strtol(options[o], &tail, 0);
796
if(tail!=options[o]){
797
numOfNoises++;
798
numOfUnknownOptions--;
799
if(numOfNoises >= 3) break;
800
}
801
}
802
}
803
else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
804
|| filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
805
int o;
806
807
for(o=0; options[o] && o<2; o++){
808
char *tail;
809
int val= strtol(options[o], &tail, 0);
810
if(tail==options[o]) break;
811
812
numOfUnknownOptions--;
813
if(o==0) ppMode->baseDcDiff= val;
814
else ppMode->flatnessThreshold= val;
815
}
816
}
817
else if(filters[i].mask == FORCE_QUANT){
818
int o;
819
ppMode->forcedQuant= 15;
820
821
for(o=0; options[o] && o<1; o++){
822
char *tail;
823
int val= strtol(options[o], &tail, 0);
824
if(tail==options[o]) break;
825
826
numOfUnknownOptions--;
827
ppMode->forcedQuant= val;
828
}
829
}
830
}
831
}
832
if(!filterNameOk) ppMode->error++;
833
ppMode->error += numOfUnknownOptions;
834
}
835
836
av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
837
if(ppMode->error){
838
av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
839
av_free(ppMode);
840
return NULL;
841
}
842
return ppMode;
843
}
844
845
void pp_free_mode(pp_mode *mode){
846
av_free(mode);
847
}
848
849
static void reallocAlign(void **p, int size){
850
av_free(*p);
851
*p= av_mallocz(size);
852
}
853
854
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
855
int mbWidth = (width+15)>>4;
856
int mbHeight= (height+15)>>4;
857
int i;
858
859
c->stride= stride;
860
c->qpStride= qpStride;
861
862
reallocAlign((void **)&c->tempDst, stride*24+32);
863
reallocAlign((void **)&c->tempSrc, stride*24);
864
reallocAlign((void **)&c->tempBlocks, 2*16*8);
865
reallocAlign((void **)&c->yHistogram, 256*sizeof(uint64_t));
866
for(i=0; i<256; i++)
867
c->yHistogram[i]= width*height/64*15/256;
868
869
for(i=0; i<3; i++){
870
//Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
871
reallocAlign((void **)&c->tempBlurred[i], stride*mbHeight*16 + 17*1024);
872
reallocAlign((void **)&c->tempBlurredPast[i], 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
873
}
874
875
reallocAlign((void **)&c->deintTemp, 2*width+32);
876
reallocAlign((void **)&c->nonBQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
877
reallocAlign((void **)&c->stdQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
878
reallocAlign((void **)&c->forcedQPTable, mbWidth*sizeof(QP_STORE_T));
879
}
880
881
static const char * context_to_name(void * ptr) {
882
return "postproc";
883
}
884
885
static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
886
887
av_cold pp_context *pp_get_context(int width, int height, int cpuCaps){
888
PPContext *c= av_mallocz(sizeof(PPContext));
889
int stride= FFALIGN(width, 16); //assumed / will realloc if needed
890
int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
891
892
if (!c)
893
return NULL;
894
895
c->av_class = &av_codec_context_class;
896
if(cpuCaps&PP_FORMAT){
897
c->hChromaSubSample= cpuCaps&0x3;
898
c->vChromaSubSample= (cpuCaps>>4)&0x3;
899
}else{
900
c->hChromaSubSample= 1;
901
c->vChromaSubSample= 1;
902
}
903
if (cpuCaps & PP_CPU_CAPS_AUTO) {
904
c->cpuCaps = av_get_cpu_flags();
905
} else {
906
c->cpuCaps = 0;
907
if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps |= AV_CPU_FLAG_MMX;
908
if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
909
if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps |= AV_CPU_FLAG_3DNOW;
910
if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
911
}
912
913
reallocBuffers(c, width, height, stride, qpStride);
914
915
c->frameNum=-1;
916
917
return c;
918
}
919
920
av_cold void pp_free_context(void *vc){
921
PPContext *c = (PPContext*)vc;
922
int i;
923
924
for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurred); i++)
925
av_free(c->tempBlurred[i]);
926
for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurredPast); i++)
927
av_free(c->tempBlurredPast[i]);
928
929
av_free(c->tempBlocks);
930
av_free(c->yHistogram);
931
av_free(c->tempDst);
932
av_free(c->tempSrc);
933
av_free(c->deintTemp);
934
av_free(c->stdQPTable);
935
av_free(c->nonBQPTable);
936
av_free(c->forcedQPTable);
937
938
memset(c, 0, sizeof(PPContext));
939
940
av_free(c);
941
}
942
943
void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
944
uint8_t * dst[3], const int dstStride[3],
945
int width, int height,
946
const QP_STORE_T *QP_store, int QPStride,
947
pp_mode *vm, void *vc, int pict_type)
948
{
949
int mbWidth = (width+15)>>4;
950
int mbHeight= (height+15)>>4;
951
PPMode *mode = vm;
952
PPContext *c = vc;
953
int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
954
int absQPStride = FFABS(QPStride);
955
956
// c->stride and c->QPStride are always positive
957
if(c->stride < minStride || c->qpStride < absQPStride)
958
reallocBuffers(c, width, height,
959
FFMAX(minStride, c->stride),
960
FFMAX(c->qpStride, absQPStride));
961
962
if(!QP_store || (mode->lumMode & FORCE_QUANT)){
963
int i;
964
QP_store= c->forcedQPTable;
965
absQPStride = QPStride = 0;
966
if(mode->lumMode & FORCE_QUANT)
967
for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
968
else
969
for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
970
}
971
972
if(pict_type & PP_PICT_TYPE_QP2){
973
int i;
974
const int count= FFMAX(mbHeight * absQPStride, mbWidth);
975
for(i=0; i<(count>>2); i++){
976
((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
977
}
978
for(i<<=2; i<count; i++){
979
c->stdQPTable[i] = QP_store[i]>>1;
980
}
981
QP_store= c->stdQPTable;
982
QPStride= absQPStride;
983
}
984
985
if(0){
986
int x,y;
987
for(y=0; y<mbHeight; y++){
988
for(x=0; x<mbWidth; x++){
989
av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
990
}
991
av_log(c, AV_LOG_INFO, "\n");
992
}
993
av_log(c, AV_LOG_INFO, "\n");
994
}
995
996
if((pict_type&7)!=3){
997
if (QPStride >= 0){
998
int i;
999
const int count= FFMAX(mbHeight * QPStride, mbWidth);
1000
for(i=0; i<(count>>2); i++){
1001
AV_WN32(c->nonBQPTable + (i<<2), AV_RN32(QP_store + (i<<2)) & 0x3F3F3F3F);
1002
}
1003
for(i<<=2; i<count; i++){
1004
c->nonBQPTable[i] = QP_store[i] & 0x3F;
1005
}
1006
} else {
1007
int i,j;
1008
for(i=0; i<mbHeight; i++) {
1009
for(j=0; j<absQPStride; j++) {
1010
c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1011
}
1012
}
1013
}
1014
}
1015
1016
av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1017
mode->lumMode, mode->chromMode);
1018
1019
postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1020
width, height, QP_store, QPStride, 0, mode, c);
1021
1022
if (!(src[1] && src[2] && dst[1] && dst[2]))
1023
return;
1024
1025
width = (width )>>c->hChromaSubSample;
1026
height = (height)>>c->vChromaSubSample;
1027
1028
if(mode->chromMode){
1029
postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1030
width, height, QP_store, QPStride, 1, mode, c);
1031
postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1032
width, height, QP_store, QPStride, 2, mode, c);
1033
}
1034
else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1035
linecpy(dst[1], src[1], height, srcStride[1]);
1036
linecpy(dst[2], src[2], height, srcStride[2]);
1037
}else{
1038
int y;
1039
for(y=0; y<height; y++){
1040
memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1041
memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1042
}
1043
}
1044
}
1045
1046