CoCalc -- postprocess.c

05. Matplotlib / ffmpeg-3.0 / libpostproc / postprocess.c
⁵²⁸⁶⁷ views
1
/*
2
 * Copyright (C) 2001-2003 Michael Niedermayer ([email protected])
3
 *
4
 * AltiVec optimizations (C) 2004 Romain Dolbeau <[email protected]>
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22

23
/**
24
 * @file
25
 * postprocessing.
26
 */
27

28
/*
29
                        C       MMX     MMX2    3DNow   AltiVec
30
isVertDC                Ec      Ec                      Ec
31
isVertMinMaxOk          Ec      Ec                      Ec
32
doVertLowPass           E               e       e       Ec
33
doVertDefFilter         Ec      Ec      e       e       Ec
34
isHorizDC               Ec      Ec                      Ec
35
isHorizMinMaxOk         a       E                       Ec
36
doHorizLowPass          E               e       e       Ec
37
doHorizDefFilter        Ec      Ec      e       e       Ec
38
do_a_deblock            Ec      E       Ec      E
39
deRing                  E               e       e*      Ecp
40
Vertical RKAlgo1        E               a       a
41
Horizontal RKAlgo1                      a       a
42
Vertical X1#            a               E       E
43
Horizontal X1#          a               E       E
44
LinIpolDeinterlace      e               E       E*
45
CubicIpolDeinterlace    a               e       e*
46
LinBlendDeinterlace     e               E       E*
47
MedianDeinterlace#      E       Ec      Ec
48
TempDeNoiser#           E               e       e       Ec
49

50
* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51
# more or less selfinvented filters so the exactness is not too meaningful
52
E = Exact implementation
53
e = almost exact implementation (slightly different rounding,...)
54
a = alternative / approximate impl
55
c = checked against the other implementations (-vo md5)
56
p = partially optimized, still some work to do
57
*/
58

59
/*
60
TODO:
61
reduce the time wasted on the mem transfer
62
unroll stuff if instructions depend too much on the prior one
63
move YScale thing to the end instead of fixing QP
64
write a faster and higher quality deblocking filter :)
65
make the mainloop more flexible (variable number of blocks at once
66
        (the if/else stuff per block is slowing things down)
67
compare the quality & speed of all filters
68
split this huge file
69
optimize c versions
70
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71
...
72
*/
73

74
//Changelog: use git log
75

76
#include "config.h"
77
#include "libavutil/avutil.h"
78
#include "libavutil/avassert.h"
79
#include "libavutil/intreadwrite.h"
80
#include <inttypes.h>
81
#include <stdio.h>
82
#include <stdlib.h>
83
#include <string.h>
84
//#undef HAVE_MMXEXT_INLINE
85
//#define HAVE_AMD3DNOW_INLINE
86
//#undef HAVE_MMX_INLINE
87
//#undef ARCH_X86
88
//#define DEBUG_BRIGHTNESS
89
#include "postprocess.h"
90
#include "postprocess_internal.h"
91
#include "libavutil/avstring.h"
92

93
#include "libavutil/ffversion.h"
94
const char postproc_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
95

96
unsigned postproc_version(void)
97
{
98
    av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
99
    return LIBPOSTPROC_VERSION_INT;
100
}
101

102
const char *postproc_configuration(void)
103
{
104
    return FFMPEG_CONFIGURATION;
105
}
106

107
const char *postproc_license(void)
108
{
109
#define LICENSE_PREFIX "libpostproc license: "
110
    return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
111
}
112

113
#if HAVE_ALTIVEC_H
114
#include <altivec.h>
115
#endif
116

117
#define GET_MODE_BUFFER_SIZE 500
118
#define OPTIONS_ARRAY_SIZE 10
119
#define BLOCK_SIZE 8
120
#define TEMP_STRIDE 8
121
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
122

123
#if ARCH_X86 && HAVE_INLINE_ASM
124
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
125
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
126
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
127
DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
128
DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
129
DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
130
DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
131
DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
132
#endif
133

134
DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
135

136

137
static const struct PPFilter filters[]=
138
{
139
    {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
140
    {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
141
/*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
142
    {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
143
    {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
144
    {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
145
    {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
146
    {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
147
    {"dr", "dering",                1, 5, 6, DERING},
148
    {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
149
    {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
150
    {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
151
    {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
152
    {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
153
    {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
154
    {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
155
    {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
156
    {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
157
    {"be", "bitexact",              1, 0, 0, BITEXACT},
158
    {"vi", "visualize",             1, 0, 0, VISUALIZE},
159
    {NULL, NULL,0,0,0,0} //End Marker
160
};
161

162
static const char * const replaceTable[]=
163
{
164
    "default",      "hb:a,vb:a,dr:a",
165
    "de",           "hb:a,vb:a,dr:a",
166
    "fast",         "h1:a,v1:a,dr:a",
167
    "fa",           "h1:a,v1:a,dr:a",
168
    "ac",           "ha:a:128:7,va:a,dr:a",
169
    NULL //End Marker
170
};
171

172
/* The horizontal functions exist only in C because the MMX
173
 * code is faster with vertical filters and transposing. */
174

175
/**
176
 * Check if the given 8x8 Block is mostly "flat"
177
 */
178
static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
179
{
180
    int numEq= 0;
181
    int y;
182
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
183
    const int dcThreshold= dcOffset*2 + 1;
184

185
    for(y=0; y<BLOCK_SIZE; y++){
186
        numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
187
        numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
188
        numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
189
        numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
190
        numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
191
        numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
192
        numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
193
        src+= stride;
194
    }
195
    return numEq > c->ppMode.flatnessThreshold;
196
}
197

198
/**
199
 * Check if the middle 8x8 Block in the given 8x16 block is flat
200
 */
201
static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
202
{
203
    int numEq= 0;
204
    int y;
205
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
206
    const int dcThreshold= dcOffset*2 + 1;
207

208
    src+= stride*4; // src points to begin of the 8x8 Block
209
    for(y=0; y<BLOCK_SIZE-1; y++){
210
        numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
211
        numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
212
        numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
213
        numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
214
        numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
215
        numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
216
        numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
217
        numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
218
        src+= stride;
219
    }
220
    return numEq > c->ppMode.flatnessThreshold;
221
}
222

223
static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
224
{
225
    int i;
226
    for(i=0; i<2; i++){
227
        if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
228
        src += stride;
229
        if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
230
        src += stride;
231
        if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
232
        src += stride;
233
        if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
234
        src += stride;
235
    }
236
    return 1;
237
}
238

239
static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
240
{
241
    int x;
242
    src+= stride*4;
243
    for(x=0; x<BLOCK_SIZE; x+=4){
244
        if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
245
        if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
246
        if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
247
        if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
248
    }
249
    return 1;
250
}
251

252
static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
253
{
254
    if( isHorizDC_C(src, stride, c) ){
255
        return isHorizMinMaxOk_C(src, stride, c->QP);
256
    }else{
257
        return 2;
258
    }
259
}
260

261
static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
262
{
263
    if( isVertDC_C(src, stride, c) ){
264
        return isVertMinMaxOk_C(src, stride, c->QP);
265
    }else{
266
        return 2;
267
    }
268
}
269

270
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
271
{
272
    int y;
273
    for(y=0; y<BLOCK_SIZE; y++){
274
        const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
275

276
        if(FFABS(middleEnergy) < 8*c->QP){
277
            const int q=(dst[3] - dst[4])/2;
278
            const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
279
            const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
280

281
            int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
282
            d= FFMAX(d, 0);
283

284
            d= (5*d + 32) >> 6;
285
            d*= FFSIGN(-middleEnergy);
286

287
            if(q>0)
288
            {
289
                d = FFMAX(d, 0);
290
                d = FFMIN(d, q);
291
            }
292
            else
293
            {
294
                d = FFMIN(d, 0);
295
                d = FFMAX(d, q);
296
            }
297

298
            dst[3]-= d;
299
            dst[4]+= d;
300
        }
301
        dst+= stride;
302
    }
303
}
304

305
/**
306
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
307
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
308
 */
309
static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
310
{
311
    int y;
312
    for(y=0; y<BLOCK_SIZE; y++){
313
        const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
314
        const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
315

316
        int sums[10];
317
        sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
318
        sums[1] = sums[0] - first  + dst[3];
319
        sums[2] = sums[1] - first  + dst[4];
320
        sums[3] = sums[2] - first  + dst[5];
321
        sums[4] = sums[3] - first  + dst[6];
322
        sums[5] = sums[4] - dst[0] + dst[7];
323
        sums[6] = sums[5] - dst[1] + last;
324
        sums[7] = sums[6] - dst[2] + last;
325
        sums[8] = sums[7] - dst[3] + last;
326
        sums[9] = sums[8] - dst[4] + last;
327

328
        dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
329
        dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
330
        dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
331
        dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
332
        dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
333
        dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
334
        dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
335
        dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
336

337
        dst+= stride;
338
    }
339
}
340

341
/**
342
 * Experimental Filter 1 (Horizontal)
343
 * will not damage linear gradients
344
 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
345
 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
346
 * MMX2 version does correct clipping C version does not
347
 * not identical with the vertical one
348
 */
349
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
350
{
351
    int y;
352
    static uint64_t lut[256];
353
    if(!lut[255])
354
    {
355
        int i;
356
        for(i=0; i<256; i++)
357
        {
358
            int v= i < 128 ? 2*i : 2*(i-256);
359
/*
360
//Simulate 112242211 9-Tap filter
361
            uint64_t a= (v/16)  & 0xFF;
362
            uint64_t b= (v/8)   & 0xFF;
363
            uint64_t c= (v/4)   & 0xFF;
364
            uint64_t d= (3*v/8) & 0xFF;
365
*/
366
//Simulate piecewise linear interpolation
367
            uint64_t a= (v/16)   & 0xFF;
368
            uint64_t b= (v*3/16) & 0xFF;
369
            uint64_t c= (v*5/16) & 0xFF;
370
            uint64_t d= (7*v/16) & 0xFF;
371
            uint64_t A= (0x100 - a)&0xFF;
372
            uint64_t B= (0x100 - b)&0xFF;
373
            uint64_t C= (0x100 - c)&0xFF;
374
            uint64_t D= (0x100 - c)&0xFF;
375

376
            lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
377
                       (D<<24) | (C<<16) | (B<<8)  | (A);
378
            //lut[i] = (v<<32) | (v<<24);
379
        }
380
    }
381

382
    for(y=0; y<BLOCK_SIZE; y++){
383
        int a= src[1] - src[2];
384
        int b= src[3] - src[4];
385
        int c= src[5] - src[6];
386

387
        int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
388

389
        if(d < QP){
390
            int v = d * FFSIGN(-b);
391

392
            src[1] +=v/8;
393
            src[2] +=v/4;
394
            src[3] +=3*v/8;
395
            src[4] -=3*v/8;
396
            src[5] -=v/4;
397
            src[6] -=v/8;
398
        }
399
        src+=stride;
400
    }
401
}
402

403
/**
404
 * accurate deblock filter
405
 */
406
static av_always_inline void do_a_deblock_C(uint8_t *src, int step,
407
                                            int stride, const PPContext *c, int mode)
408
{
409
    int y;
410
    const int QP= c->QP;
411
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
412
    const int dcThreshold= dcOffset*2 + 1;
413
//START_TIMER
414
    src+= step*4; // src points to begin of the 8x8 Block
415
    for(y=0; y<8; y++){
416
        int numEq= 0;
417

418
        numEq += ((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold;
419
        numEq += ((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold;
420
        numEq += ((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold;
421
        numEq += ((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold;
422
        numEq += ((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold;
423
        numEq += ((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold;
424
        numEq += ((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold;
425
        numEq += ((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold;
426
        numEq += ((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold;
427
        if(numEq > c->ppMode.flatnessThreshold){
428
            int min, max, x;
429

430
            if(src[0] > src[step]){
431
                max= src[0];
432
                min= src[step];
433
            }else{
434
                max= src[step];
435
                min= src[0];
436
            }
437
            for(x=2; x<8; x+=2){
438
                if(src[x*step] > src[(x+1)*step]){
439
                        if(src[x    *step] > max) max= src[ x   *step];
440
                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
441
                }else{
442
                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
443
                        if(src[ x   *step] < min) min= src[ x   *step];
444
                }
445
            }
446
            if(max-min < 2*QP){
447
                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
448
                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
449

450
                int sums[10];
451
                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
452
                sums[1] = sums[0] - first       + src[3*step];
453
                sums[2] = sums[1] - first       + src[4*step];
454
                sums[3] = sums[2] - first       + src[5*step];
455
                sums[4] = sums[3] - first       + src[6*step];
456
                sums[5] = sums[4] - src[0*step] + src[7*step];
457
                sums[6] = sums[5] - src[1*step] + last;
458
                sums[7] = sums[6] - src[2*step] + last;
459
                sums[8] = sums[7] - src[3*step] + last;
460
                sums[9] = sums[8] - src[4*step] + last;
461

462
                if (mode & VISUALIZE) {
463
                    src[0*step] =
464
                    src[1*step] =
465
                    src[2*step] =
466
                    src[3*step] =
467
                    src[4*step] =
468
                    src[5*step] =
469
                    src[6*step] =
470
                    src[7*step] = 128;
471
                }
472
                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
473
                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
474
                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
475
                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
476
                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
477
                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
478
                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
479
                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
480
            }
481
        }else{
482
            const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
483

484
            if(FFABS(middleEnergy) < 8*QP){
485
                const int q=(src[3*step] - src[4*step])/2;
486
                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
487
                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
488

489
                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
490
                d= FFMAX(d, 0);
491

492
                d= (5*d + 32) >> 6;
493
                d*= FFSIGN(-middleEnergy);
494

495
                if(q>0){
496
                    d = FFMAX(d, 0);
497
                    d = FFMIN(d, q);
498
                }else{
499
                    d = FFMIN(d, 0);
500
                    d = FFMAX(d, q);
501
                }
502

503
                if ((mode & VISUALIZE) && d) {
504
                    d= (d < 0) ? 32 : -32;
505
                    src[3*step]= av_clip_uint8(src[3*step] - d);
506
                    src[4*step]= av_clip_uint8(src[4*step] + d);
507
                    d = 0;
508
                }
509

510
                src[3*step]-= d;
511
                src[4*step]+= d;
512
            }
513
        }
514

515
        src += stride;
516
    }
517
/*if(step==16){
518
    STOP_TIMER("step16")
519
}else{
520
    STOP_TIMER("stepX")
521
}*/
522
}
523

524
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
525
//Plain C versions
526
//we always compile C for testing which needs bitexactness
527
#define TEMPLATE_PP_C 1
528
#include "postprocess_template.c"
529

530
#if HAVE_ALTIVEC
531
#   define TEMPLATE_PP_ALTIVEC 1
532
#   include "postprocess_altivec_template.c"
533
#   include "postprocess_template.c"
534
#endif
535

536
#if ARCH_X86 && HAVE_INLINE_ASM
537
#    if CONFIG_RUNTIME_CPUDETECT
538
#        define TEMPLATE_PP_MMX 1
539
#        include "postprocess_template.c"
540
#        define TEMPLATE_PP_MMXEXT 1
541
#        include "postprocess_template.c"
542
#        define TEMPLATE_PP_3DNOW 1
543
#        include "postprocess_template.c"
544
#        define TEMPLATE_PP_SSE2 1
545
#        include "postprocess_template.c"
546
#    else
547
#        if HAVE_SSE2_INLINE
548
#            define TEMPLATE_PP_SSE2 1
549
#            include "postprocess_template.c"
550
#        elif HAVE_MMXEXT_INLINE
551
#            define TEMPLATE_PP_MMXEXT 1
552
#            include "postprocess_template.c"
553
#        elif HAVE_AMD3DNOW_INLINE
554
#            define TEMPLATE_PP_3DNOW 1
555
#            include "postprocess_template.c"
556
#        elif HAVE_MMX_INLINE
557
#            define TEMPLATE_PP_MMX 1
558
#            include "postprocess_template.c"
559
#        endif
560
#    endif
561
#endif
562

563
typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
564
                      const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
565

566
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
567
        const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
568
{
569
    pp_fn pp = postProcess_C;
570
    PPContext *c= (PPContext *)vc;
571
    PPMode *ppMode= (PPMode *)vm;
572
    c->ppMode= *ppMode; //FIXME
573

574
    if (!(ppMode->lumMode & BITEXACT)) {
575
#if CONFIG_RUNTIME_CPUDETECT
576
#if ARCH_X86 && HAVE_INLINE_ASM
577
        // ordered per speed fastest first
578
        if      (c->cpuCaps & AV_CPU_FLAG_SSE2)     pp = postProcess_SSE2;
579
        else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT)   pp = postProcess_MMX2;
580
        else if (c->cpuCaps & AV_CPU_FLAG_3DNOW)    pp = postProcess_3DNow;
581
        else if (c->cpuCaps & AV_CPU_FLAG_MMX)      pp = postProcess_MMX;
582
#elif HAVE_ALTIVEC
583
        if      (c->cpuCaps & AV_CPU_FLAG_ALTIVEC)  pp = postProcess_altivec;
584
#endif
585
#else /* CONFIG_RUNTIME_CPUDETECT */
586
#if     HAVE_SSE2_INLINE
587
        pp = postProcess_SSE2;
588
#elif   HAVE_MMXEXT_INLINE
589
        pp = postProcess_MMX2;
590
#elif HAVE_AMD3DNOW_INLINE
591
        pp = postProcess_3DNow;
592
#elif HAVE_MMX_INLINE
593
        pp = postProcess_MMX;
594
#elif HAVE_ALTIVEC
595
        pp = postProcess_altivec;
596
#endif
597
#endif /* !CONFIG_RUNTIME_CPUDETECT */
598
    }
599

600
    pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
601
}
602

603
/* -pp Command line Help
604
*/
605
const char pp_help[] =
606
"Available postprocessing filters:\n"
607
"Filters                        Options\n"
608
"short  long name       short   long option     Description\n"
609
"*      *               a       autoq           CPU power dependent enabler\n"
610
"                       c       chrom           chrominance filtering enabled\n"
611
"                       y       nochrom         chrominance filtering disabled\n"
612
"                       n       noluma          luma filtering disabled\n"
613
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
614
"       1. difference factor: default=32, higher -> more deblocking\n"
615
"       2. flatness threshold: default=39, lower -> more deblocking\n"
616
"                       the h & v deblocking filters share these\n"
617
"                       so you can't set different thresholds for h / v\n"
618
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
619
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
620
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
621
"h1     x1hdeblock                              experimental h deblock filter 1\n"
622
"v1     x1vdeblock                              experimental v deblock filter 1\n"
623
"dr     dering                                  deringing filter\n"
624
"al     autolevels                              automatic brightness / contrast\n"
625
"                       f        fullyrange     stretch luminance to (0..255)\n"
626
"lb     linblenddeint                           linear blend deinterlacer\n"
627
"li     linipoldeint                            linear interpolating deinterlace\n"
628
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
629
"md     mediandeint                             median deinterlacer\n"
630
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
631
"l5     lowpass5                                FIR lowpass deinterlacer\n"
632
"de     default                                 hb:a,vb:a,dr:a\n"
633
"fa     fast                                    h1:a,v1:a,dr:a\n"
634
"ac                                             ha:a:128:7,va:a,dr:a\n"
635
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
636
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
637
"fq     forceQuant      <quantizer>             force quantizer\n"
638
"Usage:\n"
639
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
640
"long form example:\n"
641
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
642
"short form example:\n"
643
"vb:a/hb:a/lb                                   de,-vb\n"
644
"more examples:\n"
645
"tn:64:128:256\n"
646
"\n"
647
;
648

649
pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
650
{
651
    char temp[GET_MODE_BUFFER_SIZE];
652
    char *p= temp;
653
    static const char filterDelimiters[] = ",/";
654
    static const char optionDelimiters[] = ":|";
655
    struct PPMode *ppMode;
656
    char *filterToken;
657

658
    if (!name)  {
659
        av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
660
        return NULL;
661
    }
662

663
    if (!strcmp(name, "help")) {
664
        const char *p;
665
        for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
666
            av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
667
            av_log(NULL, AV_LOG_INFO, "%s", temp);
668
        }
669
        return NULL;
670
    }
671

672
    ppMode= av_malloc(sizeof(PPMode));
673
    if (!ppMode)
674
        return NULL;
675

676
    ppMode->lumMode= 0;
677
    ppMode->chromMode= 0;
678
    ppMode->maxTmpNoise[0]= 700;
679
    ppMode->maxTmpNoise[1]= 1500;
680
    ppMode->maxTmpNoise[2]= 3000;
681
    ppMode->maxAllowedY= 234;
682
    ppMode->minAllowedY= 16;
683
    ppMode->baseDcDiff= 256/8;
684
    ppMode->flatnessThreshold= 56-16-1;
685
    ppMode->maxClippedThreshold= (AVRational){1,100};
686
    ppMode->error=0;
687

688
    memset(temp, 0, GET_MODE_BUFFER_SIZE);
689
    av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
690

691
    av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
692

693
    for(;;){
694
        const char *filterName;
695
        int q= 1000000; //PP_QUALITY_MAX;
696
        int chrom=-1;
697
        int luma=-1;
698
        const char *option;
699
        const char *options[OPTIONS_ARRAY_SIZE];
700
        int i;
701
        int filterNameOk=0;
702
        int numOfUnknownOptions=0;
703
        int enable=1; //does the user want us to enabled or disabled the filter
704
        char *tokstate;
705

706
        filterToken= av_strtok(p, filterDelimiters, &tokstate);
707
        if(!filterToken) break;
708
        p+= strlen(filterToken) + 1; // p points to next filterToken
709
        filterName= av_strtok(filterToken, optionDelimiters, &tokstate);
710
        if (!filterName) {
711
            ppMode->error++;
712
            break;
713
        }
714
        av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
715

716
        if(*filterName == '-'){
717
            enable=0;
718
            filterName++;
719
        }
720

721
        for(;;){ //for all options
722
            option= av_strtok(NULL, optionDelimiters, &tokstate);
723
            if(!option) break;
724

725
            av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
726
            if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
727
            else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
728
            else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
729
            else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
730
            else{
731
                options[numOfUnknownOptions] = option;
732
                numOfUnknownOptions++;
733
            }
734
            if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
735
        }
736
        options[numOfUnknownOptions] = NULL;
737

738
        /* replace stuff from the replace Table */
739
        for(i=0; replaceTable[2*i]; i++){
740
            if(!strcmp(replaceTable[2*i], filterName)){
741
                size_t newlen = strlen(replaceTable[2*i + 1]);
742
                int plen;
743
                int spaceLeft;
744

745
                p--, *p=',';
746

747
                plen= strlen(p);
748
                spaceLeft= p - temp + plen;
749
                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE - 1){
750
                    ppMode->error++;
751
                    break;
752
                }
753
                memmove(p + newlen, p, plen+1);
754
                memcpy(p, replaceTable[2*i + 1], newlen);
755
                filterNameOk=1;
756
            }
757
        }
758

759
        for(i=0; filters[i].shortName; i++){
760
            if(   !strcmp(filters[i].longName, filterName)
761
               || !strcmp(filters[i].shortName, filterName)){
762
                ppMode->lumMode &= ~filters[i].mask;
763
                ppMode->chromMode &= ~filters[i].mask;
764

765
                filterNameOk=1;
766
                if(!enable) break; // user wants to disable it
767

768
                if(q >= filters[i].minLumQuality && luma)
769
                    ppMode->lumMode|= filters[i].mask;
770
                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
771
                    if(q >= filters[i].minChromQuality)
772
                            ppMode->chromMode|= filters[i].mask;
773

774
                if(filters[i].mask == LEVEL_FIX){
775
                    int o;
776
                    ppMode->minAllowedY= 16;
777
                    ppMode->maxAllowedY= 234;
778
                    for(o=0; options[o]; o++){
779
                        if(  !strcmp(options[o],"fullyrange")
780
                           ||!strcmp(options[o],"f")){
781
                            ppMode->minAllowedY= 0;
782
                            ppMode->maxAllowedY= 255;
783
                            numOfUnknownOptions--;
784
                        }
785
                    }
786
                }
787
                else if(filters[i].mask == TEMP_NOISE_FILTER)
788
                {
789
                    int o;
790
                    int numOfNoises=0;
791

792
                    for(o=0; options[o]; o++){
793
                        char *tail;
794
                        ppMode->maxTmpNoise[numOfNoises]=
795
                            strtol(options[o], &tail, 0);
796
                        if(tail!=options[o]){
797
                            numOfNoises++;
798
                            numOfUnknownOptions--;
799
                            if(numOfNoises >= 3) break;
800
                        }
801
                    }
802
                }
803
                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
804
                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
805
                    int o;
806

807
                    for(o=0; options[o] && o<2; o++){
808
                        char *tail;
809
                        int val= strtol(options[o], &tail, 0);
810
                        if(tail==options[o]) break;
811

812
                        numOfUnknownOptions--;
813
                        if(o==0) ppMode->baseDcDiff= val;
814
                        else ppMode->flatnessThreshold= val;
815
                    }
816
                }
817
                else if(filters[i].mask == FORCE_QUANT){
818
                    int o;
819
                    ppMode->forcedQuant= 15;
820

821
                    for(o=0; options[o] && o<1; o++){
822
                        char *tail;
823
                        int val= strtol(options[o], &tail, 0);
824
                        if(tail==options[o]) break;
825

826
                        numOfUnknownOptions--;
827
                        ppMode->forcedQuant= val;
828
                    }
829
                }
830
            }
831
        }
832
        if(!filterNameOk) ppMode->error++;
833
        ppMode->error += numOfUnknownOptions;
834
    }
835

836
    av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
837
    if(ppMode->error){
838
        av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
839
        av_free(ppMode);
840
        return NULL;
841
    }
842
    return ppMode;
843
}
844

845
void pp_free_mode(pp_mode *mode){
846
    av_free(mode);
847
}
848

849
static void reallocAlign(void **p, int size){
850
    av_free(*p);
851
    *p= av_mallocz(size);
852
}
853

854
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
855
    int mbWidth = (width+15)>>4;
856
    int mbHeight= (height+15)>>4;
857
    int i;
858

859
    c->stride= stride;
860
    c->qpStride= qpStride;
861

862
    reallocAlign((void **)&c->tempDst, stride*24+32);
863
    reallocAlign((void **)&c->tempSrc, stride*24);
864
    reallocAlign((void **)&c->tempBlocks, 2*16*8);
865
    reallocAlign((void **)&c->yHistogram, 256*sizeof(uint64_t));
866
    for(i=0; i<256; i++)
867
            c->yHistogram[i]= width*height/64*15/256;
868

869
    for(i=0; i<3; i++){
870
        //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
871
        reallocAlign((void **)&c->tempBlurred[i], stride*mbHeight*16 + 17*1024);
872
        reallocAlign((void **)&c->tempBlurredPast[i], 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
873
    }
874

875
    reallocAlign((void **)&c->deintTemp, 2*width+32);
876
    reallocAlign((void **)&c->nonBQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
877
    reallocAlign((void **)&c->stdQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
878
    reallocAlign((void **)&c->forcedQPTable, mbWidth*sizeof(QP_STORE_T));
879
}
880

881
static const char * context_to_name(void * ptr) {
882
    return "postproc";
883
}
884

885
static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
886

887
av_cold pp_context *pp_get_context(int width, int height, int cpuCaps){
888
    PPContext *c= av_mallocz(sizeof(PPContext));
889
    int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
890
    int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
891

892
    if (!c)
893
        return NULL;
894

895
    c->av_class = &av_codec_context_class;
896
    if(cpuCaps&PP_FORMAT){
897
        c->hChromaSubSample= cpuCaps&0x3;
898
        c->vChromaSubSample= (cpuCaps>>4)&0x3;
899
    }else{
900
        c->hChromaSubSample= 1;
901
        c->vChromaSubSample= 1;
902
    }
903
    if (cpuCaps & PP_CPU_CAPS_AUTO) {
904
        c->cpuCaps = av_get_cpu_flags();
905
    } else {
906
        c->cpuCaps = 0;
907
        if (cpuCaps & PP_CPU_CAPS_MMX)      c->cpuCaps |= AV_CPU_FLAG_MMX;
908
        if (cpuCaps & PP_CPU_CAPS_MMX2)     c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
909
        if (cpuCaps & PP_CPU_CAPS_3DNOW)    c->cpuCaps |= AV_CPU_FLAG_3DNOW;
910
        if (cpuCaps & PP_CPU_CAPS_ALTIVEC)  c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
911
    }
912

913
    reallocBuffers(c, width, height, stride, qpStride);
914

915
    c->frameNum=-1;
916

917
    return c;
918
}
919

920
av_cold void pp_free_context(void *vc){
921
    PPContext *c = (PPContext*)vc;
922
    int i;
923

924
    for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurred); i++)
925
        av_free(c->tempBlurred[i]);
926
    for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurredPast); i++)
927
        av_free(c->tempBlurredPast[i]);
928

929
    av_free(c->tempBlocks);
930
    av_free(c->yHistogram);
931
    av_free(c->tempDst);
932
    av_free(c->tempSrc);
933
    av_free(c->deintTemp);
934
    av_free(c->stdQPTable);
935
    av_free(c->nonBQPTable);
936
    av_free(c->forcedQPTable);
937

938
    memset(c, 0, sizeof(PPContext));
939

940
    av_free(c);
941
}
942

943
void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
944
                     uint8_t * dst[3], const int dstStride[3],
945
                     int width, int height,
946
                     const QP_STORE_T *QP_store,  int QPStride,
947
                     pp_mode *vm,  void *vc, int pict_type)
948
{
949
    int mbWidth = (width+15)>>4;
950
    int mbHeight= (height+15)>>4;
951
    PPMode *mode = vm;
952
    PPContext *c = vc;
953
    int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
954
    int absQPStride = FFABS(QPStride);
955

956
    // c->stride and c->QPStride are always positive
957
    if(c->stride < minStride || c->qpStride < absQPStride)
958
        reallocBuffers(c, width, height,
959
                       FFMAX(minStride, c->stride),
960
                       FFMAX(c->qpStride, absQPStride));
961

962
    if(!QP_store || (mode->lumMode & FORCE_QUANT)){
963
        int i;
964
        QP_store= c->forcedQPTable;
965
        absQPStride = QPStride = 0;
966
        if(mode->lumMode & FORCE_QUANT)
967
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
968
        else
969
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
970
    }
971

972
    if(pict_type & PP_PICT_TYPE_QP2){
973
        int i;
974
        const int count= FFMAX(mbHeight * absQPStride, mbWidth);
975
        for(i=0; i<(count>>2); i++){
976
            ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
977
        }
978
        for(i<<=2; i<count; i++){
979
            c->stdQPTable[i] = QP_store[i]>>1;
980
        }
981
        QP_store= c->stdQPTable;
982
        QPStride= absQPStride;
983
    }
984

985
    if(0){
986
        int x,y;
987
        for(y=0; y<mbHeight; y++){
988
            for(x=0; x<mbWidth; x++){
989
                av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
990
            }
991
            av_log(c, AV_LOG_INFO, "\n");
992
        }
993
        av_log(c, AV_LOG_INFO, "\n");
994
    }
995

996
    if((pict_type&7)!=3){
997
        if (QPStride >= 0){
998
            int i;
999
            const int count= FFMAX(mbHeight * QPStride, mbWidth);
1000
            for(i=0; i<(count>>2); i++){
1001
                AV_WN32(c->nonBQPTable + (i<<2), AV_RN32(QP_store + (i<<2)) & 0x3F3F3F3F);
1002
            }
1003
            for(i<<=2; i<count; i++){
1004
                c->nonBQPTable[i] = QP_store[i] & 0x3F;
1005
            }
1006
        } else {
1007
            int i,j;
1008
            for(i=0; i<mbHeight; i++) {
1009
                for(j=0; j<absQPStride; j++) {
1010
                    c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1011
                }
1012
            }
1013
        }
1014
    }
1015

1016
    av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1017
           mode->lumMode, mode->chromMode);
1018

1019
    postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1020
                width, height, QP_store, QPStride, 0, mode, c);
1021

1022
    if (!(src[1] && src[2] && dst[1] && dst[2]))
1023
        return;
1024

1025
    width  = (width )>>c->hChromaSubSample;
1026
    height = (height)>>c->vChromaSubSample;
1027

1028
    if(mode->chromMode){
1029
        postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1030
                    width, height, QP_store, QPStride, 1, mode, c);
1031
        postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1032
                    width, height, QP_store, QPStride, 2, mode, c);
1033
    }
1034
    else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1035
        linecpy(dst[1], src[1], height, srcStride[1]);
1036
        linecpy(dst[2], src[2], height, srcStride[2]);
1037
    }else{
1038
        int y;
1039
        for(y=0; y<height; y++){
1040
            memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1041
            memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1042
        }
1043
    }
1044
}
1045

1046
Product

Resources

Company