Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
52866 views
1
/*****************************************************************************
2
* quant.c: quantization and level-run
3
*****************************************************************************
4
* Copyright (C) 2005-2016 x264 project
5
*
6
* Authors: Loren Merritt <[email protected]>
7
* Fiona Glaser <[email protected]>
8
* Christian Heine <[email protected]>
9
* Henrik Gramner <[email protected]>
10
*
11
* This program is free software; you can redistribute it and/or modify
12
* it under the terms of the GNU General Public License as published by
13
* the Free Software Foundation; either version 2 of the License, or
14
* (at your option) any later version.
15
*
16
* This program is distributed in the hope that it will be useful,
17
* but WITHOUT ANY WARRANTY; without even the implied warranty of
18
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
* GNU General Public License for more details.
20
*
21
* You should have received a copy of the GNU General Public License
22
* along with this program; if not, write to the Free Software
23
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
24
*
25
* This program is also available under a commercial proprietary license.
26
* For more information, contact us at [email protected].
27
*****************************************************************************/
28
29
#include "common.h"
30
31
#if HAVE_MMX
32
#include "x86/quant.h"
33
#endif
34
#if ARCH_PPC
35
# include "ppc/quant.h"
36
#endif
37
#if ARCH_ARM
38
# include "arm/quant.h"
39
#endif
40
#if ARCH_AARCH64
41
# include "aarch64/quant.h"
42
#endif
43
#if ARCH_MIPS
44
# include "mips/quant.h"
45
#endif
46
47
#define QUANT_ONE( coef, mf, f ) \
48
{ \
49
if( (coef) > 0 ) \
50
(coef) = (f + (coef)) * (mf) >> 16; \
51
else \
52
(coef) = - ((f - (coef)) * (mf) >> 16); \
53
nz |= (coef); \
54
}
55
56
static int quant_8x8( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] )
57
{
58
int nz = 0;
59
for( int i = 0; i < 64; i++ )
60
QUANT_ONE( dct[i], mf[i], bias[i] );
61
return !!nz;
62
}
63
64
static int quant_4x4( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] )
65
{
66
int nz = 0;
67
for( int i = 0; i < 16; i++ )
68
QUANT_ONE( dct[i], mf[i], bias[i] );
69
return !!nz;
70
}
71
72
static int quant_4x4x4( dctcoef dct[4][16], udctcoef mf[16], udctcoef bias[16] )
73
{
74
int nza = 0;
75
for( int j = 0; j < 4; j++ )
76
{
77
int nz = 0;
78
for( int i = 0; i < 16; i++ )
79
QUANT_ONE( dct[j][i], mf[i], bias[i] );
80
nza |= (!!nz)<<j;
81
}
82
return nza;
83
}
84
85
static int quant_4x4_dc( dctcoef dct[16], int mf, int bias )
86
{
87
int nz = 0;
88
for( int i = 0; i < 16; i++ )
89
QUANT_ONE( dct[i], mf, bias );
90
return !!nz;
91
}
92
93
static int quant_2x2_dc( dctcoef dct[4], int mf, int bias )
94
{
95
int nz = 0;
96
QUANT_ONE( dct[0], mf, bias );
97
QUANT_ONE( dct[1], mf, bias );
98
QUANT_ONE( dct[2], mf, bias );
99
QUANT_ONE( dct[3], mf, bias );
100
return !!nz;
101
}
102
103
#define DEQUANT_SHL( x ) \
104
dct[x] = ( dct[x] * dequant_mf[i_mf][x] ) << i_qbits
105
106
#define DEQUANT_SHR( x ) \
107
dct[x] = ( dct[x] * dequant_mf[i_mf][x] + f ) >> (-i_qbits)
108
109
static void dequant_4x4( dctcoef dct[16], int dequant_mf[6][16], int i_qp )
110
{
111
const int i_mf = i_qp%6;
112
const int i_qbits = i_qp/6 - 4;
113
114
if( i_qbits >= 0 )
115
{
116
for( int i = 0; i < 16; i++ )
117
DEQUANT_SHL( i );
118
}
119
else
120
{
121
const int f = 1 << (-i_qbits-1);
122
for( int i = 0; i < 16; i++ )
123
DEQUANT_SHR( i );
124
}
125
}
126
127
static void dequant_8x8( dctcoef dct[64], int dequant_mf[6][64], int i_qp )
128
{
129
const int i_mf = i_qp%6;
130
const int i_qbits = i_qp/6 - 6;
131
132
if( i_qbits >= 0 )
133
{
134
for( int i = 0; i < 64; i++ )
135
DEQUANT_SHL( i );
136
}
137
else
138
{
139
const int f = 1 << (-i_qbits-1);
140
for( int i = 0; i < 64; i++ )
141
DEQUANT_SHR( i );
142
}
143
}
144
145
static void dequant_4x4_dc( dctcoef dct[16], int dequant_mf[6][16], int i_qp )
146
{
147
const int i_qbits = i_qp/6 - 6;
148
149
if( i_qbits >= 0 )
150
{
151
const int i_dmf = dequant_mf[i_qp%6][0] << i_qbits;
152
for( int i = 0; i < 16; i++ )
153
dct[i] *= i_dmf;
154
}
155
else
156
{
157
const int i_dmf = dequant_mf[i_qp%6][0];
158
const int f = 1 << (-i_qbits-1);
159
for( int i = 0; i < 16; i++ )
160
dct[i] = ( dct[i] * i_dmf + f ) >> (-i_qbits);
161
}
162
}
163
164
#define IDCT_DEQUANT_2X4_START \
165
int a0 = dct[0] + dct[1]; \
166
int a1 = dct[2] + dct[3]; \
167
int a2 = dct[4] + dct[5]; \
168
int a3 = dct[6] + dct[7]; \
169
int a4 = dct[0] - dct[1]; \
170
int a5 = dct[2] - dct[3]; \
171
int a6 = dct[4] - dct[5]; \
172
int a7 = dct[6] - dct[7]; \
173
int b0 = a0 + a1; \
174
int b1 = a2 + a3; \
175
int b2 = a4 + a5; \
176
int b3 = a6 + a7; \
177
int b4 = a0 - a1; \
178
int b5 = a2 - a3; \
179
int b6 = a4 - a5; \
180
int b7 = a6 - a7;
181
182
static void idct_dequant_2x4_dc( dctcoef dct[8], dctcoef dct4x4[8][16], int dequant_mf[6][16], int i_qp )
183
{
184
IDCT_DEQUANT_2X4_START
185
int dmf = dequant_mf[i_qp%6][0] << i_qp/6;
186
dct4x4[0][0] = ((b0 + b1) * dmf + 32) >> 6;
187
dct4x4[1][0] = ((b2 + b3) * dmf + 32) >> 6;
188
dct4x4[2][0] = ((b0 - b1) * dmf + 32) >> 6;
189
dct4x4[3][0] = ((b2 - b3) * dmf + 32) >> 6;
190
dct4x4[4][0] = ((b4 - b5) * dmf + 32) >> 6;
191
dct4x4[5][0] = ((b6 - b7) * dmf + 32) >> 6;
192
dct4x4[6][0] = ((b4 + b5) * dmf + 32) >> 6;
193
dct4x4[7][0] = ((b6 + b7) * dmf + 32) >> 6;
194
}
195
196
static void idct_dequant_2x4_dconly( dctcoef dct[8], int dequant_mf[6][16], int i_qp )
197
{
198
IDCT_DEQUANT_2X4_START
199
int dmf = dequant_mf[i_qp%6][0] << i_qp/6;
200
dct[0] = ((b0 + b1) * dmf + 32) >> 6;
201
dct[1] = ((b2 + b3) * dmf + 32) >> 6;
202
dct[2] = ((b0 - b1) * dmf + 32) >> 6;
203
dct[3] = ((b2 - b3) * dmf + 32) >> 6;
204
dct[4] = ((b4 - b5) * dmf + 32) >> 6;
205
dct[5] = ((b6 - b7) * dmf + 32) >> 6;
206
dct[6] = ((b4 + b5) * dmf + 32) >> 6;
207
dct[7] = ((b6 + b7) * dmf + 32) >> 6;
208
}
209
210
static ALWAYS_INLINE void optimize_chroma_idct_dequant_2x4( dctcoef out[8], dctcoef dct[8], int dmf )
211
{
212
IDCT_DEQUANT_2X4_START
213
out[0] = ((b0 + b1) * dmf + 2080) >> 6; /* 2080 = 32 + (32<<6) */
214
out[1] = ((b2 + b3) * dmf + 2080) >> 6;
215
out[2] = ((b0 - b1) * dmf + 2080) >> 6;
216
out[3] = ((b2 - b3) * dmf + 2080) >> 6;
217
out[4] = ((b4 - b5) * dmf + 2080) >> 6;
218
out[5] = ((b6 - b7) * dmf + 2080) >> 6;
219
out[6] = ((b4 + b5) * dmf + 2080) >> 6;
220
out[7] = ((b6 + b7) * dmf + 2080) >> 6;
221
}
222
#undef IDCT_DEQUANT_2X4_START
223
224
static ALWAYS_INLINE void optimize_chroma_idct_dequant_2x2( dctcoef out[4], dctcoef dct[4], int dmf )
225
{
226
int d0 = dct[0] + dct[1];
227
int d1 = dct[2] + dct[3];
228
int d2 = dct[0] - dct[1];
229
int d3 = dct[2] - dct[3];
230
out[0] = ((d0 + d1) * dmf >> 5) + 32;
231
out[1] = ((d0 - d1) * dmf >> 5) + 32;
232
out[2] = ((d2 + d3) * dmf >> 5) + 32;
233
out[3] = ((d2 - d3) * dmf >> 5) + 32;
234
}
235
236
static ALWAYS_INLINE int optimize_chroma_round( dctcoef *ref, dctcoef *dct, int dequant_mf, int chroma422 )
237
{
238
dctcoef out[8];
239
240
if( chroma422 )
241
optimize_chroma_idct_dequant_2x4( out, dct, dequant_mf );
242
else
243
optimize_chroma_idct_dequant_2x2( out, dct, dequant_mf );
244
245
int sum = 0;
246
for( int i = 0; i < (chroma422?8:4); i++ )
247
sum |= ref[i] ^ out[i];
248
return sum >> 6;
249
}
250
251
static ALWAYS_INLINE int optimize_chroma_dc_internal( dctcoef *dct, int dequant_mf, int chroma422 )
252
{
253
/* dequant_mf = h->dequant4_mf[CQM_4IC + b_inter][i_qp%6][0] << i_qp/6, max 32*64 */
254
dctcoef dct_orig[8];
255
int coeff, nz;
256
257
if( chroma422 )
258
optimize_chroma_idct_dequant_2x4( dct_orig, dct, dequant_mf );
259
else
260
optimize_chroma_idct_dequant_2x2( dct_orig, dct, dequant_mf );
261
262
/* If the DC coefficients already round to zero, terminate early. */
263
int sum = 0;
264
for( int i = 0; i < (chroma422?8:4); i++ )
265
sum |= dct_orig[i];
266
if( !(sum >> 6) )
267
return 0;
268
269
/* Start with the highest frequency coefficient... is this the best option? */
270
for( nz = 0, coeff = (chroma422?7:3); coeff >= 0; coeff-- )
271
{
272
int level = dct[coeff];
273
int sign = level>>31 | 1; /* dct[coeff] < 0 ? -1 : 1 */
274
275
while( level )
276
{
277
dct[coeff] = level - sign;
278
if( optimize_chroma_round( dct_orig, dct, dequant_mf, chroma422 ) )
279
{
280
nz = 1;
281
dct[coeff] = level;
282
break;
283
}
284
level -= sign;
285
}
286
}
287
288
return nz;
289
}
290
291
static int optimize_chroma_2x2_dc( dctcoef dct[4], int dequant_mf )
292
{
293
return optimize_chroma_dc_internal( dct, dequant_mf, 0 );
294
}
295
296
static int optimize_chroma_2x4_dc( dctcoef dct[8], int dequant_mf )
297
{
298
return optimize_chroma_dc_internal( dct, dequant_mf, 1 );
299
}
300
301
static void x264_denoise_dct( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size )
302
{
303
for( int i = 0; i < size; i++ )
304
{
305
int level = dct[i];
306
int sign = level>>31;
307
level = (level+sign)^sign;
308
sum[i] += level;
309
level -= offset[i];
310
dct[i] = level<0 ? 0 : (level^sign)-sign;
311
}
312
}
313
314
/* (ref: JVT-B118)
315
* x264_mb_decimate_score: given dct coeffs it returns a score to see if we could empty this dct coeffs
316
* to 0 (low score means set it to null)
317
* Used in inter macroblock (luma and chroma)
318
* luma: for a 8x8 block: if score < 4 -> null
319
* for the complete mb: if score < 6 -> null
320
* chroma: for the complete mb: if score < 7 -> null
321
*/
322
323
const uint8_t x264_decimate_table4[16] =
324
{
325
3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0
326
};
327
const uint8_t x264_decimate_table8[64] =
328
{
329
3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,
330
1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
331
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
332
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
333
};
334
335
static int ALWAYS_INLINE x264_decimate_score_internal( dctcoef *dct, int i_max )
336
{
337
const uint8_t *ds_table = (i_max == 64) ? x264_decimate_table8 : x264_decimate_table4;
338
int i_score = 0;
339
int idx = i_max - 1;
340
341
while( idx >= 0 && dct[idx] == 0 )
342
idx--;
343
while( idx >= 0 )
344
{
345
int i_run;
346
347
if( (unsigned)(dct[idx--] + 1) > 2 )
348
return 9;
349
350
i_run = 0;
351
while( idx >= 0 && dct[idx] == 0 )
352
{
353
idx--;
354
i_run++;
355
}
356
i_score += ds_table[i_run];
357
}
358
359
return i_score;
360
}
361
362
static int x264_decimate_score15( dctcoef *dct )
363
{
364
return x264_decimate_score_internal( dct+1, 15 );
365
}
366
static int x264_decimate_score16( dctcoef *dct )
367
{
368
return x264_decimate_score_internal( dct, 16 );
369
}
370
static int x264_decimate_score64( dctcoef *dct )
371
{
372
return x264_decimate_score_internal( dct, 64 );
373
}
374
375
#define last(num)\
376
static int x264_coeff_last##num( dctcoef *l )\
377
{\
378
int i_last = num-1;\
379
while( i_last >= 0 && l[i_last] == 0 )\
380
i_last--;\
381
return i_last;\
382
}
383
384
last(4)
385
last(8)
386
last(15)
387
last(16)
388
last(64)
389
390
#define level_run(num)\
391
static int x264_coeff_level_run##num( dctcoef *dct, x264_run_level_t *runlevel )\
392
{\
393
int i_last = runlevel->last = x264_coeff_last##num(dct);\
394
int i_total = 0;\
395
int mask = 0;\
396
do\
397
{\
398
runlevel->level[i_total++] = dct[i_last];\
399
mask |= 1 << (i_last);\
400
while( --i_last >= 0 && dct[i_last] == 0 );\
401
} while( i_last >= 0 );\
402
runlevel->mask = mask;\
403
return i_total;\
404
}
405
406
level_run(4)
407
level_run(8)
408
level_run(15)
409
level_run(16)
410
411
#if ARCH_X86_64
412
#define INIT_TRELLIS(cpu)\
413
pf->trellis_cabac_4x4 = x264_trellis_cabac_4x4_##cpu;\
414
pf->trellis_cabac_8x8 = x264_trellis_cabac_8x8_##cpu;\
415
pf->trellis_cabac_4x4_psy = x264_trellis_cabac_4x4_psy_##cpu;\
416
pf->trellis_cabac_8x8_psy = x264_trellis_cabac_8x8_psy_##cpu;\
417
pf->trellis_cabac_dc = x264_trellis_cabac_dc_##cpu;\
418
pf->trellis_cabac_chroma_422_dc = x264_trellis_cabac_chroma_422_dc_##cpu;
419
#else
420
#define INIT_TRELLIS(...)
421
#endif
422
423
void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
424
{
425
pf->quant_8x8 = quant_8x8;
426
pf->quant_4x4 = quant_4x4;
427
pf->quant_4x4x4 = quant_4x4x4;
428
pf->quant_4x4_dc = quant_4x4_dc;
429
pf->quant_2x2_dc = quant_2x2_dc;
430
431
pf->dequant_4x4 = dequant_4x4;
432
pf->dequant_4x4_dc = dequant_4x4_dc;
433
pf->dequant_8x8 = dequant_8x8;
434
435
pf->idct_dequant_2x4_dc = idct_dequant_2x4_dc;
436
pf->idct_dequant_2x4_dconly = idct_dequant_2x4_dconly;
437
438
pf->optimize_chroma_2x2_dc = optimize_chroma_2x2_dc;
439
pf->optimize_chroma_2x4_dc = optimize_chroma_2x4_dc;
440
441
pf->denoise_dct = x264_denoise_dct;
442
pf->decimate_score15 = x264_decimate_score15;
443
pf->decimate_score16 = x264_decimate_score16;
444
pf->decimate_score64 = x264_decimate_score64;
445
446
pf->coeff_last4 = x264_coeff_last4;
447
pf->coeff_last8 = x264_coeff_last8;
448
pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15;
449
pf->coeff_last[ DCT_LUMA_4x4] = x264_coeff_last16;
450
pf->coeff_last[ DCT_LUMA_8x8] = x264_coeff_last64;
451
pf->coeff_level_run4 = x264_coeff_level_run4;
452
pf->coeff_level_run8 = x264_coeff_level_run8;
453
pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15;
454
pf->coeff_level_run[ DCT_LUMA_4x4] = x264_coeff_level_run16;
455
456
#if HIGH_BIT_DEPTH
457
#if HAVE_MMX
458
INIT_TRELLIS( sse2 );
459
if( cpu&X264_CPU_MMX2 )
460
{
461
#if ARCH_X86
462
pf->denoise_dct = x264_denoise_dct_mmx;
463
pf->decimate_score15 = x264_decimate_score15_mmx2;
464
pf->decimate_score16 = x264_decimate_score16_mmx2;
465
pf->decimate_score64 = x264_decimate_score64_mmx2;
466
pf->coeff_last8 = x264_coeff_last8_mmx2;
467
pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_mmx2;
468
pf->coeff_last[ DCT_LUMA_4x4] = x264_coeff_last16_mmx2;
469
pf->coeff_last[ DCT_LUMA_8x8] = x264_coeff_last64_mmx2;
470
pf->coeff_level_run8 = x264_coeff_level_run8_mmx2;
471
pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_mmx2;
472
pf->coeff_level_run[ DCT_LUMA_4x4] = x264_coeff_level_run16_mmx2;
473
#endif
474
pf->coeff_last4 = x264_coeff_last4_mmx2;
475
pf->coeff_level_run4 = x264_coeff_level_run4_mmx2;
476
if( cpu&X264_CPU_LZCNT )
477
pf->coeff_level_run4 = x264_coeff_level_run4_mmx2_lzcnt;
478
}
479
if( cpu&X264_CPU_SSE2 )
480
{
481
pf->quant_4x4 = x264_quant_4x4_sse2;
482
pf->quant_4x4x4 = x264_quant_4x4x4_sse2;
483
pf->quant_8x8 = x264_quant_8x8_sse2;
484
pf->quant_2x2_dc = x264_quant_2x2_dc_sse2;
485
pf->quant_4x4_dc = x264_quant_4x4_dc_sse2;
486
pf->dequant_4x4 = x264_dequant_4x4_sse2;
487
pf->dequant_8x8 = x264_dequant_8x8_sse2;
488
pf->dequant_4x4_dc = x264_dequant_4x4dc_sse2;
489
pf->denoise_dct = x264_denoise_dct_sse2;
490
pf->decimate_score15 = x264_decimate_score15_sse2;
491
pf->decimate_score16 = x264_decimate_score16_sse2;
492
pf->decimate_score64 = x264_decimate_score64_sse2;
493
pf->coeff_last8 = x264_coeff_last8_sse2;
494
pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_sse2;
495
pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_sse2;
496
pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_sse2;
497
pf->coeff_level_run8 = x264_coeff_level_run8_sse2;
498
pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_sse2;
499
pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_sse2;
500
if( cpu&X264_CPU_LZCNT )
501
{
502
pf->coeff_last4 = x264_coeff_last4_mmx2_lzcnt;
503
pf->coeff_last8 = x264_coeff_last8_sse2_lzcnt;
504
pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_sse2_lzcnt;
505
pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_sse2_lzcnt;
506
pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_sse2_lzcnt;
507
pf->coeff_level_run8 = x264_coeff_level_run8_sse2_lzcnt;
508
pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_sse2_lzcnt;
509
pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_sse2_lzcnt;
510
}
511
}
512
if( cpu&X264_CPU_SSSE3 )
513
{
514
pf->quant_4x4 = x264_quant_4x4_ssse3;
515
pf->quant_4x4x4 = x264_quant_4x4x4_ssse3;
516
pf->quant_8x8 = x264_quant_8x8_ssse3;
517
pf->quant_2x2_dc = x264_quant_2x2_dc_ssse3;
518
pf->quant_4x4_dc = x264_quant_4x4_dc_ssse3;
519
pf->denoise_dct = x264_denoise_dct_ssse3;
520
pf->decimate_score15 = x264_decimate_score15_ssse3;
521
pf->decimate_score16 = x264_decimate_score16_ssse3;
522
pf->decimate_score64 = x264_decimate_score64_ssse3;
523
INIT_TRELLIS( ssse3 );
524
}
525
if( cpu&X264_CPU_SSE4 )
526
{
527
pf->quant_2x2_dc = x264_quant_2x2_dc_sse4;
528
pf->quant_4x4_dc = x264_quant_4x4_dc_sse4;
529
pf->quant_4x4 = x264_quant_4x4_sse4;
530
pf->quant_4x4x4 = x264_quant_4x4x4_sse4;
531
pf->quant_8x8 = x264_quant_8x8_sse4;
532
}
533
if( cpu&X264_CPU_AVX )
534
{
535
pf->denoise_dct = x264_denoise_dct_avx;
536
}
537
if( cpu&X264_CPU_XOP )
538
{
539
pf->dequant_4x4_dc = x264_dequant_4x4dc_xop;
540
if( h->param.i_cqm_preset != X264_CQM_FLAT )
541
{
542
pf->dequant_4x4 = x264_dequant_4x4_xop;
543
pf->dequant_8x8 = x264_dequant_8x8_xop;
544
}
545
}
546
if( cpu&X264_CPU_AVX2 )
547
{
548
pf->quant_4x4 = x264_quant_4x4_avx2;
549
pf->quant_4x4_dc = x264_quant_4x4_dc_avx2;
550
pf->quant_8x8 = x264_quant_8x8_avx2;
551
pf->quant_4x4x4 = x264_quant_4x4x4_avx2;
552
pf->dequant_4x4 = x264_dequant_4x4_avx2;
553
pf->dequant_8x8 = x264_dequant_8x8_avx2;
554
pf->dequant_4x4_dc = x264_dequant_4x4dc_avx2;
555
pf->denoise_dct = x264_denoise_dct_avx2;
556
if( cpu&X264_CPU_LZCNT )
557
pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_avx2_lzcnt;
558
}
559
#endif // HAVE_MMX
560
#else // !HIGH_BIT_DEPTH
561
#if HAVE_MMX
562
INIT_TRELLIS( sse2 );
563
if( cpu&X264_CPU_MMX )
564
{
565
#if ARCH_X86
566
pf->dequant_4x4 = x264_dequant_4x4_mmx;
567
pf->dequant_4x4_dc = x264_dequant_4x4dc_mmx2;
568
pf->dequant_8x8 = x264_dequant_8x8_mmx;
569
if( h->param.i_cqm_preset == X264_CQM_FLAT )
570
{
571
pf->dequant_4x4 = x264_dequant_4x4_flat16_mmx;
572
pf->dequant_8x8 = x264_dequant_8x8_flat16_mmx;
573
}
574
pf->denoise_dct = x264_denoise_dct_mmx;
575
#endif
576
}
577
578
if( cpu&X264_CPU_MMX2 )
579
{
580
pf->quant_2x2_dc = x264_quant_2x2_dc_mmx2;
581
#if ARCH_X86
582
pf->quant_4x4 = x264_quant_4x4_mmx2;
583
pf->quant_8x8 = x264_quant_8x8_mmx2;
584
pf->quant_4x4_dc = x264_quant_4x4_dc_mmx2;
585
pf->decimate_score15 = x264_decimate_score15_mmx2;
586
pf->decimate_score16 = x264_decimate_score16_mmx2;
587
pf->decimate_score64 = x264_decimate_score64_mmx2;
588
pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_mmx2;
589
pf->coeff_last[ DCT_LUMA_4x4] = x264_coeff_last16_mmx2;
590
pf->coeff_last[ DCT_LUMA_8x8] = x264_coeff_last64_mmx2;
591
pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_mmx2;
592
pf->coeff_level_run[ DCT_LUMA_4x4] = x264_coeff_level_run16_mmx2;
593
#endif
594
pf->coeff_last4 = x264_coeff_last4_mmx2;
595
pf->coeff_last8 = x264_coeff_last8_mmx2;
596
pf->coeff_level_run4 = x264_coeff_level_run4_mmx2;
597
pf->coeff_level_run8 = x264_coeff_level_run8_mmx2;
598
if( cpu&X264_CPU_LZCNT )
599
{
600
pf->coeff_last4 = x264_coeff_last4_mmx2_lzcnt;
601
pf->coeff_last8 = x264_coeff_last8_mmx2_lzcnt;
602
pf->coeff_level_run4 = x264_coeff_level_run4_mmx2_lzcnt;
603
pf->coeff_level_run8 = x264_coeff_level_run8_mmx2_lzcnt;
604
}
605
}
606
607
if( cpu&X264_CPU_SSE2 )
608
{
609
pf->quant_4x4_dc = x264_quant_4x4_dc_sse2;
610
pf->quant_4x4 = x264_quant_4x4_sse2;
611
pf->quant_4x4x4 = x264_quant_4x4x4_sse2;
612
pf->quant_8x8 = x264_quant_8x8_sse2;
613
pf->dequant_4x4 = x264_dequant_4x4_sse2;
614
pf->dequant_4x4_dc = x264_dequant_4x4dc_sse2;
615
pf->dequant_8x8 = x264_dequant_8x8_sse2;
616
if( h->param.i_cqm_preset == X264_CQM_FLAT )
617
{
618
pf->dequant_4x4 = x264_dequant_4x4_flat16_sse2;
619
pf->dequant_8x8 = x264_dequant_8x8_flat16_sse2;
620
}
621
pf->optimize_chroma_2x2_dc = x264_optimize_chroma_2x2_dc_sse2;
622
pf->denoise_dct = x264_denoise_dct_sse2;
623
pf->decimate_score15 = x264_decimate_score15_sse2;
624
pf->decimate_score16 = x264_decimate_score16_sse2;
625
pf->decimate_score64 = x264_decimate_score64_sse2;
626
pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_sse2;
627
pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_sse2;
628
pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_sse2;
629
pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_sse2;
630
pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_sse2;
631
if( cpu&X264_CPU_LZCNT )
632
{
633
pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_sse2_lzcnt;
634
pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_sse2_lzcnt;
635
pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_sse2_lzcnt;
636
pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_sse2_lzcnt;
637
pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_sse2_lzcnt;
638
}
639
}
640
641
if( cpu&X264_CPU_SSSE3 )
642
{
643
pf->quant_2x2_dc = x264_quant_2x2_dc_ssse3;
644
pf->quant_4x4_dc = x264_quant_4x4_dc_ssse3;
645
pf->quant_4x4 = x264_quant_4x4_ssse3;
646
pf->quant_4x4x4 = x264_quant_4x4x4_ssse3;
647
pf->quant_8x8 = x264_quant_8x8_ssse3;
648
pf->optimize_chroma_2x2_dc = x264_optimize_chroma_2x2_dc_ssse3;
649
pf->denoise_dct = x264_denoise_dct_ssse3;
650
pf->decimate_score15 = x264_decimate_score15_ssse3;
651
pf->decimate_score16 = x264_decimate_score16_ssse3;
652
pf->decimate_score64 = x264_decimate_score64_ssse3;
653
INIT_TRELLIS( ssse3 );
654
pf->coeff_level_run4 = x264_coeff_level_run4_ssse3;
655
pf->coeff_level_run8 = x264_coeff_level_run8_ssse3;
656
pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_ssse3;
657
pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_ssse3;
658
if( cpu&X264_CPU_LZCNT )
659
{
660
pf->coeff_level_run4 = x264_coeff_level_run4_ssse3;
661
pf->coeff_level_run8 = x264_coeff_level_run8_ssse3;
662
pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_ssse3_lzcnt;
663
pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_ssse3_lzcnt;
664
}
665
}
666
667
if( cpu&X264_CPU_SSE4 )
668
{
669
pf->quant_4x4_dc = x264_quant_4x4_dc_sse4;
670
pf->quant_4x4 = x264_quant_4x4_sse4;
671
pf->quant_8x8 = x264_quant_8x8_sse4;
672
pf->optimize_chroma_2x2_dc = x264_optimize_chroma_2x2_dc_sse4;
673
}
674
675
if( cpu&X264_CPU_AVX )
676
{
677
pf->dequant_4x4_dc = x264_dequant_4x4dc_avx;
678
if( h->param.i_cqm_preset != X264_CQM_FLAT )
679
{
680
pf->dequant_4x4 = x264_dequant_4x4_avx;
681
pf->dequant_8x8 = x264_dequant_8x8_avx;
682
}
683
pf->optimize_chroma_2x2_dc = x264_optimize_chroma_2x2_dc_avx;
684
pf->denoise_dct = x264_denoise_dct_avx;
685
}
686
687
if( cpu&X264_CPU_XOP )
688
{
689
if( h->param.i_cqm_preset != X264_CQM_FLAT )
690
{
691
pf->dequant_4x4 = x264_dequant_4x4_xop;
692
pf->dequant_8x8 = x264_dequant_8x8_xop;
693
}
694
}
695
696
if( cpu&X264_CPU_AVX2 )
697
{
698
pf->quant_4x4 = x264_quant_4x4_avx2;
699
pf->quant_4x4_dc = x264_quant_4x4_dc_avx2;
700
pf->quant_8x8 = x264_quant_8x8_avx2;
701
pf->quant_4x4x4 = x264_quant_4x4x4_avx2;
702
pf->dequant_4x4 = x264_dequant_4x4_avx2;
703
pf->dequant_8x8 = x264_dequant_8x8_avx2;
704
pf->dequant_4x4_dc = x264_dequant_4x4dc_avx2;
705
if( h->param.i_cqm_preset == X264_CQM_FLAT )
706
{
707
pf->dequant_4x4 = x264_dequant_4x4_flat16_avx2;
708
pf->dequant_8x8 = x264_dequant_8x8_flat16_avx2;
709
}
710
pf->decimate_score64 = x264_decimate_score64_avx2;
711
pf->denoise_dct = x264_denoise_dct_avx2;
712
if( cpu&X264_CPU_LZCNT )
713
{
714
pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_avx2_lzcnt;
715
pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_avx2_lzcnt;
716
pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_avx2_lzcnt;
717
}
718
}
719
#endif // HAVE_MMX
720
721
#if HAVE_ALTIVEC
722
if( cpu&X264_CPU_ALTIVEC )
723
{
724
pf->quant_2x2_dc = x264_quant_2x2_dc_altivec;
725
pf->quant_4x4_dc = x264_quant_4x4_dc_altivec;
726
pf->quant_4x4 = x264_quant_4x4_altivec;
727
pf->quant_8x8 = x264_quant_8x8_altivec;
728
729
pf->dequant_4x4 = x264_dequant_4x4_altivec;
730
pf->dequant_8x8 = x264_dequant_8x8_altivec;
731
}
732
#endif
733
734
#if HAVE_ARMV6
735
if( cpu&X264_CPU_ARMV6 )
736
{
737
pf->coeff_last4 = x264_coeff_last4_arm;
738
pf->coeff_last8 = x264_coeff_last8_arm;
739
}
740
#endif
741
#if HAVE_ARMV6 || ARCH_AARCH64
742
if( cpu&X264_CPU_NEON )
743
{
744
pf->quant_2x2_dc = x264_quant_2x2_dc_neon;
745
pf->quant_4x4 = x264_quant_4x4_neon;
746
pf->quant_4x4_dc = x264_quant_4x4_dc_neon;
747
pf->quant_4x4x4 = x264_quant_4x4x4_neon;
748
pf->quant_8x8 = x264_quant_8x8_neon;
749
pf->dequant_4x4 = x264_dequant_4x4_neon;
750
pf->dequant_4x4_dc = x264_dequant_4x4_dc_neon;
751
pf->dequant_8x8 = x264_dequant_8x8_neon;
752
pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_neon;
753
pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_neon;
754
pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_neon;
755
pf->denoise_dct = x264_denoise_dct_neon;
756
pf->decimate_score15 = x264_decimate_score15_neon;
757
pf->decimate_score16 = x264_decimate_score16_neon;
758
pf->decimate_score64 = x264_decimate_score64_neon;
759
}
760
#endif
761
#if ARCH_AARCH64
762
if( cpu&X264_CPU_ARMV8 )
763
{
764
pf->coeff_last4 = x264_coeff_last4_aarch64;
765
pf->coeff_last8 = x264_coeff_last8_aarch64;
766
pf->coeff_level_run4 = x264_coeff_level_run4_aarch64;
767
}
768
if( cpu&X264_CPU_NEON )
769
{
770
pf->coeff_level_run8 = x264_coeff_level_run8_neon;
771
pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_neon;
772
pf->coeff_level_run[ DCT_LUMA_4x4] = x264_coeff_level_run16_neon;
773
}
774
#endif
775
776
#if HAVE_MSA
777
if( cpu&X264_CPU_MSA )
778
{
779
pf->quant_4x4 = x264_quant_4x4_msa;
780
pf->quant_4x4_dc = x264_quant_4x4_dc_msa;
781
pf->quant_4x4x4 = x264_quant_4x4x4_msa;
782
pf->quant_8x8 = x264_quant_8x8_msa;
783
pf->dequant_4x4 = x264_dequant_4x4_msa;
784
pf->dequant_4x4_dc = x264_dequant_4x4_dc_msa;
785
pf->dequant_8x8 = x264_dequant_8x8_msa;
786
pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_msa;
787
pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_msa;
788
}
789
#endif
790
#endif // HIGH_BIT_DEPTH
791
pf->coeff_last[DCT_LUMA_DC] = pf->coeff_last[DCT_CHROMAU_DC] = pf->coeff_last[DCT_CHROMAV_DC] =
792
pf->coeff_last[DCT_CHROMAU_4x4] = pf->coeff_last[DCT_CHROMAV_4x4] = pf->coeff_last[DCT_LUMA_4x4];
793
pf->coeff_last[DCT_CHROMA_AC] = pf->coeff_last[DCT_CHROMAU_AC] =
794
pf->coeff_last[DCT_CHROMAV_AC] = pf->coeff_last[DCT_LUMA_AC];
795
pf->coeff_last[DCT_CHROMAU_8x8] = pf->coeff_last[DCT_CHROMAV_8x8] = pf->coeff_last[DCT_LUMA_8x8];
796
797
pf->coeff_level_run[DCT_LUMA_DC] = pf->coeff_level_run[DCT_CHROMAU_DC] = pf->coeff_level_run[DCT_CHROMAV_DC] =
798
pf->coeff_level_run[DCT_CHROMAU_4x4] = pf->coeff_level_run[DCT_CHROMAV_4x4] = pf->coeff_level_run[DCT_LUMA_4x4];
799
pf->coeff_level_run[DCT_CHROMA_AC] = pf->coeff_level_run[DCT_CHROMAU_AC] =
800
pf->coeff_level_run[DCT_CHROMAV_AC] = pf->coeff_level_run[DCT_LUMA_AC];
801
}
802
803