Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
52867 views
1
/*****************************************************************************
2
* predict-c.c: intra prediction
3
*****************************************************************************
4
* Copyright (C) 2003-2016 x264 project
5
*
6
* Authors: Laurent Aimar <[email protected]>
7
* Loren Merritt <[email protected]>
8
* Fiona Glaser <[email protected]>
9
*
10
* This program is free software; you can redistribute it and/or modify
11
* it under the terms of the GNU General Public License as published by
12
* the Free Software Foundation; either version 2 of the License, or
13
* (at your option) any later version.
14
*
15
* This program is distributed in the hope that it will be useful,
16
* but WITHOUT ANY WARRANTY; without even the implied warranty of
17
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
* GNU General Public License for more details.
19
*
20
* You should have received a copy of the GNU General Public License
21
* along with this program; if not, write to the Free Software
22
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
23
*
24
* This program is also available under a commercial proprietary license.
25
* For more information, contact us at [email protected].
26
*****************************************************************************/
27
28
#include "common/common.h"
29
#include "predict.h"
30
#include "pixel.h"
31
32
#define PREDICT_16x16_DC(name)\
33
void x264_predict_16x16_dc_##name( pixel *src )\
34
{\
35
uint32_t dc = 16;\
36
for( int i = 0; i < 16; i += 2 )\
37
{\
38
dc += src[-1 + i * FDEC_STRIDE];\
39
dc += src[-1 + (i+1) * FDEC_STRIDE];\
40
}\
41
x264_predict_16x16_dc_core_##name( src, dc );\
42
}
43
44
PREDICT_16x16_DC( mmx2 )
45
PREDICT_16x16_DC( sse2 )
46
PREDICT_16x16_DC( avx2 )
47
48
#define PREDICT_16x16_DC_LEFT(name)\
49
static void x264_predict_16x16_dc_left_##name( pixel *src )\
50
{\
51
uint32_t dc = 8;\
52
for( int i = 0; i < 16; i += 2 )\
53
{\
54
dc += src[-1 + i * FDEC_STRIDE];\
55
dc += src[-1 + (i+1) * FDEC_STRIDE];\
56
}\
57
x264_predict_16x16_dc_left_core_##name( src, dc>>4 );\
58
}
59
60
PREDICT_16x16_DC_LEFT( mmx2 )
61
PREDICT_16x16_DC_LEFT( sse2 )
62
PREDICT_16x16_DC_LEFT( avx2 )
63
64
#define PREDICT_P_SUM(j,i)\
65
H += i * ( src[j+i - FDEC_STRIDE ] - src[j-i - FDEC_STRIDE ] );\
66
V += i * ( src[(j+i)*FDEC_STRIDE -1] - src[(j-i)*FDEC_STRIDE -1] );
67
68
#if HAVE_X86_INLINE_ASM
69
#if HIGH_BIT_DEPTH
70
ALIGNED_16( static const int16_t pw_12345678[8] ) = {1,2,3,4,5,6,7,8};
71
ALIGNED_16( static const int16_t pw_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
72
ALIGNED_16( static const int16_t pw_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
73
#else // !HIGH_BIT_DEPTH
74
ALIGNED_8( static const int8_t pb_12345678[8] ) = {1,2,3,4,5,6,7,8};
75
ALIGNED_8( static const int8_t pb_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
76
ALIGNED_8( static const int8_t pb_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
77
#endif // HIGH_BIT_DEPTH
78
#endif // HAVE_X86_INLINE_ASM
79
80
#define PREDICT_16x16_P_CORE\
81
int H = 0;\
82
int V = 0;\
83
PREDICT_P_SUM(7,1)\
84
PREDICT_P_SUM(7,2)\
85
PREDICT_P_SUM(7,3)\
86
PREDICT_P_SUM(7,4)\
87
PREDICT_P_SUM(7,5)\
88
PREDICT_P_SUM(7,6)\
89
PREDICT_P_SUM(7,7)\
90
PREDICT_P_SUM(7,8)
91
92
#define PREDICT_16x16_P_END(name)\
93
int a = 16 * ( src[15*FDEC_STRIDE -1] + src[15 - FDEC_STRIDE] );\
94
int b = ( 5 * H + 32 ) >> 6;\
95
int c = ( 5 * V + 32 ) >> 6;\
96
int i00 = a - b * 7 - c * 7 + 16;\
97
/* b*15 + c*15 can overflow: it's easier to just branch away in this rare case
98
* than to try to consider it in the asm. */\
99
if( BIT_DEPTH > 8 && (i00 > 0x7fff || abs(b) > 1092 || abs(c) > 1092) )\
100
x264_predict_16x16_p_c( src );\
101
else\
102
x264_predict_16x16_p_core_##name( src, i00, b, c );
103
104
#define PREDICT_16x16_P(name, name2)\
105
static void x264_predict_16x16_p_##name( pixel *src )\
106
{\
107
PREDICT_16x16_P_CORE\
108
PREDICT_16x16_P_END(name2)\
109
}
110
111
#if HAVE_X86_INLINE_ASM
112
#if HIGH_BIT_DEPTH
113
#define PREDICT_16x16_P_ASM\
114
asm (\
115
"movdqu %1, %%xmm1 \n"\
116
"movdqa %2, %%xmm0 \n"\
117
"pmaddwd %3, %%xmm0 \n"\
118
"pmaddwd %4, %%xmm1 \n"\
119
"paddd %%xmm1, %%xmm0 \n"\
120
"movhlps %%xmm0, %%xmm1 \n"\
121
"paddd %%xmm1, %%xmm0 \n"\
122
"pshuflw $14, %%xmm0, %%xmm1 \n"\
123
"paddd %%xmm1, %%xmm0 \n"\
124
"movd %%xmm0, %0 \n"\
125
:"=r"(H)\
126
:"m"(src[-FDEC_STRIDE-1]), "m"(src[-FDEC_STRIDE+8]),\
127
"m"(*pw_12345678), "m"(*pw_m87654321)\
128
);
129
#else // !HIGH_BIT_DEPTH
130
#define PREDICT_16x16_P_ASM\
131
asm (\
132
"movq %1, %%mm1 \n"\
133
"movq %2, %%mm0 \n"\
134
"palignr $7, %3, %%mm1 \n"\
135
"pmaddubsw %4, %%mm0 \n"\
136
"pmaddubsw %5, %%mm1 \n"\
137
"paddw %%mm1, %%mm0 \n"\
138
"pshufw $14, %%mm0, %%mm1 \n"\
139
"paddw %%mm1, %%mm0 \n"\
140
"pshufw $1, %%mm0, %%mm1 \n"\
141
"paddw %%mm1, %%mm0 \n"\
142
"movd %%mm0, %0 \n"\
143
"movswl %w0, %0 \n"\
144
:"=r"(H)\
145
:"m"(src[-FDEC_STRIDE]), "m"(src[-FDEC_STRIDE+8]),\
146
"m"(src[-FDEC_STRIDE-8]), "m"(*pb_12345678), "m"(*pb_m87654321)\
147
);
148
#endif // HIGH_BIT_DEPTH
149
150
#define PREDICT_16x16_P_CORE_INLINE\
151
int H, V;\
152
PREDICT_16x16_P_ASM\
153
V = 8 * ( src[15*FDEC_STRIDE-1] - src[-1*FDEC_STRIDE-1] )\
154
+ 7 * ( src[14*FDEC_STRIDE-1] - src[ 0*FDEC_STRIDE-1] )\
155
+ 6 * ( src[13*FDEC_STRIDE-1] - src[ 1*FDEC_STRIDE-1] )\
156
+ 5 * ( src[12*FDEC_STRIDE-1] - src[ 2*FDEC_STRIDE-1] )\
157
+ 4 * ( src[11*FDEC_STRIDE-1] - src[ 3*FDEC_STRIDE-1] )\
158
+ 3 * ( src[10*FDEC_STRIDE-1] - src[ 4*FDEC_STRIDE-1] )\
159
+ 2 * ( src[ 9*FDEC_STRIDE-1] - src[ 5*FDEC_STRIDE-1] )\
160
+ 1 * ( src[ 8*FDEC_STRIDE-1] - src[ 6*FDEC_STRIDE-1] );
161
162
#define PREDICT_16x16_P_INLINE(name, name2)\
163
static void x264_predict_16x16_p_##name( pixel *src )\
164
{\
165
PREDICT_16x16_P_CORE_INLINE\
166
PREDICT_16x16_P_END(name2)\
167
}
168
#else // !HAVE_X86_INLINE_ASM
169
#define PREDICT_16x16_P_INLINE(name, name2) PREDICT_16x16_P(name, name2)
170
#endif // HAVE_X86_INLINE_ASM
171
172
#if HIGH_BIT_DEPTH
173
PREDICT_16x16_P_INLINE( sse2, sse2 )
174
#else // !HIGH_BIT_DEPTH
175
#if !ARCH_X86_64
176
PREDICT_16x16_P( mmx2, mmx2 )
177
#endif // !ARCH_X86_64
178
PREDICT_16x16_P( sse2, sse2 )
179
#if HAVE_X86_INLINE_ASM
180
PREDICT_16x16_P_INLINE( ssse3, sse2 )
181
#endif // HAVE_X86_INLINE_ASM
182
PREDICT_16x16_P_INLINE( avx, avx )
183
#endif // HIGH_BIT_DEPTH
184
PREDICT_16x16_P_INLINE( avx2, avx2 )
185
186
#define PREDICT_8x16C_P_CORE\
187
int H = 0, V = 0;\
188
for( int i = 0; i < 4; i++ )\
189
H += ( i + 1 ) * ( src[4 + i - FDEC_STRIDE] - src[2 - i - FDEC_STRIDE] );\
190
for( int i = 0; i < 8; i++ )\
191
V += ( i + 1 ) * ( src[-1 + (i+8)*FDEC_STRIDE] - src[-1 + (6-i)*FDEC_STRIDE] );
192
193
#if HIGH_BIT_DEPTH
194
#define PREDICT_8x16C_P_END(name)\
195
int a = 16 * ( src[-1 + 15*FDEC_STRIDE] + src[7 - FDEC_STRIDE] );\
196
int b = ( 17 * H + 16 ) >> 5;\
197
int c = ( 5 * V + 32 ) >> 6;\
198
x264_predict_8x16c_p_core_##name( src, a, b, c );
199
#else // !HIGH_BIT_DEPTH
200
#define PREDICT_8x16C_P_END(name)\
201
int a = 16 * ( src[-1 + 15*FDEC_STRIDE] + src[7 - FDEC_STRIDE] );\
202
int b = ( 17 * H + 16 ) >> 5;\
203
int c = ( 5 * V + 32 ) >> 6;\
204
int i00 = a -3*b -7*c + 16;\
205
x264_predict_8x16c_p_core_##name( src, i00, b, c );
206
#endif // HIGH_BIT_DEPTH
207
208
#define PREDICT_8x16C_P(name)\
209
static void x264_predict_8x16c_p_##name( pixel *src )\
210
{\
211
PREDICT_8x16C_P_CORE\
212
PREDICT_8x16C_P_END(name)\
213
}
214
215
#if !ARCH_X86_64 && !HIGH_BIT_DEPTH
216
PREDICT_8x16C_P( mmx2 )
217
#endif // !ARCH_X86_64 && !HIGH_BIT_DEPTH
218
PREDICT_8x16C_P( sse2 )
219
PREDICT_8x16C_P( avx )
220
PREDICT_8x16C_P( avx2 )
221
222
#define PREDICT_8x8C_P_CORE\
223
int H = 0;\
224
int V = 0;\
225
PREDICT_P_SUM(3,1)\
226
PREDICT_P_SUM(3,2)\
227
PREDICT_P_SUM(3,3)\
228
PREDICT_P_SUM(3,4)
229
230
#if HIGH_BIT_DEPTH
231
#define PREDICT_8x8C_P_END(name)\
232
int a = 16 * ( src[7*FDEC_STRIDE -1] + src[7 - FDEC_STRIDE] );\
233
int b = ( 17 * H + 16 ) >> 5;\
234
int c = ( 17 * V + 16 ) >> 5;\
235
x264_predict_8x8c_p_core_##name( src, a, b, c );
236
#else // !HIGH_BIT_DEPTH
237
#define PREDICT_8x8C_P_END(name)\
238
int a = 16 * ( src[7*FDEC_STRIDE -1] + src[7 - FDEC_STRIDE] );\
239
int b = ( 17 * H + 16 ) >> 5;\
240
int c = ( 17 * V + 16 ) >> 5;\
241
int i00 = a -3*b -3*c + 16;\
242
x264_predict_8x8c_p_core_##name( src, i00, b, c );
243
#endif // HIGH_BIT_DEPTH
244
245
#define PREDICT_8x8C_P(name, name2)\
246
static void x264_predict_8x8c_p_##name( pixel *src )\
247
{\
248
PREDICT_8x8C_P_CORE\
249
PREDICT_8x8C_P_END(name2)\
250
}
251
252
#if HAVE_X86_INLINE_ASM
253
#if HIGH_BIT_DEPTH
254
#define PREDICT_8x8C_P_ASM\
255
asm (\
256
"movdqa %1, %%xmm0 \n"\
257
"pmaddwd %2, %%xmm0 \n"\
258
"movhlps %%xmm0, %%xmm1 \n"\
259
"paddd %%xmm1, %%xmm0 \n"\
260
"pshuflw $14, %%xmm0, %%xmm1 \n"\
261
"paddd %%xmm1, %%xmm0 \n"\
262
"movd %%xmm0, %0 \n"\
263
:"=r"(H)\
264
:"m"(src[-FDEC_STRIDE]), "m"(*pw_m32101234)\
265
);
266
#else // !HIGH_BIT_DEPTH
267
#define PREDICT_8x8C_P_ASM\
268
asm (\
269
"movq %1, %%mm0 \n"\
270
"pmaddubsw %2, %%mm0 \n"\
271
"pshufw $14, %%mm0, %%mm1 \n"\
272
"paddw %%mm1, %%mm0 \n"\
273
"pshufw $1, %%mm0, %%mm1 \n"\
274
"paddw %%mm1, %%mm0 \n"\
275
"movd %%mm0, %0 \n"\
276
"movswl %w0, %0 \n"\
277
:"=r"(H)\
278
:"m"(src[-FDEC_STRIDE]), "m"(*pb_m32101234)\
279
);
280
#endif // HIGH_BIT_DEPTH
281
282
#define PREDICT_8x8C_P_CORE_INLINE\
283
int H, V;\
284
PREDICT_8x8C_P_ASM\
285
V = 1 * ( src[4*FDEC_STRIDE -1] - src[ 2*FDEC_STRIDE -1] )\
286
+ 2 * ( src[5*FDEC_STRIDE -1] - src[ 1*FDEC_STRIDE -1] )\
287
+ 3 * ( src[6*FDEC_STRIDE -1] - src[ 0*FDEC_STRIDE -1] )\
288
+ 4 * ( src[7*FDEC_STRIDE -1] - src[-1*FDEC_STRIDE -1] );\
289
H += -4 * src[-1*FDEC_STRIDE -1];
290
291
#define PREDICT_8x8C_P_INLINE(name, name2)\
292
static void x264_predict_8x8c_p_##name( pixel *src )\
293
{\
294
PREDICT_8x8C_P_CORE_INLINE\
295
PREDICT_8x8C_P_END(name2)\
296
}
297
#else // !HAVE_X86_INLINE_ASM
298
#define PREDICT_8x8C_P_INLINE(name, name2) PREDICT_8x8C_P(name, name2)
299
#endif // HAVE_X86_INLINE_ASM
300
301
#if HIGH_BIT_DEPTH
302
PREDICT_8x8C_P_INLINE( sse2, sse2 )
303
#else //!HIGH_BIT_DEPTH
304
#if !ARCH_X86_64
305
PREDICT_8x8C_P( mmx2, mmx2 )
306
#endif // !ARCH_X86_64
307
PREDICT_8x8C_P( sse2, sse2 )
308
#if HAVE_X86_INLINE_ASM
309
PREDICT_8x8C_P_INLINE( ssse3, sse2 )
310
#endif // HAVE_X86_INLINE_ASM
311
#endif // HIGH_BIT_DEPTH
312
PREDICT_8x8C_P_INLINE( avx, avx )
313
PREDICT_8x8C_P_INLINE( avx2, avx2 )
314
315
#if ARCH_X86_64 && !HIGH_BIT_DEPTH
316
static void x264_predict_8x8c_dc_left( uint8_t *src )
317
{
318
int y;
319
uint32_t s0 = 0, s1 = 0;
320
uint64_t dc0, dc1;
321
322
for( y = 0; y < 4; y++ )
323
{
324
s0 += src[y * FDEC_STRIDE - 1];
325
s1 += src[(y+4) * FDEC_STRIDE - 1];
326
}
327
dc0 = (( s0 + 2 ) >> 2) * 0x0101010101010101ULL;
328
dc1 = (( s1 + 2 ) >> 2) * 0x0101010101010101ULL;
329
330
for( y = 0; y < 4; y++ )
331
{
332
M64( src ) = dc0;
333
src += FDEC_STRIDE;
334
}
335
for( y = 0; y < 4; y++ )
336
{
337
M64( src ) = dc1;
338
src += FDEC_STRIDE;
339
}
340
}
341
#endif // ARCH_X86_64 && !HIGH_BIT_DEPTH
342
343
/****************************************************************************
344
* Exported functions:
345
****************************************************************************/
346
void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] )
347
{
348
if( !(cpu&X264_CPU_MMX2) )
349
return;
350
pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_mmx2;
351
pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_mmx2;
352
pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_mmx2;
353
pf[I_PRED_16x16_V] = x264_predict_16x16_v_mmx2;
354
pf[I_PRED_16x16_H] = x264_predict_16x16_h_mmx2;
355
#if HIGH_BIT_DEPTH
356
if( !(cpu&X264_CPU_SSE) )
357
return;
358
pf[I_PRED_16x16_V] = x264_predict_16x16_v_sse;
359
if( !(cpu&X264_CPU_SSE2) )
360
return;
361
pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_sse2;
362
pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_sse2;
363
pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_sse2;
364
pf[I_PRED_16x16_H] = x264_predict_16x16_h_sse2;
365
pf[I_PRED_16x16_P] = x264_predict_16x16_p_sse2;
366
if( !(cpu&X264_CPU_AVX) )
367
return;
368
pf[I_PRED_16x16_V] = x264_predict_16x16_v_avx;
369
if( !(cpu&X264_CPU_AVX2) )
370
return;
371
pf[I_PRED_16x16_H] = x264_predict_16x16_h_avx2;
372
#else
373
#if !ARCH_X86_64
374
pf[I_PRED_16x16_P] = x264_predict_16x16_p_mmx2;
375
#endif
376
if( !(cpu&X264_CPU_SSE) )
377
return;
378
pf[I_PRED_16x16_V] = x264_predict_16x16_v_sse;
379
if( !(cpu&X264_CPU_SSE2) )
380
return;
381
pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_sse2;
382
if( cpu&X264_CPU_SSE2_IS_SLOW )
383
return;
384
pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_sse2;
385
pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_sse2;
386
pf[I_PRED_16x16_P] = x264_predict_16x16_p_sse2;
387
if( !(cpu&X264_CPU_SSSE3) )
388
return;
389
if( !(cpu&X264_CPU_SLOW_PSHUFB) )
390
pf[I_PRED_16x16_H] = x264_predict_16x16_h_ssse3;
391
#if HAVE_X86_INLINE_ASM
392
pf[I_PRED_16x16_P] = x264_predict_16x16_p_ssse3;
393
#endif
394
if( !(cpu&X264_CPU_AVX) )
395
return;
396
pf[I_PRED_16x16_P] = x264_predict_16x16_p_avx;
397
#endif // HIGH_BIT_DEPTH
398
399
if( cpu&X264_CPU_AVX2 )
400
{
401
pf[I_PRED_16x16_P] = x264_predict_16x16_p_avx2;
402
pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_avx2;
403
pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_avx2;
404
pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_avx2;
405
}
406
}
407
408
void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
409
{
410
if( !(cpu&X264_CPU_MMX) )
411
return;
412
#if HIGH_BIT_DEPTH
413
pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_mmx;
414
if( !(cpu&X264_CPU_MMX2) )
415
return;
416
pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_mmx2;
417
pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_mmx2;
418
if( !(cpu&X264_CPU_SSE) )
419
return;
420
pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_sse;
421
if( !(cpu&X264_CPU_SSE2) )
422
return;
423
pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_sse2;
424
pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x8c_dc_top_sse2;
425
pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_sse2;
426
pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_sse2;
427
if( !(cpu&X264_CPU_AVX) )
428
return;
429
pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_avx;
430
if( !(cpu&X264_CPU_AVX2) )
431
return;
432
pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_avx2;
433
#else
434
#if ARCH_X86_64
435
pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left;
436
#endif
437
pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_mmx;
438
if( !(cpu&X264_CPU_MMX2) )
439
return;
440
pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x8c_dc_top_mmx2;
441
pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_mmx2;
442
#if !ARCH_X86_64
443
pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_mmx2;
444
#endif
445
pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_mmx2;
446
if( !(cpu&X264_CPU_SSE2) )
447
return;
448
pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_sse2;
449
if( !(cpu&X264_CPU_SSSE3) )
450
return;
451
pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_ssse3;
452
#if HAVE_X86_INLINE_ASM
453
pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_ssse3;
454
#endif
455
if( !(cpu&X264_CPU_AVX) )
456
return;
457
pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_avx;
458
#endif // HIGH_BIT_DEPTH
459
460
if( cpu&X264_CPU_AVX2 )
461
{
462
pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_avx2;
463
}
464
}
465
466
void x264_predict_8x16c_init_mmx( int cpu, x264_predict_t pf[7] )
467
{
468
if( !(cpu&X264_CPU_MMX) )
469
return;
470
#if HIGH_BIT_DEPTH
471
if( !(cpu&X264_CPU_MMX2) )
472
return;
473
pf[I_PRED_CHROMA_DC] = x264_predict_8x16c_dc_mmx2;
474
pf[I_PRED_CHROMA_H] = x264_predict_8x16c_h_mmx2;
475
if( !(cpu&X264_CPU_SSE) )
476
return;
477
pf[I_PRED_CHROMA_V] = x264_predict_8x16c_v_sse;
478
if( !(cpu&X264_CPU_SSE2) )
479
return;
480
pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x16c_dc_top_sse2;
481
pf[I_PRED_CHROMA_DC] = x264_predict_8x16c_dc_sse2;
482
pf[I_PRED_CHROMA_H] = x264_predict_8x16c_h_sse2;
483
pf[I_PRED_CHROMA_P] = x264_predict_8x16c_p_sse2;
484
if( !(cpu&X264_CPU_AVX) )
485
return;
486
pf[I_PRED_CHROMA_P] = x264_predict_8x16c_p_avx;
487
if( !(cpu&X264_CPU_AVX2) )
488
return;
489
pf[I_PRED_CHROMA_H] = x264_predict_8x16c_h_avx2;
490
#else
491
pf[I_PRED_CHROMA_V] = x264_predict_8x16c_v_mmx;
492
if( !(cpu&X264_CPU_MMX2) )
493
return;
494
pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x16c_dc_top_mmx2;
495
pf[I_PRED_CHROMA_DC] = x264_predict_8x16c_dc_mmx2;
496
pf[I_PRED_CHROMA_H] = x264_predict_8x16c_h_mmx2;
497
#if !ARCH_X86_64
498
pf[I_PRED_CHROMA_P] = x264_predict_8x16c_p_mmx2;
499
#endif
500
if( !(cpu&X264_CPU_SSE2) )
501
return;
502
pf[I_PRED_CHROMA_P] = x264_predict_8x16c_p_sse2;
503
if( !(cpu&X264_CPU_SSSE3) )
504
return;
505
pf[I_PRED_CHROMA_H] = x264_predict_8x16c_h_ssse3;
506
if( !(cpu&X264_CPU_AVX) )
507
return;
508
pf[I_PRED_CHROMA_P] = x264_predict_8x16c_p_avx;
509
#endif // HIGH_BIT_DEPTH
510
511
if( cpu&X264_CPU_AVX2 )
512
{
513
pf[I_PRED_CHROMA_P] = x264_predict_8x16c_p_avx2;
514
}
515
}
516
517
void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter )
518
{
519
if( !(cpu&X264_CPU_MMX2) )
520
return;
521
#if HIGH_BIT_DEPTH
522
if( !(cpu&X264_CPU_SSE) )
523
return;
524
pf[I_PRED_8x8_V] = x264_predict_8x8_v_sse;
525
if( !(cpu&X264_CPU_SSE2) )
526
return;
527
pf[I_PRED_8x8_H] = x264_predict_8x8_h_sse2;
528
pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_sse2;
529
pf[I_PRED_8x8_DC_TOP] = x264_predict_8x8_dc_top_sse2;
530
pf[I_PRED_8x8_DC_LEFT]= x264_predict_8x8_dc_left_sse2;
531
pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_sse2;
532
pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_sse2;
533
pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_sse2;
534
pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_sse2;
535
pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_sse2;
536
pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_sse2;
537
*predict_8x8_filter = x264_predict_8x8_filter_sse2;
538
if( !(cpu&X264_CPU_SSSE3) )
539
return;
540
pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_ssse3;
541
pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_ssse3;
542
pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_ssse3;
543
pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_ssse3;
544
pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_ssse3;
545
pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_ssse3;
546
*predict_8x8_filter = x264_predict_8x8_filter_ssse3;
547
if( cpu&X264_CPU_CACHELINE_64 )
548
{
549
pf[I_PRED_8x8_DDL]= x264_predict_8x8_ddl_ssse3_cache64;
550
pf[I_PRED_8x8_DDR]= x264_predict_8x8_ddr_ssse3_cache64;
551
}
552
if( !(cpu&X264_CPU_AVX) )
553
return;
554
pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_avx;
555
pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_avx;
556
pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_avx;
557
pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_avx;
558
*predict_8x8_filter = x264_predict_8x8_filter_avx;
559
#else
560
pf[I_PRED_8x8_V] = x264_predict_8x8_v_mmx2;
561
pf[I_PRED_8x8_H] = x264_predict_8x8_h_mmx2;
562
pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_mmx2;
563
pf[I_PRED_8x8_DC_TOP] = x264_predict_8x8_dc_top_mmx2;
564
pf[I_PRED_8x8_DC_LEFT]= x264_predict_8x8_dc_left_mmx2;
565
pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_mmx2;
566
pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_mmx2;
567
*predict_8x8_filter = x264_predict_8x8_filter_mmx2;
568
#if ARCH_X86
569
pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_mmx2;
570
pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_mmx2;
571
pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_mmx2;
572
pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_mmx2;
573
#endif
574
if( !(cpu&X264_CPU_SSE2) )
575
return;
576
pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_sse2;
577
pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_sse2;
578
pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_sse2;
579
pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_sse2;
580
pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_sse2;
581
pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_sse2;
582
if( !(cpu&X264_CPU_SSSE3) )
583
return;
584
if( !(cpu&X264_CPU_SLOW_PALIGNR) )
585
{
586
pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_ssse3;
587
pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_ssse3;
588
}
589
pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_ssse3;
590
*predict_8x8_filter = x264_predict_8x8_filter_ssse3;
591
if( !(cpu&X264_CPU_AVX) )
592
return;
593
pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_avx;
594
pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_avx;
595
pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_avx;
596
pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_avx;
597
pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_avx;
598
#endif // HIGH_BIT_DEPTH
599
}
600
601
void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
602
{
603
if( !(cpu&X264_CPU_MMX2) )
604
return;
605
pf[I_PRED_4x4_DC] = x264_predict_4x4_dc_mmx2;
606
pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_mmx2;
607
pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_mmx2;
608
pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_mmx2;
609
pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_mmx2;
610
pf[I_PRED_4x4_HU] = x264_predict_4x4_hu_mmx2;
611
#if HIGH_BIT_DEPTH
612
if( !(cpu&X264_CPU_SSE2) )
613
return;
614
pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_sse2;
615
pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_sse2;
616
pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_sse2;
617
pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_sse2;
618
pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_sse2;
619
if( !(cpu&X264_CPU_SSSE3) )
620
return;
621
pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
622
pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_ssse3;
623
pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_ssse3;
624
if( !(cpu&X264_CPU_AVX) )
625
return;
626
pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_avx;
627
pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_avx;
628
pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_avx;
629
pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_avx;
630
pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_avx;
631
if( !(cpu&X264_CPU_AVX2) )
632
return;
633
pf[I_PRED_4x4_H] = x264_predict_4x4_h_avx2;
634
#else
635
pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_mmx2;
636
if( !(cpu&X264_CPU_SSSE3) )
637
return;
638
pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
639
pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_ssse3;
640
pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_ssse3;
641
if( cpu&X264_CPU_CACHELINE_64 )
642
pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_ssse3_cache64;
643
#endif // HIGH_BIT_DEPTH
644
}
645
646