Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
52868 views
1
/*****************************************************************************
2
* predict-c.c: msa intra prediction
3
*****************************************************************************
4
* Copyright (C) 2015-2016 x264 project
5
*
6
* Authors: Mandar Sahastrabuddhe <[email protected]>
7
*
8
* This program is free software; you can redistribute it and/or modify
9
* it under the terms of the GNU General Public License as published by
10
* the Free Software Foundation; either version 2 of the License, or
11
* (at your option) any later version.
12
*
13
* This program is distributed in the hope that it will be useful,
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
* GNU General Public License for more details.
17
*
18
* You should have received a copy of the GNU General Public License
19
* along with this program; if not, write to the Free Software
20
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
21
*
22
* This program is also available under a commercial proprietary license.
23
* For more information, contact us at [email protected].
24
*****************************************************************************/
25
26
#include "common/common.h"
27
#include "macros.h"
28
29
#if !HIGH_BIT_DEPTH
30
static void intra_predict_vert_4x4_msa( uint8_t *p_src, uint8_t *p_dst,
31
int32_t i_dst_stride )
32
{
33
uint32_t u_src_data;
34
35
u_src_data = LW( p_src );
36
37
SW4( u_src_data, u_src_data, u_src_data, u_src_data, p_dst, i_dst_stride );
38
}
39
40
static void intra_predict_vert_8x8_msa( uint8_t *p_src, uint8_t *p_dst,
41
int32_t i_dst_stride )
42
{
43
uint64_t u_out;
44
45
u_out = LD( p_src );
46
47
SD4( u_out, u_out, u_out, u_out, p_dst, i_dst_stride );
48
p_dst += ( 4 * i_dst_stride );
49
SD4( u_out, u_out, u_out, u_out, p_dst, i_dst_stride );
50
}
51
52
static void intra_predict_vert_16x16_msa( uint8_t *p_src, uint8_t *p_dst,
53
int32_t i_dst_stride )
54
{
55
v16u8 src0 = LD_UB( p_src );
56
57
ST_UB8( src0, src0, src0, src0, src0, src0, src0, src0, p_dst,
58
i_dst_stride );
59
p_dst += ( 8 * i_dst_stride );
60
ST_UB8( src0, src0, src0, src0, src0, src0, src0, src0, p_dst,
61
i_dst_stride );
62
}
63
64
static void intra_predict_horiz_4x4_msa( uint8_t *p_src, int32_t i_src_stride,
65
uint8_t *p_dst, int32_t i_dst_stride )
66
{
67
uint32_t u_out0, u_out1, u_out2, u_out3;
68
69
u_out0 = p_src[0 * i_src_stride] * 0x01010101;
70
u_out1 = p_src[1 * i_src_stride] * 0x01010101;
71
u_out2 = p_src[2 * i_src_stride] * 0x01010101;
72
u_out3 = p_src[3 * i_src_stride] * 0x01010101;
73
74
SW4( u_out0, u_out1, u_out2, u_out3, p_dst, i_dst_stride );
75
}
76
77
static void intra_predict_horiz_8x8_msa( uint8_t *p_src, int32_t i_src_stride,
78
uint8_t *p_dst, int32_t i_dst_stride )
79
{
80
uint64_t u_out0, u_out1, u_out2, u_out3, u_out4, u_out5, u_out6, u_out7;
81
82
u_out0 = p_src[0 * i_src_stride] * 0x0101010101010101ull;
83
u_out1 = p_src[1 * i_src_stride] * 0x0101010101010101ull;
84
u_out2 = p_src[2 * i_src_stride] * 0x0101010101010101ull;
85
u_out3 = p_src[3 * i_src_stride] * 0x0101010101010101ull;
86
u_out4 = p_src[4 * i_src_stride] * 0x0101010101010101ull;
87
u_out5 = p_src[5 * i_src_stride] * 0x0101010101010101ull;
88
u_out6 = p_src[6 * i_src_stride] * 0x0101010101010101ull;
89
u_out7 = p_src[7 * i_src_stride] * 0x0101010101010101ull;
90
91
SD4( u_out0, u_out1, u_out2, u_out3, p_dst, i_dst_stride );
92
p_dst += ( 4 * i_dst_stride );
93
SD4( u_out4, u_out5, u_out6, u_out7, p_dst, i_dst_stride );
94
}
95
96
static void intra_predict_horiz_16x16_msa( uint8_t *p_src, int32_t i_src_stride,
97
uint8_t *p_dst,
98
int32_t i_dst_stride )
99
{
100
uint32_t u_row;
101
uint8_t u_inp0, u_inp1, u_inp2, u_inp3;
102
v16u8 src0, src1, src2, src3;
103
104
for ( u_row = 4; u_row--; )
105
{
106
u_inp0 = p_src[0];
107
p_src += i_src_stride;
108
u_inp1 = p_src[0];
109
p_src += i_src_stride;
110
u_inp2 = p_src[0];
111
p_src += i_src_stride;
112
u_inp3 = p_src[0];
113
p_src += i_src_stride;
114
115
src0 = ( v16u8 ) __msa_fill_b( u_inp0 );
116
src1 = ( v16u8 ) __msa_fill_b( u_inp1 );
117
src2 = ( v16u8 ) __msa_fill_b( u_inp2 );
118
src3 = ( v16u8 ) __msa_fill_b( u_inp3 );
119
120
ST_UB4( src0, src1, src2, src3, p_dst, i_dst_stride );
121
p_dst += ( 4 * i_dst_stride );
122
}
123
}
124
125
static void intra_predict_dc_4x4_msa( uint8_t *p_src_top, uint8_t *p_src_left,
126
int32_t i_src_stride_left,
127
uint8_t *p_dst, int32_t i_dst_stride,
128
uint8_t is_above, uint8_t is_left )
129
{
130
uint32_t u_row;
131
uint32_t u_out, u_addition = 0;
132
v16u8 src_above, store;
133
v8u16 sum_above;
134
v4u32 sum;
135
136
if ( is_left && is_above )
137
{
138
src_above = LD_UB( p_src_top );
139
140
sum_above = __msa_hadd_u_h( src_above, src_above );
141
sum = __msa_hadd_u_w( sum_above, sum_above );
142
u_addition = __msa_copy_u_w( ( v4i32 ) sum, 0 );
143
144
for ( u_row = 0; u_row < 4; u_row++ )
145
{
146
u_addition += p_src_left[u_row * i_src_stride_left];
147
}
148
149
u_addition = ( u_addition + 4 ) >> 3;
150
store = ( v16u8 ) __msa_fill_b( u_addition );
151
}
152
else if ( is_left )
153
{
154
for ( u_row = 0; u_row < 4; u_row++ )
155
{
156
u_addition += p_src_left[u_row * i_src_stride_left];
157
}
158
159
u_addition = ( u_addition + 2 ) >> 2;
160
store = ( v16u8 ) __msa_fill_b( u_addition );
161
}
162
else if ( is_above )
163
{
164
src_above = LD_UB( p_src_top );
165
166
sum_above = __msa_hadd_u_h( src_above, src_above );
167
sum = __msa_hadd_u_w( sum_above, sum_above );
168
sum = ( v4u32 ) __msa_srari_w( ( v4i32 ) sum, 2 );
169
store = ( v16u8 ) __msa_splati_b( ( v16i8 ) sum, 0 );
170
}
171
else
172
{
173
store = ( v16u8 ) __msa_ldi_b( 128 );
174
}
175
176
u_out = __msa_copy_u_w( ( v4i32 ) store, 0 );
177
178
SW4( u_out, u_out, u_out, u_out, p_dst, i_dst_stride );
179
}
180
181
static void intra_predict_dc_8x8_msa( uint8_t *p_src_top, uint8_t *p_src_left,
182
uint8_t *p_dst, int32_t i_dst_stride )
183
{
184
uint64_t u_val0, u_val1;
185
v16i8 store;
186
v16u8 src = { 0 };
187
v8u16 sum_h;
188
v4u32 sum_w;
189
v2u64 sum_d;
190
191
u_val0 = LD( p_src_top );
192
u_val1 = LD( p_src_left );
193
INSERT_D2_UB( u_val0, u_val1, src );
194
sum_h = __msa_hadd_u_h( src, src );
195
sum_w = __msa_hadd_u_w( sum_h, sum_h );
196
sum_d = __msa_hadd_u_d( sum_w, sum_w );
197
sum_w = ( v4u32 ) __msa_pckev_w( ( v4i32 ) sum_d, ( v4i32 ) sum_d );
198
sum_d = __msa_hadd_u_d( sum_w, sum_w );
199
sum_w = ( v4u32 ) __msa_srari_w( ( v4i32 ) sum_d, 4 );
200
store = __msa_splati_b( ( v16i8 ) sum_w, 0 );
201
u_val0 = __msa_copy_u_d( ( v2i64 ) store, 0 );
202
203
SD4( u_val0, u_val0, u_val0, u_val0, p_dst, i_dst_stride );
204
p_dst += ( 4 * i_dst_stride );
205
SD4( u_val0, u_val0, u_val0, u_val0, p_dst, i_dst_stride );
206
}
207
208
static void intra_predict_dc_16x16_msa( uint8_t *p_src_top, uint8_t *p_src_left,
209
int32_t i_src_stride_left,
210
uint8_t *p_dst, int32_t i_dst_stride,
211
uint8_t is_above, uint8_t is_left )
212
{
213
uint32_t u_row;
214
uint32_t u_addition = 0;
215
v16u8 src_above, store;
216
v8u16 sum_above;
217
v4u32 sum_top;
218
v2u64 sum;
219
220
if ( is_left && is_above )
221
{
222
src_above = LD_UB( p_src_top );
223
224
sum_above = __msa_hadd_u_h( src_above, src_above );
225
sum_top = __msa_hadd_u_w( sum_above, sum_above );
226
sum = __msa_hadd_u_d( sum_top, sum_top );
227
sum_top = ( v4u32 ) __msa_pckev_w( ( v4i32 ) sum, ( v4i32 ) sum );
228
sum = __msa_hadd_u_d( sum_top, sum_top );
229
u_addition = __msa_copy_u_w( ( v4i32 ) sum, 0 );
230
231
for ( u_row = 0; u_row < 16; u_row++ )
232
{
233
u_addition += p_src_left[u_row * i_src_stride_left];
234
}
235
236
u_addition = ( u_addition + 16 ) >> 5;
237
store = ( v16u8 ) __msa_fill_b( u_addition );
238
}
239
else if ( is_left )
240
{
241
for ( u_row = 0; u_row < 16; u_row++ )
242
{
243
u_addition += p_src_left[u_row * i_src_stride_left];
244
}
245
246
u_addition = ( u_addition + 8 ) >> 4;
247
store = ( v16u8 ) __msa_fill_b( u_addition );
248
}
249
else if ( is_above )
250
{
251
src_above = LD_UB( p_src_top );
252
253
sum_above = __msa_hadd_u_h( src_above, src_above );
254
sum_top = __msa_hadd_u_w( sum_above, sum_above );
255
sum = __msa_hadd_u_d( sum_top, sum_top );
256
sum_top = ( v4u32 ) __msa_pckev_w( ( v4i32 ) sum, ( v4i32 ) sum );
257
sum = __msa_hadd_u_d( sum_top, sum_top );
258
sum = ( v2u64 ) __msa_srari_d( ( v2i64 ) sum, 4 );
259
store = ( v16u8 ) __msa_splati_b( ( v16i8 ) sum, 0 );
260
}
261
else
262
{
263
store = ( v16u8 ) __msa_ldi_b( 128 );
264
}
265
266
ST_UB8( store, store, store, store, store, store, store, store, p_dst,
267
i_dst_stride );
268
p_dst += ( 8 * i_dst_stride );
269
ST_UB8( store, store, store, store, store, store, store, store, p_dst,
270
i_dst_stride );
271
}
272
273
static void intra_predict_plane_8x8_msa( uint8_t *p_src, int32_t i_stride )
274
{
275
uint8_t u_lpcnt;
276
int32_t i_res, i_res0, i_res1, i_res2, i_res3;
277
uint64_t u_out0, u_out1;
278
v16i8 shf_mask = { 3, 5, 2, 6, 1, 7, 0, 8, 3, 5, 2, 6, 1, 7, 0, 8 };
279
v8i16 short_multiplier = { 1, 2, 3, 4, 1, 2, 3, 4 };
280
v4i32 int_multiplier = { 0, 1, 2, 3 };
281
v16u8 p_src_top;
282
v8i16 vec9, vec10, vec11;
283
v4i32 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8;
284
v2i64 sum;
285
286
p_src_top = LD_UB( p_src - ( i_stride + 1 ) );
287
p_src_top = ( v16u8 ) __msa_vshf_b( shf_mask, ( v16i8 ) p_src_top,
288
( v16i8 ) p_src_top );
289
290
vec9 = __msa_hsub_u_h( p_src_top, p_src_top );
291
vec9 *= short_multiplier;
292
vec8 = __msa_hadd_s_w( vec9, vec9 );
293
sum = __msa_hadd_s_d( vec8, vec8 );
294
295
i_res0 = __msa_copy_s_w( ( v4i32 ) sum, 0 );
296
297
i_res1 = ( p_src[4 * i_stride - 1] - p_src[2 * i_stride - 1] ) +
298
2 * ( p_src[5 * i_stride - 1] - p_src[i_stride - 1] ) +
299
3 * ( p_src[6 * i_stride - 1] - p_src[-1] ) +
300
4 * ( p_src[7 * i_stride - 1] - p_src[-i_stride - 1] );
301
302
i_res0 *= 17;
303
i_res1 *= 17;
304
i_res0 = ( i_res0 + 16 ) >> 5;
305
i_res1 = ( i_res1 + 16 ) >> 5;
306
307
i_res3 = 3 * ( i_res0 + i_res1 );
308
i_res2 = 16 * ( p_src[7 * i_stride - 1] + p_src[-i_stride + 7] + 1 );
309
i_res = i_res2 - i_res3;
310
311
vec8 = __msa_fill_w( i_res0 );
312
vec4 = __msa_fill_w( i_res );
313
vec2 = __msa_fill_w( i_res1 );
314
vec5 = vec8 * int_multiplier;
315
vec3 = vec8 * 4;
316
317
for ( u_lpcnt = 4; u_lpcnt--; )
318
{
319
vec0 = vec5;
320
vec0 += vec4;
321
vec1 = vec0 + vec3;
322
vec6 = vec5;
323
vec4 += vec2;
324
vec6 += vec4;
325
vec7 = vec6 + vec3;
326
327
SRA_4V( vec0, vec1, vec6, vec7, 5 );
328
PCKEV_H2_SH( vec1, vec0, vec7, vec6, vec10, vec11 );
329
CLIP_SH2_0_255( vec10, vec11 );
330
PCKEV_B2_SH( vec10, vec10, vec11, vec11, vec10, vec11 );
331
332
u_out0 = __msa_copy_s_d( ( v2i64 ) vec10, 0 );
333
u_out1 = __msa_copy_s_d( ( v2i64 ) vec11, 0 );
334
SD( u_out0, p_src );
335
p_src += i_stride;
336
SD( u_out1, p_src );
337
p_src += i_stride;
338
339
vec4 += vec2;
340
}
341
}
342
343
static void intra_predict_plane_16x16_msa( uint8_t *p_src, int32_t i_stride )
344
{
345
uint8_t u_lpcnt;
346
int32_t i_res0, i_res1, i_res2, i_res3;
347
uint64_t u_load0, u_load1;
348
v16i8 shf_mask = { 7, 8, 6, 9, 5, 10, 4, 11, 3, 12, 2, 13, 1, 14, 0, 15 };
349
v8i16 short_multiplier = { 1, 2, 3, 4, 5, 6, 7, 8 };
350
v4i32 int_multiplier = { 0, 1, 2, 3 };
351
v16u8 p_src_top = { 0 };
352
v8i16 vec9, vec10;
353
v4i32 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, res_add;
354
355
u_load0 = LD( p_src - ( i_stride + 1 ) );
356
u_load1 = LD( p_src - ( i_stride + 1 ) + 9 );
357
358
INSERT_D2_UB( u_load0, u_load1, p_src_top );
359
360
p_src_top = ( v16u8 ) __msa_vshf_b( shf_mask, ( v16i8 ) p_src_top,
361
( v16i8 ) p_src_top );
362
363
vec9 = __msa_hsub_u_h( p_src_top, p_src_top );
364
vec9 *= short_multiplier;
365
vec8 = __msa_hadd_s_w( vec9, vec9 );
366
res_add = ( v4i32 ) __msa_hadd_s_d( vec8, vec8 );
367
368
i_res0 = __msa_copy_s_w( res_add, 0 ) + __msa_copy_s_w( res_add, 2 );
369
370
i_res1 = ( p_src[8 * i_stride - 1] - p_src[6 * i_stride - 1] ) +
371
2 * ( p_src[9 * i_stride - 1] - p_src[5 * i_stride - 1] ) +
372
3 * ( p_src[10 * i_stride - 1] - p_src[4 * i_stride - 1] ) +
373
4 * ( p_src[11 * i_stride - 1] - p_src[3 * i_stride - 1] ) +
374
5 * ( p_src[12 * i_stride - 1] - p_src[2 * i_stride - 1] ) +
375
6 * ( p_src[13 * i_stride - 1] - p_src[i_stride - 1] ) +
376
7 * ( p_src[14 * i_stride - 1] - p_src[-1] ) +
377
8 * ( p_src[15 * i_stride - 1] - p_src[-1 * i_stride - 1] );
378
379
i_res0 *= 5;
380
i_res1 *= 5;
381
i_res0 = ( i_res0 + 32 ) >> 6;
382
i_res1 = ( i_res1 + 32 ) >> 6;
383
384
i_res3 = 7 * ( i_res0 + i_res1 );
385
i_res2 = 16 * ( p_src[15 * i_stride - 1] + p_src[-i_stride + 15] + 1 );
386
i_res2 -= i_res3;
387
388
vec8 = __msa_fill_w( i_res0 );
389
vec4 = __msa_fill_w( i_res2 );
390
vec5 = __msa_fill_w( i_res1 );
391
vec6 = vec8 * 4;
392
vec7 = vec8 * int_multiplier;
393
394
for ( u_lpcnt = 16; u_lpcnt--; )
395
{
396
vec0 = vec7;
397
vec0 += vec4;
398
vec1 = vec0 + vec6;
399
vec2 = vec1 + vec6;
400
vec3 = vec2 + vec6;
401
402
SRA_4V( vec0, vec1, vec2, vec3, 5 );
403
PCKEV_H2_SH( vec1, vec0, vec3, vec2, vec9, vec10 );
404
CLIP_SH2_0_255( vec9, vec10 );
405
PCKEV_ST_SB( vec9, vec10, p_src );
406
p_src += i_stride;
407
408
vec4 += vec5;
409
}
410
}
411
412
static void intra_predict_dc_4blk_8x8_msa( uint8_t *p_src, int32_t i_stride )
413
{
414
uint8_t u_lp_cnt;
415
uint32_t u_src0, u_src1, u_src3, u_src2 = 0;
416
uint32_t u_out0, u_out1, u_out2, u_out3;
417
v16u8 p_src_top;
418
v8u16 add;
419
v4u32 sum;
420
421
p_src_top = LD_UB( p_src - i_stride );
422
add = __msa_hadd_u_h( ( v16u8 ) p_src_top, ( v16u8 ) p_src_top );
423
sum = __msa_hadd_u_w( add, add );
424
u_src0 = __msa_copy_u_w( ( v4i32 ) sum, 0 );
425
u_src1 = __msa_copy_u_w( ( v4i32 ) sum, 1 );
426
427
for ( u_lp_cnt = 0; u_lp_cnt < 4; u_lp_cnt++ )
428
{
429
u_src0 += p_src[u_lp_cnt * i_stride - 1];
430
u_src2 += p_src[( 4 + u_lp_cnt ) * i_stride - 1];
431
}
432
433
u_src0 = ( u_src0 + 4 ) >> 3;
434
u_src3 = ( u_src1 + u_src2 + 4 ) >> 3;
435
u_src1 = ( u_src1 + 2 ) >> 2;
436
u_src2 = ( u_src2 + 2 ) >> 2;
437
438
u_out0 = u_src0 * 0x01010101;
439
u_out1 = u_src1 * 0x01010101;
440
u_out2 = u_src2 * 0x01010101;
441
u_out3 = u_src3 * 0x01010101;
442
443
for ( u_lp_cnt = 4; u_lp_cnt--; )
444
{
445
SW( u_out0, p_src );
446
SW( u_out1, ( p_src + 4 ) );
447
SW( u_out2, ( p_src + 4 * i_stride ) );
448
SW( u_out3, ( p_src + 4 * i_stride + 4 ) );
449
p_src += i_stride;
450
}
451
}
452
453
static void intra_predict_ddl_8x8_msa( uint8_t *p_src, uint8_t *p_dst,
454
int32_t i_dst_stride )
455
{
456
uint8_t u_src_val = p_src[15];
457
uint64_t u_out0, u_out1, u_out2, u_out3;
458
v16u8 src, vec4, vec5, res0;
459
v8u16 vec0, vec1, vec2, vec3;
460
v2i64 res1, res2, res3;
461
462
src = LD_UB( p_src );
463
464
vec4 = ( v16u8 ) __msa_sldi_b( ( v16i8 ) src, ( v16i8 ) src, 1 );
465
vec5 = ( v16u8 ) __msa_sldi_b( ( v16i8 ) src, ( v16i8 ) src, 2 );
466
vec5 = ( v16u8 ) __msa_insert_b( ( v16i8 ) vec5, 14, u_src_val );
467
ILVR_B2_UH( vec5, src, vec4, vec4, vec0, vec1 );
468
ILVL_B2_UH( vec5, src, vec4, vec4, vec2, vec3 );
469
HADD_UB4_UH( vec0, vec1, vec2, vec3, vec0, vec1, vec2, vec3 );
470
471
vec0 += vec1;
472
vec2 += vec3;
473
vec0 = ( v8u16 ) __msa_srari_h( ( v8i16 ) vec0, 2 );
474
vec2 = ( v8u16 ) __msa_srari_h( ( v8i16 ) vec2, 2 );
475
476
res0 = ( v16u8 ) __msa_pckev_b( ( v16i8 ) vec2, ( v16i8 ) vec0 );
477
res1 = ( v2i64 ) __msa_sldi_b( ( v16i8 ) res0, ( v16i8 ) res0, 1 );
478
res2 = ( v2i64 ) __msa_sldi_b( ( v16i8 ) res0, ( v16i8 ) res0, 2 );
479
res3 = ( v2i64 ) __msa_sldi_b( ( v16i8 ) res0, ( v16i8 ) res0, 3 );
480
481
u_out0 = __msa_copy_u_d( ( v2i64 ) res0, 0 );
482
u_out1 = __msa_copy_u_d( res1, 0 );
483
u_out2 = __msa_copy_u_d( res2, 0 );
484
u_out3 = __msa_copy_u_d( res3, 0 );
485
SD4( u_out0, u_out1, u_out2, u_out3, p_dst, i_dst_stride );
486
p_dst += ( 4 * i_dst_stride );
487
488
res0 = ( v16u8 ) __msa_sldi_b( ( v16i8 ) res0, ( v16i8 ) res0, 4 );
489
res1 = ( v2i64 ) __msa_sldi_b( ( v16i8 ) res0, ( v16i8 ) res0, 1 );
490
res2 = ( v2i64 ) __msa_sldi_b( ( v16i8 ) res0, ( v16i8 ) res0, 2 );
491
res3 = ( v2i64 ) __msa_sldi_b( ( v16i8 ) res0, ( v16i8 ) res0, 3 );
492
493
u_out0 = __msa_copy_u_d( ( v2i64 ) res0, 0 );
494
u_out1 = __msa_copy_u_d( res1, 0 );
495
u_out2 = __msa_copy_u_d( res2, 0 );
496
u_out3 = __msa_copy_u_d( res3, 0 );
497
SD4( u_out0, u_out1, u_out2, u_out3, p_dst, i_dst_stride );
498
}
499
500
static void intra_predict_128dc_16x16_msa( uint8_t *p_dst,
501
int32_t i_dst_stride )
502
{
503
v16u8 out = ( v16u8 ) __msa_ldi_b( 128 );
504
505
ST_UB8( out, out, out, out, out, out, out, out, p_dst, i_dst_stride );
506
p_dst += ( 8 * i_dst_stride );
507
ST_UB8( out, out, out, out, out, out, out, out, p_dst, i_dst_stride );
508
}
509
510
void x264_intra_predict_dc_16x16_msa( uint8_t *p_src )
511
{
512
intra_predict_dc_16x16_msa( ( p_src - FDEC_STRIDE ), ( p_src - 1 ),
513
FDEC_STRIDE, p_src, FDEC_STRIDE, 1, 1 );
514
}
515
516
void x264_intra_predict_dc_left_16x16_msa( uint8_t *p_src )
517
{
518
intra_predict_dc_16x16_msa( ( p_src - FDEC_STRIDE ), ( p_src - 1 ),
519
FDEC_STRIDE, p_src, FDEC_STRIDE, 0, 1 );
520
}
521
522
void x264_intra_predict_dc_top_16x16_msa( uint8_t *p_src )
523
{
524
intra_predict_dc_16x16_msa( ( p_src - FDEC_STRIDE ), ( p_src - 1 ),
525
FDEC_STRIDE, p_src, FDEC_STRIDE, 1, 0 );
526
}
527
528
void x264_intra_predict_dc_128_16x16_msa( uint8_t *p_src )
529
{
530
intra_predict_128dc_16x16_msa( p_src, FDEC_STRIDE );
531
}
532
533
void x264_intra_predict_hor_16x16_msa( uint8_t *p_src )
534
{
535
intra_predict_horiz_16x16_msa( ( p_src - 1 ), FDEC_STRIDE,
536
p_src, FDEC_STRIDE );
537
}
538
539
void x264_intra_predict_vert_16x16_msa( uint8_t *p_src )
540
{
541
intra_predict_vert_16x16_msa( ( p_src - FDEC_STRIDE ), p_src, FDEC_STRIDE );
542
}
543
544
void x264_intra_predict_plane_16x16_msa( uint8_t *p_src )
545
{
546
intra_predict_plane_16x16_msa( p_src, FDEC_STRIDE );
547
}
548
549
void x264_intra_predict_dc_4blk_8x8_msa( uint8_t *p_src )
550
{
551
intra_predict_dc_4blk_8x8_msa( p_src, FDEC_STRIDE );
552
}
553
554
void x264_intra_predict_hor_8x8_msa( uint8_t *p_src )
555
{
556
intra_predict_horiz_8x8_msa( ( p_src - 1 ), FDEC_STRIDE,
557
p_src, FDEC_STRIDE );
558
}
559
560
void x264_intra_predict_vert_8x8_msa( uint8_t *p_src )
561
{
562
intra_predict_vert_8x8_msa( ( p_src - FDEC_STRIDE ), p_src, FDEC_STRIDE );
563
}
564
565
void x264_intra_predict_plane_8x8_msa( uint8_t *p_src )
566
{
567
intra_predict_plane_8x8_msa( p_src, FDEC_STRIDE );
568
}
569
570
void x264_intra_predict_ddl_8x8_msa( uint8_t *p_src, uint8_t pu_xyz[36] )
571
{
572
intra_predict_ddl_8x8_msa( ( pu_xyz + 16 ), p_src, FDEC_STRIDE );
573
}
574
575
void x264_intra_predict_dc_8x8_msa( uint8_t *p_src, uint8_t pu_xyz[36] )
576
{
577
intra_predict_dc_8x8_msa( ( pu_xyz + 16 ), ( pu_xyz + 7 ),
578
p_src, FDEC_STRIDE );
579
}
580
581
void x264_intra_predict_h_8x8_msa( uint8_t *p_src, uint8_t pu_xyz[36] )
582
{
583
intra_predict_horiz_8x8_msa( ( pu_xyz + 14 ), -1, p_src, FDEC_STRIDE );
584
}
585
586
void x264_intra_predict_v_8x8_msa( uint8_t *p_src, uint8_t pu_xyz[36] )
587
{
588
intra_predict_vert_8x8_msa( ( pu_xyz + 16 ), p_src, FDEC_STRIDE );
589
}
590
591
void x264_intra_predict_dc_4x4_msa( uint8_t *p_src )
592
{
593
intra_predict_dc_4x4_msa( ( p_src - FDEC_STRIDE ), ( p_src - 1 ),
594
FDEC_STRIDE, p_src, FDEC_STRIDE, 1, 1 );
595
}
596
597
void x264_intra_predict_hor_4x4_msa( uint8_t *p_src )
598
{
599
intra_predict_horiz_4x4_msa( ( p_src - 1 ), FDEC_STRIDE,
600
p_src, FDEC_STRIDE );
601
}
602
603
void x264_intra_predict_vert_4x4_msa( uint8_t *p_src )
604
{
605
intra_predict_vert_4x4_msa( ( p_src - FDEC_STRIDE ), p_src, FDEC_STRIDE );
606
}
607
#endif
608
609