Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
52868 views
1
;******************************************************************************
2
;* Copyright (c) 2012 Michael Niedermayer
3
;*
4
;* This file is part of FFmpeg.
5
;*
6
;* FFmpeg is free software; you can redistribute it and/or
7
;* modify it under the terms of the GNU Lesser General Public
8
;* License as published by the Free Software Foundation; either
9
;* version 2.1 of the License, or (at your option) any later version.
10
;*
11
;* FFmpeg is distributed in the hope that it will be useful,
12
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
;* Lesser General Public License for more details.
15
;*
16
;* You should have received a copy of the GNU Lesser General Public
17
;* License along with FFmpeg; if not, write to the Free Software
18
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
;******************************************************************************
20
21
%include "libavutil/x86/x86util.asm"
22
23
SECTION_RODATA 32
24
flt2pm31: times 8 dd 4.6566129e-10
25
flt2p31 : times 8 dd 2147483648.0
26
flt2p15 : times 8 dd 32768.0
27
28
word_unpack_shuf : db 0, 1, 4, 5, 8, 9,12,13, 2, 3, 6, 7,10,11,14,15
29
30
SECTION .text
31
32
33
;to, from, a/u, log2_outsize, log_intsize, const
34
%macro PACK_2CH 5-7
35
cglobal pack_2ch_%2_to_%1_%3, 3, 4, 6, dst, src, len, src2
36
mov src2q , [srcq+gprsize]
37
mov srcq , [srcq]
38
mov dstq , [dstq]
39
%ifidn %3, a
40
test dstq, mmsize-1
41
jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
42
test srcq, mmsize-1
43
jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
44
test src2q, mmsize-1
45
jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
46
%else
47
pack_2ch_%2_to_%1_u_int %+ SUFFIX:
48
%endif
49
lea srcq , [srcq + (1<<%5)*lenq]
50
lea src2q, [src2q + (1<<%5)*lenq]
51
lea dstq , [dstq + (2<<%4)*lenq]
52
neg lenq
53
%7 m0,m1,m2,m3,m4,m5
54
.next:
55
%if %4 >= %5
56
mov%3 m0, [ srcq +(1<<%5)*lenq]
57
mova m1, m0
58
mov%3 m2, [ src2q+(1<<%5)*lenq]
59
%if %5 == 1
60
punpcklwd m0, m2
61
punpckhwd m1, m2
62
%else
63
punpckldq m0, m2
64
punpckhdq m1, m2
65
%endif
66
%6 m0,m1,m2,m3,m4,m5
67
%else
68
mov%3 m0, [ srcq +(1<<%5)*lenq]
69
mov%3 m1, [mmsize + srcq +(1<<%5)*lenq]
70
mov%3 m2, [ src2q+(1<<%5)*lenq]
71
mov%3 m3, [mmsize + src2q+(1<<%5)*lenq]
72
%6 m0,m1,m2,m3,m4,m5
73
mova m2, m0
74
punpcklwd m0, m1
75
punpckhwd m2, m1
76
SWAP 1,2
77
%endif
78
mov%3 [ dstq+(2<<%4)*lenq], m0
79
mov%3 [ mmsize + dstq+(2<<%4)*lenq], m1
80
%if %4 > %5
81
mov%3 [2*mmsize + dstq+(2<<%4)*lenq], m2
82
mov%3 [3*mmsize + dstq+(2<<%4)*lenq], m3
83
add lenq, 4*mmsize/(2<<%4)
84
%else
85
add lenq, 2*mmsize/(2<<%4)
86
%endif
87
jl .next
88
REP_RET
89
%endmacro
90
91
%macro UNPACK_2CH 5-7
92
cglobal unpack_2ch_%2_to_%1_%3, 3, 4, 7, dst, src, len, dst2
93
mov dst2q , [dstq+gprsize]
94
mov srcq , [srcq]
95
mov dstq , [dstq]
96
%ifidn %3, a
97
test dstq, mmsize-1
98
jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
99
test srcq, mmsize-1
100
jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
101
test dst2q, mmsize-1
102
jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
103
%else
104
unpack_2ch_%2_to_%1_u_int %+ SUFFIX:
105
%endif
106
lea srcq , [srcq + (2<<%5)*lenq]
107
lea dstq , [dstq + (1<<%4)*lenq]
108
lea dst2q, [dst2q + (1<<%4)*lenq]
109
neg lenq
110
%7 m0,m1,m2,m3,m4,m5
111
mova m6, [word_unpack_shuf]
112
.next:
113
mov%3 m0, [ srcq +(2<<%5)*lenq]
114
mov%3 m2, [ mmsize + srcq +(2<<%5)*lenq]
115
%if %5 == 1
116
%ifidn SUFFIX, _ssse3
117
pshufb m0, m6
118
mova m1, m0
119
pshufb m2, m6
120
punpcklqdq m0,m2
121
punpckhqdq m1,m2
122
%else
123
mova m1, m0
124
punpcklwd m0,m2
125
punpckhwd m1,m2
126
127
mova m2, m0
128
punpcklwd m0,m1
129
punpckhwd m2,m1
130
131
mova m1, m0
132
punpcklwd m0,m2
133
punpckhwd m1,m2
134
%endif
135
%else
136
mova m1, m0
137
shufps m0, m2, 10001000b
138
shufps m1, m2, 11011101b
139
%endif
140
%if %4 < %5
141
mov%3 m2, [2*mmsize + srcq +(2<<%5)*lenq]
142
mova m3, m2
143
mov%3 m4, [3*mmsize + srcq +(2<<%5)*lenq]
144
shufps m2, m4, 10001000b
145
shufps m3, m4, 11011101b
146
SWAP 1,2
147
%endif
148
%6 m0,m1,m2,m3,m4,m5
149
mov%3 [ dstq+(1<<%4)*lenq], m0
150
%if %4 > %5
151
mov%3 [ dst2q+(1<<%4)*lenq], m2
152
mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1
153
mov%3 [ mmsize + dst2q+(1<<%4)*lenq], m3
154
add lenq, 2*mmsize/(1<<%4)
155
%else
156
mov%3 [ dst2q+(1<<%4)*lenq], m1
157
add lenq, mmsize/(1<<%4)
158
%endif
159
jl .next
160
REP_RET
161
%endmacro
162
163
%macro CONV 5-7
164
cglobal %2_to_%1_%3, 3, 3, 6, dst, src, len
165
mov srcq , [srcq]
166
mov dstq , [dstq]
167
%ifidn %3, a
168
test dstq, mmsize-1
169
jne %2_to_%1_u_int %+ SUFFIX
170
test srcq, mmsize-1
171
jne %2_to_%1_u_int %+ SUFFIX
172
%else
173
%2_to_%1_u_int %+ SUFFIX:
174
%endif
175
lea srcq , [srcq + (1<<%5)*lenq]
176
lea dstq , [dstq + (1<<%4)*lenq]
177
neg lenq
178
%7 m0,m1,m2,m3,m4,m5
179
.next:
180
mov%3 m0, [ srcq +(1<<%5)*lenq]
181
mov%3 m1, [ mmsize + srcq +(1<<%5)*lenq]
182
%if %4 < %5
183
mov%3 m2, [2*mmsize + srcq +(1<<%5)*lenq]
184
mov%3 m3, [3*mmsize + srcq +(1<<%5)*lenq]
185
%endif
186
%6 m0,m1,m2,m3,m4,m5
187
mov%3 [ dstq+(1<<%4)*lenq], m0
188
mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1
189
%if %4 > %5
190
mov%3 [2*mmsize + dstq+(1<<%4)*lenq], m2
191
mov%3 [3*mmsize + dstq+(1<<%4)*lenq], m3
192
add lenq, 4*mmsize/(1<<%4)
193
%else
194
add lenq, 2*mmsize/(1<<%4)
195
%endif
196
jl .next
197
%if mmsize == 8
198
emms
199
RET
200
%else
201
REP_RET
202
%endif
203
%endmacro
204
205
%macro PACK_6CH 8
206
cglobal pack_6ch_%2_to_%1_%3, 2, 8, %6, dst, src, src1, src2, src3, src4, src5, len
207
%if ARCH_X86_64
208
mov lend, r2d
209
%else
210
%define lend dword r2m
211
%endif
212
mov src1q, [srcq+1*gprsize]
213
mov src2q, [srcq+2*gprsize]
214
mov src3q, [srcq+3*gprsize]
215
mov src4q, [srcq+4*gprsize]
216
mov src5q, [srcq+5*gprsize]
217
mov srcq, [srcq]
218
mov dstq, [dstq]
219
%ifidn %3, a
220
test dstq, mmsize-1
221
jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
222
test srcq, mmsize-1
223
jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
224
test src1q, mmsize-1
225
jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
226
test src2q, mmsize-1
227
jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
228
test src3q, mmsize-1
229
jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
230
test src4q, mmsize-1
231
jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
232
test src5q, mmsize-1
233
jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
234
%else
235
pack_6ch_%2_to_%1_u_int %+ SUFFIX:
236
%endif
237
sub src1q, srcq
238
sub src2q, srcq
239
sub src3q, srcq
240
sub src4q, srcq
241
sub src5q, srcq
242
%8 x,x,x,x,m7,x
243
.loop:
244
mov%3 m0, [srcq ]
245
mov%3 m1, [srcq+src1q]
246
mov%3 m2, [srcq+src2q]
247
mov%3 m3, [srcq+src3q]
248
mov%3 m4, [srcq+src4q]
249
mov%3 m5, [srcq+src5q]
250
%if cpuflag(sse)
251
SBUTTERFLYPS 0, 1, 6
252
SBUTTERFLYPS 2, 3, 6
253
SBUTTERFLYPS 4, 5, 6
254
255
%if cpuflag(avx)
256
blendps m6, m4, m0, 1100b
257
%else
258
movaps m6, m4
259
shufps m4, m0, q3210
260
SWAP 4,6
261
%endif
262
movlhps m0, m2
263
movhlps m4, m2
264
%if cpuflag(avx)
265
blendps m2, m5, m1, 1100b
266
%else
267
movaps m2, m5
268
shufps m5, m1, q3210
269
SWAP 2,5
270
%endif
271
movlhps m1, m3
272
movhlps m5, m3
273
274
%7 m0,m6,x,x,m7,m3
275
%7 m4,m1,x,x,m7,m3
276
%7 m2,m5,x,x,m7,m3
277
278
mov %+ %3 %+ ps [dstq ], m0
279
mov %+ %3 %+ ps [dstq+16], m6
280
mov %+ %3 %+ ps [dstq+32], m4
281
mov %+ %3 %+ ps [dstq+48], m1
282
mov %+ %3 %+ ps [dstq+64], m2
283
mov %+ %3 %+ ps [dstq+80], m5
284
%else ; mmx
285
SBUTTERFLY dq, 0, 1, 6
286
SBUTTERFLY dq, 2, 3, 6
287
SBUTTERFLY dq, 4, 5, 6
288
289
movq [dstq ], m0
290
movq [dstq+ 8], m2
291
movq [dstq+16], m4
292
movq [dstq+24], m1
293
movq [dstq+32], m3
294
movq [dstq+40], m5
295
%endif
296
add srcq, mmsize
297
add dstq, mmsize*6
298
sub lend, mmsize/4
299
jg .loop
300
%if mmsize == 8
301
emms
302
RET
303
%else
304
REP_RET
305
%endif
306
%endmacro
307
308
%macro UNPACK_6CH 8
309
cglobal unpack_6ch_%2_to_%1_%3, 2, 8, %6, dst, src, dst1, dst2, dst3, dst4, dst5, len
310
%if ARCH_X86_64
311
mov lend, r2d
312
%else
313
%define lend dword r2m
314
%endif
315
mov dst1q, [dstq+1*gprsize]
316
mov dst2q, [dstq+2*gprsize]
317
mov dst3q, [dstq+3*gprsize]
318
mov dst4q, [dstq+4*gprsize]
319
mov dst5q, [dstq+5*gprsize]
320
mov dstq, [dstq]
321
mov srcq, [srcq]
322
%ifidn %3, a
323
test dstq, mmsize-1
324
jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
325
test srcq, mmsize-1
326
jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
327
test dst1q, mmsize-1
328
jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
329
test dst2q, mmsize-1
330
jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
331
test dst3q, mmsize-1
332
jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
333
test dst4q, mmsize-1
334
jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
335
test dst5q, mmsize-1
336
jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
337
%else
338
unpack_6ch_%2_to_%1_u_int %+ SUFFIX:
339
%endif
340
sub dst1q, dstq
341
sub dst2q, dstq
342
sub dst3q, dstq
343
sub dst4q, dstq
344
sub dst5q, dstq
345
%8 x,x,x,x,m7,x
346
.loop:
347
mov%3 m0, [srcq ]
348
mov%3 m1, [srcq+16]
349
mov%3 m2, [srcq+32]
350
mov%3 m3, [srcq+48]
351
mov%3 m4, [srcq+64]
352
mov%3 m5, [srcq+80]
353
354
SBUTTERFLYPS 0, 3, 6
355
SBUTTERFLYPS 1, 4, 6
356
SBUTTERFLYPS 2, 5, 6
357
SBUTTERFLYPS 0, 4, 6
358
SBUTTERFLYPS 3, 2, 6
359
SBUTTERFLYPS 1, 5, 6
360
SWAP 1, 4
361
SWAP 2, 3
362
363
%7 m0,m1,x,x,m7,m6
364
%7 m2,m3,x,x,m7,m6
365
%7 m4,m5,x,x,m7,m6
366
367
mov %+ %3 %+ ps [dstq ], m0
368
mov %+ %3 %+ ps [dstq+dst1q], m1
369
mov %+ %3 %+ ps [dstq+dst2q], m2
370
mov %+ %3 %+ ps [dstq+dst3q], m3
371
mov %+ %3 %+ ps [dstq+dst4q], m4
372
mov %+ %3 %+ ps [dstq+dst5q], m5
373
374
add srcq, mmsize*6
375
add dstq, mmsize
376
sub lend, mmsize/4
377
jg .loop
378
REP_RET
379
%endmacro
380
381
%define PACK_8CH_GPRS (10 * ARCH_X86_64) + ((6 + HAVE_ALIGNED_STACK) * ARCH_X86_32)
382
383
%macro PACK_8CH 8
384
cglobal pack_8ch_%2_to_%1_%3, 2, PACK_8CH_GPRS, %6, ARCH_X86_32*48, dst, src, len, src1, src2, src3, src4, src5, src6, src7
385
mov dstq, [dstq]
386
%if ARCH_X86_32
387
DEFINE_ARGS dst, src, src2, src3, src4, src5, src6
388
%define lend dword r2m
389
%define src1q r0q
390
%define src1m dword [rsp+32]
391
%if HAVE_ALIGNED_STACK == 0
392
DEFINE_ARGS dst, src, src2, src3, src5, src6
393
%define src4q r0q
394
%define src4m dword [rsp+36]
395
%endif
396
%define src7q r0q
397
%define src7m dword [rsp+40]
398
mov dstm, dstq
399
%endif
400
mov src7q, [srcq+7*gprsize]
401
mov src6q, [srcq+6*gprsize]
402
%if ARCH_X86_32
403
mov src7m, src7q
404
%endif
405
mov src5q, [srcq+5*gprsize]
406
mov src4q, [srcq+4*gprsize]
407
mov src3q, [srcq+3*gprsize]
408
%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0
409
mov src4m, src4q
410
%endif
411
mov src2q, [srcq+2*gprsize]
412
mov src1q, [srcq+1*gprsize]
413
mov srcq, [srcq]
414
%ifidn %3, a
415
%if ARCH_X86_32
416
test dstmp, mmsize-1
417
%else
418
test dstq, mmsize-1
419
%endif
420
jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
421
test srcq, mmsize-1
422
jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
423
test src1q, mmsize-1
424
jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
425
test src2q, mmsize-1
426
jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
427
test src3q, mmsize-1
428
jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
429
%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0
430
test src4m, mmsize-1
431
%else
432
test src4q, mmsize-1
433
%endif
434
jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
435
test src5q, mmsize-1
436
jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
437
test src6q, mmsize-1
438
jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
439
%if ARCH_X86_32
440
test src7m, mmsize-1
441
%else
442
test src7q, mmsize-1
443
%endif
444
jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
445
%else
446
pack_8ch_%2_to_%1_u_int %+ SUFFIX:
447
%endif
448
sub src1q, srcq
449
sub src2q, srcq
450
sub src3q, srcq
451
%if ARCH_X86_64 || HAVE_ALIGNED_STACK
452
sub src4q, srcq
453
%else
454
sub src4m, srcq
455
%endif
456
sub src5q, srcq
457
sub src6q, srcq
458
%if ARCH_X86_64
459
sub src7q, srcq
460
%else
461
mov src1m, src1q
462
sub src7m, srcq
463
%endif
464
465
%if ARCH_X86_64
466
%8 x,x,x,x,m9,x
467
%elifidn %1, int32
468
%define m9 [flt2p31]
469
%else
470
%define m9 [flt2pm31]
471
%endif
472
473
.loop:
474
mov%3 m0, [srcq ]
475
mov%3 m1, [srcq+src1q]
476
mov%3 m2, [srcq+src2q]
477
%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0
478
mov src4q, src4m
479
%endif
480
mov%3 m3, [srcq+src3q]
481
mov%3 m4, [srcq+src4q]
482
mov%3 m5, [srcq+src5q]
483
%if ARCH_X86_32
484
mov src7q, src7m
485
%endif
486
mov%3 m6, [srcq+src6q]
487
mov%3 m7, [srcq+src7q]
488
489
%if ARCH_X86_64
490
TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, 8
491
492
%7 m0,m1,x,x,m9,m8
493
%7 m2,m3,x,x,m9,m8
494
%7 m4,m5,x,x,m9,m8
495
%7 m6,m7,x,x,m9,m8
496
497
mov%3 [dstq], m0
498
%else
499
mov dstq, dstm
500
501
TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, [rsp], [rsp+16], 1
502
503
%7 m0,m1,x,x,m9,m2
504
mova m2, [rsp]
505
mov%3 [dstq], m0
506
%7 m2,m3,x,x,m9,m0
507
%7 m4,m5,x,x,m9,m0
508
%7 m6,m7,x,x,m9,m0
509
510
%endif
511
512
mov%3 [dstq+16], m1
513
mov%3 [dstq+32], m2
514
mov%3 [dstq+48], m3
515
mov%3 [dstq+64], m4
516
mov%3 [dstq+80], m5
517
mov%3 [dstq+96], m6
518
mov%3 [dstq+112], m7
519
520
add srcq, mmsize
521
add dstq, mmsize*8
522
%if ARCH_X86_32
523
mov dstm, dstq
524
mov src1q, src1m
525
%endif
526
sub lend, mmsize/4
527
jg .loop
528
REP_RET
529
%endmacro
530
531
%macro INT16_TO_INT32_N 6
532
pxor m2, m2
533
pxor m3, m3
534
punpcklwd m2, m1
535
punpckhwd m3, m1
536
SWAP 4,0
537
pxor m0, m0
538
pxor m1, m1
539
punpcklwd m0, m4
540
punpckhwd m1, m4
541
%endmacro
542
543
%macro INT32_TO_INT16_N 6
544
psrad m0, 16
545
psrad m1, 16
546
psrad m2, 16
547
psrad m3, 16
548
packssdw m0, m1
549
packssdw m2, m3
550
SWAP 1,2
551
%endmacro
552
553
%macro INT32_TO_FLOAT_INIT 6
554
mova %5, [flt2pm31]
555
%endmacro
556
%macro INT32_TO_FLOAT_N 6
557
cvtdq2ps %1, %1
558
cvtdq2ps %2, %2
559
mulps %1, %1, %5
560
mulps %2, %2, %5
561
%endmacro
562
563
%macro FLOAT_TO_INT32_INIT 6
564
mova %5, [flt2p31]
565
%endmacro
566
%macro FLOAT_TO_INT32_N 6
567
mulps %1, %5
568
mulps %2, %5
569
cvtps2dq %6, %1
570
cmpps %1, %1, %5, 5
571
paddd %1, %6
572
cvtps2dq %6, %2
573
cmpps %2, %2, %5, 5
574
paddd %2, %6
575
%endmacro
576
577
%macro INT16_TO_FLOAT_INIT 6
578
mova m5, [flt2pm31]
579
%endmacro
580
%macro INT16_TO_FLOAT_N 6
581
INT16_TO_INT32_N %1,%2,%3,%4,%5,%6
582
cvtdq2ps m0, m0
583
cvtdq2ps m1, m1
584
cvtdq2ps m2, m2
585
cvtdq2ps m3, m3
586
mulps m0, m0, m5
587
mulps m1, m1, m5
588
mulps m2, m2, m5
589
mulps m3, m3, m5
590
%endmacro
591
592
%macro FLOAT_TO_INT16_INIT 6
593
mova m5, [flt2p15]
594
%endmacro
595
%macro FLOAT_TO_INT16_N 6
596
mulps m0, m5
597
mulps m1, m5
598
mulps m2, m5
599
mulps m3, m5
600
cvtps2dq m0, m0
601
cvtps2dq m1, m1
602
packssdw m0, m1
603
cvtps2dq m1, m2
604
cvtps2dq m3, m3
605
packssdw m1, m3
606
%endmacro
607
608
%macro NOP_N 0-6
609
%endmacro
610
611
INIT_MMX mmx
612
CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
613
CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
614
CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
615
CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
616
617
PACK_6CH float, float, u, 2, 2, 0, NOP_N, NOP_N
618
PACK_6CH float, float, a, 2, 2, 0, NOP_N, NOP_N
619
620
INIT_XMM sse
621
PACK_6CH float, float, u, 2, 2, 7, NOP_N, NOP_N
622
PACK_6CH float, float, a, 2, 2, 7, NOP_N, NOP_N
623
624
UNPACK_6CH float, float, u, 2, 2, 7, NOP_N, NOP_N
625
UNPACK_6CH float, float, a, 2, 2, 7, NOP_N, NOP_N
626
627
INIT_XMM sse2
628
CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
629
CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
630
CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
631
CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
632
633
PACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
634
PACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
635
PACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N
636
PACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N
637
PACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
638
PACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
639
PACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
640
PACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
641
642
UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
643
UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
644
UNPACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N
645
UNPACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N
646
UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
647
UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
648
UNPACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
649
UNPACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
650
651
CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
652
CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
653
CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
654
CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
655
CONV float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
656
CONV float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
657
CONV int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
658
CONV int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
659
660
PACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
661
PACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
662
PACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
663
PACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
664
PACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
665
PACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
666
PACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
667
PACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
668
669
UNPACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
670
UNPACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
671
UNPACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
672
UNPACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
673
UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
674
UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
675
UNPACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
676
UNPACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
677
678
PACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
679
PACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
680
PACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
681
PACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
682
683
UNPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
684
UNPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
685
UNPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
686
UNPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
687
688
PACK_8CH float, float, u, 2, 2, 9, NOP_N, NOP_N
689
PACK_8CH float, float, a, 2, 2, 9, NOP_N, NOP_N
690
691
PACK_8CH float, int32, u, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
692
PACK_8CH float, int32, a, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
693
PACK_8CH int32, float, u, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
694
PACK_8CH int32, float, a, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
695
696
INIT_XMM ssse3
697
UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
698
UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
699
UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
700
UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
701
UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
702
UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
703
704
%if HAVE_AVX_EXTERNAL
705
INIT_XMM avx
706
PACK_6CH float, float, u, 2, 2, 8, NOP_N, NOP_N
707
PACK_6CH float, float, a, 2, 2, 8, NOP_N, NOP_N
708
709
UNPACK_6CH float, float, u, 2, 2, 8, NOP_N, NOP_N
710
UNPACK_6CH float, float, a, 2, 2, 8, NOP_N, NOP_N
711
712
PACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
713
PACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
714
PACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
715
PACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
716
717
UNPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
718
UNPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
719
UNPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
720
UNPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
721
722
PACK_8CH float, float, u, 2, 2, 9, NOP_N, NOP_N
723
PACK_8CH float, float, a, 2, 2, 9, NOP_N, NOP_N
724
725
PACK_8CH float, int32, u, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
726
PACK_8CH float, int32, a, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
727
PACK_8CH int32, float, u, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
728
PACK_8CH int32, float, a, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
729
730
INIT_YMM avx
731
CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
732
CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
733
%endif
734
735
%if HAVE_AVX2_EXTERNAL
736
INIT_YMM avx2
737
CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
738
CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
739
%endif
740
741