Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
52868 views
1
;******************************************************************************
2
;* Copyright (c) 2012 Michael Niedermayer
3
;*
4
;* This file is part of FFmpeg.
5
;*
6
;* FFmpeg is free software; you can redistribute it and/or
7
;* modify it under the terms of the GNU Lesser General Public
8
;* License as published by the Free Software Foundation; either
9
;* version 2.1 of the License, or (at your option) any later version.
10
;*
11
;* FFmpeg is distributed in the hope that it will be useful,
12
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
;* Lesser General Public License for more details.
15
;*
16
;* You should have received a copy of the GNU Lesser General Public
17
;* License along with FFmpeg; if not, write to the Free Software
18
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
;******************************************************************************
20
21
%include "libavutil/x86/x86util.asm"
22
23
24
SECTION_RODATA 32
25
dw1: times 8 dd 1
26
w1 : times 16 dw 1
27
28
SECTION .text
29
30
%macro MIX2_FLT 1
31
cglobal mix_2_1_%1_float, 7, 7, 6, out, in1, in2, coeffp, index1, index2, len
32
%ifidn %1, a
33
test in1q, mmsize-1
34
jne mix_2_1_float_u_int %+ SUFFIX
35
test in2q, mmsize-1
36
jne mix_2_1_float_u_int %+ SUFFIX
37
test outq, mmsize-1
38
jne mix_2_1_float_u_int %+ SUFFIX
39
%else
40
mix_2_1_float_u_int %+ SUFFIX:
41
%endif
42
VBROADCASTSS m4, [coeffpq + 4*index1q]
43
VBROADCASTSS m5, [coeffpq + 4*index2q]
44
shl lend , 2
45
add in1q , lenq
46
add in2q , lenq
47
add outq , lenq
48
neg lenq
49
.next:
50
%ifidn %1, a
51
mulps m0, m4, [in1q + lenq ]
52
mulps m1, m5, [in2q + lenq ]
53
mulps m2, m4, [in1q + lenq + mmsize]
54
mulps m3, m5, [in2q + lenq + mmsize]
55
%else
56
movu m0, [in1q + lenq ]
57
movu m1, [in2q + lenq ]
58
movu m2, [in1q + lenq + mmsize]
59
movu m3, [in2q + lenq + mmsize]
60
mulps m0, m0, m4
61
mulps m1, m1, m5
62
mulps m2, m2, m4
63
mulps m3, m3, m5
64
%endif
65
addps m0, m0, m1
66
addps m2, m2, m3
67
mov%1 [outq + lenq ], m0
68
mov%1 [outq + lenq + mmsize], m2
69
add lenq, mmsize*2
70
jl .next
71
REP_RET
72
%endmacro
73
74
%macro MIX1_FLT 1
75
cglobal mix_1_1_%1_float, 5, 5, 3, out, in, coeffp, index, len
76
%ifidn %1, a
77
test inq, mmsize-1
78
jne mix_1_1_float_u_int %+ SUFFIX
79
test outq, mmsize-1
80
jne mix_1_1_float_u_int %+ SUFFIX
81
%else
82
mix_1_1_float_u_int %+ SUFFIX:
83
%endif
84
VBROADCASTSS m2, [coeffpq + 4*indexq]
85
shl lenq , 2
86
add inq , lenq
87
add outq , lenq
88
neg lenq
89
.next:
90
%ifidn %1, a
91
mulps m0, m2, [inq + lenq ]
92
mulps m1, m2, [inq + lenq + mmsize]
93
%else
94
movu m0, [inq + lenq ]
95
movu m1, [inq + lenq + mmsize]
96
mulps m0, m0, m2
97
mulps m1, m1, m2
98
%endif
99
mov%1 [outq + lenq ], m0
100
mov%1 [outq + lenq + mmsize], m1
101
add lenq, mmsize*2
102
jl .next
103
REP_RET
104
%endmacro
105
106
%macro MIX1_INT16 1
107
cglobal mix_1_1_%1_int16, 5, 5, 6, out, in, coeffp, index, len
108
%ifidn %1, a
109
test inq, mmsize-1
110
jne mix_1_1_int16_u_int %+ SUFFIX
111
test outq, mmsize-1
112
jne mix_1_1_int16_u_int %+ SUFFIX
113
%else
114
mix_1_1_int16_u_int %+ SUFFIX:
115
%endif
116
movd m4, [coeffpq + 4*indexq]
117
SPLATW m5, m4
118
psllq m4, 32
119
psrlq m4, 48
120
mova m0, [w1]
121
psllw m0, m4
122
psrlw m0, 1
123
punpcklwd m5, m0
124
add lenq , lenq
125
add inq , lenq
126
add outq , lenq
127
neg lenq
128
.next:
129
mov%1 m0, [inq + lenq ]
130
mov%1 m2, [inq + lenq + mmsize]
131
mova m1, m0
132
mova m3, m2
133
punpcklwd m0, [w1]
134
punpckhwd m1, [w1]
135
punpcklwd m2, [w1]
136
punpckhwd m3, [w1]
137
pmaddwd m0, m5
138
pmaddwd m1, m5
139
pmaddwd m2, m5
140
pmaddwd m3, m5
141
psrad m0, m4
142
psrad m1, m4
143
psrad m2, m4
144
psrad m3, m4
145
packssdw m0, m1
146
packssdw m2, m3
147
mov%1 [outq + lenq ], m0
148
mov%1 [outq + lenq + mmsize], m2
149
add lenq, mmsize*2
150
jl .next
151
%if mmsize == 8
152
emms
153
RET
154
%else
155
REP_RET
156
%endif
157
%endmacro
158
159
%macro MIX2_INT16 1
160
cglobal mix_2_1_%1_int16, 7, 7, 8, out, in1, in2, coeffp, index1, index2, len
161
%ifidn %1, a
162
test in1q, mmsize-1
163
jne mix_2_1_int16_u_int %+ SUFFIX
164
test in2q, mmsize-1
165
jne mix_2_1_int16_u_int %+ SUFFIX
166
test outq, mmsize-1
167
jne mix_2_1_int16_u_int %+ SUFFIX
168
%else
169
mix_2_1_int16_u_int %+ SUFFIX:
170
%endif
171
movd m4, [coeffpq + 4*index1q]
172
movd m6, [coeffpq + 4*index2q]
173
SPLATW m5, m4
174
SPLATW m6, m6
175
psllq m4, 32
176
psrlq m4, 48
177
mova m7, [dw1]
178
pslld m7, m4
179
psrld m7, 1
180
punpcklwd m5, m6
181
add lend , lend
182
add in1q , lenq
183
add in2q , lenq
184
add outq , lenq
185
neg lenq
186
.next:
187
mov%1 m0, [in1q + lenq ]
188
mov%1 m2, [in2q + lenq ]
189
mova m1, m0
190
punpcklwd m0, m2
191
punpckhwd m1, m2
192
193
mov%1 m2, [in1q + lenq + mmsize]
194
mov%1 m6, [in2q + lenq + mmsize]
195
mova m3, m2
196
punpcklwd m2, m6
197
punpckhwd m3, m6
198
199
pmaddwd m0, m5
200
pmaddwd m1, m5
201
pmaddwd m2, m5
202
pmaddwd m3, m5
203
paddd m0, m7
204
paddd m1, m7
205
paddd m2, m7
206
paddd m3, m7
207
psrad m0, m4
208
psrad m1, m4
209
psrad m2, m4
210
psrad m3, m4
211
packssdw m0, m1
212
packssdw m2, m3
213
mov%1 [outq + lenq ], m0
214
mov%1 [outq + lenq + mmsize], m2
215
add lenq, mmsize*2
216
jl .next
217
%if mmsize == 8
218
emms
219
RET
220
%else
221
REP_RET
222
%endif
223
%endmacro
224
225
226
INIT_MMX mmx
227
MIX1_INT16 u
228
MIX1_INT16 a
229
MIX2_INT16 u
230
MIX2_INT16 a
231
232
INIT_XMM sse
233
MIX2_FLT u
234
MIX2_FLT a
235
MIX1_FLT u
236
MIX1_FLT a
237
238
INIT_XMM sse2
239
MIX1_INT16 u
240
MIX1_INT16 a
241
MIX2_INT16 u
242
MIX2_INT16 a
243
244
%if HAVE_AVX_EXTERNAL
245
INIT_YMM avx
246
MIX2_FLT u
247
MIX2_FLT a
248
MIX1_FLT u
249
MIX1_FLT a
250
%endif
251
252