Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
52869 views
1
/*
2
* Copyright (c) 2014 Peter Meerwald <[email protected]>
3
*
4
* This file is part of FFmpeg.
5
*
6
* FFmpeg is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU Lesser General Public
8
* License as published by the Free Software Foundation; either
9
* version 2.1 of the License, or (at your option) any later version.
10
*
11
* FFmpeg is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
* Lesser General Public License for more details.
15
*
16
* You should have received a copy of the GNU Lesser General Public
17
* License along with FFmpeg; if not, write to the Free Software
18
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
*/
20
21
#include "libavutil/arm/asm.S"
22
23
#include "asm-offsets.h"
24
25
.macro resample_one fmt, es=2
26
function ff_resample_one_\fmt\()_neon, export=1
27
push {r4, r5}
28
add r1, r1, r2, lsl #\es
29
30
ldr r2, [r0, #PHASE_SHIFT+4] /* phase_mask */
31
ldr ip, [sp, #8] /* index */
32
ldr r5, [r0, #FILTER_LENGTH]
33
and r2, ip, r2 /* (index & phase_mask) */
34
ldr r4, [r0, #PHASE_SHIFT]
35
lsr r4, ip, r4 /* compute sample_index */
36
mul r2, r2, r5
37
38
ldr ip, [r0, #FILTER_BANK]
39
add r3, r3, r4, lsl #\es /* &src[sample_index] */
40
41
cmp r5, #8
42
add r0, ip, r2, lsl #\es /* filter = &filter_bank[...] */
43
44
blt 5f
45
8:
46
subs r5, r5, #8
47
LOAD4
48
MUL4
49
7:
50
LOAD4
51
beq 6f
52
cmp r5, #8
53
MLA4
54
blt 4f
55
subs r5, r5, #8
56
LOAD4
57
MLA4
58
b 7b
59
6:
60
MLA4
61
STORE
62
pop {r4, r5}
63
bx lr
64
5:
65
INIT4
66
4: /* remaining filter_length 1 to 7 */
67
cmp r5, #4
68
blt 2f
69
subs r5, r5, #4
70
LOAD4
71
MLA4
72
beq 0f
73
2: /* remaining filter_length 1 to 3 */
74
cmp r5, #2
75
blt 1f
76
subs r5, r5, #2
77
LOAD2
78
MLA2
79
beq 0f
80
1: /* remaining filter_length 1 */
81
LOAD1
82
MLA1
83
0:
84
STORE
85
pop {r4, r5}
86
bx lr
87
endfunc
88
89
.purgem LOAD1
90
.purgem LOAD2
91
.purgem LOAD4
92
.purgem MLA1
93
.purgem MLA2
94
.purgem MLA4
95
.purgem MUL4
96
.purgem INIT4
97
.purgem STORE
98
.endm
99
100
101
/* float32 */
102
.macro LOAD1
103
veor.32 d0, d0
104
vld1.32 {d0[0]}, [r0]! /* load filter */
105
vld1.32 {d4[0]}, [r3]! /* load src */
106
.endm
107
.macro LOAD2
108
vld1.32 {d0}, [r0]! /* load filter */
109
vld1.32 {d4}, [r3]! /* load src */
110
.endm
111
.macro LOAD4
112
vld1.32 {d0,d1}, [r0]! /* load filter */
113
vld1.32 {d4,d5}, [r3]! /* load src */
114
.endm
115
.macro MLA1
116
vmla.f32 d16, d0, d4[0]
117
.endm
118
.macro MLA2
119
vmla.f32 d16, d0, d4
120
.endm
121
.macro MLA4
122
vmla.f32 d16, d0, d4
123
vmla.f32 d17, d1, d5
124
.endm
125
.macro MUL4
126
vmul.f32 d16, d0, d4
127
vmul.f32 d17, d1, d5
128
.endm
129
.macro INIT4
130
veor.f32 q8, q8
131
.endm
132
.macro STORE
133
vpadd.f32 d16, d16, d17
134
vpadd.f32 d16, d16, d16
135
vst1.32 d16[0], [r1]
136
.endm
137
138
resample_one flt, 2
139
140
141
/* s32 */
142
.macro LOAD1
143
veor.32 d0, d0
144
vld1.32 {d0[0]}, [r0]! /* load filter */
145
vld1.32 {d4[0]}, [r3]! /* load src */
146
.endm
147
.macro LOAD2
148
vld1.32 {d0}, [r0]! /* load filter */
149
vld1.32 {d4}, [r3]! /* load src */
150
.endm
151
.macro LOAD4
152
vld1.32 {d0,d1}, [r0]! /* load filter */
153
vld1.32 {d4,d5}, [r3]! /* load src */
154
.endm
155
.macro MLA1
156
vmlal.s32 q8, d0, d4[0]
157
.endm
158
.macro MLA2
159
vmlal.s32 q8, d0, d4
160
.endm
161
.macro MLA4
162
vmlal.s32 q8, d0, d4
163
vmlal.s32 q9, d1, d5
164
.endm
165
.macro MUL4
166
vmull.s32 q8, d0, d4
167
vmull.s32 q9, d1, d5
168
.endm
169
.macro INIT4
170
veor.s64 q8, q8
171
veor.s64 q9, q9
172
.endm
173
.macro STORE
174
vadd.s64 q8, q8, q9
175
vadd.s64 d16, d16, d17
176
vqrshrn.s64 d16, q8, #30
177
vst1.32 d16[0], [r1]
178
.endm
179
180
resample_one s32, 2
181
182
183
/* s16 */
184
.macro LOAD1
185
veor.16 d0, d0
186
vld1.16 {d0[0]}, [r0]! /* load filter */
187
vld1.16 {d4[0]}, [r3]! /* load src */
188
.endm
189
.macro LOAD2
190
veor.16 d0, d0
191
vld1.32 {d0[0]}, [r0]! /* load filter */
192
veor.16 d4, d4
193
vld1.32 {d4[0]}, [r3]! /* load src */
194
.endm
195
.macro LOAD4
196
vld1.16 {d0}, [r0]! /* load filter */
197
vld1.16 {d4}, [r3]! /* load src */
198
.endm
199
.macro MLA1
200
vmlal.s16 q8, d0, d4[0]
201
.endm
202
.macro MLA2
203
vmlal.s16 q8, d0, d4
204
.endm
205
.macro MLA4
206
vmlal.s16 q8, d0, d4
207
.endm
208
.macro MUL4
209
vmull.s16 q8, d0, d4
210
.endm
211
.macro INIT4
212
veor.s32 q8, q8
213
.endm
214
.macro STORE
215
vpadd.s32 d16, d16, d17
216
vpadd.s32 d16, d16, d16
217
vqrshrn.s32 d16, q8, #15
218
vst1.16 d16[0], [r1]
219
.endm
220
221
resample_one s16, 1
222
223
224
.macro resample_linear fmt, es=2
225
function ff_resample_linear_\fmt\()_neon, export=1
226
push {r4, r5}
227
add r1, r1, r2, lsl #\es
228
229
ldr r2, [r0, #PHASE_SHIFT+4] /* phase_mask */
230
ldr ip, [sp, #8] /* index */
231
ldr r5, [r0, #FILTER_LENGTH]
232
and r2, ip, r2 /* (index & phase_mask) */
233
ldr r4, [r0, #PHASE_SHIFT]
234
lsr r4, ip, r4 /* compute sample_index */
235
mul r2, r2, r5
236
237
ldr ip, [r0, #FILTER_BANK]
238
add r3, r3, r4, lsl #\es /* &src[sample_index] */
239
240
cmp r5, #8
241
ldr r4, [r0, #SRC_INCR]
242
add r0, ip, r2, lsl #\es /* filter = &filter_bank[...] */
243
add r2, r0, r5, lsl #\es /* filter[... + c->filter_length] */
244
245
blt 5f
246
8:
247
subs r5, r5, #8
248
LOAD4
249
MUL4
250
7:
251
LOAD4
252
beq 6f
253
cmp r5, #8
254
MLA4
255
blt 4f
256
subs r5, r5, #8
257
LOAD4
258
MLA4
259
b 7b
260
6:
261
MLA4
262
STORE
263
pop {r4, r5}
264
bx lr
265
5:
266
INIT4
267
4: /* remaining filter_length 1 to 7 */
268
cmp r5, #4
269
blt 2f
270
subs r5, r5, #4
271
LOAD4
272
MLA4
273
beq 0f
274
2: /* remaining filter_length 1 to 3 */
275
cmp r5, #2
276
blt 1f
277
subs r5, r5, #2
278
LOAD2
279
MLA2
280
beq 0f
281
1: /* remaining filter_length 1 */
282
LOAD1
283
MLA1
284
0:
285
STORE
286
pop {r4, r5}
287
bx lr
288
endfunc
289
290
.purgem LOAD1
291
.purgem LOAD2
292
.purgem LOAD4
293
.purgem MLA1
294
.purgem MLA2
295
.purgem MLA4
296
.purgem MUL4
297
.purgem INIT4
298
.purgem STORE
299
.endm
300
301
302
/* float32 linear */
303
.macro LOAD1
304
veor.32 d0, d0
305
veor.32 d2, d2
306
vld1.32 {d0[0]}, [r0]! /* load filter */
307
vld1.32 {d2[0]}, [r2]! /* load filter */
308
vld1.32 {d4[0]}, [r3]! /* load src */
309
.endm
310
.macro LOAD2
311
vld1.32 {d0}, [r0]! /* load filter */
312
vld1.32 {d2}, [r2]! /* load filter */
313
vld1.32 {d4}, [r3]! /* load src */
314
.endm
315
.macro LOAD4
316
vld1.32 {d0,d1}, [r0]! /* load filter */
317
vld1.32 {d2,d3}, [r2]! /* load filter */
318
vld1.32 {d4,d5}, [r3]! /* load src */
319
.endm
320
.macro MLA1
321
vmla.f32 d18, d0, d4[0]
322
vmla.f32 d16, d2, d4[0]
323
.endm
324
.macro MLA2
325
vmla.f32 d18, d0, d4
326
vmla.f32 d16, d2, d4
327
.endm
328
.macro MLA4
329
vmla.f32 q9, q0, q2
330
vmla.f32 q8, q1, q2
331
.endm
332
.macro MUL4
333
vmul.f32 q9, q0, q2
334
vmul.f32 q8, q1, q2
335
.endm
336
.macro INIT4
337
veor.f32 q9, q9
338
veor.f32 q8, q8
339
.endm
340
.macro STORE
341
vldr s0, [sp, #12] /* frac */
342
vmov s1, r4
343
vcvt.f32.s32 d0, d0
344
345
vsub.f32 q8, q8, q9 /* v2 - val */
346
vpadd.f32 d18, d18, d19
347
vpadd.f32 d16, d16, d17
348
vpadd.f32 d2, d18, d18
349
vpadd.f32 d1, d16, d16
350
351
vmul.f32 s2, s2, s0 /* (v2 - val) * frac */
352
vdiv.f32 s2, s2, s1 /* / c->src_incr */
353
vadd.f32 s4, s4, s2
354
355
vstr s4, [r1]
356
.endm
357
358
resample_linear flt, 2
359
360