Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
52869 views
1
/*
2
* Copyright (c) 2014 Janne Grunau <[email protected]>
3
*
4
* This file is part of FFmpeg.
5
*
6
* FFmpeg is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU Lesser General Public
8
* License as published by the Free Software Foundation; either
9
* version 2.1 of the License, or (at your option) any later version.
10
*
11
* FFmpeg is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
* Lesser General Public License for more details.
15
*
16
* You should have received a copy of the GNU Lesser General Public
17
* License along with FFmpeg; if not, write to the Free Software
18
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
*/
20
21
#include "libavutil/aarch64/asm.S"
22
#include "asm-offsets.h"
23
24
.macro resample_one fmt, es=2
25
.ifnc \fmt, dbl
26
.macro M_MUL2 x:vararg
27
.endm
28
.macro M_MLA2 x:vararg
29
.endm
30
.endif
31
function ff_resample_one_\fmt\()_neon, export=1
32
sxtw x2, w2
33
ldr x9, [x0, #FILTER_BANK]
34
ldr w6, [x0, #FILTER_LENGTH]
35
ldp w7, w8, [x0, #PHASE_SHIFT] // and phase_mask
36
lsr x10, x4, x7 // sample_index
37
and x4, x4, x8
38
lsl x11, x6, #\es // filter_length * elem_size
39
add x3, x3, x10, lsl #\es // src[sample_index]
40
madd x9, x11, x4, x9 // filter
41
cmp w6, #16
42
b.lt 5f
43
8: // remaining filter_length at least 16
44
subs w6, w6, #16
45
LOAD8 v4, v5, v6, v7, x3
46
LOAD8 v16, v17, v18, v19, x9
47
M_MUL v0, v4, v16, v1
48
M_MUL2 v1, v6, v18
49
7:
50
LOAD8 v20, v21, v22, v23, x3
51
M_MLA v0, v5, v17, v1
52
M_MLA2 v1, v7, v19
53
LOAD8 v24, v25, v26, v27, x9
54
M_MLA v0, v20, v24, v1
55
M_MLA2 v1, v22, v26
56
b.eq 6f
57
cmp w6, #16
58
M_MLA v0, v21, v25, v1
59
M_MLA2 v1, v23, v27
60
b.lt 4f
61
subs w6, w6, #16
62
LOAD8 v4, v5, v6, v7, x3
63
LOAD8 v16, v17, v18, v19, x9
64
M_MLA v0, v4, v16, v1
65
M_MLA2 v1, v6, v18
66
b 7b
67
6:
68
M_MLA v0, v21, v25, v1
69
M_MLA2 v1, v23, v27
70
STORE_ONE 0, x1, x2, v1
71
ret
72
5:
73
movi v0.16b, #0
74
movi v1.16b, #0
75
4: // remaining filter_length 1-15
76
cmp w6, #4
77
b.lt 2f
78
subs w6, w6, #4
79
LOAD4 v4, v5, x3
80
LOAD4 v6, v7, x9
81
M_MLA v0, v4, v6, v1
82
M_MLA2 v1, v5, v7
83
b.eq 0f
84
b 4b
85
2: // remaining filter_length 1-3
86
cmp w6, #2
87
b.lt 1f
88
LOAD2 2, x3
89
LOAD2 3, x9
90
subs w6, w6, #2
91
M_MLA v0, v2, v3
92
b.eq 0f
93
1: // remaining filter_length 1
94
LOAD1 6, x3
95
LOAD1 7, x9
96
M_MLA v0, v6, v7
97
0:
98
STORE_ONE 0, x1, x2, v1
99
ret
100
endfunc
101
102
.purgem LOAD1
103
.purgem LOAD2
104
.purgem LOAD4
105
.purgem LOAD8
106
.purgem M_MLA
107
.purgem M_MLA2
108
.purgem M_MUL
109
.purgem M_MUL2
110
.purgem STORE_ONE
111
.endm
112
113
114
.macro LOAD1 d1, addr
115
ldr d\d1, [\addr], #8
116
.endm
117
.macro LOAD2 d1, addr
118
ld1 {v\d1\().2d}, [\addr], #16
119
.endm
120
.macro LOAD4 d1, d2, addr
121
ld1 {\d1\().2d,\d2\().2d}, [\addr], #32
122
.endm
123
.macro LOAD8 d1, d2, d3, d4, addr
124
ld1 {\d1\().2d,\d2\().2d,\d3\().2d,\d4\().2d}, [\addr], #64
125
.endm
126
.macro M_MLA d, r0, r1, d2:vararg
127
fmla \d\().2d, \r0\().2d, \r1\().2d
128
.endm
129
.macro M_MLA2 second:vararg
130
M_MLA \second
131
.endm
132
.macro M_MUL d, r0, r1, d2:vararg
133
fmul \d\().2d, \r0\().2d, \r1\().2d
134
.endm
135
.macro M_MUL2 second:vararg
136
M_MUL \second
137
.endm
138
.macro STORE_ONE rn, addr, idx, d2
139
fadd v\rn\().2d, v\rn\().2d, \d2\().2d
140
faddp d\rn\(), v\rn\().2d
141
str d\rn\(), [\addr, \idx, lsl #3]
142
.endm
143
144
resample_one dbl, 3
145
146
147
.macro LOAD1 d1, addr
148
ldr s\d1, [\addr], #4
149
.endm
150
.macro LOAD2 d1, addr
151
ld1 {v\d1\().2s}, [\addr], #8
152
.endm
153
.macro LOAD4 d1, d2, addr
154
ld1 {\d1\().4s}, [\addr], #16
155
.endm
156
.macro LOAD8 d1, d2, d3, d4, addr
157
ld1 {\d1\().4s,\d2\().4s}, [\addr], #32
158
.endm
159
.macro M_MLA d, r0, r1, d2:vararg
160
fmla \d\().4s, \r0\().4s, \r1\().4s
161
.endm
162
.macro M_MUL d, r0, r1, d2:vararg
163
fmul \d\().4s, \r0\().4s, \r1\().4s
164
.endm
165
.macro STORE_ONE rn, addr, idx, d2
166
faddp v\rn\().4s, v\rn\().4s, v\rn\().4s
167
faddp s\rn\(), v\rn\().2s
168
str s\rn\(), [\addr, \idx, lsl #2]
169
.endm
170
171
resample_one flt
172
173
174
.macro LOAD1 d1, addr
175
ldr h\d1, [\addr], #2
176
.endm
177
.macro LOAD2 d1, addr
178
ldr s\d1, [\addr], #4
179
.endm
180
.macro LOAD4 d1, d2, addr
181
ld1 {\d1\().4h}, [\addr], #8
182
.endm
183
.macro LOAD8 d1, d2, d3, d4, addr
184
ld1 {\d1\().4h,\d2\().4h}, [\addr], #16
185
.endm
186
.macro M_MLA d, r0, r1, d2:vararg
187
smlal \d\().4s, \r0\().4h, \r1\().4h
188
.endm
189
.macro M_MUL d, r0, r1, d2:vararg
190
smull \d\().4s, \r0\().4h, \r1\().4h
191
.endm
192
.macro STORE_ONE rn, addr, idx, d2
193
addp v\rn\().4s, v\rn\().4s, v\rn\().4s
194
addp v\rn\().4s, v\rn\().4s, v\rn\().4s
195
sqrshrn v\rn\().4h, v\rn\().4s, #15
196
str h\rn\(), [\addr, \idx, lsl #1]
197
.endm
198
199
resample_one s16, 1
200
201
202
.macro LOAD1 d1, addr
203
ldr s\d1, [\addr], #4
204
.endm
205
.macro LOAD2 d1, addr
206
ld1 {v\d1\().2s}, [\addr], #8
207
.endm
208
.macro LOAD4 d1, d2, addr
209
ld1 {\d1\().4s}, [\addr], #16
210
.endm
211
.macro LOAD8 d1, d2, d3, d4, addr
212
ld1 {\d1\().4s,\d2\().4s}, [\addr], #32
213
.endm
214
.macro M_MLA d1, r0, r1, d2:vararg
215
smlal \d1\().2d, \r0\().2s, \r1\().2s
216
.ifnb \d2
217
smlal2 \d2\().2d, \r0\().4s, \r1\().4s
218
.endif
219
.endm
220
.macro M_MUL d1, r0, r1, d2:vararg
221
smull \d1\().2d, \r0\().2s, \r1\().2s
222
.ifnb \d2
223
smull2 \d2\().2d, \r0\().4s, \r1\().4s
224
.endif
225
.endm
226
.macro STORE_ONE rn, addr, idx, d2
227
add v\rn\().2d, v\rn\().2d, \d2\().2d
228
addp d\rn\(), v\rn\().2d
229
sqrshrn v\rn\().2s, v\rn\().2d, #30
230
str s\rn\(), [\addr, \idx, lsl #2]
231
.endm
232
233
resample_one s32
234
235