Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
52868 views
1
/*
2
* Copyright (C) 2013 Xiaolei Yu <[email protected]>
3
*
4
* This file is part of FFmpeg.
5
*
6
* FFmpeg is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU Lesser General Public
8
* License as published by the Free Software Foundation; either
9
* version 2.1 of the License, or (at your option) any later version.
10
*
11
* FFmpeg is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
* Lesser General Public License for more details.
15
*
16
* You should have received a copy of the GNU Lesser General Public
17
* License along with FFmpeg; if not, write to the Free Software
18
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
*/
20
21
#include "libavutil/arm/asm.S"
22
23
.macro alias name, tgt, set=1
24
.if \set != 0
25
\name .req \tgt
26
.else
27
.unreq \name
28
.endif
29
.endm
30
31
.altmacro
32
33
.macro alias_dw_all qw, dw_l, dw_h
34
alias q\qw\()_l, d\dw_l
35
alias q\qw\()_h, d\dw_h
36
.if \qw < 15
37
alias_dw_all %(\qw + 1), %(\dw_l + 2), %(\dw_h + 2)
38
.endif
39
.endm
40
41
alias_dw_all 0, 0, 1
42
43
.noaltmacro
44
45
.macro alias_qw name, qw, set=1
46
alias \name\(), \qw, \set
47
alias \name\()_l, \qw\()_l, \set
48
alias \name\()_h, \qw\()_h, \set
49
.endm
50
51
.macro prologue
52
push {r4-r12, lr}
53
vpush {q4-q7}
54
.endm
55
56
.macro epilogue
57
vpop {q4-q7}
58
pop {r4-r12, pc}
59
.endm
60
61
.macro load_arg reg, ix
62
ldr \reg, [sp, #((10 * 4 + 4 * 16) + (\ix - 4) * 4)]
63
.endm
64
65
66
/* ()_to_()_neon(const uint8_t *src, uint8_t *y, uint8_t *chroma
67
* int width, int height,
68
* int y_stride, int c_stride, int src_stride,
69
* int32_t coeff_table[9]);
70
*/
71
.macro alias_loop_420sp set=1
72
alias src, r0, \set
73
alias src0, src, \set
74
alias y, r1, \set
75
alias y0, y, \set
76
alias chroma, r2, \set
77
alias width, r3, \set
78
alias header, width, \set
79
80
alias height, r4, \set
81
alias y_stride, r5, \set
82
alias c_stride, r6, \set
83
alias c_padding, c_stride, \set
84
alias src_stride, r7, \set
85
86
alias y0_end, r8, \set
87
88
alias src_padding,r9, \set
89
alias y_padding, r10, \set
90
91
alias src1, r11, \set
92
alias y1, r12, \set
93
94
alias coeff_table,r12, \set
95
.endm
96
97
98
.macro loop_420sp s_fmt, d_fmt, init, kernel, precision
99
100
function \s_fmt\()_to_\d_fmt\()_neon_\precision, export=1
101
prologue
102
103
alias_loop_420sp
104
105
load_arg height, 4
106
load_arg y_stride, 5
107
load_arg c_stride, 6
108
load_arg src_stride, 7
109
load_arg coeff_table, 8
110
111
\init coeff_table
112
113
sub y_padding, y_stride, width
114
sub c_padding, c_stride, width
115
sub src_padding, src_stride, width, LSL #2
116
117
add y0_end, y0, width
118
and header, width, #15
119
120
add y1, y0, y_stride
121
add src1, src0, src_stride
122
123
0:
124
cmp header, #0
125
beq 1f
126
127
\kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma, header
128
129
1:
130
\kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma
131
132
cmp y0, y0_end
133
blt 1b
134
2:
135
add y0, y1, y_padding
136
add y0_end, y1, y_stride
137
add chroma, chroma, c_padding
138
add src0, src1, src_padding
139
140
add y1, y0, y_stride
141
add src1, src0, src_stride
142
143
subs height, height, #2
144
145
bgt 0b
146
147
epilogue
148
149
alias_loop_420sp 0
150
151
endfunc
152
.endm
153
154
.macro downsample
155
vpaddl.u8 r16x8, r8x16
156
vpaddl.u8 g16x8, g8x16
157
vpaddl.u8 b16x8, b8x16
158
.endm
159
160
161
/* acculumate and right shift by 2 */
162
.macro downsample_ars2
163
vpadal.u8 r16x8, r8x16
164
vpadal.u8 g16x8, g8x16
165
vpadal.u8 b16x8, b8x16
166
167
vrshr.u16 r16x8, r16x8, #2
168
vrshr.u16 g16x8, g16x8, #2
169
vrshr.u16 b16x8, b16x8, #2
170
.endm
171
172
.macro store_y8_16x1 dst, count
173
.ifc "\count",""
174
vstmia \dst!, {y8x16}
175
.else
176
vstmia \dst, {y8x16}
177
add \dst, \dst, \count
178
.endif
179
.endm
180
181
.macro store_chroma_nv12_8x1 dst, count
182
.ifc "\count",""
183
vst2.i8 {u8x8, v8x8}, [\dst]!
184
.else
185
vst2.i8 {u8x8, v8x8}, [\dst], \count
186
.endif
187
.endm
188
189
.macro store_chroma_nv21_8x1 dst, count
190
.ifc "\count",""
191
vst2.i8 {v8x8, u8x8}, [\dst]!
192
.else
193
vst2.i8 {v8x8, u8x8}, [\dst], \count
194
.endif
195
.endm
196
197
.macro load_8888_16x1 a, b, c, d, src, count
198
.ifc "\count",""
199
vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]!
200
vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src]!
201
.else
202
vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]!
203
vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src]
204
sub \src, \src, #32
205
add \src, \src, \count, LSL #2
206
.endif
207
.endm
208
209
.macro load_rgbx_16x1 src, count
210
load_8888_16x1 r, g, b, x, \src, \count
211
.endm
212
213
.macro load_bgrx_16x1 src, count
214
load_8888_16x1 b, g, r, x, \src, \count
215
.endm
216
217
.macro alias_src_rgbx set=1
218
alias_src_8888 r, g, b, x, \set
219
.endm
220
221
.macro alias_src_bgrx set=1
222
alias_src_8888 b, g, r, x, \set
223
.endm
224
225
.macro alias_dst_nv12 set=1
226
alias u8x8, c8x8x2_l, \set
227
alias v8x8, c8x8x2_h, \set
228
.endm
229
230
.macro alias_dst_nv21 set=1
231
alias v8x8, c8x8x2_l, \set
232
alias u8x8, c8x8x2_h, \set
233
.endm
234
235
236
// common aliases
237
238
alias CO_R d0
239
CO_RY .dn d0.s16[0]
240
CO_RU .dn d0.s16[1]
241
CO_RV .dn d0.s16[2]
242
243
alias CO_G d1
244
CO_GY .dn d1.s16[0]
245
CO_GU .dn d1.s16[1]
246
CO_GV .dn d1.s16[2]
247
248
alias CO_B d2
249
CO_BY .dn d2.s16[0]
250
CO_BU .dn d2.s16[1]
251
CO_BV .dn d2.s16[2]
252
253
alias BIAS_U, d3
254
alias BIAS_V, BIAS_U
255
256
alias BIAS_Y, q2
257
258
259
/* q3-q6 R8G8B8X8 x16 */
260
261
.macro alias_src_8888 a, b, c, d, set
262
alias_qw \a\()8x16, q3, \set
263
alias_qw \b\()8x16, q4, \set
264
alias_qw \c\()8x16, q5, \set
265
alias_qw \d\()8x16, q6, \set
266
.endm
267
268
.macro kernel_420_16x2 rgb_fmt, yuv_fmt, rgb0, rgb1, y0, y1, chroma, count
269
alias_src_\rgb_fmt
270
alias_dst_\yuv_fmt
271
272
load_\rgb_fmt\()_16x1 \rgb0, \count
273
274
downsample
275
compute_y_16x1
276
store_y8_16x1 \y0, \count
277
278
279
load_\rgb_fmt\()_16x1 \rgb1, \count
280
downsample_ars2
281
compute_y_16x1
282
store_y8_16x1 \y1, \count
283
284
compute_chroma_8x1 u, U
285
compute_chroma_8x1 v, V
286
287
store_chroma_\yuv_fmt\()_8x1 \chroma, \count
288
289
alias_dst_\yuv_fmt 0
290
alias_src_\rgb_fmt 0
291
.endm
292
293