Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
52867 views
1
/*****************************************************************************
2
* asm.S: AArch64 utility macros
3
*****************************************************************************
4
* Copyright (C) 2008-2016 x264 project
5
*
6
* Authors: Mans Rullgard <[email protected]>
7
* David Conrad <[email protected]>
8
* Janne Grunau <[email protected]>
9
*
10
* This program is free software; you can redistribute it and/or modify
11
* it under the terms of the GNU General Public License as published by
12
* the Free Software Foundation; either version 2 of the License, or
13
* (at your option) any later version.
14
*
15
* This program is distributed in the hope that it will be useful,
16
* but WITHOUT ANY WARRANTY; without even the implied warranty of
17
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
* GNU General Public License for more details.
19
*
20
* You should have received a copy of the GNU General Public License
21
* along with this program; if not, write to the Free Software
22
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
23
*
24
* This program is also available under a commercial proprietary license.
25
* For more information, contact us at [email protected].
26
*****************************************************************************/
27
28
#include "config.h"
29
30
#ifdef PREFIX
31
# define EXTERN_ASM _
32
#else
33
# define EXTERN_ASM
34
#endif
35
36
#ifdef __ELF__
37
# define ELF
38
#else
39
# define ELF #
40
#endif
41
42
#ifdef __MACH__
43
# define MACH
44
#else
45
# define MACH #
46
#endif
47
48
#if HAVE_AS_FUNC
49
# define FUNC
50
#else
51
# define FUNC #
52
#endif
53
54
.macro function name, export=0, align=2
55
.macro endfunc
56
ELF .size \name, . - \name
57
FUNC .endfunc
58
.purgem endfunc
59
.endm
60
.text
61
.align \align
62
.if \export
63
.global EXTERN_ASM\name
64
ELF .type EXTERN_ASM\name, %function
65
FUNC .func EXTERN_ASM\name
66
EXTERN_ASM\name:
67
.else
68
ELF .type \name, %function
69
FUNC .func \name
70
\name:
71
.endif
72
.endm
73
74
.macro const name, align=2
75
.macro endconst
76
ELF .size \name, . - \name
77
.purgem endconst
78
.endm
79
ELF .section .rodata
80
MACH .const_data
81
.align \align
82
\name:
83
.endm
84
85
.macro movrel rd, val
86
#if defined(PIC) && defined(__APPLE__)
87
adrp \rd, \val@PAGE
88
add \rd, \rd, \val@PAGEOFF
89
#elif defined(PIC)
90
adrp \rd, \val
91
add \rd, \rd, :lo12:\val
92
#else
93
ldr \rd, =\val
94
#endif
95
.endm
96
97
#define GLUE(a, b) a ## b
98
#define JOIN(a, b) GLUE(a, b)
99
#define X(s) JOIN(EXTERN_ASM, s)
100
101
#define FDEC_STRIDE 32
102
#define FENC_STRIDE 16
103
104
105
.macro SUMSUB_AB sum, sub, a, b
106
add \sum, \a, \b
107
sub \sub, \a, \b
108
.endm
109
110
.macro unzip t1, t2, s1, s2
111
uzp1 \t1, \s1, \s2
112
uzp2 \t2, \s1, \s2
113
.endm
114
115
.macro transpose t1, t2, s1, s2
116
trn1 \t1, \s1, \s2
117
trn2 \t2, \s1, \s2
118
.endm
119
120
.macro transpose4x4.h v0, v1, v2, v3, t0, t1, t2, t3
121
transpose \t0\().2s, \t2\().2s, \v0\().2s, \v2\().2s
122
transpose \t1\().2s, \t3\().2s, \v1\().2s, \v3\().2s
123
transpose \v0\().4h, \v1\().4h, \t0\().4h, \t1\().4h
124
transpose \v2\().4h, \v3\().4h, \t2\().4h, \t3\().4h
125
.endm
126
127
.macro transpose4x8.h v0, v1, v2, v3, t0, t1, t2, t3
128
transpose \t0\().4s, \t2\().4s, \v0\().4s, \v2\().4s
129
transpose \t1\().4s, \t3\().4s, \v1\().4s, \v3\().4s
130
transpose \v0\().8h, \v1\().8h, \t0\().8h, \t1\().8h
131
transpose \v2\().8h, \v3\().8h, \t2\().8h, \t3\().8h
132
.endm
133
134
135
.macro transpose8x8.h r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
136
trn1 \r8\().8H, \r0\().8H, \r1\().8H
137
trn2 \r9\().8H, \r0\().8H, \r1\().8H
138
trn1 \r1\().8H, \r2\().8H, \r3\().8H
139
trn2 \r3\().8H, \r2\().8H, \r3\().8H
140
trn1 \r0\().8H, \r4\().8H, \r5\().8H
141
trn2 \r5\().8H, \r4\().8H, \r5\().8H
142
trn1 \r2\().8H, \r6\().8H, \r7\().8H
143
trn2 \r7\().8H, \r6\().8H, \r7\().8H
144
145
trn1 \r4\().4S, \r0\().4S, \r2\().4S
146
trn2 \r2\().4S, \r0\().4S, \r2\().4S
147
trn1 \r6\().4S, \r5\().4S, \r7\().4S
148
trn2 \r7\().4S, \r5\().4S, \r7\().4S
149
trn1 \r5\().4S, \r9\().4S, \r3\().4S
150
trn2 \r9\().4S, \r9\().4S, \r3\().4S
151
trn1 \r3\().4S, \r8\().4S, \r1\().4S
152
trn2 \r8\().4S, \r8\().4S, \r1\().4S
153
154
trn1 \r0\().2D, \r3\().2D, \r4\().2D
155
trn2 \r4\().2D, \r3\().2D, \r4\().2D
156
157
trn1 \r1\().2D, \r5\().2D, \r6\().2D
158
trn2 \r5\().2D, \r5\().2D, \r6\().2D
159
160
trn2 \r6\().2D, \r8\().2D, \r2\().2D
161
trn1 \r2\().2D, \r8\().2D, \r2\().2D
162
163
trn1 \r3\().2D, \r9\().2D, \r7\().2D
164
trn2 \r7\().2D, \r9\().2D, \r7\().2D
165
.endm
166
167
.macro transpose_8x16.b r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
168
trn1 \t0\().16b, \r0\().16b, \r1\().16b
169
trn2 \t1\().16b, \r0\().16b, \r1\().16b
170
trn1 \r1\().16b, \r2\().16b, \r3\().16b
171
trn2 \r3\().16b, \r2\().16b, \r3\().16b
172
trn1 \r0\().16b, \r4\().16b, \r5\().16b
173
trn2 \r5\().16b, \r4\().16b, \r5\().16b
174
trn1 \r2\().16b, \r6\().16b, \r7\().16b
175
trn2 \r7\().16b, \r6\().16b, \r7\().16b
176
177
trn1 \r4\().8h, \r0\().8h, \r2\().8h
178
trn2 \r2\().8h, \r0\().8h, \r2\().8h
179
trn1 \r6\().8h, \r5\().8h, \r7\().8h
180
trn2 \r7\().8h, \r5\().8h, \r7\().8h
181
trn1 \r5\().8h, \t1\().8h, \r3\().8h
182
trn2 \t1\().8h, \t1\().8h, \r3\().8h
183
trn1 \r3\().8h, \t0\().8h, \r1\().8h
184
trn2 \t0\().8h, \t0\().8h, \r1\().8h
185
186
trn1 \r0\().4s, \r3\().4s, \r4\().4s
187
trn2 \r4\().4s, \r3\().4s, \r4\().4s
188
189
trn1 \r1\().4s, \r5\().4s, \r6\().4s
190
trn2 \r5\().4s, \r5\().4s, \r6\().4s
191
192
trn2 \r6\().4s, \t0\().4s, \r2\().4s
193
trn1 \r2\().4s, \t0\().4s, \r2\().4s
194
195
trn1 \r3\().4s, \t1\().4s, \r7\().4s
196
trn2 \r7\().4s, \t1\().4s, \r7\().4s
197
.endm
198
199
.macro transpose_4x16.b r0, r1, r2, r3, t4, t5, t6, t7
200
trn1 \t4\().16b, \r0\().16b, \r1\().16b
201
trn2 \t5\().16b, \r0\().16b, \r1\().16b
202
trn1 \t6\().16b, \r2\().16b, \r3\().16b
203
trn2 \t7\().16b, \r2\().16b, \r3\().16b
204
205
trn1 \r0\().8h, \t4\().8h, \t6\().8h
206
trn2 \r2\().8h, \t4\().8h, \t6\().8h
207
trn1 \r1\().8h, \t5\().8h, \t7\().8h
208
trn2 \r3\().8h, \t5\().8h, \t7\().8h
209
.endm
210
211
.macro transpose_4x8.b r0, r1, r2, r3, t4, t5, t6, t7
212
trn1 \t4\().8b, \r0\().8b, \r1\().8b
213
trn2 \t5\().8b, \r0\().8b, \r1\().8b
214
trn1 \t6\().8b, \r2\().8b, \r3\().8b
215
trn2 \t7\().8b, \r2\().8b, \r3\().8b
216
217
trn1 \r0\().4h, \t4\().4h, \t6\().4h
218
trn2 \r2\().4h, \t4\().4h, \t6\().4h
219
trn1 \r1\().4h, \t5\().4h, \t7\().4h
220
trn2 \r3\().4h, \t5\().4h, \t7\().4h
221
.endm
222
223