Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/arm/crypto/aes-ce-core.S
29266 views
1
/* SPDX-License-Identifier: GPL-2.0-only */
2
/*
3
* aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
4
*
5
* Copyright (C) 2015 Linaro Ltd <[email protected]>
6
*/
7
8
#include <linux/linkage.h>
9
#include <asm/assembler.h>
10
11
.text
12
.arch armv8-a
13
.fpu crypto-neon-fp-armv8
14
.align 3
15
16
.macro enc_round, state, key
17
aese.8 \state, \key
18
aesmc.8 \state, \state
19
.endm
20
21
.macro dec_round, state, key
22
aesd.8 \state, \key
23
aesimc.8 \state, \state
24
.endm
25
26
.macro enc_dround, key1, key2
27
enc_round q0, \key1
28
enc_round q0, \key2
29
.endm
30
31
.macro dec_dround, key1, key2
32
dec_round q0, \key1
33
dec_round q0, \key2
34
.endm
35
36
.macro enc_fround, key1, key2, key3
37
enc_round q0, \key1
38
aese.8 q0, \key2
39
veor q0, q0, \key3
40
.endm
41
42
.macro dec_fround, key1, key2, key3
43
dec_round q0, \key1
44
aesd.8 q0, \key2
45
veor q0, q0, \key3
46
.endm
47
48
.macro enc_dround_4x, key1, key2
49
enc_round q0, \key1
50
enc_round q1, \key1
51
enc_round q2, \key1
52
enc_round q3, \key1
53
enc_round q0, \key2
54
enc_round q1, \key2
55
enc_round q2, \key2
56
enc_round q3, \key2
57
.endm
58
59
.macro dec_dround_4x, key1, key2
60
dec_round q0, \key1
61
dec_round q1, \key1
62
dec_round q2, \key1
63
dec_round q3, \key1
64
dec_round q0, \key2
65
dec_round q1, \key2
66
dec_round q2, \key2
67
dec_round q3, \key2
68
.endm
69
70
.macro enc_fround_4x, key1, key2, key3
71
enc_round q0, \key1
72
enc_round q1, \key1
73
enc_round q2, \key1
74
enc_round q3, \key1
75
aese.8 q0, \key2
76
aese.8 q1, \key2
77
aese.8 q2, \key2
78
aese.8 q3, \key2
79
veor q0, q0, \key3
80
veor q1, q1, \key3
81
veor q2, q2, \key3
82
veor q3, q3, \key3
83
.endm
84
85
.macro dec_fround_4x, key1, key2, key3
86
dec_round q0, \key1
87
dec_round q1, \key1
88
dec_round q2, \key1
89
dec_round q3, \key1
90
aesd.8 q0, \key2
91
aesd.8 q1, \key2
92
aesd.8 q2, \key2
93
aesd.8 q3, \key2
94
veor q0, q0, \key3
95
veor q1, q1, \key3
96
veor q2, q2, \key3
97
veor q3, q3, \key3
98
.endm
99
100
.macro do_block, dround, fround
101
cmp r3, #12 @ which key size?
102
vld1.32 {q10-q11}, [ip]!
103
\dround q8, q9
104
vld1.32 {q12-q13}, [ip]!
105
\dround q10, q11
106
vld1.32 {q10-q11}, [ip]!
107
\dround q12, q13
108
vld1.32 {q12-q13}, [ip]!
109
\dround q10, q11
110
blo 0f @ AES-128: 10 rounds
111
vld1.32 {q10-q11}, [ip]!
112
\dround q12, q13
113
beq 1f @ AES-192: 12 rounds
114
vld1.32 {q12-q13}, [ip]
115
\dround q10, q11
116
0: \fround q12, q13, q14
117
bx lr
118
119
1: \fround q10, q11, q14
120
bx lr
121
.endm
122
123
/*
124
* Internal, non-AAPCS compliant functions that implement the core AES
125
* transforms. These should preserve all registers except q0 - q2 and ip
126
* Arguments:
127
* q0 : first in/output block
128
* q1 : second in/output block (_4x version only)
129
* q2 : third in/output block (_4x version only)
130
* q3 : fourth in/output block (_4x version only)
131
* q8 : first round key
132
* q9 : secound round key
133
* q14 : final round key
134
* r2 : address of round key array
135
* r3 : number of rounds
136
*/
137
.align 6
138
aes_encrypt:
139
add ip, r2, #32 @ 3rd round key
140
.Laes_encrypt_tweak:
141
do_block enc_dround, enc_fround
142
ENDPROC(aes_encrypt)
143
144
.align 6
145
aes_decrypt:
146
add ip, r2, #32 @ 3rd round key
147
do_block dec_dround, dec_fround
148
ENDPROC(aes_decrypt)
149
150
.align 6
151
aes_encrypt_4x:
152
add ip, r2, #32 @ 3rd round key
153
do_block enc_dround_4x, enc_fround_4x
154
ENDPROC(aes_encrypt_4x)
155
156
.align 6
157
aes_decrypt_4x:
158
add ip, r2, #32 @ 3rd round key
159
do_block dec_dround_4x, dec_fround_4x
160
ENDPROC(aes_decrypt_4x)
161
162
.macro prepare_key, rk, rounds
163
add ip, \rk, \rounds, lsl #4
164
vld1.32 {q8-q9}, [\rk] @ load first 2 round keys
165
vld1.32 {q14}, [ip] @ load last round key
166
.endm
167
168
/*
169
* aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
170
* int blocks)
171
* aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
172
* int blocks)
173
*/
174
ENTRY(ce_aes_ecb_encrypt)
175
push {r4, lr}
176
ldr r4, [sp, #8]
177
prepare_key r2, r3
178
.Lecbencloop4x:
179
subs r4, r4, #4
180
bmi .Lecbenc1x
181
vld1.8 {q0-q1}, [r1]!
182
vld1.8 {q2-q3}, [r1]!
183
bl aes_encrypt_4x
184
vst1.8 {q0-q1}, [r0]!
185
vst1.8 {q2-q3}, [r0]!
186
b .Lecbencloop4x
187
.Lecbenc1x:
188
adds r4, r4, #4
189
beq .Lecbencout
190
.Lecbencloop:
191
vld1.8 {q0}, [r1]!
192
bl aes_encrypt
193
vst1.8 {q0}, [r0]!
194
subs r4, r4, #1
195
bne .Lecbencloop
196
.Lecbencout:
197
pop {r4, pc}
198
ENDPROC(ce_aes_ecb_encrypt)
199
200
ENTRY(ce_aes_ecb_decrypt)
201
push {r4, lr}
202
ldr r4, [sp, #8]
203
prepare_key r2, r3
204
.Lecbdecloop4x:
205
subs r4, r4, #4
206
bmi .Lecbdec1x
207
vld1.8 {q0-q1}, [r1]!
208
vld1.8 {q2-q3}, [r1]!
209
bl aes_decrypt_4x
210
vst1.8 {q0-q1}, [r0]!
211
vst1.8 {q2-q3}, [r0]!
212
b .Lecbdecloop4x
213
.Lecbdec1x:
214
adds r4, r4, #4
215
beq .Lecbdecout
216
.Lecbdecloop:
217
vld1.8 {q0}, [r1]!
218
bl aes_decrypt
219
vst1.8 {q0}, [r0]!
220
subs r4, r4, #1
221
bne .Lecbdecloop
222
.Lecbdecout:
223
pop {r4, pc}
224
ENDPROC(ce_aes_ecb_decrypt)
225
226
/*
227
* aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
228
* int blocks, u8 iv[])
229
* aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
230
* int blocks, u8 iv[])
231
*/
232
ENTRY(ce_aes_cbc_encrypt)
233
push {r4-r6, lr}
234
ldrd r4, r5, [sp, #16]
235
vld1.8 {q0}, [r5]
236
prepare_key r2, r3
237
.Lcbcencloop:
238
vld1.8 {q1}, [r1]! @ get next pt block
239
veor q0, q0, q1 @ ..and xor with iv
240
bl aes_encrypt
241
vst1.8 {q0}, [r0]!
242
subs r4, r4, #1
243
bne .Lcbcencloop
244
vst1.8 {q0}, [r5]
245
pop {r4-r6, pc}
246
ENDPROC(ce_aes_cbc_encrypt)
247
248
ENTRY(ce_aes_cbc_decrypt)
249
push {r4-r6, lr}
250
ldrd r4, r5, [sp, #16]
251
vld1.8 {q15}, [r5] @ keep iv in q15
252
prepare_key r2, r3
253
.Lcbcdecloop4x:
254
subs r4, r4, #4
255
bmi .Lcbcdec1x
256
vld1.8 {q0-q1}, [r1]!
257
vld1.8 {q2-q3}, [r1]!
258
vmov q4, q0
259
vmov q5, q1
260
vmov q6, q2
261
vmov q7, q3
262
bl aes_decrypt_4x
263
veor q0, q0, q15
264
veor q1, q1, q4
265
veor q2, q2, q5
266
veor q3, q3, q6
267
vmov q15, q7
268
vst1.8 {q0-q1}, [r0]!
269
vst1.8 {q2-q3}, [r0]!
270
b .Lcbcdecloop4x
271
.Lcbcdec1x:
272
adds r4, r4, #4
273
beq .Lcbcdecout
274
vmov q6, q14 @ preserve last round key
275
.Lcbcdecloop:
276
vld1.8 {q0}, [r1]! @ get next ct block
277
veor q14, q15, q6 @ combine prev ct with last key
278
vmov q15, q0
279
bl aes_decrypt
280
vst1.8 {q0}, [r0]!
281
subs r4, r4, #1
282
bne .Lcbcdecloop
283
.Lcbcdecout:
284
vst1.8 {q15}, [r5] @ keep iv in q15
285
pop {r4-r6, pc}
286
ENDPROC(ce_aes_cbc_decrypt)
287
288
289
/*
290
* ce_aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
291
* int rounds, int bytes, u8 const iv[])
292
* ce_aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
293
* int rounds, int bytes, u8 const iv[])
294
*/
295
296
ENTRY(ce_aes_cbc_cts_encrypt)
297
push {r4-r6, lr}
298
ldrd r4, r5, [sp, #16]
299
300
movw ip, :lower16:.Lcts_permute_table
301
movt ip, :upper16:.Lcts_permute_table
302
sub r4, r4, #16
303
add lr, ip, #32
304
add ip, ip, r4
305
sub lr, lr, r4
306
vld1.8 {q5}, [ip]
307
vld1.8 {q6}, [lr]
308
309
add ip, r1, r4
310
vld1.8 {q0}, [r1] @ overlapping loads
311
vld1.8 {q3}, [ip]
312
313
vld1.8 {q1}, [r5] @ get iv
314
prepare_key r2, r3
315
316
veor q0, q0, q1 @ xor with iv
317
bl aes_encrypt
318
319
vtbl.8 d4, {d0-d1}, d10
320
vtbl.8 d5, {d0-d1}, d11
321
vtbl.8 d2, {d6-d7}, d12
322
vtbl.8 d3, {d6-d7}, d13
323
324
veor q0, q0, q1
325
bl aes_encrypt
326
327
add r4, r0, r4
328
vst1.8 {q2}, [r4] @ overlapping stores
329
vst1.8 {q0}, [r0]
330
331
pop {r4-r6, pc}
332
ENDPROC(ce_aes_cbc_cts_encrypt)
333
334
ENTRY(ce_aes_cbc_cts_decrypt)
335
push {r4-r6, lr}
336
ldrd r4, r5, [sp, #16]
337
338
movw ip, :lower16:.Lcts_permute_table
339
movt ip, :upper16:.Lcts_permute_table
340
sub r4, r4, #16
341
add lr, ip, #32
342
add ip, ip, r4
343
sub lr, lr, r4
344
vld1.8 {q5}, [ip]
345
vld1.8 {q6}, [lr]
346
347
add ip, r1, r4
348
vld1.8 {q0}, [r1] @ overlapping loads
349
vld1.8 {q1}, [ip]
350
351
vld1.8 {q3}, [r5] @ get iv
352
prepare_key r2, r3
353
354
bl aes_decrypt
355
356
vtbl.8 d4, {d0-d1}, d10
357
vtbl.8 d5, {d0-d1}, d11
358
vtbx.8 d0, {d2-d3}, d12
359
vtbx.8 d1, {d2-d3}, d13
360
361
veor q1, q1, q2
362
bl aes_decrypt
363
veor q0, q0, q3 @ xor with iv
364
365
add r4, r0, r4
366
vst1.8 {q1}, [r4] @ overlapping stores
367
vst1.8 {q0}, [r0]
368
369
pop {r4-r6, pc}
370
ENDPROC(ce_aes_cbc_cts_decrypt)
371
372
373
/*
374
* aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
375
* int blocks, u8 ctr[])
376
*/
377
ENTRY(ce_aes_ctr_encrypt)
378
push {r4-r6, lr}
379
ldrd r4, r5, [sp, #16]
380
vld1.8 {q7}, [r5] @ load ctr
381
prepare_key r2, r3
382
vmov r6, s31 @ keep swabbed ctr in r6
383
rev r6, r6
384
cmn r6, r4 @ 32 bit overflow?
385
bcs .Lctrloop
386
.Lctrloop4x:
387
subs r4, r4, #4
388
bmi .Lctr1x
389
390
/*
391
* NOTE: the sequence below has been carefully tweaked to avoid
392
* a silicon erratum that exists in Cortex-A57 (#1742098) and
393
* Cortex-A72 (#1655431) cores, where AESE/AESMC instruction pairs
394
* may produce an incorrect result if they take their input from a
395
* register of which a single 32-bit lane has been updated the last
396
* time it was modified. To work around this, the lanes of registers
397
* q0-q3 below are not manipulated individually, and the different
398
* counter values are prepared by successive manipulations of q7.
399
*/
400
add ip, r6, #1
401
vmov q0, q7
402
rev ip, ip
403
add lr, r6, #2
404
vmov s31, ip @ set lane 3 of q1 via q7
405
add ip, r6, #3
406
rev lr, lr
407
vmov q1, q7
408
vmov s31, lr @ set lane 3 of q2 via q7
409
rev ip, ip
410
vmov q2, q7
411
vmov s31, ip @ set lane 3 of q3 via q7
412
add r6, r6, #4
413
vmov q3, q7
414
415
vld1.8 {q4-q5}, [r1]!
416
vld1.8 {q6}, [r1]!
417
vld1.8 {q15}, [r1]!
418
bl aes_encrypt_4x
419
veor q0, q0, q4
420
veor q1, q1, q5
421
veor q2, q2, q6
422
veor q3, q3, q15
423
rev ip, r6
424
vst1.8 {q0-q1}, [r0]!
425
vst1.8 {q2-q3}, [r0]!
426
vmov s31, ip
427
b .Lctrloop4x
428
.Lctr1x:
429
adds r4, r4, #4
430
beq .Lctrout
431
.Lctrloop:
432
vmov q0, q7
433
bl aes_encrypt
434
435
adds r6, r6, #1 @ increment BE ctr
436
rev ip, r6
437
vmov s31, ip
438
bcs .Lctrcarry
439
440
.Lctrcarrydone:
441
subs r4, r4, #1
442
bmi .Lctrtailblock @ blocks < 0 means tail block
443
vld1.8 {q3}, [r1]!
444
veor q3, q0, q3
445
vst1.8 {q3}, [r0]!
446
bne .Lctrloop
447
448
.Lctrout:
449
vst1.8 {q7}, [r5] @ return next CTR value
450
pop {r4-r6, pc}
451
452
.Lctrtailblock:
453
vst1.8 {q0}, [r0, :64] @ return the key stream
454
b .Lctrout
455
456
.Lctrcarry:
457
.irp sreg, s30, s29, s28
458
vmov ip, \sreg @ load next word of ctr
459
rev ip, ip @ ... to handle the carry
460
adds ip, ip, #1
461
rev ip, ip
462
vmov \sreg, ip
463
bcc .Lctrcarrydone
464
.endr
465
b .Lctrcarrydone
466
ENDPROC(ce_aes_ctr_encrypt)
467
468
/*
469
* aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
470
* int bytes, u8 iv[], u32 const rk2[], int first)
471
* aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
472
* int bytes, u8 iv[], u32 const rk2[], int first)
473
*/
474
475
.macro next_tweak, out, in, const, tmp
476
vshr.s64 \tmp, \in, #63
477
vand \tmp, \tmp, \const
478
vadd.u64 \out, \in, \in
479
vext.8 \tmp, \tmp, \tmp, #8
480
veor \out, \out, \tmp
481
.endm
482
483
ce_aes_xts_init:
484
vmov.i32 d30, #0x87 @ compose tweak mask vector
485
vmovl.u32 q15, d30
486
vshr.u64 d30, d31, #7
487
488
ldrd r4, r5, [sp, #16] @ load args
489
ldr r6, [sp, #28]
490
vld1.8 {q0}, [r5] @ load iv
491
teq r6, #1 @ start of a block?
492
bxne lr
493
494
@ Encrypt the IV in q0 with the second AES key. This should only
495
@ be done at the start of a block.
496
ldr r6, [sp, #24] @ load AES key 2
497
prepare_key r6, r3
498
add ip, r6, #32 @ 3rd round key of key 2
499
b .Laes_encrypt_tweak @ tail call
500
ENDPROC(ce_aes_xts_init)
501
502
ENTRY(ce_aes_xts_encrypt)
503
push {r4-r6, lr}
504
505
bl ce_aes_xts_init @ run shared prologue
506
prepare_key r2, r3
507
vmov q4, q0
508
509
teq r6, #0 @ start of a block?
510
bne .Lxtsenc4x
511
512
.Lxtsencloop4x:
513
next_tweak q4, q4, q15, q10
514
.Lxtsenc4x:
515
subs r4, r4, #64
516
bmi .Lxtsenc1x
517
vld1.8 {q0-q1}, [r1]! @ get 4 pt blocks
518
vld1.8 {q2-q3}, [r1]!
519
next_tweak q5, q4, q15, q10
520
veor q0, q0, q4
521
next_tweak q6, q5, q15, q10
522
veor q1, q1, q5
523
next_tweak q7, q6, q15, q10
524
veor q2, q2, q6
525
veor q3, q3, q7
526
bl aes_encrypt_4x
527
veor q0, q0, q4
528
veor q1, q1, q5
529
veor q2, q2, q6
530
veor q3, q3, q7
531
vst1.8 {q0-q1}, [r0]! @ write 4 ct blocks
532
vst1.8 {q2-q3}, [r0]!
533
vmov q4, q7
534
teq r4, #0
535
beq .Lxtsencret
536
b .Lxtsencloop4x
537
.Lxtsenc1x:
538
adds r4, r4, #64
539
beq .Lxtsencout
540
subs r4, r4, #16
541
bmi .LxtsencctsNx
542
.Lxtsencloop:
543
vld1.8 {q0}, [r1]!
544
.Lxtsencctsout:
545
veor q0, q0, q4
546
bl aes_encrypt
547
veor q0, q0, q4
548
teq r4, #0
549
beq .Lxtsencout
550
subs r4, r4, #16
551
next_tweak q4, q4, q15, q6
552
bmi .Lxtsenccts
553
vst1.8 {q0}, [r0]!
554
b .Lxtsencloop
555
.Lxtsencout:
556
vst1.8 {q0}, [r0]
557
.Lxtsencret:
558
vst1.8 {q4}, [r5]
559
pop {r4-r6, pc}
560
561
.LxtsencctsNx:
562
vmov q0, q3
563
sub r0, r0, #16
564
.Lxtsenccts:
565
movw ip, :lower16:.Lcts_permute_table
566
movt ip, :upper16:.Lcts_permute_table
567
568
add r1, r1, r4 @ rewind input pointer
569
add r4, r4, #16 @ # bytes in final block
570
add lr, ip, #32
571
add ip, ip, r4
572
sub lr, lr, r4
573
add r4, r0, r4 @ output address of final block
574
575
vld1.8 {q1}, [r1] @ load final partial block
576
vld1.8 {q2}, [ip]
577
vld1.8 {q3}, [lr]
578
579
vtbl.8 d4, {d0-d1}, d4
580
vtbl.8 d5, {d0-d1}, d5
581
vtbx.8 d0, {d2-d3}, d6
582
vtbx.8 d1, {d2-d3}, d7
583
584
vst1.8 {q2}, [r4] @ overlapping stores
585
mov r4, #0
586
b .Lxtsencctsout
587
ENDPROC(ce_aes_xts_encrypt)
588
589
590
ENTRY(ce_aes_xts_decrypt)
591
push {r4-r6, lr}
592
593
bl ce_aes_xts_init @ run shared prologue
594
prepare_key r2, r3
595
vmov q4, q0
596
597
/* subtract 16 bytes if we are doing CTS */
598
tst r4, #0xf
599
subne r4, r4, #0x10
600
601
teq r6, #0 @ start of a block?
602
bne .Lxtsdec4x
603
604
.Lxtsdecloop4x:
605
next_tweak q4, q4, q15, q10
606
.Lxtsdec4x:
607
subs r4, r4, #64
608
bmi .Lxtsdec1x
609
vld1.8 {q0-q1}, [r1]! @ get 4 ct blocks
610
vld1.8 {q2-q3}, [r1]!
611
next_tweak q5, q4, q15, q10
612
veor q0, q0, q4
613
next_tweak q6, q5, q15, q10
614
veor q1, q1, q5
615
next_tweak q7, q6, q15, q10
616
veor q2, q2, q6
617
veor q3, q3, q7
618
bl aes_decrypt_4x
619
veor q0, q0, q4
620
veor q1, q1, q5
621
veor q2, q2, q6
622
veor q3, q3, q7
623
vst1.8 {q0-q1}, [r0]! @ write 4 pt blocks
624
vst1.8 {q2-q3}, [r0]!
625
vmov q4, q7
626
teq r4, #0
627
beq .Lxtsdecout
628
b .Lxtsdecloop4x
629
.Lxtsdec1x:
630
adds r4, r4, #64
631
beq .Lxtsdecout
632
subs r4, r4, #16
633
.Lxtsdecloop:
634
vld1.8 {q0}, [r1]!
635
bmi .Lxtsdeccts
636
.Lxtsdecctsout:
637
veor q0, q0, q4
638
bl aes_decrypt
639
veor q0, q0, q4
640
vst1.8 {q0}, [r0]!
641
teq r4, #0
642
beq .Lxtsdecout
643
subs r4, r4, #16
644
next_tweak q4, q4, q15, q6
645
b .Lxtsdecloop
646
.Lxtsdecout:
647
vst1.8 {q4}, [r5]
648
pop {r4-r6, pc}
649
650
.Lxtsdeccts:
651
movw ip, :lower16:.Lcts_permute_table
652
movt ip, :upper16:.Lcts_permute_table
653
654
add r1, r1, r4 @ rewind input pointer
655
add r4, r4, #16 @ # bytes in final block
656
add lr, ip, #32
657
add ip, ip, r4
658
sub lr, lr, r4
659
add r4, r0, r4 @ output address of final block
660
661
next_tweak q5, q4, q15, q6
662
663
vld1.8 {q1}, [r1] @ load final partial block
664
vld1.8 {q2}, [ip]
665
vld1.8 {q3}, [lr]
666
667
veor q0, q0, q5
668
bl aes_decrypt
669
veor q0, q0, q5
670
671
vtbl.8 d4, {d0-d1}, d4
672
vtbl.8 d5, {d0-d1}, d5
673
vtbx.8 d0, {d2-d3}, d6
674
vtbx.8 d1, {d2-d3}, d7
675
676
vst1.8 {q2}, [r4] @ overlapping stores
677
mov r4, #0
678
b .Lxtsdecctsout
679
ENDPROC(ce_aes_xts_decrypt)
680
681
/*
682
* u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
683
* AES sbox substitution on each byte in
684
* 'input'
685
*/
686
ENTRY(ce_aes_sub)
687
vdup.32 q1, r0
688
veor q0, q0, q0
689
aese.8 q0, q1
690
vmov r0, s0
691
bx lr
692
ENDPROC(ce_aes_sub)
693
694
/*
695
* void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
696
* operation on round key *src
697
*/
698
ENTRY(ce_aes_invert)
699
vld1.32 {q0}, [r1]
700
aesimc.8 q0, q0
701
vst1.32 {q0}, [r0]
702
bx lr
703
ENDPROC(ce_aes_invert)
704
705
.section ".rodata", "a"
706
.align 6
707
.Lcts_permute_table:
708
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
709
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
710
.byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
711
.byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
712
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
713
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
714
715