CoCalc -- aes-ce-core.S

GitHub Repository: torvalds/linux
Path: blob/master/arch/arm/crypto/aes-ce-core.S
²⁹²⁶⁶ views
1
/* SPDX-License-Identifier: GPL-2.0-only */
2
/*
3
 * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
4
 *
5
 * Copyright (C) 2015 Linaro Ltd <[email protected]>
6
 */
7

8
#include <linux/linkage.h>
9
#include <asm/assembler.h>
10

11
	.text
12
	.arch		armv8-a
13
	.fpu		crypto-neon-fp-armv8
14
	.align		3
15

16
	.macro		enc_round, state, key
17
	aese.8		\state, \key
18
	aesmc.8		\state, \state
19
	.endm
20

21
	.macro		dec_round, state, key
22
	aesd.8		\state, \key
23
	aesimc.8	\state, \state
24
	.endm
25

26
	.macro		enc_dround, key1, key2
27
	enc_round	q0, \key1
28
	enc_round	q0, \key2
29
	.endm
30

31
	.macro		dec_dround, key1, key2
32
	dec_round	q0, \key1
33
	dec_round	q0, \key2
34
	.endm
35

36
	.macro		enc_fround, key1, key2, key3
37
	enc_round	q0, \key1
38
	aese.8		q0, \key2
39
	veor		q0, q0, \key3
40
	.endm
41

42
	.macro		dec_fround, key1, key2, key3
43
	dec_round	q0, \key1
44
	aesd.8		q0, \key2
45
	veor		q0, q0, \key3
46
	.endm
47

48
	.macro		enc_dround_4x, key1, key2
49
	enc_round	q0, \key1
50
	enc_round	q1, \key1
51
	enc_round	q2, \key1
52
	enc_round	q3, \key1
53
	enc_round	q0, \key2
54
	enc_round	q1, \key2
55
	enc_round	q2, \key2
56
	enc_round	q3, \key2
57
	.endm
58

59
	.macro		dec_dround_4x, key1, key2
60
	dec_round	q0, \key1
61
	dec_round	q1, \key1
62
	dec_round	q2, \key1
63
	dec_round	q3, \key1
64
	dec_round	q0, \key2
65
	dec_round	q1, \key2
66
	dec_round	q2, \key2
67
	dec_round	q3, \key2
68
	.endm
69

70
	.macro		enc_fround_4x, key1, key2, key3
71
	enc_round	q0, \key1
72
	enc_round	q1, \key1
73
	enc_round	q2, \key1
74
	enc_round	q3, \key1
75
	aese.8		q0, \key2
76
	aese.8		q1, \key2
77
	aese.8		q2, \key2
78
	aese.8		q3, \key2
79
	veor		q0, q0, \key3
80
	veor		q1, q1, \key3
81
	veor		q2, q2, \key3
82
	veor		q3, q3, \key3
83
	.endm
84

85
	.macro		dec_fround_4x, key1, key2, key3
86
	dec_round	q0, \key1
87
	dec_round	q1, \key1
88
	dec_round	q2, \key1
89
	dec_round	q3, \key1
90
	aesd.8		q0, \key2
91
	aesd.8		q1, \key2
92
	aesd.8		q2, \key2
93
	aesd.8		q3, \key2
94
	veor		q0, q0, \key3
95
	veor		q1, q1, \key3
96
	veor		q2, q2, \key3
97
	veor		q3, q3, \key3
98
	.endm
99

100
	.macro		do_block, dround, fround
101
	cmp		r3, #12			@ which key size?
102
	vld1.32		{q10-q11}, [ip]!
103
	\dround		q8, q9
104
	vld1.32		{q12-q13}, [ip]!
105
	\dround		q10, q11
106
	vld1.32		{q10-q11}, [ip]!
107
	\dround		q12, q13
108
	vld1.32		{q12-q13}, [ip]!
109
	\dround		q10, q11
110
	blo		0f			@ AES-128: 10 rounds
111
	vld1.32		{q10-q11}, [ip]!
112
	\dround		q12, q13
113
	beq		1f			@ AES-192: 12 rounds
114
	vld1.32		{q12-q13}, [ip]
115
	\dround		q10, q11
116
0:	\fround		q12, q13, q14
117
	bx		lr
118

119
1:	\fround		q10, q11, q14
120
	bx		lr
121
	.endm
122

123
	/*
124
	 * Internal, non-AAPCS compliant functions that implement the core AES
125
	 * transforms. These should preserve all registers except q0 - q2 and ip
126
	 * Arguments:
127
	 *   q0        : first in/output block
128
	 *   q1        : second in/output block (_4x version only)
129
	 *   q2        : third in/output block (_4x version only)
130
	 *   q3        : fourth in/output block (_4x version only)
131
	 *   q8        : first round key
132
	 *   q9        : secound round key
133
	 *   q14       : final round key
134
	 *   r2        : address of round key array
135
	 *   r3        : number of rounds
136
	 */
137
	.align		6
138
aes_encrypt:
139
	add		ip, r2, #32		@ 3rd round key
140
.Laes_encrypt_tweak:
141
	do_block	enc_dround, enc_fround
142
ENDPROC(aes_encrypt)
143

144
	.align		6
145
aes_decrypt:
146
	add		ip, r2, #32		@ 3rd round key
147
	do_block	dec_dround, dec_fround
148
ENDPROC(aes_decrypt)
149

150
	.align		6
151
aes_encrypt_4x:
152
	add		ip, r2, #32		@ 3rd round key
153
	do_block	enc_dround_4x, enc_fround_4x
154
ENDPROC(aes_encrypt_4x)
155

156
	.align		6
157
aes_decrypt_4x:
158
	add		ip, r2, #32		@ 3rd round key
159
	do_block	dec_dround_4x, dec_fround_4x
160
ENDPROC(aes_decrypt_4x)
161

162
	.macro		prepare_key, rk, rounds
163
	add		ip, \rk, \rounds, lsl #4
164
	vld1.32		{q8-q9}, [\rk]		@ load first 2 round keys
165
	vld1.32		{q14}, [ip]		@ load last round key
166
	.endm
167

168
	/*
169
	 * aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
170
	 *		   int blocks)
171
	 * aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
172
	 *		   int blocks)
173
	 */
174
ENTRY(ce_aes_ecb_encrypt)
175
	push		{r4, lr}
176
	ldr		r4, [sp, #8]
177
	prepare_key	r2, r3
178
.Lecbencloop4x:
179
	subs		r4, r4, #4
180
	bmi		.Lecbenc1x
181
	vld1.8		{q0-q1}, [r1]!
182
	vld1.8		{q2-q3}, [r1]!
183
	bl		aes_encrypt_4x
184
	vst1.8		{q0-q1}, [r0]!
185
	vst1.8		{q2-q3}, [r0]!
186
	b		.Lecbencloop4x
187
.Lecbenc1x:
188
	adds		r4, r4, #4
189
	beq		.Lecbencout
190
.Lecbencloop:
191
	vld1.8		{q0}, [r1]!
192
	bl		aes_encrypt
193
	vst1.8		{q0}, [r0]!
194
	subs		r4, r4, #1
195
	bne		.Lecbencloop
196
.Lecbencout:
197
	pop		{r4, pc}
198
ENDPROC(ce_aes_ecb_encrypt)
199

200
ENTRY(ce_aes_ecb_decrypt)
201
	push		{r4, lr}
202
	ldr		r4, [sp, #8]
203
	prepare_key	r2, r3
204
.Lecbdecloop4x:
205
	subs		r4, r4, #4
206
	bmi		.Lecbdec1x
207
	vld1.8		{q0-q1}, [r1]!
208
	vld1.8		{q2-q3}, [r1]!
209
	bl		aes_decrypt_4x
210
	vst1.8		{q0-q1}, [r0]!
211
	vst1.8		{q2-q3}, [r0]!
212
	b		.Lecbdecloop4x
213
.Lecbdec1x:
214
	adds		r4, r4, #4
215
	beq		.Lecbdecout
216
.Lecbdecloop:
217
	vld1.8		{q0}, [r1]!
218
	bl		aes_decrypt
219
	vst1.8		{q0}, [r0]!
220
	subs		r4, r4, #1
221
	bne		.Lecbdecloop
222
.Lecbdecout:
223
	pop		{r4, pc}
224
ENDPROC(ce_aes_ecb_decrypt)
225

226
	/*
227
	 * aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
228
	 *		   int blocks, u8 iv[])
229
	 * aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
230
	 *		   int blocks, u8 iv[])
231
	 */
232
ENTRY(ce_aes_cbc_encrypt)
233
	push		{r4-r6, lr}
234
	ldrd		r4, r5, [sp, #16]
235
	vld1.8		{q0}, [r5]
236
	prepare_key	r2, r3
237
.Lcbcencloop:
238
	vld1.8		{q1}, [r1]!		@ get next pt block
239
	veor		q0, q0, q1		@ ..and xor with iv
240
	bl		aes_encrypt
241
	vst1.8		{q0}, [r0]!
242
	subs		r4, r4, #1
243
	bne		.Lcbcencloop
244
	vst1.8		{q0}, [r5]
245
	pop		{r4-r6, pc}
246
ENDPROC(ce_aes_cbc_encrypt)
247

248
ENTRY(ce_aes_cbc_decrypt)
249
	push		{r4-r6, lr}
250
	ldrd		r4, r5, [sp, #16]
251
	vld1.8		{q15}, [r5]		@ keep iv in q15
252
	prepare_key	r2, r3
253
.Lcbcdecloop4x:
254
	subs		r4, r4, #4
255
	bmi		.Lcbcdec1x
256
	vld1.8		{q0-q1}, [r1]!
257
	vld1.8		{q2-q3}, [r1]!
258
	vmov		q4, q0
259
	vmov		q5, q1
260
	vmov		q6, q2
261
	vmov		q7, q3
262
	bl		aes_decrypt_4x
263
	veor		q0, q0, q15
264
	veor		q1, q1, q4
265
	veor		q2, q2, q5
266
	veor		q3, q3, q6
267
	vmov		q15, q7
268
	vst1.8		{q0-q1}, [r0]!
269
	vst1.8		{q2-q3}, [r0]!
270
	b		.Lcbcdecloop4x
271
.Lcbcdec1x:
272
	adds		r4, r4, #4
273
	beq		.Lcbcdecout
274
	vmov		q6, q14			@ preserve last round key
275
.Lcbcdecloop:
276
	vld1.8		{q0}, [r1]!		@ get next ct block
277
	veor		q14, q15, q6		@ combine prev ct with last key
278
	vmov		q15, q0
279
	bl		aes_decrypt
280
	vst1.8		{q0}, [r0]!
281
	subs		r4, r4, #1
282
	bne		.Lcbcdecloop
283
.Lcbcdecout:
284
	vst1.8		{q15}, [r5]		@ keep iv in q15
285
	pop		{r4-r6, pc}
286
ENDPROC(ce_aes_cbc_decrypt)
287

288

289
	/*
290
	 * ce_aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
291
	 *			  int rounds, int bytes, u8 const iv[])
292
	 * ce_aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
293
	 *			  int rounds, int bytes, u8 const iv[])
294
	 */
295

296
ENTRY(ce_aes_cbc_cts_encrypt)
297
	push		{r4-r6, lr}
298
	ldrd		r4, r5, [sp, #16]
299

300
	movw		ip, :lower16:.Lcts_permute_table
301
	movt		ip, :upper16:.Lcts_permute_table
302
	sub		r4, r4, #16
303
	add		lr, ip, #32
304
	add		ip, ip, r4
305
	sub		lr, lr, r4
306
	vld1.8		{q5}, [ip]
307
	vld1.8		{q6}, [lr]
308

309
	add		ip, r1, r4
310
	vld1.8		{q0}, [r1]			@ overlapping loads
311
	vld1.8		{q3}, [ip]
312

313
	vld1.8		{q1}, [r5]			@ get iv
314
	prepare_key	r2, r3
315

316
	veor		q0, q0, q1			@ xor with iv
317
	bl		aes_encrypt
318

319
	vtbl.8		d4, {d0-d1}, d10
320
	vtbl.8		d5, {d0-d1}, d11
321
	vtbl.8		d2, {d6-d7}, d12
322
	vtbl.8		d3, {d6-d7}, d13
323

324
	veor		q0, q0, q1
325
	bl		aes_encrypt
326

327
	add		r4, r0, r4
328
	vst1.8		{q2}, [r4]			@ overlapping stores
329
	vst1.8		{q0}, [r0]
330

331
	pop		{r4-r6, pc}
332
ENDPROC(ce_aes_cbc_cts_encrypt)
333

334
ENTRY(ce_aes_cbc_cts_decrypt)
335
	push		{r4-r6, lr}
336
	ldrd		r4, r5, [sp, #16]
337

338
	movw		ip, :lower16:.Lcts_permute_table
339
	movt		ip, :upper16:.Lcts_permute_table
340
	sub		r4, r4, #16
341
	add		lr, ip, #32
342
	add		ip, ip, r4
343
	sub		lr, lr, r4
344
	vld1.8		{q5}, [ip]
345
	vld1.8		{q6}, [lr]
346

347
	add		ip, r1, r4
348
	vld1.8		{q0}, [r1]			@ overlapping loads
349
	vld1.8		{q1}, [ip]
350

351
	vld1.8		{q3}, [r5]			@ get iv
352
	prepare_key	r2, r3
353

354
	bl		aes_decrypt
355

356
	vtbl.8		d4, {d0-d1}, d10
357
	vtbl.8		d5, {d0-d1}, d11
358
	vtbx.8		d0, {d2-d3}, d12
359
	vtbx.8		d1, {d2-d3}, d13
360

361
	veor		q1, q1, q2
362
	bl		aes_decrypt
363
	veor		q0, q0, q3			@ xor with iv
364

365
	add		r4, r0, r4
366
	vst1.8		{q1}, [r4]			@ overlapping stores
367
	vst1.8		{q0}, [r0]
368

369
	pop		{r4-r6, pc}
370
ENDPROC(ce_aes_cbc_cts_decrypt)
371

372

373
	/*
374
	 * aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
375
	 *		   int blocks, u8 ctr[])
376
	 */
377
ENTRY(ce_aes_ctr_encrypt)
378
	push		{r4-r6, lr}
379
	ldrd		r4, r5, [sp, #16]
380
	vld1.8		{q7}, [r5]		@ load ctr
381
	prepare_key	r2, r3
382
	vmov		r6, s31			@ keep swabbed ctr in r6
383
	rev		r6, r6
384
	cmn		r6, r4			@ 32 bit overflow?
385
	bcs		.Lctrloop
386
.Lctrloop4x:
387
	subs		r4, r4, #4
388
	bmi		.Lctr1x
389

390
	/*
391
	 * NOTE: the sequence below has been carefully tweaked to avoid
392
	 * a silicon erratum that exists in Cortex-A57 (#1742098) and
393
	 * Cortex-A72 (#1655431) cores, where AESE/AESMC instruction pairs
394
	 * may produce an incorrect result if they take their input from a
395
	 * register of which a single 32-bit lane has been updated the last
396
	 * time it was modified. To work around this, the lanes of registers
397
	 * q0-q3 below are not manipulated individually, and the different
398
	 * counter values are prepared by successive manipulations of q7.
399
	 */
400
	add		ip, r6, #1
401
	vmov		q0, q7
402
	rev		ip, ip
403
	add		lr, r6, #2
404
	vmov		s31, ip			@ set lane 3 of q1 via q7
405
	add		ip, r6, #3
406
	rev		lr, lr
407
	vmov		q1, q7
408
	vmov		s31, lr			@ set lane 3 of q2 via q7
409
	rev		ip, ip
410
	vmov		q2, q7
411
	vmov		s31, ip			@ set lane 3 of q3 via q7
412
	add		r6, r6, #4
413
	vmov		q3, q7
414

415
	vld1.8		{q4-q5}, [r1]!
416
	vld1.8		{q6}, [r1]!
417
	vld1.8		{q15}, [r1]!
418
	bl		aes_encrypt_4x
419
	veor		q0, q0, q4
420
	veor		q1, q1, q5
421
	veor		q2, q2, q6
422
	veor		q3, q3, q15
423
	rev		ip, r6
424
	vst1.8		{q0-q1}, [r0]!
425
	vst1.8		{q2-q3}, [r0]!
426
	vmov		s31, ip
427
	b		.Lctrloop4x
428
.Lctr1x:
429
	adds		r4, r4, #4
430
	beq		.Lctrout
431
.Lctrloop:
432
	vmov		q0, q7
433
	bl		aes_encrypt
434

435
	adds		r6, r6, #1		@ increment BE ctr
436
	rev		ip, r6
437
	vmov		s31, ip
438
	bcs		.Lctrcarry
439

440
.Lctrcarrydone:
441
	subs		r4, r4, #1
442
	bmi		.Lctrtailblock		@ blocks < 0 means tail block
443
	vld1.8		{q3}, [r1]!
444
	veor		q3, q0, q3
445
	vst1.8		{q3}, [r0]!
446
	bne		.Lctrloop
447

448
.Lctrout:
449
	vst1.8		{q7}, [r5]		@ return next CTR value
450
	pop		{r4-r6, pc}
451

452
.Lctrtailblock:
453
	vst1.8		{q0}, [r0, :64]		@ return the key stream
454
	b		.Lctrout
455

456
.Lctrcarry:
457
	.irp		sreg, s30, s29, s28
458
	vmov		ip, \sreg		@ load next word of ctr
459
	rev		ip, ip			@ ... to handle the carry
460
	adds		ip, ip, #1
461
	rev		ip, ip
462
	vmov		\sreg, ip
463
	bcc		.Lctrcarrydone
464
	.endr
465
	b		.Lctrcarrydone
466
ENDPROC(ce_aes_ctr_encrypt)
467

468
	/*
469
	 * aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
470
	 *		   int bytes, u8 iv[], u32 const rk2[], int first)
471
	 * aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
472
	 *		   int bytes, u8 iv[], u32 const rk2[], int first)
473
	 */
474

475
	.macro		next_tweak, out, in, const, tmp
476
	vshr.s64	\tmp, \in, #63
477
	vand		\tmp, \tmp, \const
478
	vadd.u64	\out, \in, \in
479
	vext.8		\tmp, \tmp, \tmp, #8
480
	veor		\out, \out, \tmp
481
	.endm
482

483
ce_aes_xts_init:
484
	vmov.i32	d30, #0x87		@ compose tweak mask vector
485
	vmovl.u32	q15, d30
486
	vshr.u64	d30, d31, #7
487

488
	ldrd		r4, r5, [sp, #16]	@ load args
489
	ldr		r6, [sp, #28]
490
	vld1.8		{q0}, [r5]		@ load iv
491
	teq		r6, #1			@ start of a block?
492
	bxne		lr
493

494
	@ Encrypt the IV in q0 with the second AES key. This should only
495
	@ be done at the start of a block.
496
	ldr		r6, [sp, #24]		@ load AES key 2
497
	prepare_key	r6, r3
498
	add		ip, r6, #32		@ 3rd round key of key 2
499
	b		.Laes_encrypt_tweak	@ tail call
500
ENDPROC(ce_aes_xts_init)
501

502
ENTRY(ce_aes_xts_encrypt)
503
	push		{r4-r6, lr}
504

505
	bl		ce_aes_xts_init		@ run shared prologue
506
	prepare_key	r2, r3
507
	vmov		q4, q0
508

509
	teq		r6, #0			@ start of a block?
510
	bne		.Lxtsenc4x
511

512
.Lxtsencloop4x:
513
	next_tweak	q4, q4, q15, q10
514
.Lxtsenc4x:
515
	subs		r4, r4, #64
516
	bmi		.Lxtsenc1x
517
	vld1.8		{q0-q1}, [r1]!		@ get 4 pt blocks
518
	vld1.8		{q2-q3}, [r1]!
519
	next_tweak	q5, q4, q15, q10
520
	veor		q0, q0, q4
521
	next_tweak	q6, q5, q15, q10
522
	veor		q1, q1, q5
523
	next_tweak	q7, q6, q15, q10
524
	veor		q2, q2, q6
525
	veor		q3, q3, q7
526
	bl		aes_encrypt_4x
527
	veor		q0, q0, q4
528
	veor		q1, q1, q5
529
	veor		q2, q2, q6
530
	veor		q3, q3, q7
531
	vst1.8		{q0-q1}, [r0]!		@ write 4 ct blocks
532
	vst1.8		{q2-q3}, [r0]!
533
	vmov		q4, q7
534
	teq		r4, #0
535
	beq		.Lxtsencret
536
	b		.Lxtsencloop4x
537
.Lxtsenc1x:
538
	adds		r4, r4, #64
539
	beq		.Lxtsencout
540
	subs		r4, r4, #16
541
	bmi		.LxtsencctsNx
542
.Lxtsencloop:
543
	vld1.8		{q0}, [r1]!
544
.Lxtsencctsout:
545
	veor		q0, q0, q4
546
	bl		aes_encrypt
547
	veor		q0, q0, q4
548
	teq		r4, #0
549
	beq		.Lxtsencout
550
	subs		r4, r4, #16
551
	next_tweak	q4, q4, q15, q6
552
	bmi		.Lxtsenccts
553
	vst1.8		{q0}, [r0]!
554
	b		.Lxtsencloop
555
.Lxtsencout:
556
	vst1.8		{q0}, [r0]
557
.Lxtsencret:
558
	vst1.8		{q4}, [r5]
559
	pop		{r4-r6, pc}
560

561
.LxtsencctsNx:
562
	vmov		q0, q3
563
	sub		r0, r0, #16
564
.Lxtsenccts:
565
	movw		ip, :lower16:.Lcts_permute_table
566
	movt		ip, :upper16:.Lcts_permute_table
567

568
	add		r1, r1, r4		@ rewind input pointer
569
	add		r4, r4, #16		@ # bytes in final block
570
	add		lr, ip, #32
571
	add		ip, ip, r4
572
	sub		lr, lr, r4
573
	add		r4, r0, r4		@ output address of final block
574

575
	vld1.8		{q1}, [r1]		@ load final partial block
576
	vld1.8		{q2}, [ip]
577
	vld1.8		{q3}, [lr]
578

579
	vtbl.8		d4, {d0-d1}, d4
580
	vtbl.8		d5, {d0-d1}, d5
581
	vtbx.8		d0, {d2-d3}, d6
582
	vtbx.8		d1, {d2-d3}, d7
583

584
	vst1.8		{q2}, [r4]		@ overlapping stores
585
	mov		r4, #0
586
	b		.Lxtsencctsout
587
ENDPROC(ce_aes_xts_encrypt)
588

589

590
ENTRY(ce_aes_xts_decrypt)
591
	push		{r4-r6, lr}
592

593
	bl		ce_aes_xts_init		@ run shared prologue
594
	prepare_key	r2, r3
595
	vmov		q4, q0
596

597
	/* subtract 16 bytes if we are doing CTS */
598
	tst		r4, #0xf
599
	subne		r4, r4, #0x10
600

601
	teq		r6, #0			@ start of a block?
602
	bne		.Lxtsdec4x
603

604
.Lxtsdecloop4x:
605
	next_tweak	q4, q4, q15, q10
606
.Lxtsdec4x:
607
	subs		r4, r4, #64
608
	bmi		.Lxtsdec1x
609
	vld1.8		{q0-q1}, [r1]!		@ get 4 ct blocks
610
	vld1.8		{q2-q3}, [r1]!
611
	next_tweak	q5, q4, q15, q10
612
	veor		q0, q0, q4
613
	next_tweak	q6, q5, q15, q10
614
	veor		q1, q1, q5
615
	next_tweak	q7, q6, q15, q10
616
	veor		q2, q2, q6
617
	veor		q3, q3, q7
618
	bl		aes_decrypt_4x
619
	veor		q0, q0, q4
620
	veor		q1, q1, q5
621
	veor		q2, q2, q6
622
	veor		q3, q3, q7
623
	vst1.8		{q0-q1}, [r0]!		@ write 4 pt blocks
624
	vst1.8		{q2-q3}, [r0]!
625
	vmov		q4, q7
626
	teq		r4, #0
627
	beq		.Lxtsdecout
628
	b		.Lxtsdecloop4x
629
.Lxtsdec1x:
630
	adds		r4, r4, #64
631
	beq		.Lxtsdecout
632
	subs		r4, r4, #16
633
.Lxtsdecloop:
634
	vld1.8		{q0}, [r1]!
635
	bmi		.Lxtsdeccts
636
.Lxtsdecctsout:
637
	veor		q0, q0, q4
638
	bl		aes_decrypt
639
	veor		q0, q0, q4
640
	vst1.8		{q0}, [r0]!
641
	teq		r4, #0
642
	beq		.Lxtsdecout
643
	subs		r4, r4, #16
644
	next_tweak	q4, q4, q15, q6
645
	b		.Lxtsdecloop
646
.Lxtsdecout:
647
	vst1.8		{q4}, [r5]
648
	pop		{r4-r6, pc}
649

650
.Lxtsdeccts:
651
	movw		ip, :lower16:.Lcts_permute_table
652
	movt		ip, :upper16:.Lcts_permute_table
653

654
	add		r1, r1, r4		@ rewind input pointer
655
	add		r4, r4, #16		@ # bytes in final block
656
	add		lr, ip, #32
657
	add		ip, ip, r4
658
	sub		lr, lr, r4
659
	add		r4, r0, r4		@ output address of final block
660

661
	next_tweak	q5, q4, q15, q6
662

663
	vld1.8		{q1}, [r1]		@ load final partial block
664
	vld1.8		{q2}, [ip]
665
	vld1.8		{q3}, [lr]
666

667
	veor		q0, q0, q5
668
	bl		aes_decrypt
669
	veor		q0, q0, q5
670

671
	vtbl.8		d4, {d0-d1}, d4
672
	vtbl.8		d5, {d0-d1}, d5
673
	vtbx.8		d0, {d2-d3}, d6
674
	vtbx.8		d1, {d2-d3}, d7
675

676
	vst1.8		{q2}, [r4]		@ overlapping stores
677
	mov		r4, #0
678
	b		.Lxtsdecctsout
679
ENDPROC(ce_aes_xts_decrypt)
680

681
	/*
682
	 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
683
	 *                             AES sbox substitution on each byte in
684
	 *                             'input'
685
	 */
686
ENTRY(ce_aes_sub)
687
	vdup.32		q1, r0
688
	veor		q0, q0, q0
689
	aese.8		q0, q1
690
	vmov		r0, s0
691
	bx		lr
692
ENDPROC(ce_aes_sub)
693

694
	/*
695
	 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
696
	 *                                        operation on round key *src
697
	 */
698
ENTRY(ce_aes_invert)
699
	vld1.32		{q0}, [r1]
700
	aesimc.8	q0, q0
701
	vst1.32		{q0}, [r0]
702
	bx		lr
703
ENDPROC(ce_aes_invert)
704

705
	.section	".rodata", "a"
706
	.align		6
707
.Lcts_permute_table:
708
	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
709
	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
710
	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
711
	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
712
	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
713
	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
714

715
Product

Resources

Company