Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/arm/crypto/nh-neon-core.S
29266 views
1
/* SPDX-License-Identifier: GPL-2.0 */
2
/*
3
* NH - ε-almost-universal hash function, NEON accelerated version
4
*
5
* Copyright 2018 Google LLC
6
*
7
* Author: Eric Biggers <[email protected]>
8
*/
9
10
#include <linux/linkage.h>
11
12
.text
13
.fpu neon
14
15
KEY .req r0
16
MESSAGE .req r1
17
MESSAGE_LEN .req r2
18
HASH .req r3
19
20
PASS0_SUMS .req q0
21
PASS0_SUM_A .req d0
22
PASS0_SUM_B .req d1
23
PASS1_SUMS .req q1
24
PASS1_SUM_A .req d2
25
PASS1_SUM_B .req d3
26
PASS2_SUMS .req q2
27
PASS2_SUM_A .req d4
28
PASS2_SUM_B .req d5
29
PASS3_SUMS .req q3
30
PASS3_SUM_A .req d6
31
PASS3_SUM_B .req d7
32
K0 .req q4
33
K1 .req q5
34
K2 .req q6
35
K3 .req q7
36
T0 .req q8
37
T0_L .req d16
38
T0_H .req d17
39
T1 .req q9
40
T1_L .req d18
41
T1_H .req d19
42
T2 .req q10
43
T2_L .req d20
44
T2_H .req d21
45
T3 .req q11
46
T3_L .req d22
47
T3_H .req d23
48
49
.macro _nh_stride k0, k1, k2, k3
50
51
// Load next message stride
52
vld1.8 {T3}, [MESSAGE]!
53
54
// Load next key stride
55
vld1.32 {\k3}, [KEY]!
56
57
// Add message words to key words
58
vadd.u32 T0, T3, \k0
59
vadd.u32 T1, T3, \k1
60
vadd.u32 T2, T3, \k2
61
vadd.u32 T3, T3, \k3
62
63
// Multiply 32x32 => 64 and accumulate
64
vmlal.u32 PASS0_SUMS, T0_L, T0_H
65
vmlal.u32 PASS1_SUMS, T1_L, T1_H
66
vmlal.u32 PASS2_SUMS, T2_L, T2_H
67
vmlal.u32 PASS3_SUMS, T3_L, T3_H
68
.endm
69
70
/*
71
* void nh_neon(const u32 *key, const u8 *message, size_t message_len,
72
* __le64 hash[NH_NUM_PASSES])
73
*
74
* It's guaranteed that message_len % 16 == 0.
75
*/
76
ENTRY(nh_neon)
77
78
vld1.32 {K0,K1}, [KEY]!
79
vmov.u64 PASS0_SUMS, #0
80
vmov.u64 PASS1_SUMS, #0
81
vld1.32 {K2}, [KEY]!
82
vmov.u64 PASS2_SUMS, #0
83
vmov.u64 PASS3_SUMS, #0
84
85
subs MESSAGE_LEN, MESSAGE_LEN, #64
86
blt .Lloop4_done
87
.Lloop4:
88
_nh_stride K0, K1, K2, K3
89
_nh_stride K1, K2, K3, K0
90
_nh_stride K2, K3, K0, K1
91
_nh_stride K3, K0, K1, K2
92
subs MESSAGE_LEN, MESSAGE_LEN, #64
93
bge .Lloop4
94
95
.Lloop4_done:
96
ands MESSAGE_LEN, MESSAGE_LEN, #63
97
beq .Ldone
98
_nh_stride K0, K1, K2, K3
99
100
subs MESSAGE_LEN, MESSAGE_LEN, #16
101
beq .Ldone
102
_nh_stride K1, K2, K3, K0
103
104
subs MESSAGE_LEN, MESSAGE_LEN, #16
105
beq .Ldone
106
_nh_stride K2, K3, K0, K1
107
108
.Ldone:
109
// Sum the accumulators for each pass, then store the sums to 'hash'
110
vadd.u64 T0_L, PASS0_SUM_A, PASS0_SUM_B
111
vadd.u64 T0_H, PASS1_SUM_A, PASS1_SUM_B
112
vadd.u64 T1_L, PASS2_SUM_A, PASS2_SUM_B
113
vadd.u64 T1_H, PASS3_SUM_A, PASS3_SUM_B
114
vst1.8 {T0-T1}, [HASH]
115
bx lr
116
ENDPROC(nh_neon)
117
118