Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/sparc/lib/checksum_64.S
29281 views
1
/* SPDX-License-Identifier: GPL-2.0 */
2
/* checksum.S: Sparc V9 optimized checksum code.
3
*
4
* Copyright(C) 1995 Linus Torvalds
5
* Copyright(C) 1995 Miguel de Icaza
6
* Copyright(C) 1996, 2000 David S. Miller
7
* Copyright(C) 1997 Jakub Jelinek
8
*
9
* derived from:
10
* Linux/Alpha checksum c-code
11
* Linux/ix86 inline checksum assembly
12
* RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
13
* David Mosberger-Tang for optimized reference c-code
14
* BSD4.4 portable checksum routine
15
*/
16
17
#include <linux/export.h>
18
.text
19
20
csum_partial_fix_alignment:
21
/* We checked for zero length already, so there must be
22
* at least one byte.
23
*/
24
be,pt %icc, 1f
25
nop
26
ldub [%o0 + 0x00], %o4
27
add %o0, 1, %o0
28
sub %o1, 1, %o1
29
1: andcc %o0, 0x2, %g0
30
be,pn %icc, csum_partial_post_align
31
cmp %o1, 2
32
blu,pn %icc, csum_partial_end_cruft
33
nop
34
lduh [%o0 + 0x00], %o5
35
add %o0, 2, %o0
36
sub %o1, 2, %o1
37
ba,pt %xcc, csum_partial_post_align
38
add %o5, %o4, %o4
39
40
.align 32
41
.globl csum_partial
42
.type csum_partial,#function
43
EXPORT_SYMBOL(csum_partial)
44
csum_partial: /* %o0=buff, %o1=len, %o2=sum */
45
prefetch [%o0 + 0x000], #n_reads
46
clr %o4
47
prefetch [%o0 + 0x040], #n_reads
48
brz,pn %o1, csum_partial_finish
49
andcc %o0, 0x3, %g0
50
51
/* We "remember" whether the lowest bit in the address
52
* was set in %g7. Because if it is, we have to swap
53
* upper and lower 8 bit fields of the sum we calculate.
54
*/
55
bne,pn %icc, csum_partial_fix_alignment
56
andcc %o0, 0x1, %g7
57
58
csum_partial_post_align:
59
prefetch [%o0 + 0x080], #n_reads
60
andncc %o1, 0x3f, %o3
61
62
prefetch [%o0 + 0x0c0], #n_reads
63
sub %o1, %o3, %o1
64
brz,pn %o3, 2f
65
prefetch [%o0 + 0x100], #n_reads
66
67
/* So that we don't need to use the non-pairing
68
* add-with-carry instructions we accumulate 32-bit
69
* values into a 64-bit register. At the end of the
70
* loop we fold it down to 32-bits and so on.
71
*/
72
prefetch [%o0 + 0x140], #n_reads
73
1: lduw [%o0 + 0x00], %o5
74
lduw [%o0 + 0x04], %g1
75
lduw [%o0 + 0x08], %g2
76
add %o4, %o5, %o4
77
lduw [%o0 + 0x0c], %g3
78
add %o4, %g1, %o4
79
lduw [%o0 + 0x10], %o5
80
add %o4, %g2, %o4
81
lduw [%o0 + 0x14], %g1
82
add %o4, %g3, %o4
83
lduw [%o0 + 0x18], %g2
84
add %o4, %o5, %o4
85
lduw [%o0 + 0x1c], %g3
86
add %o4, %g1, %o4
87
lduw [%o0 + 0x20], %o5
88
add %o4, %g2, %o4
89
lduw [%o0 + 0x24], %g1
90
add %o4, %g3, %o4
91
lduw [%o0 + 0x28], %g2
92
add %o4, %o5, %o4
93
lduw [%o0 + 0x2c], %g3
94
add %o4, %g1, %o4
95
lduw [%o0 + 0x30], %o5
96
add %o4, %g2, %o4
97
lduw [%o0 + 0x34], %g1
98
add %o4, %g3, %o4
99
lduw [%o0 + 0x38], %g2
100
add %o4, %o5, %o4
101
lduw [%o0 + 0x3c], %g3
102
add %o4, %g1, %o4
103
prefetch [%o0 + 0x180], #n_reads
104
add %o4, %g2, %o4
105
subcc %o3, 0x40, %o3
106
add %o0, 0x40, %o0
107
bne,pt %icc, 1b
108
add %o4, %g3, %o4
109
110
2: and %o1, 0x3c, %o3
111
brz,pn %o3, 2f
112
sub %o1, %o3, %o1
113
1: lduw [%o0 + 0x00], %o5
114
subcc %o3, 0x4, %o3
115
add %o0, 0x4, %o0
116
bne,pt %icc, 1b
117
add %o4, %o5, %o4
118
119
2:
120
/* fold 64-->32 */
121
srlx %o4, 32, %o5
122
srl %o4, 0, %o4
123
add %o4, %o5, %o4
124
srlx %o4, 32, %o5
125
srl %o4, 0, %o4
126
add %o4, %o5, %o4
127
128
/* fold 32-->16 */
129
sethi %hi(0xffff0000), %g1
130
srl %o4, 16, %o5
131
andn %o4, %g1, %g2
132
add %o5, %g2, %o4
133
srl %o4, 16, %o5
134
andn %o4, %g1, %g2
135
add %o5, %g2, %o4
136
137
csum_partial_end_cruft:
138
/* %o4 has the 16-bit sum we have calculated so-far. */
139
cmp %o1, 2
140
blu,pt %icc, 1f
141
nop
142
lduh [%o0 + 0x00], %o5
143
sub %o1, 2, %o1
144
add %o0, 2, %o0
145
add %o4, %o5, %o4
146
1: brz,pt %o1, 1f
147
nop
148
ldub [%o0 + 0x00], %o5
149
sub %o1, 1, %o1
150
add %o0, 1, %o0
151
sllx %o5, 8, %o5
152
add %o4, %o5, %o4
153
1:
154
/* fold 32-->16 */
155
sethi %hi(0xffff0000), %g1
156
srl %o4, 16, %o5
157
andn %o4, %g1, %g2
158
add %o5, %g2, %o4
159
srl %o4, 16, %o5
160
andn %o4, %g1, %g2
161
add %o5, %g2, %o4
162
163
1: brz,pt %g7, 1f
164
nop
165
166
/* We started with an odd byte, byte-swap the result. */
167
srl %o4, 8, %o5
168
and %o4, 0xff, %g1
169
sll %g1, 8, %g1
170
or %o5, %g1, %o4
171
172
1: addcc %o2, %o4, %o2
173
addc %g0, %o2, %o2
174
175
csum_partial_finish:
176
retl
177
srl %o2, 0, %o0
178
179