Loading...
1/* SPDX-License-Identifier: GPL-2.0 */
2/* checksum.S: Sparc V9 optimized checksum code.
3 *
4 * Copyright(C) 1995 Linus Torvalds
5 * Copyright(C) 1995 Miguel de Icaza
6 * Copyright(C) 1996, 2000 David S. Miller
7 * Copyright(C) 1997 Jakub Jelinek
8 *
9 * derived from:
10 * Linux/Alpha checksum c-code
11 * Linux/ix86 inline checksum assembly
12 * RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
13 * David Mosberger-Tang for optimized reference c-code
14 * BSD4.4 portable checksum routine
15 */
16
17#include <asm/export.h>
18 .text
19
20csum_partial_fix_alignment:
21 /* We checked for zero length already, so there must be
22 * at least one byte.
23 */
24 be,pt %icc, 1f
25 nop
26 ldub [%o0 + 0x00], %o4
27 add %o0, 1, %o0
28 sub %o1, 1, %o1
291: andcc %o0, 0x2, %g0
30 be,pn %icc, csum_partial_post_align
31 cmp %o1, 2
32 blu,pn %icc, csum_partial_end_cruft
33 nop
34 lduh [%o0 + 0x00], %o5
35 add %o0, 2, %o0
36 sub %o1, 2, %o1
37 ba,pt %xcc, csum_partial_post_align
38 add %o5, %o4, %o4
39
40 .align 32
41 .globl csum_partial
42 .type csum_partial,#function
43 EXPORT_SYMBOL(csum_partial)
44csum_partial: /* %o0=buff, %o1=len, %o2=sum */
45 prefetch [%o0 + 0x000], #n_reads
46 clr %o4
47 prefetch [%o0 + 0x040], #n_reads
48 brz,pn %o1, csum_partial_finish
49 andcc %o0, 0x3, %g0
50
51 /* We "remember" whether the lowest bit in the address
52 * was set in %g7. Because if it is, we have to swap
53 * upper and lower 8 bit fields of the sum we calculate.
54 */
55 bne,pn %icc, csum_partial_fix_alignment
56 andcc %o0, 0x1, %g7
57
58csum_partial_post_align:
59 prefetch [%o0 + 0x080], #n_reads
60 andncc %o1, 0x3f, %o3
61
62 prefetch [%o0 + 0x0c0], #n_reads
63 sub %o1, %o3, %o1
64 brz,pn %o3, 2f
65 prefetch [%o0 + 0x100], #n_reads
66
67 /* So that we don't need to use the non-pairing
68 * add-with-carry instructions we accumulate 32-bit
69 * values into a 64-bit register. At the end of the
70 * loop we fold it down to 32-bits and so on.
71 */
72 prefetch [%o0 + 0x140], #n_reads
731: lduw [%o0 + 0x00], %o5
74 lduw [%o0 + 0x04], %g1
75 lduw [%o0 + 0x08], %g2
76 add %o4, %o5, %o4
77 lduw [%o0 + 0x0c], %g3
78 add %o4, %g1, %o4
79 lduw [%o0 + 0x10], %o5
80 add %o4, %g2, %o4
81 lduw [%o0 + 0x14], %g1
82 add %o4, %g3, %o4
83 lduw [%o0 + 0x18], %g2
84 add %o4, %o5, %o4
85 lduw [%o0 + 0x1c], %g3
86 add %o4, %g1, %o4
87 lduw [%o0 + 0x20], %o5
88 add %o4, %g2, %o4
89 lduw [%o0 + 0x24], %g1
90 add %o4, %g3, %o4
91 lduw [%o0 + 0x28], %g2
92 add %o4, %o5, %o4
93 lduw [%o0 + 0x2c], %g3
94 add %o4, %g1, %o4
95 lduw [%o0 + 0x30], %o5
96 add %o4, %g2, %o4
97 lduw [%o0 + 0x34], %g1
98 add %o4, %g3, %o4
99 lduw [%o0 + 0x38], %g2
100 add %o4, %o5, %o4
101 lduw [%o0 + 0x3c], %g3
102 add %o4, %g1, %o4
103 prefetch [%o0 + 0x180], #n_reads
104 add %o4, %g2, %o4
105 subcc %o3, 0x40, %o3
106 add %o0, 0x40, %o0
107 bne,pt %icc, 1b
108 add %o4, %g3, %o4
109
1102: and %o1, 0x3c, %o3
111 brz,pn %o3, 2f
112 sub %o1, %o3, %o1
1131: lduw [%o0 + 0x00], %o5
114 subcc %o3, 0x4, %o3
115 add %o0, 0x4, %o0
116 bne,pt %icc, 1b
117 add %o4, %o5, %o4
118
1192:
120 /* fold 64-->32 */
121 srlx %o4, 32, %o5
122 srl %o4, 0, %o4
123 add %o4, %o5, %o4
124 srlx %o4, 32, %o5
125 srl %o4, 0, %o4
126 add %o4, %o5, %o4
127
128 /* fold 32-->16 */
129 sethi %hi(0xffff0000), %g1
130 srl %o4, 16, %o5
131 andn %o4, %g1, %g2
132 add %o5, %g2, %o4
133 srl %o4, 16, %o5
134 andn %o4, %g1, %g2
135 add %o5, %g2, %o4
136
137csum_partial_end_cruft:
138 /* %o4 has the 16-bit sum we have calculated so-far. */
139 cmp %o1, 2
140 blu,pt %icc, 1f
141 nop
142 lduh [%o0 + 0x00], %o5
143 sub %o1, 2, %o1
144 add %o0, 2, %o0
145 add %o4, %o5, %o4
1461: brz,pt %o1, 1f
147 nop
148 ldub [%o0 + 0x00], %o5
149 sub %o1, 1, %o1
150 add %o0, 1, %o0
151 sllx %o5, 8, %o5
152 add %o4, %o5, %o4
1531:
154 /* fold 32-->16 */
155 sethi %hi(0xffff0000), %g1
156 srl %o4, 16, %o5
157 andn %o4, %g1, %g2
158 add %o5, %g2, %o4
159 srl %o4, 16, %o5
160 andn %o4, %g1, %g2
161 add %o5, %g2, %o4
162
1631: brz,pt %g7, 1f
164 nop
165
166 /* We started with an odd byte, byte-swap the result. */
167 srl %o4, 8, %o5
168 and %o4, 0xff, %g1
169 sll %g1, 8, %g1
170 or %o5, %g1, %o4
171
1721: addcc %o2, %o4, %o2
173 addc %g0, %o2, %o2
174
175csum_partial_finish:
176 retl
177 srl %o2, 0, %o0
1/* checksum.S: Sparc V9 optimized checksum code.
2 *
3 * Copyright(C) 1995 Linus Torvalds
4 * Copyright(C) 1995 Miguel de Icaza
5 * Copyright(C) 1996, 2000 David S. Miller
6 * Copyright(C) 1997 Jakub Jelinek
7 *
8 * derived from:
9 * Linux/Alpha checksum c-code
10 * Linux/ix86 inline checksum assembly
11 * RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
12 * David Mosberger-Tang for optimized reference c-code
13 * BSD4.4 portable checksum routine
14 */
15
16#include <asm/export.h>
17 .text
18
19csum_partial_fix_alignment:
20 /* We checked for zero length already, so there must be
21 * at least one byte.
22 */
23 be,pt %icc, 1f
24 nop
25 ldub [%o0 + 0x00], %o4
26 add %o0, 1, %o0
27 sub %o1, 1, %o1
281: andcc %o0, 0x2, %g0
29 be,pn %icc, csum_partial_post_align
30 cmp %o1, 2
31 blu,pn %icc, csum_partial_end_cruft
32 nop
33 lduh [%o0 + 0x00], %o5
34 add %o0, 2, %o0
35 sub %o1, 2, %o1
36 ba,pt %xcc, csum_partial_post_align
37 add %o5, %o4, %o4
38
39 .align 32
40 .globl csum_partial
41 EXPORT_SYMBOL(csum_partial)
42csum_partial: /* %o0=buff, %o1=len, %o2=sum */
43 prefetch [%o0 + 0x000], #n_reads
44 clr %o4
45 prefetch [%o0 + 0x040], #n_reads
46 brz,pn %o1, csum_partial_finish
47 andcc %o0, 0x3, %g0
48
49 /* We "remember" whether the lowest bit in the address
50 * was set in %g7. Because if it is, we have to swap
51 * upper and lower 8 bit fields of the sum we calculate.
52 */
53 bne,pn %icc, csum_partial_fix_alignment
54 andcc %o0, 0x1, %g7
55
56csum_partial_post_align:
57 prefetch [%o0 + 0x080], #n_reads
58 andncc %o1, 0x3f, %o3
59
60 prefetch [%o0 + 0x0c0], #n_reads
61 sub %o1, %o3, %o1
62 brz,pn %o3, 2f
63 prefetch [%o0 + 0x100], #n_reads
64
65 /* So that we don't need to use the non-pairing
66 * add-with-carry instructions we accumulate 32-bit
67 * values into a 64-bit register. At the end of the
68 * loop we fold it down to 32-bits and so on.
69 */
70 prefetch [%o0 + 0x140], #n_reads
711: lduw [%o0 + 0x00], %o5
72 lduw [%o0 + 0x04], %g1
73 lduw [%o0 + 0x08], %g2
74 add %o4, %o5, %o4
75 lduw [%o0 + 0x0c], %g3
76 add %o4, %g1, %o4
77 lduw [%o0 + 0x10], %o5
78 add %o4, %g2, %o4
79 lduw [%o0 + 0x14], %g1
80 add %o4, %g3, %o4
81 lduw [%o0 + 0x18], %g2
82 add %o4, %o5, %o4
83 lduw [%o0 + 0x1c], %g3
84 add %o4, %g1, %o4
85 lduw [%o0 + 0x20], %o5
86 add %o4, %g2, %o4
87 lduw [%o0 + 0x24], %g1
88 add %o4, %g3, %o4
89 lduw [%o0 + 0x28], %g2
90 add %o4, %o5, %o4
91 lduw [%o0 + 0x2c], %g3
92 add %o4, %g1, %o4
93 lduw [%o0 + 0x30], %o5
94 add %o4, %g2, %o4
95 lduw [%o0 + 0x34], %g1
96 add %o4, %g3, %o4
97 lduw [%o0 + 0x38], %g2
98 add %o4, %o5, %o4
99 lduw [%o0 + 0x3c], %g3
100 add %o4, %g1, %o4
101 prefetch [%o0 + 0x180], #n_reads
102 add %o4, %g2, %o4
103 subcc %o3, 0x40, %o3
104 add %o0, 0x40, %o0
105 bne,pt %icc, 1b
106 add %o4, %g3, %o4
107
1082: and %o1, 0x3c, %o3
109 brz,pn %o3, 2f
110 sub %o1, %o3, %o1
1111: lduw [%o0 + 0x00], %o5
112 subcc %o3, 0x4, %o3
113 add %o0, 0x4, %o0
114 bne,pt %icc, 1b
115 add %o4, %o5, %o4
116
1172:
118 /* fold 64-->32 */
119 srlx %o4, 32, %o5
120 srl %o4, 0, %o4
121 add %o4, %o5, %o4
122 srlx %o4, 32, %o5
123 srl %o4, 0, %o4
124 add %o4, %o5, %o4
125
126 /* fold 32-->16 */
127 sethi %hi(0xffff0000), %g1
128 srl %o4, 16, %o5
129 andn %o4, %g1, %g2
130 add %o5, %g2, %o4
131 srl %o4, 16, %o5
132 andn %o4, %g1, %g2
133 add %o5, %g2, %o4
134
135csum_partial_end_cruft:
136 /* %o4 has the 16-bit sum we have calculated so-far. */
137 cmp %o1, 2
138 blu,pt %icc, 1f
139 nop
140 lduh [%o0 + 0x00], %o5
141 sub %o1, 2, %o1
142 add %o0, 2, %o0
143 add %o4, %o5, %o4
1441: brz,pt %o1, 1f
145 nop
146 ldub [%o0 + 0x00], %o5
147 sub %o1, 1, %o1
148 add %o0, 1, %o0
149 sllx %o5, 8, %o5
150 add %o4, %o5, %o4
1511:
152 /* fold 32-->16 */
153 sethi %hi(0xffff0000), %g1
154 srl %o4, 16, %o5
155 andn %o4, %g1, %g2
156 add %o5, %g2, %o4
157 srl %o4, 16, %o5
158 andn %o4, %g1, %g2
159 add %o5, %g2, %o4
160
1611: brz,pt %g7, 1f
162 nop
163
164 /* We started with an odd byte, byte-swap the result. */
165 srl %o4, 8, %o5
166 and %o4, 0xff, %g1
167 sll %g1, 8, %g1
168 or %o5, %g1, %o4
169
1701: addcc %o2, %o4, %o2
171 addc %g0, %o2, %o2
172
173csum_partial_finish:
174 retl
175 srl %o2, 0, %o0