Loading...
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * arch/ia64/lib/xor.S
4 *
5 * Optimized RAID-5 checksumming functions for IA-64.
6 */
7
8#include <asm/asmmacro.h>
9#include <asm/export.h>
10
11GLOBAL_ENTRY(xor_ia64_2)
12 .prologue
13 .fframe 0
14 .save ar.pfs, r31
15 alloc r31 = ar.pfs, 3, 0, 13, 16
16 .save ar.lc, r30
17 mov r30 = ar.lc
18 .save pr, r29
19 mov r29 = pr
20 ;;
21 .body
22 mov r8 = in1
23 mov ar.ec = 6 + 2
24 shr in0 = in0, 3
25 ;;
26 adds in0 = -1, in0
27 mov r16 = in1
28 mov r17 = in2
29 ;;
30 mov ar.lc = in0
31 mov pr.rot = 1 << 16
32 ;;
33 .rotr s1[6+1], s2[6+1], d[2]
34 .rotp p[6+2]
350:
36(p[0]) ld8.nta s1[0] = [r16], 8
37(p[0]) ld8.nta s2[0] = [r17], 8
38(p[6]) xor d[0] = s1[6], s2[6]
39(p[6+1])st8.nta [r8] = d[1], 8
40 nop.f 0
41 br.ctop.dptk.few 0b
42 ;;
43 mov ar.lc = r30
44 mov pr = r29, -1
45 br.ret.sptk.few rp
46END(xor_ia64_2)
47EXPORT_SYMBOL(xor_ia64_2)
48
49GLOBAL_ENTRY(xor_ia64_3)
50 .prologue
51 .fframe 0
52 .save ar.pfs, r31
53 alloc r31 = ar.pfs, 4, 0, 20, 24
54 .save ar.lc, r30
55 mov r30 = ar.lc
56 .save pr, r29
57 mov r29 = pr
58 ;;
59 .body
60 mov r8 = in1
61 mov ar.ec = 6 + 2
62 shr in0 = in0, 3
63 ;;
64 adds in0 = -1, in0
65 mov r16 = in1
66 mov r17 = in2
67 ;;
68 mov r18 = in3
69 mov ar.lc = in0
70 mov pr.rot = 1 << 16
71 ;;
72 .rotr s1[6+1], s2[6+1], s3[6+1], d[2]
73 .rotp p[6+2]
740:
75(p[0]) ld8.nta s1[0] = [r16], 8
76(p[0]) ld8.nta s2[0] = [r17], 8
77(p[6]) xor d[0] = s1[6], s2[6]
78 ;;
79(p[0]) ld8.nta s3[0] = [r18], 8
80(p[6+1])st8.nta [r8] = d[1], 8
81(p[6]) xor d[0] = d[0], s3[6]
82 br.ctop.dptk.few 0b
83 ;;
84 mov ar.lc = r30
85 mov pr = r29, -1
86 br.ret.sptk.few rp
87END(xor_ia64_3)
88EXPORT_SYMBOL(xor_ia64_3)
89
90GLOBAL_ENTRY(xor_ia64_4)
91 .prologue
92 .fframe 0
93 .save ar.pfs, r31
94 alloc r31 = ar.pfs, 5, 0, 27, 32
95 .save ar.lc, r30
96 mov r30 = ar.lc
97 .save pr, r29
98 mov r29 = pr
99 ;;
100 .body
101 mov r8 = in1
102 mov ar.ec = 6 + 2
103 shr in0 = in0, 3
104 ;;
105 adds in0 = -1, in0
106 mov r16 = in1
107 mov r17 = in2
108 ;;
109 mov r18 = in3
110 mov ar.lc = in0
111 mov pr.rot = 1 << 16
112 mov r19 = in4
113 ;;
114 .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
115 .rotp p[6+2]
1160:
117(p[0]) ld8.nta s1[0] = [r16], 8
118(p[0]) ld8.nta s2[0] = [r17], 8
119(p[6]) xor d[0] = s1[6], s2[6]
120(p[0]) ld8.nta s3[0] = [r18], 8
121(p[0]) ld8.nta s4[0] = [r19], 8
122(p[6]) xor r20 = s3[6], s4[6]
123 ;;
124(p[6+1])st8.nta [r8] = d[1], 8
125(p[6]) xor d[0] = d[0], r20
126 br.ctop.dptk.few 0b
127 ;;
128 mov ar.lc = r30
129 mov pr = r29, -1
130 br.ret.sptk.few rp
131END(xor_ia64_4)
132EXPORT_SYMBOL(xor_ia64_4)
133
134GLOBAL_ENTRY(xor_ia64_5)
135 .prologue
136 .fframe 0
137 .save ar.pfs, r31
138 alloc r31 = ar.pfs, 6, 0, 34, 40
139 .save ar.lc, r30
140 mov r30 = ar.lc
141 .save pr, r29
142 mov r29 = pr
143 ;;
144 .body
145 mov r8 = in1
146 mov ar.ec = 6 + 2
147 shr in0 = in0, 3
148 ;;
149 adds in0 = -1, in0
150 mov r16 = in1
151 mov r17 = in2
152 ;;
153 mov r18 = in3
154 mov ar.lc = in0
155 mov pr.rot = 1 << 16
156 mov r19 = in4
157 mov r20 = in5
158 ;;
159 .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
160 .rotp p[6+2]
1610:
162(p[0]) ld8.nta s1[0] = [r16], 8
163(p[0]) ld8.nta s2[0] = [r17], 8
164(p[6]) xor d[0] = s1[6], s2[6]
165(p[0]) ld8.nta s3[0] = [r18], 8
166(p[0]) ld8.nta s4[0] = [r19], 8
167(p[6]) xor r21 = s3[6], s4[6]
168 ;;
169(p[0]) ld8.nta s5[0] = [r20], 8
170(p[6+1])st8.nta [r8] = d[1], 8
171(p[6]) xor d[0] = d[0], r21
172 ;;
173(p[6]) xor d[0] = d[0], s5[6]
174 nop.f 0
175 br.ctop.dptk.few 0b
176 ;;
177 mov ar.lc = r30
178 mov pr = r29, -1
179 br.ret.sptk.few rp
180END(xor_ia64_5)
181EXPORT_SYMBOL(xor_ia64_5)
1/*
2 * arch/ia64/lib/xor.S
3 *
4 * Optimized RAID-5 checksumming functions for IA-64.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2, or (at your option)
9 * any later version.
10 *
11 * You should have received a copy of the GNU General Public License
12 * (for example /usr/src/linux/COPYING); if not, write to the Free
13 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
14 */
15
16#include <asm/asmmacro.h>
17#include <asm/export.h>
18
19GLOBAL_ENTRY(xor_ia64_2)
20 .prologue
21 .fframe 0
22 .save ar.pfs, r31
23 alloc r31 = ar.pfs, 3, 0, 13, 16
24 .save ar.lc, r30
25 mov r30 = ar.lc
26 .save pr, r29
27 mov r29 = pr
28 ;;
29 .body
30 mov r8 = in1
31 mov ar.ec = 6 + 2
32 shr in0 = in0, 3
33 ;;
34 adds in0 = -1, in0
35 mov r16 = in1
36 mov r17 = in2
37 ;;
38 mov ar.lc = in0
39 mov pr.rot = 1 << 16
40 ;;
41 .rotr s1[6+1], s2[6+1], d[2]
42 .rotp p[6+2]
430:
44(p[0]) ld8.nta s1[0] = [r16], 8
45(p[0]) ld8.nta s2[0] = [r17], 8
46(p[6]) xor d[0] = s1[6], s2[6]
47(p[6+1])st8.nta [r8] = d[1], 8
48 nop.f 0
49 br.ctop.dptk.few 0b
50 ;;
51 mov ar.lc = r30
52 mov pr = r29, -1
53 br.ret.sptk.few rp
54END(xor_ia64_2)
55EXPORT_SYMBOL(xor_ia64_2)
56
57GLOBAL_ENTRY(xor_ia64_3)
58 .prologue
59 .fframe 0
60 .save ar.pfs, r31
61 alloc r31 = ar.pfs, 4, 0, 20, 24
62 .save ar.lc, r30
63 mov r30 = ar.lc
64 .save pr, r29
65 mov r29 = pr
66 ;;
67 .body
68 mov r8 = in1
69 mov ar.ec = 6 + 2
70 shr in0 = in0, 3
71 ;;
72 adds in0 = -1, in0
73 mov r16 = in1
74 mov r17 = in2
75 ;;
76 mov r18 = in3
77 mov ar.lc = in0
78 mov pr.rot = 1 << 16
79 ;;
80 .rotr s1[6+1], s2[6+1], s3[6+1], d[2]
81 .rotp p[6+2]
820:
83(p[0]) ld8.nta s1[0] = [r16], 8
84(p[0]) ld8.nta s2[0] = [r17], 8
85(p[6]) xor d[0] = s1[6], s2[6]
86 ;;
87(p[0]) ld8.nta s3[0] = [r18], 8
88(p[6+1])st8.nta [r8] = d[1], 8
89(p[6]) xor d[0] = d[0], s3[6]
90 br.ctop.dptk.few 0b
91 ;;
92 mov ar.lc = r30
93 mov pr = r29, -1
94 br.ret.sptk.few rp
95END(xor_ia64_3)
96EXPORT_SYMBOL(xor_ia64_3)
97
98GLOBAL_ENTRY(xor_ia64_4)
99 .prologue
100 .fframe 0
101 .save ar.pfs, r31
102 alloc r31 = ar.pfs, 5, 0, 27, 32
103 .save ar.lc, r30
104 mov r30 = ar.lc
105 .save pr, r29
106 mov r29 = pr
107 ;;
108 .body
109 mov r8 = in1
110 mov ar.ec = 6 + 2
111 shr in0 = in0, 3
112 ;;
113 adds in0 = -1, in0
114 mov r16 = in1
115 mov r17 = in2
116 ;;
117 mov r18 = in3
118 mov ar.lc = in0
119 mov pr.rot = 1 << 16
120 mov r19 = in4
121 ;;
122 .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
123 .rotp p[6+2]
1240:
125(p[0]) ld8.nta s1[0] = [r16], 8
126(p[0]) ld8.nta s2[0] = [r17], 8
127(p[6]) xor d[0] = s1[6], s2[6]
128(p[0]) ld8.nta s3[0] = [r18], 8
129(p[0]) ld8.nta s4[0] = [r19], 8
130(p[6]) xor r20 = s3[6], s4[6]
131 ;;
132(p[6+1])st8.nta [r8] = d[1], 8
133(p[6]) xor d[0] = d[0], r20
134 br.ctop.dptk.few 0b
135 ;;
136 mov ar.lc = r30
137 mov pr = r29, -1
138 br.ret.sptk.few rp
139END(xor_ia64_4)
140EXPORT_SYMBOL(xor_ia64_4)
141
142GLOBAL_ENTRY(xor_ia64_5)
143 .prologue
144 .fframe 0
145 .save ar.pfs, r31
146 alloc r31 = ar.pfs, 6, 0, 34, 40
147 .save ar.lc, r30
148 mov r30 = ar.lc
149 .save pr, r29
150 mov r29 = pr
151 ;;
152 .body
153 mov r8 = in1
154 mov ar.ec = 6 + 2
155 shr in0 = in0, 3
156 ;;
157 adds in0 = -1, in0
158 mov r16 = in1
159 mov r17 = in2
160 ;;
161 mov r18 = in3
162 mov ar.lc = in0
163 mov pr.rot = 1 << 16
164 mov r19 = in4
165 mov r20 = in5
166 ;;
167 .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
168 .rotp p[6+2]
1690:
170(p[0]) ld8.nta s1[0] = [r16], 8
171(p[0]) ld8.nta s2[0] = [r17], 8
172(p[6]) xor d[0] = s1[6], s2[6]
173(p[0]) ld8.nta s3[0] = [r18], 8
174(p[0]) ld8.nta s4[0] = [r19], 8
175(p[6]) xor r21 = s3[6], s4[6]
176 ;;
177(p[0]) ld8.nta s5[0] = [r20], 8
178(p[6+1])st8.nta [r8] = d[1], 8
179(p[6]) xor d[0] = d[0], r21
180 ;;
181(p[6]) xor d[0] = d[0], s5[6]
182 nop.f 0
183 br.ctop.dptk.few 0b
184 ;;
185 mov ar.lc = r30
186 mov pr = r29, -1
187 br.ret.sptk.few rp
188END(xor_ia64_5)
189EXPORT_SYMBOL(xor_ia64_5)