Loading...
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
4 */
5#include <linux/export.h>
6#include <asm/processor.h>
7#include <asm/ppc_asm.h>
8#include <asm/asm-compat.h>
9#include <asm/feature-fixups.h>
10
11#ifndef SELFTEST_CASE
12/* 0 == most CPUs, 1 == POWER6, 2 == Cell */
13#define SELFTEST_CASE 0
14#endif
15
16#ifdef __BIG_ENDIAN__
17#define sLd sld /* Shift towards low-numbered address. */
18#define sHd srd /* Shift towards high-numbered address. */
19#else
20#define sLd srd /* Shift towards low-numbered address. */
21#define sHd sld /* Shift towards high-numbered address. */
22#endif
23
24/*
25 * These macros are used to generate exception table entries.
26 * The exception handlers below use the original arguments
27 * (stored on the stack) and the point where we're up to in
28 * the destination buffer, i.e. the address of the first
29 * unmodified byte. Generally r3 points into the destination
30 * buffer, but the first unmodified byte is at a variable
31 * offset from r3. In the code below, the symbol r3_offset
32 * is set to indicate the current offset at each point in
33 * the code. This offset is then used as a negative offset
34 * from the exception handler code, and those instructions
35 * before the exception handlers are addi instructions that
36 * adjust r3 to point to the correct place.
37 */
38 .macro lex /* exception handler for load */
39100: EX_TABLE(100b, .Lld_exc - r3_offset)
40 .endm
41
42 .macro stex /* exception handler for store */
43100: EX_TABLE(100b, .Lst_exc - r3_offset)
44 .endm
45
46 .align 7
47_GLOBAL_TOC(__copy_tofrom_user)
48#ifdef CONFIG_PPC_BOOK3S_64
49BEGIN_FTR_SECTION
50 nop
51FTR_SECTION_ELSE
52 b __copy_tofrom_user_power7
53ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
54#endif
55_GLOBAL(__copy_tofrom_user_base)
56 /* first check for a 4kB copy on a 4kB boundary */
57 cmpldi cr1,r5,16
58 cmpdi cr6,r5,4096
59 or r0,r3,r4
60 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
61 andi. r0,r0,4095
62 std r3,-24(r1)
63 crand cr0*4+2,cr0*4+2,cr6*4+2
64 std r4,-16(r1)
65 std r5,-8(r1)
66 dcbt 0,r4
67 beq .Lcopy_page_4K
68 andi. r6,r6,7
69 PPC_MTOCRF(0x01,r5)
70 blt cr1,.Lshort_copy
71/* Below we want to nop out the bne if we're on a CPU that has the
72 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
73 * cleared.
74 * At the time of writing the only CPU that has this combination of bits
75 * set is Power6.
76 */
77test_feature = (SELFTEST_CASE == 1)
78BEGIN_FTR_SECTION
79 nop
80FTR_SECTION_ELSE
81 bne .Ldst_unaligned
82ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
83 CPU_FTR_UNALIGNED_LD_STD)
84.Ldst_aligned:
85 addi r3,r3,-16
86r3_offset = 16
87test_feature = (SELFTEST_CASE == 0)
88BEGIN_FTR_SECTION
89 andi. r0,r4,7
90 bne .Lsrc_unaligned
91END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
92 blt cr1,.Ldo_tail /* if < 16 bytes to copy */
93 srdi r0,r5,5
94 cmpdi cr1,r0,0
95lex; ld r7,0(r4)
96lex; ld r6,8(r4)
97 addi r4,r4,16
98 mtctr r0
99 andi. r0,r5,0x10
100 beq 22f
101 addi r3,r3,16
102r3_offset = 0
103 addi r4,r4,-16
104 mr r9,r7
105 mr r8,r6
106 beq cr1,72f
10721:
108lex; ld r7,16(r4)
109lex; ld r6,24(r4)
110 addi r4,r4,32
111stex; std r9,0(r3)
112r3_offset = 8
113stex; std r8,8(r3)
114r3_offset = 16
11522:
116lex; ld r9,0(r4)
117lex; ld r8,8(r4)
118stex; std r7,16(r3)
119r3_offset = 24
120stex; std r6,24(r3)
121 addi r3,r3,32
122r3_offset = 0
123 bdnz 21b
12472:
125stex; std r9,0(r3)
126r3_offset = 8
127stex; std r8,8(r3)
128r3_offset = 16
129 andi. r5,r5,0xf
130 beq+ 3f
131 addi r4,r4,16
132.Ldo_tail:
133 addi r3,r3,16
134r3_offset = 0
135 bf cr7*4+0,246f
136lex; ld r9,0(r4)
137 addi r4,r4,8
138stex; std r9,0(r3)
139 addi r3,r3,8
140246: bf cr7*4+1,1f
141lex; lwz r9,0(r4)
142 addi r4,r4,4
143stex; stw r9,0(r3)
144 addi r3,r3,4
1451: bf cr7*4+2,2f
146lex; lhz r9,0(r4)
147 addi r4,r4,2
148stex; sth r9,0(r3)
149 addi r3,r3,2
1502: bf cr7*4+3,3f
151lex; lbz r9,0(r4)
152stex; stb r9,0(r3)
1533: li r3,0
154 blr
155
156.Lsrc_unaligned:
157r3_offset = 16
158 srdi r6,r5,3
159 addi r5,r5,-16
160 subf r4,r0,r4
161 srdi r7,r5,4
162 sldi r10,r0,3
163 cmpldi cr6,r6,3
164 andi. r5,r5,7
165 mtctr r7
166 subfic r11,r10,64
167 add r5,r5,r0
168 bt cr7*4+0,28f
169
170lex; ld r9,0(r4) /* 3+2n loads, 2+2n stores */
171lex; ld r0,8(r4)
172 sLd r6,r9,r10
173lex; ldu r9,16(r4)
174 sHd r7,r0,r11
175 sLd r8,r0,r10
176 or r7,r7,r6
177 blt cr6,79f
178lex; ld r0,8(r4)
179 b 2f
180
18128:
182lex; ld r0,0(r4) /* 4+2n loads, 3+2n stores */
183lex; ldu r9,8(r4)
184 sLd r8,r0,r10
185 addi r3,r3,-8
186r3_offset = 24
187 blt cr6,5f
188lex; ld r0,8(r4)
189 sHd r12,r9,r11
190 sLd r6,r9,r10
191lex; ldu r9,16(r4)
192 or r12,r8,r12
193 sHd r7,r0,r11
194 sLd r8,r0,r10
195 addi r3,r3,16
196r3_offset = 8
197 beq cr6,78f
198
1991: or r7,r7,r6
200lex; ld r0,8(r4)
201stex; std r12,8(r3)
202r3_offset = 16
2032: sHd r12,r9,r11
204 sLd r6,r9,r10
205lex; ldu r9,16(r4)
206 or r12,r8,r12
207stex; stdu r7,16(r3)
208r3_offset = 8
209 sHd r7,r0,r11
210 sLd r8,r0,r10
211 bdnz 1b
212
21378:
214stex; std r12,8(r3)
215r3_offset = 16
216 or r7,r7,r6
21779:
218stex; std r7,16(r3)
219r3_offset = 24
2205: sHd r12,r9,r11
221 or r12,r8,r12
222stex; std r12,24(r3)
223r3_offset = 32
224 bne 6f
225 li r3,0
226 blr
2276: cmpwi cr1,r5,8
228 addi r3,r3,32
229r3_offset = 0
230 sLd r9,r9,r10
231 ble cr1,7f
232lex; ld r0,8(r4)
233 sHd r7,r0,r11
234 or r9,r7,r9
2357:
236 bf cr7*4+1,1f
237#ifdef __BIG_ENDIAN__
238 rotldi r9,r9,32
239#endif
240stex; stw r9,0(r3)
241#ifdef __LITTLE_ENDIAN__
242 rotrdi r9,r9,32
243#endif
244 addi r3,r3,4
2451: bf cr7*4+2,2f
246#ifdef __BIG_ENDIAN__
247 rotldi r9,r9,16
248#endif
249stex; sth r9,0(r3)
250#ifdef __LITTLE_ENDIAN__
251 rotrdi r9,r9,16
252#endif
253 addi r3,r3,2
2542: bf cr7*4+3,3f
255#ifdef __BIG_ENDIAN__
256 rotldi r9,r9,8
257#endif
258stex; stb r9,0(r3)
259#ifdef __LITTLE_ENDIAN__
260 rotrdi r9,r9,8
261#endif
2623: li r3,0
263 blr
264
265.Ldst_unaligned:
266r3_offset = 0
267 PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */
268 subf r5,r6,r5
269 li r7,0
270 cmpldi cr1,r5,16
271 bf cr7*4+3,1f
272100: EX_TABLE(100b, .Lld_exc_r7)
273 lbz r0,0(r4)
274100: EX_TABLE(100b, .Lst_exc_r7)
275 stb r0,0(r3)
276 addi r7,r7,1
2771: bf cr7*4+2,2f
278100: EX_TABLE(100b, .Lld_exc_r7)
279 lhzx r0,r7,r4
280100: EX_TABLE(100b, .Lst_exc_r7)
281 sthx r0,r7,r3
282 addi r7,r7,2
2832: bf cr7*4+1,3f
284100: EX_TABLE(100b, .Lld_exc_r7)
285 lwzx r0,r7,r4
286100: EX_TABLE(100b, .Lst_exc_r7)
287 stwx r0,r7,r3
2883: PPC_MTOCRF(0x01,r5)
289 add r4,r6,r4
290 add r3,r6,r3
291 b .Ldst_aligned
292
293.Lshort_copy:
294r3_offset = 0
295 bf cr7*4+0,1f
296lex; lwz r0,0(r4)
297lex; lwz r9,4(r4)
298 addi r4,r4,8
299stex; stw r0,0(r3)
300stex; stw r9,4(r3)
301 addi r3,r3,8
3021: bf cr7*4+1,2f
303lex; lwz r0,0(r4)
304 addi r4,r4,4
305stex; stw r0,0(r3)
306 addi r3,r3,4
3072: bf cr7*4+2,3f
308lex; lhz r0,0(r4)
309 addi r4,r4,2
310stex; sth r0,0(r3)
311 addi r3,r3,2
3123: bf cr7*4+3,4f
313lex; lbz r0,0(r4)
314stex; stb r0,0(r3)
3154: li r3,0
316 blr
317
318/*
319 * exception handlers follow
320 * we have to return the number of bytes not copied
321 * for an exception on a load, we set the rest of the destination to 0
322 * Note that the number of bytes of instructions for adjusting r3 needs
323 * to equal the amount of the adjustment, due to the trick of using
324 * .Lld_exc - r3_offset as the handler address.
325 */
326
327.Lld_exc_r7:
328 add r3,r3,r7
329 b .Lld_exc
330
331 /* adjust by 24 */
332 addi r3,r3,8
333 nop
334 /* adjust by 16 */
335 addi r3,r3,8
336 nop
337 /* adjust by 8 */
338 addi r3,r3,8
339 nop
340
341/*
342 * Here we have had a fault on a load and r3 points to the first
343 * unmodified byte of the destination. We use the original arguments
344 * and r3 to work out how much wasn't copied. Since we load some
345 * distance ahead of the stores, we continue copying byte-by-byte until
346 * we hit the load fault again in order to copy as much as possible.
347 */
348.Lld_exc:
349 ld r6,-24(r1)
350 ld r4,-16(r1)
351 ld r5,-8(r1)
352 subf r6,r6,r3
353 add r4,r4,r6
354 subf r5,r6,r5 /* #bytes left to go */
355
356/*
357 * first see if we can copy any more bytes before hitting another exception
358 */
359 mtctr r5
360r3_offset = 0
361100: EX_TABLE(100b, .Ldone)
36243: lbz r0,0(r4)
363 addi r4,r4,1
364stex; stb r0,0(r3)
365 addi r3,r3,1
366 bdnz 43b
367 li r3,0 /* huh? all copied successfully this time? */
368 blr
369
370/*
371 * here we have trapped again, amount remaining is in ctr.
372 */
373.Ldone:
374 mfctr r3
375 blr
376
377/*
378 * exception handlers for stores: we need to work out how many bytes
379 * weren't copied, and we may need to copy some more.
380 * Note that the number of bytes of instructions for adjusting r3 needs
381 * to equal the amount of the adjustment, due to the trick of using
382 * .Lst_exc - r3_offset as the handler address.
383 */
384.Lst_exc_r7:
385 add r3,r3,r7
386 b .Lst_exc
387
388 /* adjust by 24 */
389 addi r3,r3,8
390 nop
391 /* adjust by 16 */
392 addi r3,r3,8
393 nop
394 /* adjust by 8 */
395 addi r3,r3,4
396 /* adjust by 4 */
397 addi r3,r3,4
398.Lst_exc:
399 ld r6,-24(r1) /* original destination pointer */
400 ld r4,-16(r1) /* original source pointer */
401 ld r5,-8(r1) /* original number of bytes */
402 add r7,r6,r5
403 /*
404 * If the destination pointer isn't 8-byte aligned,
405 * we may have got the exception as a result of a
406 * store that overlapped a page boundary, so we may be
407 * able to copy a few more bytes.
408 */
40917: andi. r0,r3,7
410 beq 19f
411 subf r8,r6,r3 /* #bytes copied */
412100: EX_TABLE(100b,19f)
413 lbzx r0,r8,r4
414100: EX_TABLE(100b,19f)
415 stb r0,0(r3)
416 addi r3,r3,1
417 cmpld r3,r7
418 blt 17b
41919: subf r3,r3,r7 /* #bytes not copied in r3 */
420 blr
421
422/*
423 * Routine to copy a whole page of data, optimized for POWER4.
424 * On POWER4 it is more than 50% faster than the simple loop
425 * above (following the .Ldst_aligned label).
426 */
427 .macro exc
428100: EX_TABLE(100b, .Labort)
429 .endm
430.Lcopy_page_4K:
431 std r31,-32(1)
432 std r30,-40(1)
433 std r29,-48(1)
434 std r28,-56(1)
435 std r27,-64(1)
436 std r26,-72(1)
437 std r25,-80(1)
438 std r24,-88(1)
439 std r23,-96(1)
440 std r22,-104(1)
441 std r21,-112(1)
442 std r20,-120(1)
443 li r5,4096/32 - 1
444 addi r3,r3,-8
445 li r0,5
4460: addi r5,r5,-24
447 mtctr r0
448exc; ld r22,640(4)
449exc; ld r21,512(4)
450exc; ld r20,384(4)
451exc; ld r11,256(4)
452exc; ld r9,128(4)
453exc; ld r7,0(4)
454exc; ld r25,648(4)
455exc; ld r24,520(4)
456exc; ld r23,392(4)
457exc; ld r10,264(4)
458exc; ld r8,136(4)
459exc; ldu r6,8(4)
460 cmpwi r5,24
4611:
462exc; std r22,648(3)
463exc; std r21,520(3)
464exc; std r20,392(3)
465exc; std r11,264(3)
466exc; std r9,136(3)
467exc; std r7,8(3)
468exc; ld r28,648(4)
469exc; ld r27,520(4)
470exc; ld r26,392(4)
471exc; ld r31,264(4)
472exc; ld r30,136(4)
473exc; ld r29,8(4)
474exc; std r25,656(3)
475exc; std r24,528(3)
476exc; std r23,400(3)
477exc; std r10,272(3)
478exc; std r8,144(3)
479exc; std r6,16(3)
480exc; ld r22,656(4)
481exc; ld r21,528(4)
482exc; ld r20,400(4)
483exc; ld r11,272(4)
484exc; ld r9,144(4)
485exc; ld r7,16(4)
486exc; std r28,664(3)
487exc; std r27,536(3)
488exc; std r26,408(3)
489exc; std r31,280(3)
490exc; std r30,152(3)
491exc; stdu r29,24(3)
492exc; ld r25,664(4)
493exc; ld r24,536(4)
494exc; ld r23,408(4)
495exc; ld r10,280(4)
496exc; ld r8,152(4)
497exc; ldu r6,24(4)
498 bdnz 1b
499exc; std r22,648(3)
500exc; std r21,520(3)
501exc; std r20,392(3)
502exc; std r11,264(3)
503exc; std r9,136(3)
504exc; std r7,8(3)
505 addi r4,r4,640
506 addi r3,r3,648
507 bge 0b
508 mtctr r5
509exc; ld r7,0(4)
510exc; ld r8,8(4)
511exc; ldu r9,16(4)
5123:
513exc; ld r10,8(4)
514exc; std r7,8(3)
515exc; ld r7,16(4)
516exc; std r8,16(3)
517exc; ld r8,24(4)
518exc; std r9,24(3)
519exc; ldu r9,32(4)
520exc; stdu r10,32(3)
521 bdnz 3b
5224:
523exc; ld r10,8(4)
524exc; std r7,8(3)
525exc; std r8,16(3)
526exc; std r9,24(3)
527exc; std r10,32(3)
5289: ld r20,-120(1)
529 ld r21,-112(1)
530 ld r22,-104(1)
531 ld r23,-96(1)
532 ld r24,-88(1)
533 ld r25,-80(1)
534 ld r26,-72(1)
535 ld r27,-64(1)
536 ld r28,-56(1)
537 ld r29,-48(1)
538 ld r30,-40(1)
539 ld r31,-32(1)
540 li r3,0
541 blr
542
543/*
544 * on an exception, reset to the beginning and jump back into the
545 * standard __copy_tofrom_user
546 */
547.Labort:
548 ld r20,-120(1)
549 ld r21,-112(1)
550 ld r22,-104(1)
551 ld r23,-96(1)
552 ld r24,-88(1)
553 ld r25,-80(1)
554 ld r26,-72(1)
555 ld r27,-64(1)
556 ld r28,-56(1)
557 ld r29,-48(1)
558 ld r30,-40(1)
559 ld r31,-32(1)
560 ld r3,-24(r1)
561 ld r4,-16(r1)
562 li r5,4096
563 b .Ldst_aligned
564EXPORT_SYMBOL(__copy_tofrom_user)
1/*
2 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#include <asm/processor.h>
10#include <asm/ppc_asm.h>
11#include <asm/export.h>
12
13#ifdef __BIG_ENDIAN__
14#define sLd sld /* Shift towards low-numbered address. */
15#define sHd srd /* Shift towards high-numbered address. */
16#else
17#define sLd srd /* Shift towards low-numbered address. */
18#define sHd sld /* Shift towards high-numbered address. */
19#endif
20
21 .align 7
22_GLOBAL_TOC(__copy_tofrom_user)
23#ifdef CONFIG_PPC_BOOK3S_64
24BEGIN_FTR_SECTION
25 nop
26FTR_SECTION_ELSE
27 b __copy_tofrom_user_power7
28ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
29#endif
30_GLOBAL(__copy_tofrom_user_base)
31 /* first check for a whole page copy on a page boundary */
32 cmpldi cr1,r5,16
33 cmpdi cr6,r5,4096
34 or r0,r3,r4
35 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
36 andi. r0,r0,4095
37 std r3,-24(r1)
38 crand cr0*4+2,cr0*4+2,cr6*4+2
39 std r4,-16(r1)
40 std r5,-8(r1)
41 dcbt 0,r4
42 beq .Lcopy_page_4K
43 andi. r6,r6,7
44 PPC_MTOCRF(0x01,r5)
45 blt cr1,.Lshort_copy
46/* Below we want to nop out the bne if we're on a CPU that has the
47 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
48 * cleared.
49 * At the time of writing the only CPU that has this combination of bits
50 * set is Power6.
51 */
52BEGIN_FTR_SECTION
53 nop
54FTR_SECTION_ELSE
55 bne .Ldst_unaligned
56ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
57 CPU_FTR_UNALIGNED_LD_STD)
58.Ldst_aligned:
59 addi r3,r3,-16
60BEGIN_FTR_SECTION
61 andi. r0,r4,7
62 bne .Lsrc_unaligned
63END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
64 blt cr1,.Ldo_tail /* if < 16 bytes to copy */
65 srdi r0,r5,5
66 cmpdi cr1,r0,0
6720: ld r7,0(r4)
68220: ld r6,8(r4)
69 addi r4,r4,16
70 mtctr r0
71 andi. r0,r5,0x10
72 beq 22f
73 addi r3,r3,16
74 addi r4,r4,-16
75 mr r9,r7
76 mr r8,r6
77 beq cr1,72f
7821: ld r7,16(r4)
79221: ld r6,24(r4)
80 addi r4,r4,32
8170: std r9,0(r3)
82270: std r8,8(r3)
8322: ld r9,0(r4)
84222: ld r8,8(r4)
8571: std r7,16(r3)
86271: std r6,24(r3)
87 addi r3,r3,32
88 bdnz 21b
8972: std r9,0(r3)
90272: std r8,8(r3)
91 andi. r5,r5,0xf
92 beq+ 3f
93 addi r4,r4,16
94.Ldo_tail:
95 addi r3,r3,16
96 bf cr7*4+0,246f
97244: ld r9,0(r4)
98 addi r4,r4,8
99245: std r9,0(r3)
100 addi r3,r3,8
101246: bf cr7*4+1,1f
10223: lwz r9,0(r4)
103 addi r4,r4,4
10473: stw r9,0(r3)
105 addi r3,r3,4
1061: bf cr7*4+2,2f
10744: lhz r9,0(r4)
108 addi r4,r4,2
10974: sth r9,0(r3)
110 addi r3,r3,2
1112: bf cr7*4+3,3f
11245: lbz r9,0(r4)
11375: stb r9,0(r3)
1143: li r3,0
115 blr
116
117.Lsrc_unaligned:
118 srdi r6,r5,3
119 addi r5,r5,-16
120 subf r4,r0,r4
121 srdi r7,r5,4
122 sldi r10,r0,3
123 cmpldi cr6,r6,3
124 andi. r5,r5,7
125 mtctr r7
126 subfic r11,r10,64
127 add r5,r5,r0
128 bt cr7*4+0,28f
129
13024: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
13125: ld r0,8(r4)
132 sLd r6,r9,r10
13326: ldu r9,16(r4)
134 sHd r7,r0,r11
135 sLd r8,r0,r10
136 or r7,r7,r6
137 blt cr6,79f
13827: ld r0,8(r4)
139 b 2f
140
14128: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
14229: ldu r9,8(r4)
143 sLd r8,r0,r10
144 addi r3,r3,-8
145 blt cr6,5f
14630: ld r0,8(r4)
147 sHd r12,r9,r11
148 sLd r6,r9,r10
14931: ldu r9,16(r4)
150 or r12,r8,r12
151 sHd r7,r0,r11
152 sLd r8,r0,r10
153 addi r3,r3,16
154 beq cr6,78f
155
1561: or r7,r7,r6
15732: ld r0,8(r4)
15876: std r12,8(r3)
1592: sHd r12,r9,r11
160 sLd r6,r9,r10
16133: ldu r9,16(r4)
162 or r12,r8,r12
16377: stdu r7,16(r3)
164 sHd r7,r0,r11
165 sLd r8,r0,r10
166 bdnz 1b
167
16878: std r12,8(r3)
169 or r7,r7,r6
17079: std r7,16(r3)
1715: sHd r12,r9,r11
172 or r12,r8,r12
17380: std r12,24(r3)
174 bne 6f
175 li r3,0
176 blr
1776: cmpwi cr1,r5,8
178 addi r3,r3,32
179 sLd r9,r9,r10
180 ble cr1,7f
18134: ld r0,8(r4)
182 sHd r7,r0,r11
183 or r9,r7,r9
1847:
185 bf cr7*4+1,1f
186#ifdef __BIG_ENDIAN__
187 rotldi r9,r9,32
188#endif
18994: stw r9,0(r3)
190#ifdef __LITTLE_ENDIAN__
191 rotrdi r9,r9,32
192#endif
193 addi r3,r3,4
1941: bf cr7*4+2,2f
195#ifdef __BIG_ENDIAN__
196 rotldi r9,r9,16
197#endif
19895: sth r9,0(r3)
199#ifdef __LITTLE_ENDIAN__
200 rotrdi r9,r9,16
201#endif
202 addi r3,r3,2
2032: bf cr7*4+3,3f
204#ifdef __BIG_ENDIAN__
205 rotldi r9,r9,8
206#endif
20796: stb r9,0(r3)
208#ifdef __LITTLE_ENDIAN__
209 rotrdi r9,r9,8
210#endif
2113: li r3,0
212 blr
213
214.Ldst_unaligned:
215 PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */
216 subf r5,r6,r5
217 li r7,0
218 cmpldi cr1,r5,16
219 bf cr7*4+3,1f
22035: lbz r0,0(r4)
22181: stb r0,0(r3)
222 addi r7,r7,1
2231: bf cr7*4+2,2f
22436: lhzx r0,r7,r4
22582: sthx r0,r7,r3
226 addi r7,r7,2
2272: bf cr7*4+1,3f
22837: lwzx r0,r7,r4
22983: stwx r0,r7,r3
2303: PPC_MTOCRF(0x01,r5)
231 add r4,r6,r4
232 add r3,r6,r3
233 b .Ldst_aligned
234
235.Lshort_copy:
236 bf cr7*4+0,1f
23738: lwz r0,0(r4)
23839: lwz r9,4(r4)
239 addi r4,r4,8
24084: stw r0,0(r3)
24185: stw r9,4(r3)
242 addi r3,r3,8
2431: bf cr7*4+1,2f
24440: lwz r0,0(r4)
245 addi r4,r4,4
24686: stw r0,0(r3)
247 addi r3,r3,4
2482: bf cr7*4+2,3f
24941: lhz r0,0(r4)
250 addi r4,r4,2
25187: sth r0,0(r3)
252 addi r3,r3,2
2533: bf cr7*4+3,4f
25442: lbz r0,0(r4)
25588: stb r0,0(r3)
2564: li r3,0
257 blr
258
259/*
260 * exception handlers follow
261 * we have to return the number of bytes not copied
262 * for an exception on a load, we set the rest of the destination to 0
263 */
264
265136:
266137:
267 add r3,r3,r7
268 b 1f
269130:
270131:
271 addi r3,r3,8
272120:
273320:
274122:
275322:
276124:
277125:
278126:
279127:
280128:
281129:
282133:
283 addi r3,r3,8
284132:
285 addi r3,r3,8
286121:
287321:
288344:
289134:
290135:
291138:
292139:
293140:
294141:
295142:
296123:
297144:
298145:
299
300/*
301 * here we have had a fault on a load and r3 points to the first
302 * unmodified byte of the destination
303 */
3041: ld r6,-24(r1)
305 ld r4,-16(r1)
306 ld r5,-8(r1)
307 subf r6,r6,r3
308 add r4,r4,r6
309 subf r5,r6,r5 /* #bytes left to go */
310
311/*
312 * first see if we can copy any more bytes before hitting another exception
313 */
314 mtctr r5
31543: lbz r0,0(r4)
316 addi r4,r4,1
31789: stb r0,0(r3)
318 addi r3,r3,1
319 bdnz 43b
320 li r3,0 /* huh? all copied successfully this time? */
321 blr
322
323/*
324 * here we have trapped again, amount remaining is in ctr.
325 */
326143: mfctr r3
327 blr
328
329/*
330 * exception handlers for stores: we just need to work
331 * out how many bytes weren't copied
332 */
333182:
334183:
335 add r3,r3,r7
336 b 1f
337371:
338180:
339 addi r3,r3,8
340171:
341177:
342179:
343 addi r3,r3,8
344370:
345372:
346176:
347178:
348 addi r3,r3,4
349185:
350 addi r3,r3,4
351170:
352172:
353345:
354173:
355174:
356175:
357181:
358184:
359186:
360187:
361188:
362189:
363194:
364195:
365196:
3661:
367 ld r6,-24(r1)
368 ld r5,-8(r1)
369 add r6,r6,r5
370 subf r3,r3,r6 /* #bytes not copied */
371 blr
372
373 EX_TABLE(20b,120b)
374 EX_TABLE(220b,320b)
375 EX_TABLE(21b,121b)
376 EX_TABLE(221b,321b)
377 EX_TABLE(70b,170b)
378 EX_TABLE(270b,370b)
379 EX_TABLE(22b,122b)
380 EX_TABLE(222b,322b)
381 EX_TABLE(71b,171b)
382 EX_TABLE(271b,371b)
383 EX_TABLE(72b,172b)
384 EX_TABLE(272b,372b)
385 EX_TABLE(244b,344b)
386 EX_TABLE(245b,345b)
387 EX_TABLE(23b,123b)
388 EX_TABLE(73b,173b)
389 EX_TABLE(44b,144b)
390 EX_TABLE(74b,174b)
391 EX_TABLE(45b,145b)
392 EX_TABLE(75b,175b)
393 EX_TABLE(24b,124b)
394 EX_TABLE(25b,125b)
395 EX_TABLE(26b,126b)
396 EX_TABLE(27b,127b)
397 EX_TABLE(28b,128b)
398 EX_TABLE(29b,129b)
399 EX_TABLE(30b,130b)
400 EX_TABLE(31b,131b)
401 EX_TABLE(32b,132b)
402 EX_TABLE(76b,176b)
403 EX_TABLE(33b,133b)
404 EX_TABLE(77b,177b)
405 EX_TABLE(78b,178b)
406 EX_TABLE(79b,179b)
407 EX_TABLE(80b,180b)
408 EX_TABLE(34b,134b)
409 EX_TABLE(94b,194b)
410 EX_TABLE(95b,195b)
411 EX_TABLE(96b,196b)
412 EX_TABLE(35b,135b)
413 EX_TABLE(81b,181b)
414 EX_TABLE(36b,136b)
415 EX_TABLE(82b,182b)
416 EX_TABLE(37b,137b)
417 EX_TABLE(83b,183b)
418 EX_TABLE(38b,138b)
419 EX_TABLE(39b,139b)
420 EX_TABLE(84b,184b)
421 EX_TABLE(85b,185b)
422 EX_TABLE(40b,140b)
423 EX_TABLE(86b,186b)
424 EX_TABLE(41b,141b)
425 EX_TABLE(87b,187b)
426 EX_TABLE(42b,142b)
427 EX_TABLE(88b,188b)
428 EX_TABLE(43b,143b)
429 EX_TABLE(89b,189b)
430
431/*
432 * Routine to copy a whole page of data, optimized for POWER4.
433 * On POWER4 it is more than 50% faster than the simple loop
434 * above (following the .Ldst_aligned label).
435 */
436.Lcopy_page_4K:
437 std r31,-32(1)
438 std r30,-40(1)
439 std r29,-48(1)
440 std r28,-56(1)
441 std r27,-64(1)
442 std r26,-72(1)
443 std r25,-80(1)
444 std r24,-88(1)
445 std r23,-96(1)
446 std r22,-104(1)
447 std r21,-112(1)
448 std r20,-120(1)
449 li r5,4096/32 - 1
450 addi r3,r3,-8
451 li r0,5
4520: addi r5,r5,-24
453 mtctr r0
45420: ld r22,640(4)
45521: ld r21,512(4)
45622: ld r20,384(4)
45723: ld r11,256(4)
45824: ld r9,128(4)
45925: ld r7,0(4)
46026: ld r25,648(4)
46127: ld r24,520(4)
46228: ld r23,392(4)
46329: ld r10,264(4)
46430: ld r8,136(4)
46531: ldu r6,8(4)
466 cmpwi r5,24
4671:
46832: std r22,648(3)
46933: std r21,520(3)
47034: std r20,392(3)
47135: std r11,264(3)
47236: std r9,136(3)
47337: std r7,8(3)
47438: ld r28,648(4)
47539: ld r27,520(4)
47640: ld r26,392(4)
47741: ld r31,264(4)
47842: ld r30,136(4)
47943: ld r29,8(4)
48044: std r25,656(3)
48145: std r24,528(3)
48246: std r23,400(3)
48347: std r10,272(3)
48448: std r8,144(3)
48549: std r6,16(3)
48650: ld r22,656(4)
48751: ld r21,528(4)
48852: ld r20,400(4)
48953: ld r11,272(4)
49054: ld r9,144(4)
49155: ld r7,16(4)
49256: std r28,664(3)
49357: std r27,536(3)
49458: std r26,408(3)
49559: std r31,280(3)
49660: std r30,152(3)
49761: stdu r29,24(3)
49862: ld r25,664(4)
49963: ld r24,536(4)
50064: ld r23,408(4)
50165: ld r10,280(4)
50266: ld r8,152(4)
50367: ldu r6,24(4)
504 bdnz 1b
50568: std r22,648(3)
50669: std r21,520(3)
50770: std r20,392(3)
50871: std r11,264(3)
50972: std r9,136(3)
51073: std r7,8(3)
51174: addi r4,r4,640
51275: addi r3,r3,648
513 bge 0b
514 mtctr r5
51576: ld r7,0(4)
51677: ld r8,8(4)
51778: ldu r9,16(4)
5183:
51979: ld r10,8(4)
52080: std r7,8(3)
52181: ld r7,16(4)
52282: std r8,16(3)
52383: ld r8,24(4)
52484: std r9,24(3)
52585: ldu r9,32(4)
52686: stdu r10,32(3)
527 bdnz 3b
5284:
52987: ld r10,8(4)
53088: std r7,8(3)
53189: std r8,16(3)
53290: std r9,24(3)
53391: std r10,32(3)
5349: ld r20,-120(1)
535 ld r21,-112(1)
536 ld r22,-104(1)
537 ld r23,-96(1)
538 ld r24,-88(1)
539 ld r25,-80(1)
540 ld r26,-72(1)
541 ld r27,-64(1)
542 ld r28,-56(1)
543 ld r29,-48(1)
544 ld r30,-40(1)
545 ld r31,-32(1)
546 li r3,0
547 blr
548
549/*
550 * on an exception, reset to the beginning and jump back into the
551 * standard __copy_tofrom_user
552 */
553100: ld r20,-120(1)
554 ld r21,-112(1)
555 ld r22,-104(1)
556 ld r23,-96(1)
557 ld r24,-88(1)
558 ld r25,-80(1)
559 ld r26,-72(1)
560 ld r27,-64(1)
561 ld r28,-56(1)
562 ld r29,-48(1)
563 ld r30,-40(1)
564 ld r31,-32(1)
565 ld r3,-24(r1)
566 ld r4,-16(r1)
567 li r5,4096
568 b .Ldst_aligned
569
570 EX_TABLE(20b,100b)
571 EX_TABLE(21b,100b)
572 EX_TABLE(22b,100b)
573 EX_TABLE(23b,100b)
574 EX_TABLE(24b,100b)
575 EX_TABLE(25b,100b)
576 EX_TABLE(26b,100b)
577 EX_TABLE(27b,100b)
578 EX_TABLE(28b,100b)
579 EX_TABLE(29b,100b)
580 EX_TABLE(30b,100b)
581 EX_TABLE(31b,100b)
582 EX_TABLE(32b,100b)
583 EX_TABLE(33b,100b)
584 EX_TABLE(34b,100b)
585 EX_TABLE(35b,100b)
586 EX_TABLE(36b,100b)
587 EX_TABLE(37b,100b)
588 EX_TABLE(38b,100b)
589 EX_TABLE(39b,100b)
590 EX_TABLE(40b,100b)
591 EX_TABLE(41b,100b)
592 EX_TABLE(42b,100b)
593 EX_TABLE(43b,100b)
594 EX_TABLE(44b,100b)
595 EX_TABLE(45b,100b)
596 EX_TABLE(46b,100b)
597 EX_TABLE(47b,100b)
598 EX_TABLE(48b,100b)
599 EX_TABLE(49b,100b)
600 EX_TABLE(50b,100b)
601 EX_TABLE(51b,100b)
602 EX_TABLE(52b,100b)
603 EX_TABLE(53b,100b)
604 EX_TABLE(54b,100b)
605 EX_TABLE(55b,100b)
606 EX_TABLE(56b,100b)
607 EX_TABLE(57b,100b)
608 EX_TABLE(58b,100b)
609 EX_TABLE(59b,100b)
610 EX_TABLE(60b,100b)
611 EX_TABLE(61b,100b)
612 EX_TABLE(62b,100b)
613 EX_TABLE(63b,100b)
614 EX_TABLE(64b,100b)
615 EX_TABLE(65b,100b)
616 EX_TABLE(66b,100b)
617 EX_TABLE(67b,100b)
618 EX_TABLE(68b,100b)
619 EX_TABLE(69b,100b)
620 EX_TABLE(70b,100b)
621 EX_TABLE(71b,100b)
622 EX_TABLE(72b,100b)
623 EX_TABLE(73b,100b)
624 EX_TABLE(74b,100b)
625 EX_TABLE(75b,100b)
626 EX_TABLE(76b,100b)
627 EX_TABLE(77b,100b)
628 EX_TABLE(78b,100b)
629 EX_TABLE(79b,100b)
630 EX_TABLE(80b,100b)
631 EX_TABLE(81b,100b)
632 EX_TABLE(82b,100b)
633 EX_TABLE(83b,100b)
634 EX_TABLE(84b,100b)
635 EX_TABLE(85b,100b)
636 EX_TABLE(86b,100b)
637 EX_TABLE(87b,100b)
638 EX_TABLE(88b,100b)
639 EX_TABLE(89b,100b)
640 EX_TABLE(90b,100b)
641 EX_TABLE(91b,100b)
642
643EXPORT_SYMBOL(__copy_tofrom_user)