Loading...
1/*
2 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#include <asm/processor.h>
10#include <asm/ppc_asm.h>
11
12#ifdef __BIG_ENDIAN__
13#define sLd sld /* Shift towards low-numbered address. */
14#define sHd srd /* Shift towards high-numbered address. */
15#else
16#define sLd srd /* Shift towards low-numbered address. */
17#define sHd sld /* Shift towards high-numbered address. */
18#endif
19
20 .align 7
21_GLOBAL_TOC(__copy_tofrom_user)
22BEGIN_FTR_SECTION
23 nop
24FTR_SECTION_ELSE
25 b __copy_tofrom_user_power7
26ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
27_GLOBAL(__copy_tofrom_user_base)
28 /* first check for a whole page copy on a page boundary */
29 cmpldi cr1,r5,16
30 cmpdi cr6,r5,4096
31 or r0,r3,r4
32 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
33 andi. r0,r0,4095
34 std r3,-24(r1)
35 crand cr0*4+2,cr0*4+2,cr6*4+2
36 std r4,-16(r1)
37 std r5,-8(r1)
38 dcbt 0,r4
39 beq .Lcopy_page_4K
40 andi. r6,r6,7
41 PPC_MTOCRF(0x01,r5)
42 blt cr1,.Lshort_copy
43/* Below we want to nop out the bne if we're on a CPU that has the
44 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
45 * cleared.
46 * At the time of writing the only CPU that has this combination of bits
47 * set is Power6.
48 */
49BEGIN_FTR_SECTION
50 nop
51FTR_SECTION_ELSE
52 bne .Ldst_unaligned
53ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
54 CPU_FTR_UNALIGNED_LD_STD)
55.Ldst_aligned:
56 addi r3,r3,-16
57BEGIN_FTR_SECTION
58 andi. r0,r4,7
59 bne .Lsrc_unaligned
60END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
61 blt cr1,.Ldo_tail /* if < 16 bytes to copy */
62 srdi r0,r5,5
63 cmpdi cr1,r0,0
6420: ld r7,0(r4)
65220: ld r6,8(r4)
66 addi r4,r4,16
67 mtctr r0
68 andi. r0,r5,0x10
69 beq 22f
70 addi r3,r3,16
71 addi r4,r4,-16
72 mr r9,r7
73 mr r8,r6
74 beq cr1,72f
7521: ld r7,16(r4)
76221: ld r6,24(r4)
77 addi r4,r4,32
7870: std r9,0(r3)
79270: std r8,8(r3)
8022: ld r9,0(r4)
81222: ld r8,8(r4)
8271: std r7,16(r3)
83271: std r6,24(r3)
84 addi r3,r3,32
85 bdnz 21b
8672: std r9,0(r3)
87272: std r8,8(r3)
88 andi. r5,r5,0xf
89 beq+ 3f
90 addi r4,r4,16
91.Ldo_tail:
92 addi r3,r3,16
93 bf cr7*4+0,246f
94244: ld r9,0(r4)
95 addi r4,r4,8
96245: std r9,0(r3)
97 addi r3,r3,8
98246: bf cr7*4+1,1f
9923: lwz r9,0(r4)
100 addi r4,r4,4
10173: stw r9,0(r3)
102 addi r3,r3,4
1031: bf cr7*4+2,2f
10444: lhz r9,0(r4)
105 addi r4,r4,2
10674: sth r9,0(r3)
107 addi r3,r3,2
1082: bf cr7*4+3,3f
10945: lbz r9,0(r4)
11075: stb r9,0(r3)
1113: li r3,0
112 blr
113
114.Lsrc_unaligned:
115 srdi r6,r5,3
116 addi r5,r5,-16
117 subf r4,r0,r4
118 srdi r7,r5,4
119 sldi r10,r0,3
120 cmpldi cr6,r6,3
121 andi. r5,r5,7
122 mtctr r7
123 subfic r11,r10,64
124 add r5,r5,r0
125 bt cr7*4+0,28f
126
12724: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
12825: ld r0,8(r4)
129 sLd r6,r9,r10
13026: ldu r9,16(r4)
131 sHd r7,r0,r11
132 sLd r8,r0,r10
133 or r7,r7,r6
134 blt cr6,79f
13527: ld r0,8(r4)
136 b 2f
137
13828: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
13929: ldu r9,8(r4)
140 sLd r8,r0,r10
141 addi r3,r3,-8
142 blt cr6,5f
14330: ld r0,8(r4)
144 sHd r12,r9,r11
145 sLd r6,r9,r10
14631: ldu r9,16(r4)
147 or r12,r8,r12
148 sHd r7,r0,r11
149 sLd r8,r0,r10
150 addi r3,r3,16
151 beq cr6,78f
152
1531: or r7,r7,r6
15432: ld r0,8(r4)
15576: std r12,8(r3)
1562: sHd r12,r9,r11
157 sLd r6,r9,r10
15833: ldu r9,16(r4)
159 or r12,r8,r12
16077: stdu r7,16(r3)
161 sHd r7,r0,r11
162 sLd r8,r0,r10
163 bdnz 1b
164
16578: std r12,8(r3)
166 or r7,r7,r6
16779: std r7,16(r3)
1685: sHd r12,r9,r11
169 or r12,r8,r12
17080: std r12,24(r3)
171 bne 6f
172 li r3,0
173 blr
1746: cmpwi cr1,r5,8
175 addi r3,r3,32
176 sLd r9,r9,r10
177 ble cr1,7f
17834: ld r0,8(r4)
179 sHd r7,r0,r11
180 or r9,r7,r9
1817:
182 bf cr7*4+1,1f
183#ifdef __BIG_ENDIAN__
184 rotldi r9,r9,32
185#endif
18694: stw r9,0(r3)
187#ifdef __LITTLE_ENDIAN__
188 rotrdi r9,r9,32
189#endif
190 addi r3,r3,4
1911: bf cr7*4+2,2f
192#ifdef __BIG_ENDIAN__
193 rotldi r9,r9,16
194#endif
19595: sth r9,0(r3)
196#ifdef __LITTLE_ENDIAN__
197 rotrdi r9,r9,16
198#endif
199 addi r3,r3,2
2002: bf cr7*4+3,3f
201#ifdef __BIG_ENDIAN__
202 rotldi r9,r9,8
203#endif
20496: stb r9,0(r3)
205#ifdef __LITTLE_ENDIAN__
206 rotrdi r9,r9,8
207#endif
2083: li r3,0
209 blr
210
211.Ldst_unaligned:
212 PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */
213 subf r5,r6,r5
214 li r7,0
215 cmpldi cr1,r5,16
216 bf cr7*4+3,1f
21735: lbz r0,0(r4)
21881: stb r0,0(r3)
219 addi r7,r7,1
2201: bf cr7*4+2,2f
22136: lhzx r0,r7,r4
22282: sthx r0,r7,r3
223 addi r7,r7,2
2242: bf cr7*4+1,3f
22537: lwzx r0,r7,r4
22683: stwx r0,r7,r3
2273: PPC_MTOCRF(0x01,r5)
228 add r4,r6,r4
229 add r3,r6,r3
230 b .Ldst_aligned
231
232.Lshort_copy:
233 bf cr7*4+0,1f
23438: lwz r0,0(r4)
23539: lwz r9,4(r4)
236 addi r4,r4,8
23784: stw r0,0(r3)
23885: stw r9,4(r3)
239 addi r3,r3,8
2401: bf cr7*4+1,2f
24140: lwz r0,0(r4)
242 addi r4,r4,4
24386: stw r0,0(r3)
244 addi r3,r3,4
2452: bf cr7*4+2,3f
24641: lhz r0,0(r4)
247 addi r4,r4,2
24887: sth r0,0(r3)
249 addi r3,r3,2
2503: bf cr7*4+3,4f
25142: lbz r0,0(r4)
25288: stb r0,0(r3)
2534: li r3,0
254 blr
255
256/*
257 * exception handlers follow
258 * we have to return the number of bytes not copied
259 * for an exception on a load, we set the rest of the destination to 0
260 */
261
262136:
263137:
264 add r3,r3,r7
265 b 1f
266130:
267131:
268 addi r3,r3,8
269120:
270320:
271122:
272322:
273124:
274125:
275126:
276127:
277128:
278129:
279133:
280 addi r3,r3,8
281132:
282 addi r3,r3,8
283121:
284321:
285344:
286134:
287135:
288138:
289139:
290140:
291141:
292142:
293123:
294144:
295145:
296
297/*
298 * here we have had a fault on a load and r3 points to the first
299 * unmodified byte of the destination
300 */
3011: ld r6,-24(r1)
302 ld r4,-16(r1)
303 ld r5,-8(r1)
304 subf r6,r6,r3
305 add r4,r4,r6
306 subf r5,r6,r5 /* #bytes left to go */
307
308/*
309 * first see if we can copy any more bytes before hitting another exception
310 */
311 mtctr r5
31243: lbz r0,0(r4)
313 addi r4,r4,1
31489: stb r0,0(r3)
315 addi r3,r3,1
316 bdnz 43b
317 li r3,0 /* huh? all copied successfully this time? */
318 blr
319
320/*
321 * here we have trapped again, need to clear ctr bytes starting at r3
322 */
323143: mfctr r5
324 li r0,0
325 mr r4,r3
326 mr r3,r5 /* return the number of bytes not copied */
3271: andi. r9,r4,7
328 beq 3f
32990: stb r0,0(r4)
330 addic. r5,r5,-1
331 addi r4,r4,1
332 bne 1b
333 blr
3343: cmpldi cr1,r5,8
335 srdi r9,r5,3
336 andi. r5,r5,7
337 blt cr1,93f
338 mtctr r9
33991: std r0,0(r4)
340 addi r4,r4,8
341 bdnz 91b
34293: beqlr
343 mtctr r5
34492: stb r0,0(r4)
345 addi r4,r4,1
346 bdnz 92b
347 blr
348
349/*
350 * exception handlers for stores: we just need to work
351 * out how many bytes weren't copied
352 */
353182:
354183:
355 add r3,r3,r7
356 b 1f
357371:
358180:
359 addi r3,r3,8
360171:
361177:
362 addi r3,r3,8
363370:
364372:
365176:
366178:
367 addi r3,r3,4
368185:
369 addi r3,r3,4
370170:
371172:
372345:
373173:
374174:
375175:
376179:
377181:
378184:
379186:
380187:
381188:
382189:
383194:
384195:
385196:
3861:
387 ld r6,-24(r1)
388 ld r5,-8(r1)
389 add r6,r6,r5
390 subf r3,r3,r6 /* #bytes not copied */
391190:
392191:
393192:
394 blr /* #bytes not copied in r3 */
395
396 .section __ex_table,"a"
397 .align 3
398 .llong 20b,120b
399 .llong 220b,320b
400 .llong 21b,121b
401 .llong 221b,321b
402 .llong 70b,170b
403 .llong 270b,370b
404 .llong 22b,122b
405 .llong 222b,322b
406 .llong 71b,171b
407 .llong 271b,371b
408 .llong 72b,172b
409 .llong 272b,372b
410 .llong 244b,344b
411 .llong 245b,345b
412 .llong 23b,123b
413 .llong 73b,173b
414 .llong 44b,144b
415 .llong 74b,174b
416 .llong 45b,145b
417 .llong 75b,175b
418 .llong 24b,124b
419 .llong 25b,125b
420 .llong 26b,126b
421 .llong 27b,127b
422 .llong 28b,128b
423 .llong 29b,129b
424 .llong 30b,130b
425 .llong 31b,131b
426 .llong 32b,132b
427 .llong 76b,176b
428 .llong 33b,133b
429 .llong 77b,177b
430 .llong 78b,178b
431 .llong 79b,179b
432 .llong 80b,180b
433 .llong 34b,134b
434 .llong 94b,194b
435 .llong 95b,195b
436 .llong 96b,196b
437 .llong 35b,135b
438 .llong 81b,181b
439 .llong 36b,136b
440 .llong 82b,182b
441 .llong 37b,137b
442 .llong 83b,183b
443 .llong 38b,138b
444 .llong 39b,139b
445 .llong 84b,184b
446 .llong 85b,185b
447 .llong 40b,140b
448 .llong 86b,186b
449 .llong 41b,141b
450 .llong 87b,187b
451 .llong 42b,142b
452 .llong 88b,188b
453 .llong 43b,143b
454 .llong 89b,189b
455 .llong 90b,190b
456 .llong 91b,191b
457 .llong 92b,192b
458
459 .text
460
461/*
462 * Routine to copy a whole page of data, optimized for POWER4.
463 * On POWER4 it is more than 50% faster than the simple loop
464 * above (following the .Ldst_aligned label).
465 */
466.Lcopy_page_4K:
467 std r31,-32(1)
468 std r30,-40(1)
469 std r29,-48(1)
470 std r28,-56(1)
471 std r27,-64(1)
472 std r26,-72(1)
473 std r25,-80(1)
474 std r24,-88(1)
475 std r23,-96(1)
476 std r22,-104(1)
477 std r21,-112(1)
478 std r20,-120(1)
479 li r5,4096/32 - 1
480 addi r3,r3,-8
481 li r0,5
4820: addi r5,r5,-24
483 mtctr r0
48420: ld r22,640(4)
48521: ld r21,512(4)
48622: ld r20,384(4)
48723: ld r11,256(4)
48824: ld r9,128(4)
48925: ld r7,0(4)
49026: ld r25,648(4)
49127: ld r24,520(4)
49228: ld r23,392(4)
49329: ld r10,264(4)
49430: ld r8,136(4)
49531: ldu r6,8(4)
496 cmpwi r5,24
4971:
49832: std r22,648(3)
49933: std r21,520(3)
50034: std r20,392(3)
50135: std r11,264(3)
50236: std r9,136(3)
50337: std r7,8(3)
50438: ld r28,648(4)
50539: ld r27,520(4)
50640: ld r26,392(4)
50741: ld r31,264(4)
50842: ld r30,136(4)
50943: ld r29,8(4)
51044: std r25,656(3)
51145: std r24,528(3)
51246: std r23,400(3)
51347: std r10,272(3)
51448: std r8,144(3)
51549: std r6,16(3)
51650: ld r22,656(4)
51751: ld r21,528(4)
51852: ld r20,400(4)
51953: ld r11,272(4)
52054: ld r9,144(4)
52155: ld r7,16(4)
52256: std r28,664(3)
52357: std r27,536(3)
52458: std r26,408(3)
52559: std r31,280(3)
52660: std r30,152(3)
52761: stdu r29,24(3)
52862: ld r25,664(4)
52963: ld r24,536(4)
53064: ld r23,408(4)
53165: ld r10,280(4)
53266: ld r8,152(4)
53367: ldu r6,24(4)
534 bdnz 1b
53568: std r22,648(3)
53669: std r21,520(3)
53770: std r20,392(3)
53871: std r11,264(3)
53972: std r9,136(3)
54073: std r7,8(3)
54174: addi r4,r4,640
54275: addi r3,r3,648
543 bge 0b
544 mtctr r5
54576: ld r7,0(4)
54677: ld r8,8(4)
54778: ldu r9,16(4)
5483:
54979: ld r10,8(4)
55080: std r7,8(3)
55181: ld r7,16(4)
55282: std r8,16(3)
55383: ld r8,24(4)
55484: std r9,24(3)
55585: ldu r9,32(4)
55686: stdu r10,32(3)
557 bdnz 3b
5584:
55987: ld r10,8(4)
56088: std r7,8(3)
56189: std r8,16(3)
56290: std r9,24(3)
56391: std r10,32(3)
5649: ld r20,-120(1)
565 ld r21,-112(1)
566 ld r22,-104(1)
567 ld r23,-96(1)
568 ld r24,-88(1)
569 ld r25,-80(1)
570 ld r26,-72(1)
571 ld r27,-64(1)
572 ld r28,-56(1)
573 ld r29,-48(1)
574 ld r30,-40(1)
575 ld r31,-32(1)
576 li r3,0
577 blr
578
579/*
580 * on an exception, reset to the beginning and jump back into the
581 * standard __copy_tofrom_user
582 */
583100: ld r20,-120(1)
584 ld r21,-112(1)
585 ld r22,-104(1)
586 ld r23,-96(1)
587 ld r24,-88(1)
588 ld r25,-80(1)
589 ld r26,-72(1)
590 ld r27,-64(1)
591 ld r28,-56(1)
592 ld r29,-48(1)
593 ld r30,-40(1)
594 ld r31,-32(1)
595 ld r3,-24(r1)
596 ld r4,-16(r1)
597 li r5,4096
598 b .Ldst_aligned
599
600 .section __ex_table,"a"
601 .align 3
602 .llong 20b,100b
603 .llong 21b,100b
604 .llong 22b,100b
605 .llong 23b,100b
606 .llong 24b,100b
607 .llong 25b,100b
608 .llong 26b,100b
609 .llong 27b,100b
610 .llong 28b,100b
611 .llong 29b,100b
612 .llong 30b,100b
613 .llong 31b,100b
614 .llong 32b,100b
615 .llong 33b,100b
616 .llong 34b,100b
617 .llong 35b,100b
618 .llong 36b,100b
619 .llong 37b,100b
620 .llong 38b,100b
621 .llong 39b,100b
622 .llong 40b,100b
623 .llong 41b,100b
624 .llong 42b,100b
625 .llong 43b,100b
626 .llong 44b,100b
627 .llong 45b,100b
628 .llong 46b,100b
629 .llong 47b,100b
630 .llong 48b,100b
631 .llong 49b,100b
632 .llong 50b,100b
633 .llong 51b,100b
634 .llong 52b,100b
635 .llong 53b,100b
636 .llong 54b,100b
637 .llong 55b,100b
638 .llong 56b,100b
639 .llong 57b,100b
640 .llong 58b,100b
641 .llong 59b,100b
642 .llong 60b,100b
643 .llong 61b,100b
644 .llong 62b,100b
645 .llong 63b,100b
646 .llong 64b,100b
647 .llong 65b,100b
648 .llong 66b,100b
649 .llong 67b,100b
650 .llong 68b,100b
651 .llong 69b,100b
652 .llong 70b,100b
653 .llong 71b,100b
654 .llong 72b,100b
655 .llong 73b,100b
656 .llong 74b,100b
657 .llong 75b,100b
658 .llong 76b,100b
659 .llong 77b,100b
660 .llong 78b,100b
661 .llong 79b,100b
662 .llong 80b,100b
663 .llong 81b,100b
664 .llong 82b,100b
665 .llong 83b,100b
666 .llong 84b,100b
667 .llong 85b,100b
668 .llong 86b,100b
669 .llong 87b,100b
670 .llong 88b,100b
671 .llong 89b,100b
672 .llong 90b,100b
673 .llong 91b,100b
1/*
2 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#include <asm/processor.h>
10#include <asm/ppc_asm.h>
11#include <asm/export.h>
12
13#ifdef __BIG_ENDIAN__
14#define sLd sld /* Shift towards low-numbered address. */
15#define sHd srd /* Shift towards high-numbered address. */
16#else
17#define sLd srd /* Shift towards low-numbered address. */
18#define sHd sld /* Shift towards high-numbered address. */
19#endif
20
21 .align 7
22_GLOBAL_TOC(__copy_tofrom_user)
23BEGIN_FTR_SECTION
24 nop
25FTR_SECTION_ELSE
26 b __copy_tofrom_user_power7
27ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
28_GLOBAL(__copy_tofrom_user_base)
29 /* first check for a whole page copy on a page boundary */
30 cmpldi cr1,r5,16
31 cmpdi cr6,r5,4096
32 or r0,r3,r4
33 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
34 andi. r0,r0,4095
35 std r3,-24(r1)
36 crand cr0*4+2,cr0*4+2,cr6*4+2
37 std r4,-16(r1)
38 std r5,-8(r1)
39 dcbt 0,r4
40 beq .Lcopy_page_4K
41 andi. r6,r6,7
42 PPC_MTOCRF(0x01,r5)
43 blt cr1,.Lshort_copy
44/* Below we want to nop out the bne if we're on a CPU that has the
45 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
46 * cleared.
47 * At the time of writing the only CPU that has this combination of bits
48 * set is Power6.
49 */
50BEGIN_FTR_SECTION
51 nop
52FTR_SECTION_ELSE
53 bne .Ldst_unaligned
54ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
55 CPU_FTR_UNALIGNED_LD_STD)
56.Ldst_aligned:
57 addi r3,r3,-16
58BEGIN_FTR_SECTION
59 andi. r0,r4,7
60 bne .Lsrc_unaligned
61END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
62 blt cr1,.Ldo_tail /* if < 16 bytes to copy */
63 srdi r0,r5,5
64 cmpdi cr1,r0,0
6520: ld r7,0(r4)
66220: ld r6,8(r4)
67 addi r4,r4,16
68 mtctr r0
69 andi. r0,r5,0x10
70 beq 22f
71 addi r3,r3,16
72 addi r4,r4,-16
73 mr r9,r7
74 mr r8,r6
75 beq cr1,72f
7621: ld r7,16(r4)
77221: ld r6,24(r4)
78 addi r4,r4,32
7970: std r9,0(r3)
80270: std r8,8(r3)
8122: ld r9,0(r4)
82222: ld r8,8(r4)
8371: std r7,16(r3)
84271: std r6,24(r3)
85 addi r3,r3,32
86 bdnz 21b
8772: std r9,0(r3)
88272: std r8,8(r3)
89 andi. r5,r5,0xf
90 beq+ 3f
91 addi r4,r4,16
92.Ldo_tail:
93 addi r3,r3,16
94 bf cr7*4+0,246f
95244: ld r9,0(r4)
96 addi r4,r4,8
97245: std r9,0(r3)
98 addi r3,r3,8
99246: bf cr7*4+1,1f
10023: lwz r9,0(r4)
101 addi r4,r4,4
10273: stw r9,0(r3)
103 addi r3,r3,4
1041: bf cr7*4+2,2f
10544: lhz r9,0(r4)
106 addi r4,r4,2
10774: sth r9,0(r3)
108 addi r3,r3,2
1092: bf cr7*4+3,3f
11045: lbz r9,0(r4)
11175: stb r9,0(r3)
1123: li r3,0
113 blr
114
115.Lsrc_unaligned:
116 srdi r6,r5,3
117 addi r5,r5,-16
118 subf r4,r0,r4
119 srdi r7,r5,4
120 sldi r10,r0,3
121 cmpldi cr6,r6,3
122 andi. r5,r5,7
123 mtctr r7
124 subfic r11,r10,64
125 add r5,r5,r0
126 bt cr7*4+0,28f
127
12824: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
12925: ld r0,8(r4)
130 sLd r6,r9,r10
13126: ldu r9,16(r4)
132 sHd r7,r0,r11
133 sLd r8,r0,r10
134 or r7,r7,r6
135 blt cr6,79f
13627: ld r0,8(r4)
137 b 2f
138
13928: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
14029: ldu r9,8(r4)
141 sLd r8,r0,r10
142 addi r3,r3,-8
143 blt cr6,5f
14430: ld r0,8(r4)
145 sHd r12,r9,r11
146 sLd r6,r9,r10
14731: ldu r9,16(r4)
148 or r12,r8,r12
149 sHd r7,r0,r11
150 sLd r8,r0,r10
151 addi r3,r3,16
152 beq cr6,78f
153
1541: or r7,r7,r6
15532: ld r0,8(r4)
15676: std r12,8(r3)
1572: sHd r12,r9,r11
158 sLd r6,r9,r10
15933: ldu r9,16(r4)
160 or r12,r8,r12
16177: stdu r7,16(r3)
162 sHd r7,r0,r11
163 sLd r8,r0,r10
164 bdnz 1b
165
16678: std r12,8(r3)
167 or r7,r7,r6
16879: std r7,16(r3)
1695: sHd r12,r9,r11
170 or r12,r8,r12
17180: std r12,24(r3)
172 bne 6f
173 li r3,0
174 blr
1756: cmpwi cr1,r5,8
176 addi r3,r3,32
177 sLd r9,r9,r10
178 ble cr1,7f
17934: ld r0,8(r4)
180 sHd r7,r0,r11
181 or r9,r7,r9
1827:
183 bf cr7*4+1,1f
184#ifdef __BIG_ENDIAN__
185 rotldi r9,r9,32
186#endif
18794: stw r9,0(r3)
188#ifdef __LITTLE_ENDIAN__
189 rotrdi r9,r9,32
190#endif
191 addi r3,r3,4
1921: bf cr7*4+2,2f
193#ifdef __BIG_ENDIAN__
194 rotldi r9,r9,16
195#endif
19695: sth r9,0(r3)
197#ifdef __LITTLE_ENDIAN__
198 rotrdi r9,r9,16
199#endif
200 addi r3,r3,2
2012: bf cr7*4+3,3f
202#ifdef __BIG_ENDIAN__
203 rotldi r9,r9,8
204#endif
20596: stb r9,0(r3)
206#ifdef __LITTLE_ENDIAN__
207 rotrdi r9,r9,8
208#endif
2093: li r3,0
210 blr
211
212.Ldst_unaligned:
213 PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */
214 subf r5,r6,r5
215 li r7,0
216 cmpldi cr1,r5,16
217 bf cr7*4+3,1f
21835: lbz r0,0(r4)
21981: stb r0,0(r3)
220 addi r7,r7,1
2211: bf cr7*4+2,2f
22236: lhzx r0,r7,r4
22382: sthx r0,r7,r3
224 addi r7,r7,2
2252: bf cr7*4+1,3f
22637: lwzx r0,r7,r4
22783: stwx r0,r7,r3
2283: PPC_MTOCRF(0x01,r5)
229 add r4,r6,r4
230 add r3,r6,r3
231 b .Ldst_aligned
232
233.Lshort_copy:
234 bf cr7*4+0,1f
23538: lwz r0,0(r4)
23639: lwz r9,4(r4)
237 addi r4,r4,8
23884: stw r0,0(r3)
23985: stw r9,4(r3)
240 addi r3,r3,8
2411: bf cr7*4+1,2f
24240: lwz r0,0(r4)
243 addi r4,r4,4
24486: stw r0,0(r3)
245 addi r3,r3,4
2462: bf cr7*4+2,3f
24741: lhz r0,0(r4)
248 addi r4,r4,2
24987: sth r0,0(r3)
250 addi r3,r3,2
2513: bf cr7*4+3,4f
25242: lbz r0,0(r4)
25388: stb r0,0(r3)
2544: li r3,0
255 blr
256
257/*
258 * exception handlers follow
259 * we have to return the number of bytes not copied
260 * for an exception on a load, we set the rest of the destination to 0
261 */
262
263136:
264137:
265 add r3,r3,r7
266 b 1f
267130:
268131:
269 addi r3,r3,8
270120:
271320:
272122:
273322:
274124:
275125:
276126:
277127:
278128:
279129:
280133:
281 addi r3,r3,8
282132:
283 addi r3,r3,8
284121:
285321:
286344:
287134:
288135:
289138:
290139:
291140:
292141:
293142:
294123:
295144:
296145:
297
298/*
299 * here we have had a fault on a load and r3 points to the first
300 * unmodified byte of the destination
301 */
3021: ld r6,-24(r1)
303 ld r4,-16(r1)
304 ld r5,-8(r1)
305 subf r6,r6,r3
306 add r4,r4,r6
307 subf r5,r6,r5 /* #bytes left to go */
308
309/*
310 * first see if we can copy any more bytes before hitting another exception
311 */
312 mtctr r5
31343: lbz r0,0(r4)
314 addi r4,r4,1
31589: stb r0,0(r3)
316 addi r3,r3,1
317 bdnz 43b
318 li r3,0 /* huh? all copied successfully this time? */
319 blr
320
321/*
322 * here we have trapped again, need to clear ctr bytes starting at r3
323 */
324143: mfctr r5
325 li r0,0
326 mr r4,r3
327 mr r3,r5 /* return the number of bytes not copied */
3281: andi. r9,r4,7
329 beq 3f
33090: stb r0,0(r4)
331 addic. r5,r5,-1
332 addi r4,r4,1
333 bne 1b
334 blr
3353: cmpldi cr1,r5,8
336 srdi r9,r5,3
337 andi. r5,r5,7
338 blt cr1,93f
339 mtctr r9
34091: std r0,0(r4)
341 addi r4,r4,8
342 bdnz 91b
34393: beqlr
344 mtctr r5
34592: stb r0,0(r4)
346 addi r4,r4,1
347 bdnz 92b
348 blr
349
350/*
351 * exception handlers for stores: we just need to work
352 * out how many bytes weren't copied
353 */
354182:
355183:
356 add r3,r3,r7
357 b 1f
358371:
359180:
360 addi r3,r3,8
361171:
362177:
363179:
364 addi r3,r3,8
365370:
366372:
367176:
368178:
369 addi r3,r3,4
370185:
371 addi r3,r3,4
372170:
373172:
374345:
375173:
376174:
377175:
378181:
379184:
380186:
381187:
382188:
383189:
384194:
385195:
386196:
3871:
388 ld r6,-24(r1)
389 ld r5,-8(r1)
390 add r6,r6,r5
391 subf r3,r3,r6 /* #bytes not copied */
392190:
393191:
394192:
395 blr /* #bytes not copied in r3 */
396
397 EX_TABLE(20b,120b)
398 EX_TABLE(220b,320b)
399 EX_TABLE(21b,121b)
400 EX_TABLE(221b,321b)
401 EX_TABLE(70b,170b)
402 EX_TABLE(270b,370b)
403 EX_TABLE(22b,122b)
404 EX_TABLE(222b,322b)
405 EX_TABLE(71b,171b)
406 EX_TABLE(271b,371b)
407 EX_TABLE(72b,172b)
408 EX_TABLE(272b,372b)
409 EX_TABLE(244b,344b)
410 EX_TABLE(245b,345b)
411 EX_TABLE(23b,123b)
412 EX_TABLE(73b,173b)
413 EX_TABLE(44b,144b)
414 EX_TABLE(74b,174b)
415 EX_TABLE(45b,145b)
416 EX_TABLE(75b,175b)
417 EX_TABLE(24b,124b)
418 EX_TABLE(25b,125b)
419 EX_TABLE(26b,126b)
420 EX_TABLE(27b,127b)
421 EX_TABLE(28b,128b)
422 EX_TABLE(29b,129b)
423 EX_TABLE(30b,130b)
424 EX_TABLE(31b,131b)
425 EX_TABLE(32b,132b)
426 EX_TABLE(76b,176b)
427 EX_TABLE(33b,133b)
428 EX_TABLE(77b,177b)
429 EX_TABLE(78b,178b)
430 EX_TABLE(79b,179b)
431 EX_TABLE(80b,180b)
432 EX_TABLE(34b,134b)
433 EX_TABLE(94b,194b)
434 EX_TABLE(95b,195b)
435 EX_TABLE(96b,196b)
436 EX_TABLE(35b,135b)
437 EX_TABLE(81b,181b)
438 EX_TABLE(36b,136b)
439 EX_TABLE(82b,182b)
440 EX_TABLE(37b,137b)
441 EX_TABLE(83b,183b)
442 EX_TABLE(38b,138b)
443 EX_TABLE(39b,139b)
444 EX_TABLE(84b,184b)
445 EX_TABLE(85b,185b)
446 EX_TABLE(40b,140b)
447 EX_TABLE(86b,186b)
448 EX_TABLE(41b,141b)
449 EX_TABLE(87b,187b)
450 EX_TABLE(42b,142b)
451 EX_TABLE(88b,188b)
452 EX_TABLE(43b,143b)
453 EX_TABLE(89b,189b)
454 EX_TABLE(90b,190b)
455 EX_TABLE(91b,191b)
456 EX_TABLE(92b,192b)
457
458/*
459 * Routine to copy a whole page of data, optimized for POWER4.
460 * On POWER4 it is more than 50% faster than the simple loop
461 * above (following the .Ldst_aligned label).
462 */
463.Lcopy_page_4K:
464 std r31,-32(1)
465 std r30,-40(1)
466 std r29,-48(1)
467 std r28,-56(1)
468 std r27,-64(1)
469 std r26,-72(1)
470 std r25,-80(1)
471 std r24,-88(1)
472 std r23,-96(1)
473 std r22,-104(1)
474 std r21,-112(1)
475 std r20,-120(1)
476 li r5,4096/32 - 1
477 addi r3,r3,-8
478 li r0,5
4790: addi r5,r5,-24
480 mtctr r0
48120: ld r22,640(4)
48221: ld r21,512(4)
48322: ld r20,384(4)
48423: ld r11,256(4)
48524: ld r9,128(4)
48625: ld r7,0(4)
48726: ld r25,648(4)
48827: ld r24,520(4)
48928: ld r23,392(4)
49029: ld r10,264(4)
49130: ld r8,136(4)
49231: ldu r6,8(4)
493 cmpwi r5,24
4941:
49532: std r22,648(3)
49633: std r21,520(3)
49734: std r20,392(3)
49835: std r11,264(3)
49936: std r9,136(3)
50037: std r7,8(3)
50138: ld r28,648(4)
50239: ld r27,520(4)
50340: ld r26,392(4)
50441: ld r31,264(4)
50542: ld r30,136(4)
50643: ld r29,8(4)
50744: std r25,656(3)
50845: std r24,528(3)
50946: std r23,400(3)
51047: std r10,272(3)
51148: std r8,144(3)
51249: std r6,16(3)
51350: ld r22,656(4)
51451: ld r21,528(4)
51552: ld r20,400(4)
51653: ld r11,272(4)
51754: ld r9,144(4)
51855: ld r7,16(4)
51956: std r28,664(3)
52057: std r27,536(3)
52158: std r26,408(3)
52259: std r31,280(3)
52360: std r30,152(3)
52461: stdu r29,24(3)
52562: ld r25,664(4)
52663: ld r24,536(4)
52764: ld r23,408(4)
52865: ld r10,280(4)
52966: ld r8,152(4)
53067: ldu r6,24(4)
531 bdnz 1b
53268: std r22,648(3)
53369: std r21,520(3)
53470: std r20,392(3)
53571: std r11,264(3)
53672: std r9,136(3)
53773: std r7,8(3)
53874: addi r4,r4,640
53975: addi r3,r3,648
540 bge 0b
541 mtctr r5
54276: ld r7,0(4)
54377: ld r8,8(4)
54478: ldu r9,16(4)
5453:
54679: ld r10,8(4)
54780: std r7,8(3)
54881: ld r7,16(4)
54982: std r8,16(3)
55083: ld r8,24(4)
55184: std r9,24(3)
55285: ldu r9,32(4)
55386: stdu r10,32(3)
554 bdnz 3b
5554:
55687: ld r10,8(4)
55788: std r7,8(3)
55889: std r8,16(3)
55990: std r9,24(3)
56091: std r10,32(3)
5619: ld r20,-120(1)
562 ld r21,-112(1)
563 ld r22,-104(1)
564 ld r23,-96(1)
565 ld r24,-88(1)
566 ld r25,-80(1)
567 ld r26,-72(1)
568 ld r27,-64(1)
569 ld r28,-56(1)
570 ld r29,-48(1)
571 ld r30,-40(1)
572 ld r31,-32(1)
573 li r3,0
574 blr
575
576/*
577 * on an exception, reset to the beginning and jump back into the
578 * standard __copy_tofrom_user
579 */
580100: ld r20,-120(1)
581 ld r21,-112(1)
582 ld r22,-104(1)
583 ld r23,-96(1)
584 ld r24,-88(1)
585 ld r25,-80(1)
586 ld r26,-72(1)
587 ld r27,-64(1)
588 ld r28,-56(1)
589 ld r29,-48(1)
590 ld r30,-40(1)
591 ld r31,-32(1)
592 ld r3,-24(r1)
593 ld r4,-16(r1)
594 li r5,4096
595 b .Ldst_aligned
596
597 EX_TABLE(20b,100b)
598 EX_TABLE(21b,100b)
599 EX_TABLE(22b,100b)
600 EX_TABLE(23b,100b)
601 EX_TABLE(24b,100b)
602 EX_TABLE(25b,100b)
603 EX_TABLE(26b,100b)
604 EX_TABLE(27b,100b)
605 EX_TABLE(28b,100b)
606 EX_TABLE(29b,100b)
607 EX_TABLE(30b,100b)
608 EX_TABLE(31b,100b)
609 EX_TABLE(32b,100b)
610 EX_TABLE(33b,100b)
611 EX_TABLE(34b,100b)
612 EX_TABLE(35b,100b)
613 EX_TABLE(36b,100b)
614 EX_TABLE(37b,100b)
615 EX_TABLE(38b,100b)
616 EX_TABLE(39b,100b)
617 EX_TABLE(40b,100b)
618 EX_TABLE(41b,100b)
619 EX_TABLE(42b,100b)
620 EX_TABLE(43b,100b)
621 EX_TABLE(44b,100b)
622 EX_TABLE(45b,100b)
623 EX_TABLE(46b,100b)
624 EX_TABLE(47b,100b)
625 EX_TABLE(48b,100b)
626 EX_TABLE(49b,100b)
627 EX_TABLE(50b,100b)
628 EX_TABLE(51b,100b)
629 EX_TABLE(52b,100b)
630 EX_TABLE(53b,100b)
631 EX_TABLE(54b,100b)
632 EX_TABLE(55b,100b)
633 EX_TABLE(56b,100b)
634 EX_TABLE(57b,100b)
635 EX_TABLE(58b,100b)
636 EX_TABLE(59b,100b)
637 EX_TABLE(60b,100b)
638 EX_TABLE(61b,100b)
639 EX_TABLE(62b,100b)
640 EX_TABLE(63b,100b)
641 EX_TABLE(64b,100b)
642 EX_TABLE(65b,100b)
643 EX_TABLE(66b,100b)
644 EX_TABLE(67b,100b)
645 EX_TABLE(68b,100b)
646 EX_TABLE(69b,100b)
647 EX_TABLE(70b,100b)
648 EX_TABLE(71b,100b)
649 EX_TABLE(72b,100b)
650 EX_TABLE(73b,100b)
651 EX_TABLE(74b,100b)
652 EX_TABLE(75b,100b)
653 EX_TABLE(76b,100b)
654 EX_TABLE(77b,100b)
655 EX_TABLE(78b,100b)
656 EX_TABLE(79b,100b)
657 EX_TABLE(80b,100b)
658 EX_TABLE(81b,100b)
659 EX_TABLE(82b,100b)
660 EX_TABLE(83b,100b)
661 EX_TABLE(84b,100b)
662 EX_TABLE(85b,100b)
663 EX_TABLE(86b,100b)
664 EX_TABLE(87b,100b)
665 EX_TABLE(88b,100b)
666 EX_TABLE(89b,100b)
667 EX_TABLE(90b,100b)
668 EX_TABLE(91b,100b)
669
670EXPORT_SYMBOL(__copy_tofrom_user)