Linux Audio

Check our new training course

Linux debugging, profiling, tracing and performance analysis training

Apr 14-17, 2025
Register
Loading...
v4.6
  1/*
  2 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
  3 *
  4 * This program is free software; you can redistribute it and/or
  5 * modify it under the terms of the GNU General Public License
  6 * as published by the Free Software Foundation; either version
  7 * 2 of the License, or (at your option) any later version.
  8 */
  9#include <asm/processor.h>
 10#include <asm/ppc_asm.h>
 
 11
 12#ifdef __BIG_ENDIAN__
 13#define sLd sld		/* Shift towards low-numbered address. */
 14#define sHd srd		/* Shift towards high-numbered address. */
 15#else
 16#define sLd srd		/* Shift towards low-numbered address. */
 17#define sHd sld		/* Shift towards high-numbered address. */
 18#endif
 19
 20	.align	7
 21_GLOBAL_TOC(__copy_tofrom_user)
 22BEGIN_FTR_SECTION
 23	nop
 24FTR_SECTION_ELSE
 25	b	__copy_tofrom_user_power7
 26ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
 27_GLOBAL(__copy_tofrom_user_base)
 28	/* first check for a whole page copy on a page boundary */
 29	cmpldi	cr1,r5,16
 30	cmpdi	cr6,r5,4096
 31	or	r0,r3,r4
 32	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
 33	andi.	r0,r0,4095
 34	std	r3,-24(r1)
 35	crand	cr0*4+2,cr0*4+2,cr6*4+2
 36	std	r4,-16(r1)
 37	std	r5,-8(r1)
 38	dcbt	0,r4
 39	beq	.Lcopy_page_4K
 40	andi.	r6,r6,7
 41	PPC_MTOCRF(0x01,r5)
 42	blt	cr1,.Lshort_copy
 43/* Below we want to nop out the bne if we're on a CPU that has the
 44 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
 45 * cleared.
 46 * At the time of writing the only CPU that has this combination of bits
 47 * set is Power6.
 48 */
 49BEGIN_FTR_SECTION
 50	nop
 51FTR_SECTION_ELSE
 52	bne	.Ldst_unaligned
 53ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
 54		    CPU_FTR_UNALIGNED_LD_STD)
 55.Ldst_aligned:
 56	addi	r3,r3,-16
 57BEGIN_FTR_SECTION
 58	andi.	r0,r4,7
 59	bne	.Lsrc_unaligned
 60END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 61	blt	cr1,.Ldo_tail		/* if < 16 bytes to copy */
 62	srdi	r0,r5,5
 63	cmpdi	cr1,r0,0
 6420:	ld	r7,0(r4)
 65220:	ld	r6,8(r4)
 66	addi	r4,r4,16
 67	mtctr	r0
 68	andi.	r0,r5,0x10
 69	beq	22f
 70	addi	r3,r3,16
 71	addi	r4,r4,-16
 72	mr	r9,r7
 73	mr	r8,r6
 74	beq	cr1,72f
 7521:	ld	r7,16(r4)
 76221:	ld	r6,24(r4)
 77	addi	r4,r4,32
 7870:	std	r9,0(r3)
 79270:	std	r8,8(r3)
 8022:	ld	r9,0(r4)
 81222:	ld	r8,8(r4)
 8271:	std	r7,16(r3)
 83271:	std	r6,24(r3)
 84	addi	r3,r3,32
 85	bdnz	21b
 8672:	std	r9,0(r3)
 87272:	std	r8,8(r3)
 88	andi.	r5,r5,0xf
 89	beq+	3f
 90	addi	r4,r4,16
 91.Ldo_tail:
 92	addi	r3,r3,16
 93	bf	cr7*4+0,246f
 94244:	ld	r9,0(r4)
 95	addi	r4,r4,8
 96245:	std	r9,0(r3)
 97	addi	r3,r3,8
 98246:	bf	cr7*4+1,1f
 9923:	lwz	r9,0(r4)
100	addi	r4,r4,4
10173:	stw	r9,0(r3)
102	addi	r3,r3,4
1031:	bf	cr7*4+2,2f
10444:	lhz	r9,0(r4)
105	addi	r4,r4,2
10674:	sth	r9,0(r3)
107	addi	r3,r3,2
1082:	bf	cr7*4+3,3f
10945:	lbz	r9,0(r4)
11075:	stb	r9,0(r3)
1113:	li	r3,0
112	blr
113
114.Lsrc_unaligned:
115	srdi	r6,r5,3
116	addi	r5,r5,-16
117	subf	r4,r0,r4
118	srdi	r7,r5,4
119	sldi	r10,r0,3
120	cmpldi	cr6,r6,3
121	andi.	r5,r5,7
122	mtctr	r7
123	subfic	r11,r10,64
124	add	r5,r5,r0
125	bt	cr7*4+0,28f
126
12724:	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
12825:	ld	r0,8(r4)
129	sLd	r6,r9,r10
13026:	ldu	r9,16(r4)
131	sHd	r7,r0,r11
132	sLd	r8,r0,r10
133	or	r7,r7,r6
134	blt	cr6,79f
13527:	ld	r0,8(r4)
136	b	2f
137
13828:	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
13929:	ldu	r9,8(r4)
140	sLd	r8,r0,r10
141	addi	r3,r3,-8
142	blt	cr6,5f
14330:	ld	r0,8(r4)
144	sHd	r12,r9,r11
145	sLd	r6,r9,r10
14631:	ldu	r9,16(r4)
147	or	r12,r8,r12
148	sHd	r7,r0,r11
149	sLd	r8,r0,r10
150	addi	r3,r3,16
151	beq	cr6,78f
152
1531:	or	r7,r7,r6
15432:	ld	r0,8(r4)
15576:	std	r12,8(r3)
1562:	sHd	r12,r9,r11
157	sLd	r6,r9,r10
15833:	ldu	r9,16(r4)
159	or	r12,r8,r12
16077:	stdu	r7,16(r3)
161	sHd	r7,r0,r11
162	sLd	r8,r0,r10
163	bdnz	1b
164
16578:	std	r12,8(r3)
166	or	r7,r7,r6
16779:	std	r7,16(r3)
1685:	sHd	r12,r9,r11
169	or	r12,r8,r12
17080:	std	r12,24(r3)
171	bne	6f
172	li	r3,0
173	blr
1746:	cmpwi	cr1,r5,8
175	addi	r3,r3,32
176	sLd	r9,r9,r10
177	ble	cr1,7f
17834:	ld	r0,8(r4)
179	sHd	r7,r0,r11
180	or	r9,r7,r9
1817:
182	bf	cr7*4+1,1f
183#ifdef __BIG_ENDIAN__
184	rotldi	r9,r9,32
185#endif
18694:	stw	r9,0(r3)
187#ifdef __LITTLE_ENDIAN__
188	rotrdi	r9,r9,32
189#endif
190	addi	r3,r3,4
1911:	bf	cr7*4+2,2f
192#ifdef __BIG_ENDIAN__
193	rotldi	r9,r9,16
194#endif
19595:	sth	r9,0(r3)
196#ifdef __LITTLE_ENDIAN__
197	rotrdi	r9,r9,16
198#endif
199	addi	r3,r3,2
2002:	bf	cr7*4+3,3f
201#ifdef __BIG_ENDIAN__
202	rotldi	r9,r9,8
203#endif
20496:	stb	r9,0(r3)
205#ifdef __LITTLE_ENDIAN__
206	rotrdi	r9,r9,8
207#endif
2083:	li	r3,0
209	blr
210
211.Ldst_unaligned:
212	PPC_MTOCRF(0x01,r6)		/* put #bytes to 8B bdry into cr7 */
213	subf	r5,r6,r5
214	li	r7,0
215	cmpldi	cr1,r5,16
216	bf	cr7*4+3,1f
21735:	lbz	r0,0(r4)
21881:	stb	r0,0(r3)
219	addi	r7,r7,1
2201:	bf	cr7*4+2,2f
22136:	lhzx	r0,r7,r4
22282:	sthx	r0,r7,r3
223	addi	r7,r7,2
2242:	bf	cr7*4+1,3f
22537:	lwzx	r0,r7,r4
22683:	stwx	r0,r7,r3
2273:	PPC_MTOCRF(0x01,r5)
228	add	r4,r6,r4
229	add	r3,r6,r3
230	b	.Ldst_aligned
231
232.Lshort_copy:
233	bf	cr7*4+0,1f
23438:	lwz	r0,0(r4)
23539:	lwz	r9,4(r4)
236	addi	r4,r4,8
23784:	stw	r0,0(r3)
23885:	stw	r9,4(r3)
239	addi	r3,r3,8
2401:	bf	cr7*4+1,2f
24140:	lwz	r0,0(r4)
242	addi	r4,r4,4
24386:	stw	r0,0(r3)
244	addi	r3,r3,4
2452:	bf	cr7*4+2,3f
24641:	lhz	r0,0(r4)
247	addi	r4,r4,2
24887:	sth	r0,0(r3)
249	addi	r3,r3,2
2503:	bf	cr7*4+3,4f
25142:	lbz	r0,0(r4)
25288:	stb	r0,0(r3)
2534:	li	r3,0
254	blr
255
256/*
257 * exception handlers follow
258 * we have to return the number of bytes not copied
259 * for an exception on a load, we set the rest of the destination to 0
260 */
261
262136:
263137:
264	add	r3,r3,r7
265	b	1f
266130:
267131:
268	addi	r3,r3,8
269120:
270320:
271122:
272322:
273124:
274125:
275126:
276127:
277128:
278129:
279133:
280	addi	r3,r3,8
281132:
282	addi	r3,r3,8
283121:
284321:
285344:
286134:
287135:
288138:
289139:
290140:
291141:
292142:
293123:
294144:
295145:
296
297/*
298 * here we have had a fault on a load and r3 points to the first
299 * unmodified byte of the destination
300 */
3011:	ld	r6,-24(r1)
302	ld	r4,-16(r1)
303	ld	r5,-8(r1)
304	subf	r6,r6,r3
305	add	r4,r4,r6
306	subf	r5,r6,r5	/* #bytes left to go */
307
308/*
309 * first see if we can copy any more bytes before hitting another exception
310 */
311	mtctr	r5
31243:	lbz	r0,0(r4)
313	addi	r4,r4,1
31489:	stb	r0,0(r3)
315	addi	r3,r3,1
316	bdnz	43b
317	li	r3,0		/* huh? all copied successfully this time? */
318	blr
319
320/*
321 * here we have trapped again, need to clear ctr bytes starting at r3
322 */
323143:	mfctr	r5
324	li	r0,0
325	mr	r4,r3
326	mr	r3,r5		/* return the number of bytes not copied */
3271:	andi.	r9,r4,7
328	beq	3f
32990:	stb	r0,0(r4)
330	addic.	r5,r5,-1
331	addi	r4,r4,1
332	bne	1b
333	blr
3343:	cmpldi	cr1,r5,8
335	srdi	r9,r5,3
336	andi.	r5,r5,7
337	blt	cr1,93f
338	mtctr	r9
33991:	std	r0,0(r4)
340	addi	r4,r4,8
341	bdnz	91b
34293:	beqlr
343	mtctr	r5	
34492:	stb	r0,0(r4)
345	addi	r4,r4,1
346	bdnz	92b
347	blr
348
349/*
350 * exception handlers for stores: we just need to work
351 * out how many bytes weren't copied
352 */
353182:
354183:
355	add	r3,r3,r7
356	b	1f
357371:
358180:
359	addi	r3,r3,8
360171:
361177:
 
362	addi	r3,r3,8
363370:
364372:
365176:
366178:
367	addi	r3,r3,4
368185:
369	addi	r3,r3,4
370170:
371172:
372345:
373173:
374174:
375175:
376179:
377181:
378184:
379186:
380187:
381188:
382189:	
383194:
384195:
385196:
3861:
387	ld	r6,-24(r1)
388	ld	r5,-8(r1)
389	add	r6,r6,r5
390	subf	r3,r3,r6	/* #bytes not copied */
391190:
392191:
393192:
394	blr			/* #bytes not copied in r3 */
395
396	.section __ex_table,"a"
397	.align	3
398	.llong	20b,120b
399	.llong	220b,320b
400	.llong	21b,121b
401	.llong	221b,321b
402	.llong	70b,170b
403	.llong	270b,370b
404	.llong	22b,122b
405	.llong	222b,322b
406	.llong	71b,171b
407	.llong	271b,371b
408	.llong	72b,172b
409	.llong	272b,372b
410	.llong	244b,344b
411	.llong	245b,345b
412	.llong	23b,123b
413	.llong	73b,173b
414	.llong	44b,144b
415	.llong	74b,174b
416	.llong	45b,145b
417	.llong	75b,175b
418	.llong	24b,124b
419	.llong	25b,125b
420	.llong	26b,126b
421	.llong	27b,127b
422	.llong	28b,128b
423	.llong	29b,129b
424	.llong	30b,130b
425	.llong	31b,131b
426	.llong	32b,132b
427	.llong	76b,176b
428	.llong	33b,133b
429	.llong	77b,177b
430	.llong	78b,178b
431	.llong	79b,179b
432	.llong	80b,180b
433	.llong	34b,134b
434	.llong	94b,194b
435	.llong	95b,195b
436	.llong	96b,196b
437	.llong	35b,135b
438	.llong	81b,181b
439	.llong	36b,136b
440	.llong	82b,182b
441	.llong	37b,137b
442	.llong	83b,183b
443	.llong	38b,138b
444	.llong	39b,139b
445	.llong	84b,184b
446	.llong	85b,185b
447	.llong	40b,140b
448	.llong	86b,186b
449	.llong	41b,141b
450	.llong	87b,187b
451	.llong	42b,142b
452	.llong	88b,188b
453	.llong	43b,143b
454	.llong	89b,189b
455	.llong	90b,190b
456	.llong	91b,191b
457	.llong	92b,192b
458	
459	.text
460
461/*
462 * Routine to copy a whole page of data, optimized for POWER4.
463 * On POWER4 it is more than 50% faster than the simple loop
464 * above (following the .Ldst_aligned label).
465 */
466.Lcopy_page_4K:
467	std	r31,-32(1)
468	std	r30,-40(1)
469	std	r29,-48(1)
470	std	r28,-56(1)
471	std	r27,-64(1)
472	std	r26,-72(1)
473	std	r25,-80(1)
474	std	r24,-88(1)
475	std	r23,-96(1)
476	std	r22,-104(1)
477	std	r21,-112(1)
478	std	r20,-120(1)
479	li	r5,4096/32 - 1
480	addi	r3,r3,-8
481	li	r0,5
4820:	addi	r5,r5,-24
483	mtctr	r0
48420:	ld	r22,640(4)
48521:	ld	r21,512(4)
48622:	ld	r20,384(4)
48723:	ld	r11,256(4)
48824:	ld	r9,128(4)
48925:	ld	r7,0(4)
49026:	ld	r25,648(4)
49127:	ld	r24,520(4)
49228:	ld	r23,392(4)
49329:	ld	r10,264(4)
49430:	ld	r8,136(4)
49531:	ldu	r6,8(4)
496	cmpwi	r5,24
4971:
49832:	std	r22,648(3)
49933:	std	r21,520(3)
50034:	std	r20,392(3)
50135:	std	r11,264(3)
50236:	std	r9,136(3)
50337:	std	r7,8(3)
50438:	ld	r28,648(4)
50539:	ld	r27,520(4)
50640:	ld	r26,392(4)
50741:	ld	r31,264(4)
50842:	ld	r30,136(4)
50943:	ld	r29,8(4)
51044:	std	r25,656(3)
51145:	std	r24,528(3)
51246:	std	r23,400(3)
51347:	std	r10,272(3)
51448:	std	r8,144(3)
51549:	std	r6,16(3)
51650:	ld	r22,656(4)
51751:	ld	r21,528(4)
51852:	ld	r20,400(4)
51953:	ld	r11,272(4)
52054:	ld	r9,144(4)
52155:	ld	r7,16(4)
52256:	std	r28,664(3)
52357:	std	r27,536(3)
52458:	std	r26,408(3)
52559:	std	r31,280(3)
52660:	std	r30,152(3)
52761:	stdu	r29,24(3)
52862:	ld	r25,664(4)
52963:	ld	r24,536(4)
53064:	ld	r23,408(4)
53165:	ld	r10,280(4)
53266:	ld	r8,152(4)
53367:	ldu	r6,24(4)
534	bdnz	1b
53568:	std	r22,648(3)
53669:	std	r21,520(3)
53770:	std	r20,392(3)
53871:	std	r11,264(3)
53972:	std	r9,136(3)
54073:	std	r7,8(3)
54174:	addi	r4,r4,640
54275:	addi	r3,r3,648
543	bge	0b
544	mtctr	r5
54576:	ld	r7,0(4)
54677:	ld	r8,8(4)
54778:	ldu	r9,16(4)
5483:
54979:	ld	r10,8(4)
55080:	std	r7,8(3)
55181:	ld	r7,16(4)
55282:	std	r8,16(3)
55383:	ld	r8,24(4)
55484:	std	r9,24(3)
55585:	ldu	r9,32(4)
55686:	stdu	r10,32(3)
557	bdnz	3b
5584:
55987:	ld	r10,8(4)
56088:	std	r7,8(3)
56189:	std	r8,16(3)
56290:	std	r9,24(3)
56391:	std	r10,32(3)
5649:	ld	r20,-120(1)
565	ld	r21,-112(1)
566	ld	r22,-104(1)
567	ld	r23,-96(1)
568	ld	r24,-88(1)
569	ld	r25,-80(1)
570	ld	r26,-72(1)
571	ld	r27,-64(1)
572	ld	r28,-56(1)
573	ld	r29,-48(1)
574	ld	r30,-40(1)
575	ld	r31,-32(1)
576	li	r3,0
577	blr
578
579/*
580 * on an exception, reset to the beginning and jump back into the
581 * standard __copy_tofrom_user
582 */
583100:	ld	r20,-120(1)
584	ld	r21,-112(1)
585	ld	r22,-104(1)
586	ld	r23,-96(1)
587	ld	r24,-88(1)
588	ld	r25,-80(1)
589	ld	r26,-72(1)
590	ld	r27,-64(1)
591	ld	r28,-56(1)
592	ld	r29,-48(1)
593	ld	r30,-40(1)
594	ld	r31,-32(1)
595	ld	r3,-24(r1)
596	ld	r4,-16(r1)
597	li	r5,4096
598	b	.Ldst_aligned
599
600	.section __ex_table,"a"
601	.align	3
602	.llong	20b,100b
603	.llong	21b,100b
604	.llong	22b,100b
605	.llong	23b,100b
606	.llong	24b,100b
607	.llong	25b,100b
608	.llong	26b,100b
609	.llong	27b,100b
610	.llong	28b,100b
611	.llong	29b,100b
612	.llong	30b,100b
613	.llong	31b,100b
614	.llong	32b,100b
615	.llong	33b,100b
616	.llong	34b,100b
617	.llong	35b,100b
618	.llong	36b,100b
619	.llong	37b,100b
620	.llong	38b,100b
621	.llong	39b,100b
622	.llong	40b,100b
623	.llong	41b,100b
624	.llong	42b,100b
625	.llong	43b,100b
626	.llong	44b,100b
627	.llong	45b,100b
628	.llong	46b,100b
629	.llong	47b,100b
630	.llong	48b,100b
631	.llong	49b,100b
632	.llong	50b,100b
633	.llong	51b,100b
634	.llong	52b,100b
635	.llong	53b,100b
636	.llong	54b,100b
637	.llong	55b,100b
638	.llong	56b,100b
639	.llong	57b,100b
640	.llong	58b,100b
641	.llong	59b,100b
642	.llong	60b,100b
643	.llong	61b,100b
644	.llong	62b,100b
645	.llong	63b,100b
646	.llong	64b,100b
647	.llong	65b,100b
648	.llong	66b,100b
649	.llong	67b,100b
650	.llong	68b,100b
651	.llong	69b,100b
652	.llong	70b,100b
653	.llong	71b,100b
654	.llong	72b,100b
655	.llong	73b,100b
656	.llong	74b,100b
657	.llong	75b,100b
658	.llong	76b,100b
659	.llong	77b,100b
660	.llong	78b,100b
661	.llong	79b,100b
662	.llong	80b,100b
663	.llong	81b,100b
664	.llong	82b,100b
665	.llong	83b,100b
666	.llong	84b,100b
667	.llong	85b,100b
668	.llong	86b,100b
669	.llong	87b,100b
670	.llong	88b,100b
671	.llong	89b,100b
672	.llong	90b,100b
673	.llong	91b,100b
v4.10.11
  1/*
  2 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
  3 *
  4 * This program is free software; you can redistribute it and/or
  5 * modify it under the terms of the GNU General Public License
  6 * as published by the Free Software Foundation; either version
  7 * 2 of the License, or (at your option) any later version.
  8 */
  9#include <asm/processor.h>
 10#include <asm/ppc_asm.h>
 11#include <asm/export.h>
 12
 13#ifdef __BIG_ENDIAN__
 14#define sLd sld		/* Shift towards low-numbered address. */
 15#define sHd srd		/* Shift towards high-numbered address. */
 16#else
 17#define sLd srd		/* Shift towards low-numbered address. */
 18#define sHd sld		/* Shift towards high-numbered address. */
 19#endif
 20
 21	.align	7
 22_GLOBAL_TOC(__copy_tofrom_user)
 23BEGIN_FTR_SECTION
 24	nop
 25FTR_SECTION_ELSE
 26	b	__copy_tofrom_user_power7
 27ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
 28_GLOBAL(__copy_tofrom_user_base)
 29	/* first check for a whole page copy on a page boundary */
 30	cmpldi	cr1,r5,16
 31	cmpdi	cr6,r5,4096
 32	or	r0,r3,r4
 33	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
 34	andi.	r0,r0,4095
 35	std	r3,-24(r1)
 36	crand	cr0*4+2,cr0*4+2,cr6*4+2
 37	std	r4,-16(r1)
 38	std	r5,-8(r1)
 39	dcbt	0,r4
 40	beq	.Lcopy_page_4K
 41	andi.	r6,r6,7
 42	PPC_MTOCRF(0x01,r5)
 43	blt	cr1,.Lshort_copy
 44/* Below we want to nop out the bne if we're on a CPU that has the
 45 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
 46 * cleared.
 47 * At the time of writing the only CPU that has this combination of bits
 48 * set is Power6.
 49 */
 50BEGIN_FTR_SECTION
 51	nop
 52FTR_SECTION_ELSE
 53	bne	.Ldst_unaligned
 54ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
 55		    CPU_FTR_UNALIGNED_LD_STD)
 56.Ldst_aligned:
 57	addi	r3,r3,-16
 58BEGIN_FTR_SECTION
 59	andi.	r0,r4,7
 60	bne	.Lsrc_unaligned
 61END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 62	blt	cr1,.Ldo_tail		/* if < 16 bytes to copy */
 63	srdi	r0,r5,5
 64	cmpdi	cr1,r0,0
 6520:	ld	r7,0(r4)
 66220:	ld	r6,8(r4)
 67	addi	r4,r4,16
 68	mtctr	r0
 69	andi.	r0,r5,0x10
 70	beq	22f
 71	addi	r3,r3,16
 72	addi	r4,r4,-16
 73	mr	r9,r7
 74	mr	r8,r6
 75	beq	cr1,72f
 7621:	ld	r7,16(r4)
 77221:	ld	r6,24(r4)
 78	addi	r4,r4,32
 7970:	std	r9,0(r3)
 80270:	std	r8,8(r3)
 8122:	ld	r9,0(r4)
 82222:	ld	r8,8(r4)
 8371:	std	r7,16(r3)
 84271:	std	r6,24(r3)
 85	addi	r3,r3,32
 86	bdnz	21b
 8772:	std	r9,0(r3)
 88272:	std	r8,8(r3)
 89	andi.	r5,r5,0xf
 90	beq+	3f
 91	addi	r4,r4,16
 92.Ldo_tail:
 93	addi	r3,r3,16
 94	bf	cr7*4+0,246f
 95244:	ld	r9,0(r4)
 96	addi	r4,r4,8
 97245:	std	r9,0(r3)
 98	addi	r3,r3,8
 99246:	bf	cr7*4+1,1f
10023:	lwz	r9,0(r4)
101	addi	r4,r4,4
10273:	stw	r9,0(r3)
103	addi	r3,r3,4
1041:	bf	cr7*4+2,2f
10544:	lhz	r9,0(r4)
106	addi	r4,r4,2
10774:	sth	r9,0(r3)
108	addi	r3,r3,2
1092:	bf	cr7*4+3,3f
11045:	lbz	r9,0(r4)
11175:	stb	r9,0(r3)
1123:	li	r3,0
113	blr
114
115.Lsrc_unaligned:
116	srdi	r6,r5,3
117	addi	r5,r5,-16
118	subf	r4,r0,r4
119	srdi	r7,r5,4
120	sldi	r10,r0,3
121	cmpldi	cr6,r6,3
122	andi.	r5,r5,7
123	mtctr	r7
124	subfic	r11,r10,64
125	add	r5,r5,r0
126	bt	cr7*4+0,28f
127
12824:	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
12925:	ld	r0,8(r4)
130	sLd	r6,r9,r10
13126:	ldu	r9,16(r4)
132	sHd	r7,r0,r11
133	sLd	r8,r0,r10
134	or	r7,r7,r6
135	blt	cr6,79f
13627:	ld	r0,8(r4)
137	b	2f
138
13928:	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
14029:	ldu	r9,8(r4)
141	sLd	r8,r0,r10
142	addi	r3,r3,-8
143	blt	cr6,5f
14430:	ld	r0,8(r4)
145	sHd	r12,r9,r11
146	sLd	r6,r9,r10
14731:	ldu	r9,16(r4)
148	or	r12,r8,r12
149	sHd	r7,r0,r11
150	sLd	r8,r0,r10
151	addi	r3,r3,16
152	beq	cr6,78f
153
1541:	or	r7,r7,r6
15532:	ld	r0,8(r4)
15676:	std	r12,8(r3)
1572:	sHd	r12,r9,r11
158	sLd	r6,r9,r10
15933:	ldu	r9,16(r4)
160	or	r12,r8,r12
16177:	stdu	r7,16(r3)
162	sHd	r7,r0,r11
163	sLd	r8,r0,r10
164	bdnz	1b
165
16678:	std	r12,8(r3)
167	or	r7,r7,r6
16879:	std	r7,16(r3)
1695:	sHd	r12,r9,r11
170	or	r12,r8,r12
17180:	std	r12,24(r3)
172	bne	6f
173	li	r3,0
174	blr
1756:	cmpwi	cr1,r5,8
176	addi	r3,r3,32
177	sLd	r9,r9,r10
178	ble	cr1,7f
17934:	ld	r0,8(r4)
180	sHd	r7,r0,r11
181	or	r9,r7,r9
1827:
183	bf	cr7*4+1,1f
184#ifdef __BIG_ENDIAN__
185	rotldi	r9,r9,32
186#endif
18794:	stw	r9,0(r3)
188#ifdef __LITTLE_ENDIAN__
189	rotrdi	r9,r9,32
190#endif
191	addi	r3,r3,4
1921:	bf	cr7*4+2,2f
193#ifdef __BIG_ENDIAN__
194	rotldi	r9,r9,16
195#endif
19695:	sth	r9,0(r3)
197#ifdef __LITTLE_ENDIAN__
198	rotrdi	r9,r9,16
199#endif
200	addi	r3,r3,2
2012:	bf	cr7*4+3,3f
202#ifdef __BIG_ENDIAN__
203	rotldi	r9,r9,8
204#endif
20596:	stb	r9,0(r3)
206#ifdef __LITTLE_ENDIAN__
207	rotrdi	r9,r9,8
208#endif
2093:	li	r3,0
210	blr
211
212.Ldst_unaligned:
213	PPC_MTOCRF(0x01,r6)		/* put #bytes to 8B bdry into cr7 */
214	subf	r5,r6,r5
215	li	r7,0
216	cmpldi	cr1,r5,16
217	bf	cr7*4+3,1f
21835:	lbz	r0,0(r4)
21981:	stb	r0,0(r3)
220	addi	r7,r7,1
2211:	bf	cr7*4+2,2f
22236:	lhzx	r0,r7,r4
22382:	sthx	r0,r7,r3
224	addi	r7,r7,2
2252:	bf	cr7*4+1,3f
22637:	lwzx	r0,r7,r4
22783:	stwx	r0,r7,r3
2283:	PPC_MTOCRF(0x01,r5)
229	add	r4,r6,r4
230	add	r3,r6,r3
231	b	.Ldst_aligned
232
233.Lshort_copy:
234	bf	cr7*4+0,1f
23538:	lwz	r0,0(r4)
23639:	lwz	r9,4(r4)
237	addi	r4,r4,8
23884:	stw	r0,0(r3)
23985:	stw	r9,4(r3)
240	addi	r3,r3,8
2411:	bf	cr7*4+1,2f
24240:	lwz	r0,0(r4)
243	addi	r4,r4,4
24486:	stw	r0,0(r3)
245	addi	r3,r3,4
2462:	bf	cr7*4+2,3f
24741:	lhz	r0,0(r4)
248	addi	r4,r4,2
24987:	sth	r0,0(r3)
250	addi	r3,r3,2
2513:	bf	cr7*4+3,4f
25242:	lbz	r0,0(r4)
25388:	stb	r0,0(r3)
2544:	li	r3,0
255	blr
256
257/*
258 * exception handlers follow
259 * we have to return the number of bytes not copied
260 * for an exception on a load, we set the rest of the destination to 0
261 */
262
263136:
264137:
265	add	r3,r3,r7
266	b	1f
267130:
268131:
269	addi	r3,r3,8
270120:
271320:
272122:
273322:
274124:
275125:
276126:
277127:
278128:
279129:
280133:
281	addi	r3,r3,8
282132:
283	addi	r3,r3,8
284121:
285321:
286344:
287134:
288135:
289138:
290139:
291140:
292141:
293142:
294123:
295144:
296145:
297
298/*
299 * here we have had a fault on a load and r3 points to the first
300 * unmodified byte of the destination
301 */
3021:	ld	r6,-24(r1)
303	ld	r4,-16(r1)
304	ld	r5,-8(r1)
305	subf	r6,r6,r3
306	add	r4,r4,r6
307	subf	r5,r6,r5	/* #bytes left to go */
308
309/*
310 * first see if we can copy any more bytes before hitting another exception
311 */
312	mtctr	r5
31343:	lbz	r0,0(r4)
314	addi	r4,r4,1
31589:	stb	r0,0(r3)
316	addi	r3,r3,1
317	bdnz	43b
318	li	r3,0		/* huh? all copied successfully this time? */
319	blr
320
321/*
322 * here we have trapped again, need to clear ctr bytes starting at r3
323 */
324143:	mfctr	r5
325	li	r0,0
326	mr	r4,r3
327	mr	r3,r5		/* return the number of bytes not copied */
3281:	andi.	r9,r4,7
329	beq	3f
33090:	stb	r0,0(r4)
331	addic.	r5,r5,-1
332	addi	r4,r4,1
333	bne	1b
334	blr
3353:	cmpldi	cr1,r5,8
336	srdi	r9,r5,3
337	andi.	r5,r5,7
338	blt	cr1,93f
339	mtctr	r9
34091:	std	r0,0(r4)
341	addi	r4,r4,8
342	bdnz	91b
34393:	beqlr
344	mtctr	r5	
34592:	stb	r0,0(r4)
346	addi	r4,r4,1
347	bdnz	92b
348	blr
349
350/*
351 * exception handlers for stores: we just need to work
352 * out how many bytes weren't copied
353 */
354182:
355183:
356	add	r3,r3,r7
357	b	1f
358371:
359180:
360	addi	r3,r3,8
361171:
362177:
363179:
364	addi	r3,r3,8
365370:
366372:
367176:
368178:
369	addi	r3,r3,4
370185:
371	addi	r3,r3,4
372170:
373172:
374345:
375173:
376174:
377175:
 
378181:
379184:
380186:
381187:
382188:
383189:	
384194:
385195:
386196:
3871:
388	ld	r6,-24(r1)
389	ld	r5,-8(r1)
390	add	r6,r6,r5
391	subf	r3,r3,r6	/* #bytes not copied */
392190:
393191:
394192:
395	blr			/* #bytes not copied in r3 */
396
397	EX_TABLE(20b,120b)
398	EX_TABLE(220b,320b)
399	EX_TABLE(21b,121b)
400	EX_TABLE(221b,321b)
401	EX_TABLE(70b,170b)
402	EX_TABLE(270b,370b)
403	EX_TABLE(22b,122b)
404	EX_TABLE(222b,322b)
405	EX_TABLE(71b,171b)
406	EX_TABLE(271b,371b)
407	EX_TABLE(72b,172b)
408	EX_TABLE(272b,372b)
409	EX_TABLE(244b,344b)
410	EX_TABLE(245b,345b)
411	EX_TABLE(23b,123b)
412	EX_TABLE(73b,173b)
413	EX_TABLE(44b,144b)
414	EX_TABLE(74b,174b)
415	EX_TABLE(45b,145b)
416	EX_TABLE(75b,175b)
417	EX_TABLE(24b,124b)
418	EX_TABLE(25b,125b)
419	EX_TABLE(26b,126b)
420	EX_TABLE(27b,127b)
421	EX_TABLE(28b,128b)
422	EX_TABLE(29b,129b)
423	EX_TABLE(30b,130b)
424	EX_TABLE(31b,131b)
425	EX_TABLE(32b,132b)
426	EX_TABLE(76b,176b)
427	EX_TABLE(33b,133b)
428	EX_TABLE(77b,177b)
429	EX_TABLE(78b,178b)
430	EX_TABLE(79b,179b)
431	EX_TABLE(80b,180b)
432	EX_TABLE(34b,134b)
433	EX_TABLE(94b,194b)
434	EX_TABLE(95b,195b)
435	EX_TABLE(96b,196b)
436	EX_TABLE(35b,135b)
437	EX_TABLE(81b,181b)
438	EX_TABLE(36b,136b)
439	EX_TABLE(82b,182b)
440	EX_TABLE(37b,137b)
441	EX_TABLE(83b,183b)
442	EX_TABLE(38b,138b)
443	EX_TABLE(39b,139b)
444	EX_TABLE(84b,184b)
445	EX_TABLE(85b,185b)
446	EX_TABLE(40b,140b)
447	EX_TABLE(86b,186b)
448	EX_TABLE(41b,141b)
449	EX_TABLE(87b,187b)
450	EX_TABLE(42b,142b)
451	EX_TABLE(88b,188b)
452	EX_TABLE(43b,143b)
453	EX_TABLE(89b,189b)
454	EX_TABLE(90b,190b)
455	EX_TABLE(91b,191b)
456	EX_TABLE(92b,192b)
 
 
 
 
457
458/*
459 * Routine to copy a whole page of data, optimized for POWER4.
460 * On POWER4 it is more than 50% faster than the simple loop
461 * above (following the .Ldst_aligned label).
462 */
463.Lcopy_page_4K:
464	std	r31,-32(1)
465	std	r30,-40(1)
466	std	r29,-48(1)
467	std	r28,-56(1)
468	std	r27,-64(1)
469	std	r26,-72(1)
470	std	r25,-80(1)
471	std	r24,-88(1)
472	std	r23,-96(1)
473	std	r22,-104(1)
474	std	r21,-112(1)
475	std	r20,-120(1)
476	li	r5,4096/32 - 1
477	addi	r3,r3,-8
478	li	r0,5
4790:	addi	r5,r5,-24
480	mtctr	r0
48120:	ld	r22,640(4)
48221:	ld	r21,512(4)
48322:	ld	r20,384(4)
48423:	ld	r11,256(4)
48524:	ld	r9,128(4)
48625:	ld	r7,0(4)
48726:	ld	r25,648(4)
48827:	ld	r24,520(4)
48928:	ld	r23,392(4)
49029:	ld	r10,264(4)
49130:	ld	r8,136(4)
49231:	ldu	r6,8(4)
493	cmpwi	r5,24
4941:
49532:	std	r22,648(3)
49633:	std	r21,520(3)
49734:	std	r20,392(3)
49835:	std	r11,264(3)
49936:	std	r9,136(3)
50037:	std	r7,8(3)
50138:	ld	r28,648(4)
50239:	ld	r27,520(4)
50340:	ld	r26,392(4)
50441:	ld	r31,264(4)
50542:	ld	r30,136(4)
50643:	ld	r29,8(4)
50744:	std	r25,656(3)
50845:	std	r24,528(3)
50946:	std	r23,400(3)
51047:	std	r10,272(3)
51148:	std	r8,144(3)
51249:	std	r6,16(3)
51350:	ld	r22,656(4)
51451:	ld	r21,528(4)
51552:	ld	r20,400(4)
51653:	ld	r11,272(4)
51754:	ld	r9,144(4)
51855:	ld	r7,16(4)
51956:	std	r28,664(3)
52057:	std	r27,536(3)
52158:	std	r26,408(3)
52259:	std	r31,280(3)
52360:	std	r30,152(3)
52461:	stdu	r29,24(3)
52562:	ld	r25,664(4)
52663:	ld	r24,536(4)
52764:	ld	r23,408(4)
52865:	ld	r10,280(4)
52966:	ld	r8,152(4)
53067:	ldu	r6,24(4)
531	bdnz	1b
53268:	std	r22,648(3)
53369:	std	r21,520(3)
53470:	std	r20,392(3)
53571:	std	r11,264(3)
53672:	std	r9,136(3)
53773:	std	r7,8(3)
53874:	addi	r4,r4,640
53975:	addi	r3,r3,648
540	bge	0b
541	mtctr	r5
54276:	ld	r7,0(4)
54377:	ld	r8,8(4)
54478:	ldu	r9,16(4)
5453:
54679:	ld	r10,8(4)
54780:	std	r7,8(3)
54881:	ld	r7,16(4)
54982:	std	r8,16(3)
55083:	ld	r8,24(4)
55184:	std	r9,24(3)
55285:	ldu	r9,32(4)
55386:	stdu	r10,32(3)
554	bdnz	3b
5554:
55687:	ld	r10,8(4)
55788:	std	r7,8(3)
55889:	std	r8,16(3)
55990:	std	r9,24(3)
56091:	std	r10,32(3)
5619:	ld	r20,-120(1)
562	ld	r21,-112(1)
563	ld	r22,-104(1)
564	ld	r23,-96(1)
565	ld	r24,-88(1)
566	ld	r25,-80(1)
567	ld	r26,-72(1)
568	ld	r27,-64(1)
569	ld	r28,-56(1)
570	ld	r29,-48(1)
571	ld	r30,-40(1)
572	ld	r31,-32(1)
573	li	r3,0
574	blr
575
576/*
577 * on an exception, reset to the beginning and jump back into the
578 * standard __copy_tofrom_user
579 */
580100:	ld	r20,-120(1)
581	ld	r21,-112(1)
582	ld	r22,-104(1)
583	ld	r23,-96(1)
584	ld	r24,-88(1)
585	ld	r25,-80(1)
586	ld	r26,-72(1)
587	ld	r27,-64(1)
588	ld	r28,-56(1)
589	ld	r29,-48(1)
590	ld	r30,-40(1)
591	ld	r31,-32(1)
592	ld	r3,-24(r1)
593	ld	r4,-16(r1)
594	li	r5,4096
595	b	.Ldst_aligned
596
597	EX_TABLE(20b,100b)
598	EX_TABLE(21b,100b)
599	EX_TABLE(22b,100b)
600	EX_TABLE(23b,100b)
601	EX_TABLE(24b,100b)
602	EX_TABLE(25b,100b)
603	EX_TABLE(26b,100b)
604	EX_TABLE(27b,100b)
605	EX_TABLE(28b,100b)
606	EX_TABLE(29b,100b)
607	EX_TABLE(30b,100b)
608	EX_TABLE(31b,100b)
609	EX_TABLE(32b,100b)
610	EX_TABLE(33b,100b)
611	EX_TABLE(34b,100b)
612	EX_TABLE(35b,100b)
613	EX_TABLE(36b,100b)
614	EX_TABLE(37b,100b)
615	EX_TABLE(38b,100b)
616	EX_TABLE(39b,100b)
617	EX_TABLE(40b,100b)
618	EX_TABLE(41b,100b)
619	EX_TABLE(42b,100b)
620	EX_TABLE(43b,100b)
621	EX_TABLE(44b,100b)
622	EX_TABLE(45b,100b)
623	EX_TABLE(46b,100b)
624	EX_TABLE(47b,100b)
625	EX_TABLE(48b,100b)
626	EX_TABLE(49b,100b)
627	EX_TABLE(50b,100b)
628	EX_TABLE(51b,100b)
629	EX_TABLE(52b,100b)
630	EX_TABLE(53b,100b)
631	EX_TABLE(54b,100b)
632	EX_TABLE(55b,100b)
633	EX_TABLE(56b,100b)
634	EX_TABLE(57b,100b)
635	EX_TABLE(58b,100b)
636	EX_TABLE(59b,100b)
637	EX_TABLE(60b,100b)
638	EX_TABLE(61b,100b)
639	EX_TABLE(62b,100b)
640	EX_TABLE(63b,100b)
641	EX_TABLE(64b,100b)
642	EX_TABLE(65b,100b)
643	EX_TABLE(66b,100b)
644	EX_TABLE(67b,100b)
645	EX_TABLE(68b,100b)
646	EX_TABLE(69b,100b)
647	EX_TABLE(70b,100b)
648	EX_TABLE(71b,100b)
649	EX_TABLE(72b,100b)
650	EX_TABLE(73b,100b)
651	EX_TABLE(74b,100b)
652	EX_TABLE(75b,100b)
653	EX_TABLE(76b,100b)
654	EX_TABLE(77b,100b)
655	EX_TABLE(78b,100b)
656	EX_TABLE(79b,100b)
657	EX_TABLE(80b,100b)
658	EX_TABLE(81b,100b)
659	EX_TABLE(82b,100b)
660	EX_TABLE(83b,100b)
661	EX_TABLE(84b,100b)
662	EX_TABLE(85b,100b)
663	EX_TABLE(86b,100b)
664	EX_TABLE(87b,100b)
665	EX_TABLE(88b,100b)
666	EX_TABLE(89b,100b)
667	EX_TABLE(90b,100b)
668	EX_TABLE(91b,100b)
669
670EXPORT_SYMBOL(__copy_tofrom_user)