Linux Audio

Check our new training course

Loading...
v6.8
  1/* SPDX-License-Identifier: GPL-2.0-or-later */
  2/*
  3 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
 
 
 
 
 
  4 */
  5#include <linux/export.h>
  6#include <asm/processor.h>
  7#include <asm/ppc_asm.h>
  8#include <asm/asm-compat.h>
  9#include <asm/feature-fixups.h>
 10
 11#ifndef SELFTEST_CASE
 12/* 0 == most CPUs, 1 == POWER6, 2 == Cell */
 13#define SELFTEST_CASE	0
 14#endif
 15
 16#ifdef __BIG_ENDIAN__
 17#define sLd sld		/* Shift towards low-numbered address. */
 18#define sHd srd		/* Shift towards high-numbered address. */
 19#else
 20#define sLd srd		/* Shift towards low-numbered address. */
 21#define sHd sld		/* Shift towards high-numbered address. */
 22#endif
 23
 24/*
 25 * These macros are used to generate exception table entries.
 26 * The exception handlers below use the original arguments
 27 * (stored on the stack) and the point where we're up to in
 28 * the destination buffer, i.e. the address of the first
 29 * unmodified byte.  Generally r3 points into the destination
 30 * buffer, but the first unmodified byte is at a variable
 31 * offset from r3.  In the code below, the symbol r3_offset
 32 * is set to indicate the current offset at each point in
 33 * the code.  This offset is then used as a negative offset
 34 * from the exception handler code, and those instructions
 35 * before the exception handlers are addi instructions that
 36 * adjust r3 to point to the correct place.
 37 */
 38	.macro	lex		/* exception handler for load */
 39100:	EX_TABLE(100b, .Lld_exc - r3_offset)
 40	.endm
 41
 42	.macro	stex		/* exception handler for store */
 43100:	EX_TABLE(100b, .Lst_exc - r3_offset)
 44	.endm
 45
 46	.align	7
 47_GLOBAL_TOC(__copy_tofrom_user)
 48#ifdef CONFIG_PPC_BOOK3S_64
 49BEGIN_FTR_SECTION
 50	nop
 51FTR_SECTION_ELSE
 52	b	__copy_tofrom_user_power7
 53ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
 54#endif
 55_GLOBAL(__copy_tofrom_user_base)
 56	/* first check for a 4kB copy on a 4kB boundary */
 57	cmpldi	cr1,r5,16
 58	cmpdi	cr6,r5,4096
 59	or	r0,r3,r4
 60	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
 61	andi.	r0,r0,4095
 62	std	r3,-24(r1)
 63	crand	cr0*4+2,cr0*4+2,cr6*4+2
 64	std	r4,-16(r1)
 65	std	r5,-8(r1)
 66	dcbt	0,r4
 67	beq	.Lcopy_page_4K
 68	andi.	r6,r6,7
 69	PPC_MTOCRF(0x01,r5)
 70	blt	cr1,.Lshort_copy
 71/* Below we want to nop out the bne if we're on a CPU that has the
 72 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
 73 * cleared.
 74 * At the time of writing the only CPU that has this combination of bits
 75 * set is Power6.
 76 */
 77test_feature = (SELFTEST_CASE == 1)
 78BEGIN_FTR_SECTION
 79	nop
 80FTR_SECTION_ELSE
 81	bne	.Ldst_unaligned
 82ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
 83		    CPU_FTR_UNALIGNED_LD_STD)
 84.Ldst_aligned:
 85	addi	r3,r3,-16
 86r3_offset = 16
 87test_feature = (SELFTEST_CASE == 0)
 88BEGIN_FTR_SECTION
 89	andi.	r0,r4,7
 90	bne	.Lsrc_unaligned
 91END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 92	blt	cr1,.Ldo_tail		/* if < 16 bytes to copy */
 93	srdi	r0,r5,5
 94	cmpdi	cr1,r0,0
 95lex;	ld	r7,0(r4)
 96lex;	ld	r6,8(r4)
 97	addi	r4,r4,16
 98	mtctr	r0
 99	andi.	r0,r5,0x10
100	beq	22f
101	addi	r3,r3,16
102r3_offset = 0
103	addi	r4,r4,-16
104	mr	r9,r7
105	mr	r8,r6
106	beq	cr1,72f
10721:
108lex;	ld	r7,16(r4)
109lex;	ld	r6,24(r4)
110	addi	r4,r4,32
111stex;	std	r9,0(r3)
112r3_offset = 8
113stex;	std	r8,8(r3)
114r3_offset = 16
11522:
116lex;	ld	r9,0(r4)
117lex;	ld	r8,8(r4)
118stex;	std	r7,16(r3)
119r3_offset = 24
120stex;	std	r6,24(r3)
121	addi	r3,r3,32
122r3_offset = 0
123	bdnz	21b
12472:
125stex;	std	r9,0(r3)
126r3_offset = 8
127stex;	std	r8,8(r3)
128r3_offset = 16
129	andi.	r5,r5,0xf
130	beq+	3f
131	addi	r4,r4,16
132.Ldo_tail:
133	addi	r3,r3,16
134r3_offset = 0
135	bf	cr7*4+0,246f
136lex;	ld	r9,0(r4)
137	addi	r4,r4,8
138stex;	std	r9,0(r3)
139	addi	r3,r3,8
140246:	bf	cr7*4+1,1f
141lex;	lwz	r9,0(r4)
142	addi	r4,r4,4
143stex;	stw	r9,0(r3)
144	addi	r3,r3,4
1451:	bf	cr7*4+2,2f
146lex;	lhz	r9,0(r4)
147	addi	r4,r4,2
148stex;	sth	r9,0(r3)
149	addi	r3,r3,2
1502:	bf	cr7*4+3,3f
151lex;	lbz	r9,0(r4)
152stex;	stb	r9,0(r3)
1533:	li	r3,0
154	blr
155
156.Lsrc_unaligned:
157r3_offset = 16
158	srdi	r6,r5,3
159	addi	r5,r5,-16
160	subf	r4,r0,r4
161	srdi	r7,r5,4
162	sldi	r10,r0,3
163	cmpldi	cr6,r6,3
164	andi.	r5,r5,7
165	mtctr	r7
166	subfic	r11,r10,64
167	add	r5,r5,r0
168	bt	cr7*4+0,28f
169
170lex;	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
171lex;	ld	r0,8(r4)
172	sLd	r6,r9,r10
173lex;	ldu	r9,16(r4)
174	sHd	r7,r0,r11
175	sLd	r8,r0,r10
176	or	r7,r7,r6
177	blt	cr6,79f
178lex;	ld	r0,8(r4)
179	b	2f
180
18128:
182lex;	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
183lex;	ldu	r9,8(r4)
184	sLd	r8,r0,r10
185	addi	r3,r3,-8
186r3_offset = 24
187	blt	cr6,5f
188lex;	ld	r0,8(r4)
189	sHd	r12,r9,r11
190	sLd	r6,r9,r10
191lex;	ldu	r9,16(r4)
192	or	r12,r8,r12
193	sHd	r7,r0,r11
194	sLd	r8,r0,r10
195	addi	r3,r3,16
196r3_offset = 8
197	beq	cr6,78f
198
1991:	or	r7,r7,r6
200lex;	ld	r0,8(r4)
201stex;	std	r12,8(r3)
202r3_offset = 16
2032:	sHd	r12,r9,r11
204	sLd	r6,r9,r10
205lex;	ldu	r9,16(r4)
206	or	r12,r8,r12
207stex;	stdu	r7,16(r3)
208r3_offset = 8
209	sHd	r7,r0,r11
210	sLd	r8,r0,r10
211	bdnz	1b
212
21378:
214stex;	std	r12,8(r3)
215r3_offset = 16
216	or	r7,r7,r6
21779:
218stex;	std	r7,16(r3)
219r3_offset = 24
2205:	sHd	r12,r9,r11
221	or	r12,r8,r12
222stex;	std	r12,24(r3)
223r3_offset = 32
224	bne	6f
225	li	r3,0
226	blr
2276:	cmpwi	cr1,r5,8
228	addi	r3,r3,32
229r3_offset = 0
230	sLd	r9,r9,r10
231	ble	cr1,7f
232lex;	ld	r0,8(r4)
233	sHd	r7,r0,r11
234	or	r9,r7,r9
2357:
236	bf	cr7*4+1,1f
237#ifdef __BIG_ENDIAN__
238	rotldi	r9,r9,32
239#endif
240stex;	stw	r9,0(r3)
241#ifdef __LITTLE_ENDIAN__
242	rotrdi	r9,r9,32
243#endif
244	addi	r3,r3,4
2451:	bf	cr7*4+2,2f
246#ifdef __BIG_ENDIAN__
247	rotldi	r9,r9,16
248#endif
249stex;	sth	r9,0(r3)
250#ifdef __LITTLE_ENDIAN__
251	rotrdi	r9,r9,16
252#endif
253	addi	r3,r3,2
2542:	bf	cr7*4+3,3f
255#ifdef __BIG_ENDIAN__
256	rotldi	r9,r9,8
257#endif
258stex;	stb	r9,0(r3)
259#ifdef __LITTLE_ENDIAN__
260	rotrdi	r9,r9,8
261#endif
2623:	li	r3,0
263	blr
264
265.Ldst_unaligned:
266r3_offset = 0
267	PPC_MTOCRF(0x01,r6)		/* put #bytes to 8B bdry into cr7 */
268	subf	r5,r6,r5
269	li	r7,0
270	cmpldi	cr1,r5,16
271	bf	cr7*4+3,1f
272100:	EX_TABLE(100b, .Lld_exc_r7)
273	lbz	r0,0(r4)
274100:	EX_TABLE(100b, .Lst_exc_r7)
275	stb	r0,0(r3)
276	addi	r7,r7,1
2771:	bf	cr7*4+2,2f
278100:	EX_TABLE(100b, .Lld_exc_r7)
279	lhzx	r0,r7,r4
280100:	EX_TABLE(100b, .Lst_exc_r7)
281	sthx	r0,r7,r3
282	addi	r7,r7,2
2832:	bf	cr7*4+1,3f
284100:	EX_TABLE(100b, .Lld_exc_r7)
285	lwzx	r0,r7,r4
286100:	EX_TABLE(100b, .Lst_exc_r7)
287	stwx	r0,r7,r3
2883:	PPC_MTOCRF(0x01,r5)
289	add	r4,r6,r4
290	add	r3,r6,r3
291	b	.Ldst_aligned
292
293.Lshort_copy:
294r3_offset = 0
295	bf	cr7*4+0,1f
296lex;	lwz	r0,0(r4)
297lex;	lwz	r9,4(r4)
298	addi	r4,r4,8
299stex;	stw	r0,0(r3)
300stex;	stw	r9,4(r3)
301	addi	r3,r3,8
3021:	bf	cr7*4+1,2f
303lex;	lwz	r0,0(r4)
304	addi	r4,r4,4
305stex;	stw	r0,0(r3)
306	addi	r3,r3,4
3072:	bf	cr7*4+2,3f
308lex;	lhz	r0,0(r4)
309	addi	r4,r4,2
310stex;	sth	r0,0(r3)
311	addi	r3,r3,2
3123:	bf	cr7*4+3,4f
313lex;	lbz	r0,0(r4)
314stex;	stb	r0,0(r3)
3154:	li	r3,0
316	blr
317
318/*
319 * exception handlers follow
320 * we have to return the number of bytes not copied
321 * for an exception on a load, we set the rest of the destination to 0
322 * Note that the number of bytes of instructions for adjusting r3 needs
323 * to equal the amount of the adjustment, due to the trick of using
324 * .Lld_exc - r3_offset as the handler address.
325 */
326
327.Lld_exc_r7:
 
328	add	r3,r3,r7
329	b	.Lld_exc
330
331	/* adjust by 24 */
332	addi	r3,r3,8
333	nop
334	/* adjust by 16 */
 
 
 
 
 
 
 
 
 
335	addi	r3,r3,8
336	nop
337	/* adjust by 8 */
338	addi	r3,r3,8
339	nop
 
 
 
 
 
 
 
 
 
 
 
 
340
341/*
342 * Here we have had a fault on a load and r3 points to the first
343 * unmodified byte of the destination.  We use the original arguments
344 * and r3 to work out how much wasn't copied.  Since we load some
345 * distance ahead of the stores, we continue copying byte-by-byte until
346 * we hit the load fault again in order to copy as much as possible.
347 */
348.Lld_exc:
349	ld	r6,-24(r1)
350	ld	r4,-16(r1)
351	ld	r5,-8(r1)
352	subf	r6,r6,r3
353	add	r4,r4,r6
354	subf	r5,r6,r5	/* #bytes left to go */
355
356/*
357 * first see if we can copy any more bytes before hitting another exception
358 */
359	mtctr	r5
360r3_offset = 0
361100:	EX_TABLE(100b, .Ldone)
36243:	lbz	r0,0(r4)
363	addi	r4,r4,1
364stex;	stb	r0,0(r3)
365	addi	r3,r3,1
366	bdnz	43b
367	li	r3,0		/* huh? all copied successfully this time? */
368	blr
369
370/*
371 * here we have trapped again, amount remaining is in ctr.
372 */
373.Ldone:
374	mfctr	r3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
375	blr
376
377/*
378 * exception handlers for stores: we need to work out how many bytes
379 * weren't copied, and we may need to copy some more.
380 * Note that the number of bytes of instructions for adjusting r3 needs
381 * to equal the amount of the adjustment, due to the trick of using
382 * .Lst_exc - r3_offset as the handler address.
383 */
384.Lst_exc_r7:
 
385	add	r3,r3,r7
386	b	.Lst_exc
387
388	/* adjust by 24 */
389	addi	r3,r3,8
390	nop
391	/* adjust by 16 */
392	addi	r3,r3,8
393	nop
394	/* adjust by 8 */
 
 
395	addi	r3,r3,4
396	/* adjust by 4 */
397	addi	r3,r3,4
398.Lst_exc:
399	ld	r6,-24(r1)	/* original destination pointer */
400	ld	r4,-16(r1)	/* original source pointer */
401	ld	r5,-8(r1)	/* original number of bytes */
402	add	r7,r6,r5
403	/*
404	 * If the destination pointer isn't 8-byte aligned,
405	 * we may have got the exception as a result of a
406	 * store that overlapped a page boundary, so we may be
407	 * able to copy a few more bytes.
408	 */
40917:	andi.	r0,r3,7
410	beq	19f
411	subf	r8,r6,r3	/* #bytes copied */
412100:	EX_TABLE(100b,19f)
413	lbzx	r0,r8,r4
414100:	EX_TABLE(100b,19f)
415	stb	r0,0(r3)
416	addi	r3,r3,1
417	cmpld	r3,r7
418	blt	17b
41919:	subf	r3,r3,r7	/* #bytes not copied in r3 */
420	blr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
422/*
423 * Routine to copy a whole page of data, optimized for POWER4.
424 * On POWER4 it is more than 50% faster than the simple loop
425 * above (following the .Ldst_aligned label).
 
426 */
427	.macro	exc
428100:	EX_TABLE(100b, .Labort)
429	.endm
430.Lcopy_page_4K:
431	std	r31,-32(1)
432	std	r30,-40(1)
433	std	r29,-48(1)
434	std	r28,-56(1)
435	std	r27,-64(1)
436	std	r26,-72(1)
437	std	r25,-80(1)
438	std	r24,-88(1)
439	std	r23,-96(1)
440	std	r22,-104(1)
441	std	r21,-112(1)
442	std	r20,-120(1)
443	li	r5,4096/32 - 1
444	addi	r3,r3,-8
445	li	r0,5
4460:	addi	r5,r5,-24
447	mtctr	r0
448exc;	ld	r22,640(4)
449exc;	ld	r21,512(4)
450exc;	ld	r20,384(4)
451exc;	ld	r11,256(4)
452exc;	ld	r9,128(4)
453exc;	ld	r7,0(4)
454exc;	ld	r25,648(4)
455exc;	ld	r24,520(4)
456exc;	ld	r23,392(4)
457exc;	ld	r10,264(4)
458exc;	ld	r8,136(4)
459exc;	ldu	r6,8(4)
460	cmpwi	r5,24
4611:
462exc;	std	r22,648(3)
463exc;	std	r21,520(3)
464exc;	std	r20,392(3)
465exc;	std	r11,264(3)
466exc;	std	r9,136(3)
467exc;	std	r7,8(3)
468exc;	ld	r28,648(4)
469exc;	ld	r27,520(4)
470exc;	ld	r26,392(4)
471exc;	ld	r31,264(4)
472exc;	ld	r30,136(4)
473exc;	ld	r29,8(4)
474exc;	std	r25,656(3)
475exc;	std	r24,528(3)
476exc;	std	r23,400(3)
477exc;	std	r10,272(3)
478exc;	std	r8,144(3)
479exc;	std	r6,16(3)
480exc;	ld	r22,656(4)
481exc;	ld	r21,528(4)
482exc;	ld	r20,400(4)
483exc;	ld	r11,272(4)
484exc;	ld	r9,144(4)
485exc;	ld	r7,16(4)
486exc;	std	r28,664(3)
487exc;	std	r27,536(3)
488exc;	std	r26,408(3)
489exc;	std	r31,280(3)
490exc;	std	r30,152(3)
491exc;	stdu	r29,24(3)
492exc;	ld	r25,664(4)
493exc;	ld	r24,536(4)
494exc;	ld	r23,408(4)
495exc;	ld	r10,280(4)
496exc;	ld	r8,152(4)
497exc;	ldu	r6,24(4)
498	bdnz	1b
499exc;	std	r22,648(3)
500exc;	std	r21,520(3)
501exc;	std	r20,392(3)
502exc;	std	r11,264(3)
503exc;	std	r9,136(3)
504exc;	std	r7,8(3)
505	addi	r4,r4,640
506	addi	r3,r3,648
507	bge	0b
508	mtctr	r5
509exc;	ld	r7,0(4)
510exc;	ld	r8,8(4)
511exc;	ldu	r9,16(4)
5123:
513exc;	ld	r10,8(4)
514exc;	std	r7,8(3)
515exc;	ld	r7,16(4)
516exc;	std	r8,16(3)
517exc;	ld	r8,24(4)
518exc;	std	r9,24(3)
519exc;	ldu	r9,32(4)
520exc;	stdu	r10,32(3)
521	bdnz	3b
5224:
523exc;	ld	r10,8(4)
524exc;	std	r7,8(3)
525exc;	std	r8,16(3)
526exc;	std	r9,24(3)
527exc;	std	r10,32(3)
5289:	ld	r20,-120(1)
529	ld	r21,-112(1)
530	ld	r22,-104(1)
531	ld	r23,-96(1)
532	ld	r24,-88(1)
533	ld	r25,-80(1)
534	ld	r26,-72(1)
535	ld	r27,-64(1)
536	ld	r28,-56(1)
537	ld	r29,-48(1)
538	ld	r30,-40(1)
539	ld	r31,-32(1)
540	li	r3,0
541	blr
542
543/*
544 * on an exception, reset to the beginning and jump back into the
545 * standard __copy_tofrom_user
546 */
547.Labort:
548	ld	r20,-120(1)
549	ld	r21,-112(1)
550	ld	r22,-104(1)
551	ld	r23,-96(1)
552	ld	r24,-88(1)
553	ld	r25,-80(1)
554	ld	r26,-72(1)
555	ld	r27,-64(1)
556	ld	r28,-56(1)
557	ld	r29,-48(1)
558	ld	r30,-40(1)
559	ld	r31,-32(1)
560	ld	r3,-24(r1)
561	ld	r4,-16(r1)
562	li	r5,4096
563	b	.Ldst_aligned
564EXPORT_SYMBOL(__copy_tofrom_user)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
v3.5.6
 
  1/*
  2 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
  3 *
  4 * This program is free software; you can redistribute it and/or
  5 * modify it under the terms of the GNU General Public License
  6 * as published by the Free Software Foundation; either version
  7 * 2 of the License, or (at your option) any later version.
  8 */
 
  9#include <asm/processor.h>
 10#include <asm/ppc_asm.h>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 11
 12	.align	7
 13_GLOBAL(__copy_tofrom_user)
 
 14BEGIN_FTR_SECTION
 15	nop
 16FTR_SECTION_ELSE
 17	b	__copy_tofrom_user_power7
 18ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
 
 19_GLOBAL(__copy_tofrom_user_base)
 20	/* first check for a whole page copy on a page boundary */
 21	cmpldi	cr1,r5,16
 22	cmpdi	cr6,r5,4096
 23	or	r0,r3,r4
 24	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
 25	andi.	r0,r0,4095
 26	std	r3,-24(r1)
 27	crand	cr0*4+2,cr0*4+2,cr6*4+2
 28	std	r4,-16(r1)
 29	std	r5,-8(r1)
 30	dcbt	0,r4
 31	beq	.Lcopy_page_4K
 32	andi.	r6,r6,7
 33	PPC_MTOCRF(0x01,r5)
 34	blt	cr1,.Lshort_copy
 35/* Below we want to nop out the bne if we're on a CPU that has the
 36 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
 37 * cleared.
 38 * At the time of writing the only CPU that has this combination of bits
 39 * set is Power6.
 40 */
 
 41BEGIN_FTR_SECTION
 42	nop
 43FTR_SECTION_ELSE
 44	bne	.Ldst_unaligned
 45ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
 46		    CPU_FTR_UNALIGNED_LD_STD)
 47.Ldst_aligned:
 48	addi	r3,r3,-16
 
 
 49BEGIN_FTR_SECTION
 50	andi.	r0,r4,7
 51	bne	.Lsrc_unaligned
 52END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 53	blt	cr1,.Ldo_tail		/* if < 16 bytes to copy */
 54	srdi	r0,r5,5
 55	cmpdi	cr1,r0,0
 5620:	ld	r7,0(r4)
 57220:	ld	r6,8(r4)
 58	addi	r4,r4,16
 59	mtctr	r0
 60	andi.	r0,r5,0x10
 61	beq	22f
 62	addi	r3,r3,16
 
 63	addi	r4,r4,-16
 64	mr	r9,r7
 65	mr	r8,r6
 66	beq	cr1,72f
 6721:	ld	r7,16(r4)
 68221:	ld	r6,24(r4)
 
 69	addi	r4,r4,32
 7070:	std	r9,0(r3)
 71270:	std	r8,8(r3)
 7222:	ld	r9,0(r4)
 73222:	ld	r8,8(r4)
 7471:	std	r7,16(r3)
 75271:	std	r6,24(r3)
 
 
 
 
 76	addi	r3,r3,32
 
 77	bdnz	21b
 7872:	std	r9,0(r3)
 79272:	std	r8,8(r3)
 
 
 
 80	andi.	r5,r5,0xf
 81	beq+	3f
 82	addi	r4,r4,16
 83.Ldo_tail:
 84	addi	r3,r3,16
 
 85	bf	cr7*4+0,246f
 86244:	ld	r9,0(r4)
 87	addi	r4,r4,8
 88245:	std	r9,0(r3)
 89	addi	r3,r3,8
 90246:	bf	cr7*4+1,1f
 9123:	lwz	r9,0(r4)
 92	addi	r4,r4,4
 9373:	stw	r9,0(r3)
 94	addi	r3,r3,4
 951:	bf	cr7*4+2,2f
 9644:	lhz	r9,0(r4)
 97	addi	r4,r4,2
 9874:	sth	r9,0(r3)
 99	addi	r3,r3,2
1002:	bf	cr7*4+3,3f
10145:	lbz	r9,0(r4)
10275:	stb	r9,0(r3)
1033:	li	r3,0
104	blr
105
106.Lsrc_unaligned:
 
107	srdi	r6,r5,3
108	addi	r5,r5,-16
109	subf	r4,r0,r4
110	srdi	r7,r5,4
111	sldi	r10,r0,3
112	cmpldi	cr6,r6,3
113	andi.	r5,r5,7
114	mtctr	r7
115	subfic	r11,r10,64
116	add	r5,r5,r0
117	bt	cr7*4+0,28f
118
11924:	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
12025:	ld	r0,8(r4)
121	sld	r6,r9,r10
12226:	ldu	r9,16(r4)
123	srd	r7,r0,r11
124	sld	r8,r0,r10
125	or	r7,r7,r6
126	blt	cr6,79f
12727:	ld	r0,8(r4)
128	b	2f
129
13028:	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
13129:	ldu	r9,8(r4)
132	sld	r8,r0,r10
 
133	addi	r3,r3,-8
 
134	blt	cr6,5f
13530:	ld	r0,8(r4)
136	srd	r12,r9,r11
137	sld	r6,r9,r10
13831:	ldu	r9,16(r4)
139	or	r12,r8,r12
140	srd	r7,r0,r11
141	sld	r8,r0,r10
142	addi	r3,r3,16
 
143	beq	cr6,78f
144
1451:	or	r7,r7,r6
14632:	ld	r0,8(r4)
14776:	std	r12,8(r3)
1482:	srd	r12,r9,r11
149	sld	r6,r9,r10
15033:	ldu	r9,16(r4)
 
151	or	r12,r8,r12
15277:	stdu	r7,16(r3)
153	srd	r7,r0,r11
154	sld	r8,r0,r10
 
155	bdnz	1b
156
15778:	std	r12,8(r3)
 
 
158	or	r7,r7,r6
15979:	std	r7,16(r3)
1605:	srd	r12,r9,r11
 
 
161	or	r12,r8,r12
16280:	std	r12,24(r3)
 
163	bne	6f
164	li	r3,0
165	blr
1666:	cmpwi	cr1,r5,8
167	addi	r3,r3,32
168	sld	r9,r9,r10
 
169	ble	cr1,7f
17034:	ld	r0,8(r4)
171	srd	r7,r0,r11
172	or	r9,r7,r9
1737:
174	bf	cr7*4+1,1f
 
175	rotldi	r9,r9,32
17694:	stw	r9,0(r3)
 
 
 
 
177	addi	r3,r3,4
1781:	bf	cr7*4+2,2f
 
179	rotldi	r9,r9,16
18095:	sth	r9,0(r3)
 
 
 
 
181	addi	r3,r3,2
1822:	bf	cr7*4+3,3f
 
183	rotldi	r9,r9,8
18496:	stb	r9,0(r3)
 
 
 
 
1853:	li	r3,0
186	blr
187
188.Ldst_unaligned:
 
189	PPC_MTOCRF(0x01,r6)		/* put #bytes to 8B bdry into cr7 */
190	subf	r5,r6,r5
191	li	r7,0
192	cmpldi	cr1,r5,16
193	bf	cr7*4+3,1f
19435:	lbz	r0,0(r4)
19581:	stb	r0,0(r3)
 
 
196	addi	r7,r7,1
1971:	bf	cr7*4+2,2f
19836:	lhzx	r0,r7,r4
19982:	sthx	r0,r7,r3
 
 
200	addi	r7,r7,2
2012:	bf	cr7*4+1,3f
20237:	lwzx	r0,r7,r4
20383:	stwx	r0,r7,r3
 
 
2043:	PPC_MTOCRF(0x01,r5)
205	add	r4,r6,r4
206	add	r3,r6,r3
207	b	.Ldst_aligned
208
209.Lshort_copy:
 
210	bf	cr7*4+0,1f
21138:	lwz	r0,0(r4)
21239:	lwz	r9,4(r4)
213	addi	r4,r4,8
21484:	stw	r0,0(r3)
21585:	stw	r9,4(r3)
216	addi	r3,r3,8
2171:	bf	cr7*4+1,2f
21840:	lwz	r0,0(r4)
219	addi	r4,r4,4
22086:	stw	r0,0(r3)
221	addi	r3,r3,4
2222:	bf	cr7*4+2,3f
22341:	lhz	r0,0(r4)
224	addi	r4,r4,2
22587:	sth	r0,0(r3)
226	addi	r3,r3,2
2273:	bf	cr7*4+3,4f
22842:	lbz	r0,0(r4)
22988:	stb	r0,0(r3)
2304:	li	r3,0
231	blr
232
233/*
234 * exception handlers follow
235 * we have to return the number of bytes not copied
236 * for an exception on a load, we set the rest of the destination to 0
 
 
 
237 */
238
239136:
240137:
241	add	r3,r3,r7
242	b	1f
243130:
244131:
245	addi	r3,r3,8
246120:
247320:
248122:
249322:
250124:
251125:
252126:
253127:
254128:
255129:
256133:
257	addi	r3,r3,8
258132:
 
259	addi	r3,r3,8
260121:
261321:
262344:
263134:
264135:
265138:
266139:
267140:
268141:
269142:
270123:
271144:
272145:
273
274/*
275 * here we have had a fault on a load and r3 points to the first
276 * unmodified byte of the destination
 
 
 
277 */
2781:	ld	r6,-24(r1)
 
279	ld	r4,-16(r1)
280	ld	r5,-8(r1)
281	subf	r6,r6,r3
282	add	r4,r4,r6
283	subf	r5,r6,r5	/* #bytes left to go */
284
285/*
286 * first see if we can copy any more bytes before hitting another exception
287 */
288	mtctr	r5
 
 
28943:	lbz	r0,0(r4)
290	addi	r4,r4,1
29189:	stb	r0,0(r3)
292	addi	r3,r3,1
293	bdnz	43b
294	li	r3,0		/* huh? all copied successfully this time? */
295	blr
296
297/*
298 * here we have trapped again, need to clear ctr bytes starting at r3
299 */
300143:	mfctr	r5
301	li	r0,0
302	mr	r4,r3
303	mr	r3,r5		/* return the number of bytes not copied */
3041:	andi.	r9,r4,7
305	beq	3f
30690:	stb	r0,0(r4)
307	addic.	r5,r5,-1
308	addi	r4,r4,1
309	bne	1b
310	blr
3113:	cmpldi	cr1,r5,8
312	srdi	r9,r5,3
313	andi.	r5,r5,7
314	blt	cr1,93f
315	mtctr	r9
31691:	std	r0,0(r4)
317	addi	r4,r4,8
318	bdnz	91b
31993:	beqlr
320	mtctr	r5	
32192:	stb	r0,0(r4)
322	addi	r4,r4,1
323	bdnz	92b
324	blr
325
326/*
327 * exception handlers for stores: we just need to work
328 * out how many bytes weren't copied
 
 
 
329 */
330182:
331183:
332	add	r3,r3,r7
333	b	1f
334371:
335180:
336	addi	r3,r3,8
337171:
338177:
339	addi	r3,r3,8
340370:
341372:
342176:
343178:
344	addi	r3,r3,4
345185:
346	addi	r3,r3,4
347170:
348172:
349345:
350173:
351174:
352175:
353179:
354181:
355184:
356186:
357187:
358188:
359189:	
360194:
361195:
362196:
3631:
364	ld	r6,-24(r1)
365	ld	r5,-8(r1)
366	add	r6,r6,r5
367	subf	r3,r3,r6	/* #bytes not copied */
368190:
369191:
370192:
371	blr			/* #bytes not copied in r3 */
372
373	.section __ex_table,"a"
374	.align	3
375	.llong	20b,120b
376	.llong	220b,320b
377	.llong	21b,121b
378	.llong	221b,321b
379	.llong	70b,170b
380	.llong	270b,370b
381	.llong	22b,122b
382	.llong	222b,322b
383	.llong	71b,171b
384	.llong	271b,371b
385	.llong	72b,172b
386	.llong	272b,372b
387	.llong	244b,344b
388	.llong	245b,345b
389	.llong	23b,123b
390	.llong	73b,173b
391	.llong	44b,144b
392	.llong	74b,174b
393	.llong	45b,145b
394	.llong	75b,175b
395	.llong	24b,124b
396	.llong	25b,125b
397	.llong	26b,126b
398	.llong	27b,127b
399	.llong	28b,128b
400	.llong	29b,129b
401	.llong	30b,130b
402	.llong	31b,131b
403	.llong	32b,132b
404	.llong	76b,176b
405	.llong	33b,133b
406	.llong	77b,177b
407	.llong	78b,178b
408	.llong	79b,179b
409	.llong	80b,180b
410	.llong	34b,134b
411	.llong	94b,194b
412	.llong	95b,195b
413	.llong	96b,196b
414	.llong	35b,135b
415	.llong	81b,181b
416	.llong	36b,136b
417	.llong	82b,182b
418	.llong	37b,137b
419	.llong	83b,183b
420	.llong	38b,138b
421	.llong	39b,139b
422	.llong	84b,184b
423	.llong	85b,185b
424	.llong	40b,140b
425	.llong	86b,186b
426	.llong	41b,141b
427	.llong	87b,187b
428	.llong	42b,142b
429	.llong	88b,188b
430	.llong	43b,143b
431	.llong	89b,189b
432	.llong	90b,190b
433	.llong	91b,191b
434	.llong	92b,192b
435	
436	.text
437
438/*
439 * Routine to copy a whole page of data, optimized for POWER4.
440 * On POWER4 it is more than 50% faster than the simple loop
441 * above (following the .Ldst_aligned label) but it runs slightly
442 * slower on POWER3.
443 */
 
 
 
444.Lcopy_page_4K:
445	std	r31,-32(1)
446	std	r30,-40(1)
447	std	r29,-48(1)
448	std	r28,-56(1)
449	std	r27,-64(1)
450	std	r26,-72(1)
451	std	r25,-80(1)
452	std	r24,-88(1)
453	std	r23,-96(1)
454	std	r22,-104(1)
455	std	r21,-112(1)
456	std	r20,-120(1)
457	li	r5,4096/32 - 1
458	addi	r3,r3,-8
459	li	r0,5
4600:	addi	r5,r5,-24
461	mtctr	r0
46220:	ld	r22,640(4)
46321:	ld	r21,512(4)
46422:	ld	r20,384(4)
46523:	ld	r11,256(4)
46624:	ld	r9,128(4)
46725:	ld	r7,0(4)
46826:	ld	r25,648(4)
46927:	ld	r24,520(4)
47028:	ld	r23,392(4)
47129:	ld	r10,264(4)
47230:	ld	r8,136(4)
47331:	ldu	r6,8(4)
474	cmpwi	r5,24
4751:
47632:	std	r22,648(3)
47733:	std	r21,520(3)
47834:	std	r20,392(3)
47935:	std	r11,264(3)
48036:	std	r9,136(3)
48137:	std	r7,8(3)
48238:	ld	r28,648(4)
48339:	ld	r27,520(4)
48440:	ld	r26,392(4)
48541:	ld	r31,264(4)
48642:	ld	r30,136(4)
48743:	ld	r29,8(4)
48844:	std	r25,656(3)
48945:	std	r24,528(3)
49046:	std	r23,400(3)
49147:	std	r10,272(3)
49248:	std	r8,144(3)
49349:	std	r6,16(3)
49450:	ld	r22,656(4)
49551:	ld	r21,528(4)
49652:	ld	r20,400(4)
49753:	ld	r11,272(4)
49854:	ld	r9,144(4)
49955:	ld	r7,16(4)
50056:	std	r28,664(3)
50157:	std	r27,536(3)
50258:	std	r26,408(3)
50359:	std	r31,280(3)
50460:	std	r30,152(3)
50561:	stdu	r29,24(3)
50662:	ld	r25,664(4)
50763:	ld	r24,536(4)
50864:	ld	r23,408(4)
50965:	ld	r10,280(4)
51066:	ld	r8,152(4)
51167:	ldu	r6,24(4)
512	bdnz	1b
51368:	std	r22,648(3)
51469:	std	r21,520(3)
51570:	std	r20,392(3)
51671:	std	r11,264(3)
51772:	std	r9,136(3)
51873:	std	r7,8(3)
51974:	addi	r4,r4,640
52075:	addi	r3,r3,648
521	bge	0b
522	mtctr	r5
52376:	ld	r7,0(4)
52477:	ld	r8,8(4)
52578:	ldu	r9,16(4)
5263:
52779:	ld	r10,8(4)
52880:	std	r7,8(3)
52981:	ld	r7,16(4)
53082:	std	r8,16(3)
53183:	ld	r8,24(4)
53284:	std	r9,24(3)
53385:	ldu	r9,32(4)
53486:	stdu	r10,32(3)
535	bdnz	3b
5364:
53787:	ld	r10,8(4)
53888:	std	r7,8(3)
53989:	std	r8,16(3)
54090:	std	r9,24(3)
54191:	std	r10,32(3)
5429:	ld	r20,-120(1)
543	ld	r21,-112(1)
544	ld	r22,-104(1)
545	ld	r23,-96(1)
546	ld	r24,-88(1)
547	ld	r25,-80(1)
548	ld	r26,-72(1)
549	ld	r27,-64(1)
550	ld	r28,-56(1)
551	ld	r29,-48(1)
552	ld	r30,-40(1)
553	ld	r31,-32(1)
554	li	r3,0
555	blr
556
557/*
558 * on an exception, reset to the beginning and jump back into the
559 * standard __copy_tofrom_user
560 */
561100:	ld	r20,-120(1)
 
562	ld	r21,-112(1)
563	ld	r22,-104(1)
564	ld	r23,-96(1)
565	ld	r24,-88(1)
566	ld	r25,-80(1)
567	ld	r26,-72(1)
568	ld	r27,-64(1)
569	ld	r28,-56(1)
570	ld	r29,-48(1)
571	ld	r30,-40(1)
572	ld	r31,-32(1)
573	ld	r3,-24(r1)
574	ld	r4,-16(r1)
575	li	r5,4096
576	b	.Ldst_aligned
577
578	.section __ex_table,"a"
579	.align	3
580	.llong	20b,100b
581	.llong	21b,100b
582	.llong	22b,100b
583	.llong	23b,100b
584	.llong	24b,100b
585	.llong	25b,100b
586	.llong	26b,100b
587	.llong	27b,100b
588	.llong	28b,100b
589	.llong	29b,100b
590	.llong	30b,100b
591	.llong	31b,100b
592	.llong	32b,100b
593	.llong	33b,100b
594	.llong	34b,100b
595	.llong	35b,100b
596	.llong	36b,100b
597	.llong	37b,100b
598	.llong	38b,100b
599	.llong	39b,100b
600	.llong	40b,100b
601	.llong	41b,100b
602	.llong	42b,100b
603	.llong	43b,100b
604	.llong	44b,100b
605	.llong	45b,100b
606	.llong	46b,100b
607	.llong	47b,100b
608	.llong	48b,100b
609	.llong	49b,100b
610	.llong	50b,100b
611	.llong	51b,100b
612	.llong	52b,100b
613	.llong	53b,100b
614	.llong	54b,100b
615	.llong	55b,100b
616	.llong	56b,100b
617	.llong	57b,100b
618	.llong	58b,100b
619	.llong	59b,100b
620	.llong	60b,100b
621	.llong	61b,100b
622	.llong	62b,100b
623	.llong	63b,100b
624	.llong	64b,100b
625	.llong	65b,100b
626	.llong	66b,100b
627	.llong	67b,100b
628	.llong	68b,100b
629	.llong	69b,100b
630	.llong	70b,100b
631	.llong	71b,100b
632	.llong	72b,100b
633	.llong	73b,100b
634	.llong	74b,100b
635	.llong	75b,100b
636	.llong	76b,100b
637	.llong	77b,100b
638	.llong	78b,100b
639	.llong	79b,100b
640	.llong	80b,100b
641	.llong	81b,100b
642	.llong	82b,100b
643	.llong	83b,100b
644	.llong	84b,100b
645	.llong	85b,100b
646	.llong	86b,100b
647	.llong	87b,100b
648	.llong	88b,100b
649	.llong	89b,100b
650	.llong	90b,100b
651	.llong	91b,100b