Linux Audio

Check our new training course

Loading...
v6.8
  1/* SPDX-License-Identifier: GPL-2.0-or-later */
  2/*
  3 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
 
 
 
 
 
  4 */
  5#include <linux/export.h>
  6#include <asm/processor.h>
  7#include <asm/ppc_asm.h>
  8#include <asm/asm-compat.h>
  9#include <asm/feature-fixups.h>
 10
 11#ifndef SELFTEST_CASE
 12/* 0 == most CPUs, 1 == POWER6, 2 == Cell */
 13#define SELFTEST_CASE	0
 14#endif
 15
 16#ifdef __BIG_ENDIAN__
 17#define sLd sld		/* Shift towards low-numbered address. */
 18#define sHd srd		/* Shift towards high-numbered address. */
 19#else
 20#define sLd srd		/* Shift towards low-numbered address. */
 21#define sHd sld		/* Shift towards high-numbered address. */
 22#endif
 23
 24/*
 25 * These macros are used to generate exception table entries.
 26 * The exception handlers below use the original arguments
 27 * (stored on the stack) and the point where we're up to in
 28 * the destination buffer, i.e. the address of the first
 29 * unmodified byte.  Generally r3 points into the destination
 30 * buffer, but the first unmodified byte is at a variable
 31 * offset from r3.  In the code below, the symbol r3_offset
 32 * is set to indicate the current offset at each point in
 33 * the code.  This offset is then used as a negative offset
 34 * from the exception handler code, and those instructions
 35 * before the exception handlers are addi instructions that
 36 * adjust r3 to point to the correct place.
 37 */
 38	.macro	lex		/* exception handler for load */
 39100:	EX_TABLE(100b, .Lld_exc - r3_offset)
 40	.endm
 41
 42	.macro	stex		/* exception handler for store */
 43100:	EX_TABLE(100b, .Lst_exc - r3_offset)
 44	.endm
 45
 46	.align	7
 47_GLOBAL_TOC(__copy_tofrom_user)
 48#ifdef CONFIG_PPC_BOOK3S_64
 49BEGIN_FTR_SECTION
 50	nop
 51FTR_SECTION_ELSE
 52	b	__copy_tofrom_user_power7
 53ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
 54#endif
 55_GLOBAL(__copy_tofrom_user_base)
 56	/* first check for a 4kB copy on a 4kB boundary */
 57	cmpldi	cr1,r5,16
 58	cmpdi	cr6,r5,4096
 59	or	r0,r3,r4
 60	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
 61	andi.	r0,r0,4095
 62	std	r3,-24(r1)
 63	crand	cr0*4+2,cr0*4+2,cr6*4+2
 64	std	r4,-16(r1)
 65	std	r5,-8(r1)
 66	dcbt	0,r4
 67	beq	.Lcopy_page_4K
 68	andi.	r6,r6,7
 69	PPC_MTOCRF(0x01,r5)
 70	blt	cr1,.Lshort_copy
 71/* Below we want to nop out the bne if we're on a CPU that has the
 72 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
 73 * cleared.
 74 * At the time of writing the only CPU that has this combination of bits
 75 * set is Power6.
 76 */
 77test_feature = (SELFTEST_CASE == 1)
 78BEGIN_FTR_SECTION
 79	nop
 80FTR_SECTION_ELSE
 81	bne	.Ldst_unaligned
 82ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
 83		    CPU_FTR_UNALIGNED_LD_STD)
 84.Ldst_aligned:
 85	addi	r3,r3,-16
 86r3_offset = 16
 87test_feature = (SELFTEST_CASE == 0)
 88BEGIN_FTR_SECTION
 89	andi.	r0,r4,7
 90	bne	.Lsrc_unaligned
 91END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 92	blt	cr1,.Ldo_tail		/* if < 16 bytes to copy */
 93	srdi	r0,r5,5
 94	cmpdi	cr1,r0,0
 95lex;	ld	r7,0(r4)
 96lex;	ld	r6,8(r4)
 97	addi	r4,r4,16
 98	mtctr	r0
 99	andi.	r0,r5,0x10
100	beq	22f
101	addi	r3,r3,16
102r3_offset = 0
103	addi	r4,r4,-16
104	mr	r9,r7
105	mr	r8,r6
106	beq	cr1,72f
10721:
108lex;	ld	r7,16(r4)
109lex;	ld	r6,24(r4)
110	addi	r4,r4,32
111stex;	std	r9,0(r3)
112r3_offset = 8
113stex;	std	r8,8(r3)
114r3_offset = 16
11522:
116lex;	ld	r9,0(r4)
117lex;	ld	r8,8(r4)
118stex;	std	r7,16(r3)
119r3_offset = 24
120stex;	std	r6,24(r3)
121	addi	r3,r3,32
122r3_offset = 0
123	bdnz	21b
12472:
125stex;	std	r9,0(r3)
126r3_offset = 8
127stex;	std	r8,8(r3)
128r3_offset = 16
129	andi.	r5,r5,0xf
130	beq+	3f
131	addi	r4,r4,16
132.Ldo_tail:
133	addi	r3,r3,16
134r3_offset = 0
135	bf	cr7*4+0,246f
136lex;	ld	r9,0(r4)
137	addi	r4,r4,8
138stex;	std	r9,0(r3)
139	addi	r3,r3,8
140246:	bf	cr7*4+1,1f
141lex;	lwz	r9,0(r4)
142	addi	r4,r4,4
143stex;	stw	r9,0(r3)
144	addi	r3,r3,4
1451:	bf	cr7*4+2,2f
146lex;	lhz	r9,0(r4)
147	addi	r4,r4,2
148stex;	sth	r9,0(r3)
149	addi	r3,r3,2
1502:	bf	cr7*4+3,3f
151lex;	lbz	r9,0(r4)
152stex;	stb	r9,0(r3)
1533:	li	r3,0
154	blr
155
156.Lsrc_unaligned:
157r3_offset = 16
158	srdi	r6,r5,3
159	addi	r5,r5,-16
160	subf	r4,r0,r4
161	srdi	r7,r5,4
162	sldi	r10,r0,3
163	cmpldi	cr6,r6,3
164	andi.	r5,r5,7
165	mtctr	r7
166	subfic	r11,r10,64
167	add	r5,r5,r0
168	bt	cr7*4+0,28f
169
170lex;	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
171lex;	ld	r0,8(r4)
172	sLd	r6,r9,r10
173lex;	ldu	r9,16(r4)
174	sHd	r7,r0,r11
175	sLd	r8,r0,r10
176	or	r7,r7,r6
177	blt	cr6,79f
178lex;	ld	r0,8(r4)
179	b	2f
180
18128:
182lex;	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
183lex;	ldu	r9,8(r4)
184	sLd	r8,r0,r10
185	addi	r3,r3,-8
186r3_offset = 24
187	blt	cr6,5f
188lex;	ld	r0,8(r4)
189	sHd	r12,r9,r11
190	sLd	r6,r9,r10
191lex;	ldu	r9,16(r4)
192	or	r12,r8,r12
193	sHd	r7,r0,r11
194	sLd	r8,r0,r10
195	addi	r3,r3,16
196r3_offset = 8
197	beq	cr6,78f
198
1991:	or	r7,r7,r6
200lex;	ld	r0,8(r4)
201stex;	std	r12,8(r3)
202r3_offset = 16
2032:	sHd	r12,r9,r11
204	sLd	r6,r9,r10
205lex;	ldu	r9,16(r4)
206	or	r12,r8,r12
207stex;	stdu	r7,16(r3)
208r3_offset = 8
209	sHd	r7,r0,r11
210	sLd	r8,r0,r10
211	bdnz	1b
212
21378:
214stex;	std	r12,8(r3)
215r3_offset = 16
216	or	r7,r7,r6
21779:
218stex;	std	r7,16(r3)
219r3_offset = 24
2205:	sHd	r12,r9,r11
221	or	r12,r8,r12
222stex;	std	r12,24(r3)
223r3_offset = 32
224	bne	6f
225	li	r3,0
226	blr
2276:	cmpwi	cr1,r5,8
228	addi	r3,r3,32
229r3_offset = 0
230	sLd	r9,r9,r10
231	ble	cr1,7f
232lex;	ld	r0,8(r4)
233	sHd	r7,r0,r11
234	or	r9,r7,r9
2357:
236	bf	cr7*4+1,1f
237#ifdef __BIG_ENDIAN__
238	rotldi	r9,r9,32
239#endif
240stex;	stw	r9,0(r3)
241#ifdef __LITTLE_ENDIAN__
242	rotrdi	r9,r9,32
243#endif
244	addi	r3,r3,4
2451:	bf	cr7*4+2,2f
246#ifdef __BIG_ENDIAN__
247	rotldi	r9,r9,16
248#endif
249stex;	sth	r9,0(r3)
250#ifdef __LITTLE_ENDIAN__
251	rotrdi	r9,r9,16
252#endif
253	addi	r3,r3,2
2542:	bf	cr7*4+3,3f
255#ifdef __BIG_ENDIAN__
256	rotldi	r9,r9,8
257#endif
258stex;	stb	r9,0(r3)
259#ifdef __LITTLE_ENDIAN__
260	rotrdi	r9,r9,8
261#endif
2623:	li	r3,0
263	blr
264
265.Ldst_unaligned:
266r3_offset = 0
267	PPC_MTOCRF(0x01,r6)		/* put #bytes to 8B bdry into cr7 */
268	subf	r5,r6,r5
269	li	r7,0
270	cmpldi	cr1,r5,16
271	bf	cr7*4+3,1f
272100:	EX_TABLE(100b, .Lld_exc_r7)
273	lbz	r0,0(r4)
274100:	EX_TABLE(100b, .Lst_exc_r7)
275	stb	r0,0(r3)
276	addi	r7,r7,1
2771:	bf	cr7*4+2,2f
278100:	EX_TABLE(100b, .Lld_exc_r7)
279	lhzx	r0,r7,r4
280100:	EX_TABLE(100b, .Lst_exc_r7)
281	sthx	r0,r7,r3
282	addi	r7,r7,2
2832:	bf	cr7*4+1,3f
284100:	EX_TABLE(100b, .Lld_exc_r7)
285	lwzx	r0,r7,r4
286100:	EX_TABLE(100b, .Lst_exc_r7)
287	stwx	r0,r7,r3
2883:	PPC_MTOCRF(0x01,r5)
289	add	r4,r6,r4
290	add	r3,r6,r3
291	b	.Ldst_aligned
292
293.Lshort_copy:
294r3_offset = 0
295	bf	cr7*4+0,1f
296lex;	lwz	r0,0(r4)
297lex;	lwz	r9,4(r4)
298	addi	r4,r4,8
299stex;	stw	r0,0(r3)
300stex;	stw	r9,4(r3)
301	addi	r3,r3,8
3021:	bf	cr7*4+1,2f
303lex;	lwz	r0,0(r4)
304	addi	r4,r4,4
305stex;	stw	r0,0(r3)
306	addi	r3,r3,4
3072:	bf	cr7*4+2,3f
308lex;	lhz	r0,0(r4)
309	addi	r4,r4,2
310stex;	sth	r0,0(r3)
311	addi	r3,r3,2
3123:	bf	cr7*4+3,4f
313lex;	lbz	r0,0(r4)
314stex;	stb	r0,0(r3)
3154:	li	r3,0
316	blr
317
318/*
319 * exception handlers follow
320 * we have to return the number of bytes not copied
321 * for an exception on a load, we set the rest of the destination to 0
322 * Note that the number of bytes of instructions for adjusting r3 needs
323 * to equal the amount of the adjustment, due to the trick of using
324 * .Lld_exc - r3_offset as the handler address.
325 */
326
327.Lld_exc_r7:
 
328	add	r3,r3,r7
329	b	.Lld_exc
330
331	/* adjust by 24 */
332	addi	r3,r3,8
333	nop
334	/* adjust by 16 */
 
 
 
 
 
 
 
 
 
335	addi	r3,r3,8
336	nop
337	/* adjust by 8 */
338	addi	r3,r3,8
339	nop
 
 
 
 
 
 
 
 
 
 
 
 
340
341/*
342 * Here we have had a fault on a load and r3 points to the first
343 * unmodified byte of the destination.  We use the original arguments
344 * and r3 to work out how much wasn't copied.  Since we load some
345 * distance ahead of the stores, we continue copying byte-by-byte until
346 * we hit the load fault again in order to copy as much as possible.
347 */
348.Lld_exc:
349	ld	r6,-24(r1)
350	ld	r4,-16(r1)
351	ld	r5,-8(r1)
352	subf	r6,r6,r3
353	add	r4,r4,r6
354	subf	r5,r6,r5	/* #bytes left to go */
355
356/*
357 * first see if we can copy any more bytes before hitting another exception
358 */
359	mtctr	r5
360r3_offset = 0
361100:	EX_TABLE(100b, .Ldone)
36243:	lbz	r0,0(r4)
363	addi	r4,r4,1
364stex;	stb	r0,0(r3)
365	addi	r3,r3,1
366	bdnz	43b
367	li	r3,0		/* huh? all copied successfully this time? */
368	blr
369
370/*
371 * here we have trapped again, amount remaining is in ctr.
372 */
373.Ldone:
374	mfctr	r3
375	blr
376
377/*
378 * exception handlers for stores: we need to work out how many bytes
379 * weren't copied, and we may need to copy some more.
380 * Note that the number of bytes of instructions for adjusting r3 needs
381 * to equal the amount of the adjustment, due to the trick of using
382 * .Lst_exc - r3_offset as the handler address.
383 */
384.Lst_exc_r7:
 
385	add	r3,r3,r7
386	b	.Lst_exc
387
388	/* adjust by 24 */
389	addi	r3,r3,8
390	nop
391	/* adjust by 16 */
 
392	addi	r3,r3,8
393	nop
394	/* adjust by 8 */
 
 
395	addi	r3,r3,4
396	/* adjust by 4 */
397	addi	r3,r3,4
398.Lst_exc:
399	ld	r6,-24(r1)	/* original destination pointer */
400	ld	r4,-16(r1)	/* original source pointer */
401	ld	r5,-8(r1)	/* original number of bytes */
402	add	r7,r6,r5
403	/*
404	 * If the destination pointer isn't 8-byte aligned,
405	 * we may have got the exception as a result of a
406	 * store that overlapped a page boundary, so we may be
407	 * able to copy a few more bytes.
408	 */
40917:	andi.	r0,r3,7
410	beq	19f
411	subf	r8,r6,r3	/* #bytes copied */
412100:	EX_TABLE(100b,19f)
413	lbzx	r0,r8,r4
414100:	EX_TABLE(100b,19f)
415	stb	r0,0(r3)
416	addi	r3,r3,1
417	cmpld	r3,r7
418	blt	17b
41919:	subf	r3,r3,r7	/* #bytes not copied in r3 */
420	blr
421
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
422/*
423 * Routine to copy a whole page of data, optimized for POWER4.
424 * On POWER4 it is more than 50% faster than the simple loop
425 * above (following the .Ldst_aligned label).
426 */
427	.macro	exc
428100:	EX_TABLE(100b, .Labort)
429	.endm
430.Lcopy_page_4K:
431	std	r31,-32(1)
432	std	r30,-40(1)
433	std	r29,-48(1)
434	std	r28,-56(1)
435	std	r27,-64(1)
436	std	r26,-72(1)
437	std	r25,-80(1)
438	std	r24,-88(1)
439	std	r23,-96(1)
440	std	r22,-104(1)
441	std	r21,-112(1)
442	std	r20,-120(1)
443	li	r5,4096/32 - 1
444	addi	r3,r3,-8
445	li	r0,5
4460:	addi	r5,r5,-24
447	mtctr	r0
448exc;	ld	r22,640(4)
449exc;	ld	r21,512(4)
450exc;	ld	r20,384(4)
451exc;	ld	r11,256(4)
452exc;	ld	r9,128(4)
453exc;	ld	r7,0(4)
454exc;	ld	r25,648(4)
455exc;	ld	r24,520(4)
456exc;	ld	r23,392(4)
457exc;	ld	r10,264(4)
458exc;	ld	r8,136(4)
459exc;	ldu	r6,8(4)
460	cmpwi	r5,24
4611:
462exc;	std	r22,648(3)
463exc;	std	r21,520(3)
464exc;	std	r20,392(3)
465exc;	std	r11,264(3)
466exc;	std	r9,136(3)
467exc;	std	r7,8(3)
468exc;	ld	r28,648(4)
469exc;	ld	r27,520(4)
470exc;	ld	r26,392(4)
471exc;	ld	r31,264(4)
472exc;	ld	r30,136(4)
473exc;	ld	r29,8(4)
474exc;	std	r25,656(3)
475exc;	std	r24,528(3)
476exc;	std	r23,400(3)
477exc;	std	r10,272(3)
478exc;	std	r8,144(3)
479exc;	std	r6,16(3)
480exc;	ld	r22,656(4)
481exc;	ld	r21,528(4)
482exc;	ld	r20,400(4)
483exc;	ld	r11,272(4)
484exc;	ld	r9,144(4)
485exc;	ld	r7,16(4)
486exc;	std	r28,664(3)
487exc;	std	r27,536(3)
488exc;	std	r26,408(3)
489exc;	std	r31,280(3)
490exc;	std	r30,152(3)
491exc;	stdu	r29,24(3)
492exc;	ld	r25,664(4)
493exc;	ld	r24,536(4)
494exc;	ld	r23,408(4)
495exc;	ld	r10,280(4)
496exc;	ld	r8,152(4)
497exc;	ldu	r6,24(4)
498	bdnz	1b
499exc;	std	r22,648(3)
500exc;	std	r21,520(3)
501exc;	std	r20,392(3)
502exc;	std	r11,264(3)
503exc;	std	r9,136(3)
504exc;	std	r7,8(3)
505	addi	r4,r4,640
506	addi	r3,r3,648
507	bge	0b
508	mtctr	r5
509exc;	ld	r7,0(4)
510exc;	ld	r8,8(4)
511exc;	ldu	r9,16(4)
5123:
513exc;	ld	r10,8(4)
514exc;	std	r7,8(3)
515exc;	ld	r7,16(4)
516exc;	std	r8,16(3)
517exc;	ld	r8,24(4)
518exc;	std	r9,24(3)
519exc;	ldu	r9,32(4)
520exc;	stdu	r10,32(3)
521	bdnz	3b
5224:
523exc;	ld	r10,8(4)
524exc;	std	r7,8(3)
525exc;	std	r8,16(3)
526exc;	std	r9,24(3)
527exc;	std	r10,32(3)
5289:	ld	r20,-120(1)
529	ld	r21,-112(1)
530	ld	r22,-104(1)
531	ld	r23,-96(1)
532	ld	r24,-88(1)
533	ld	r25,-80(1)
534	ld	r26,-72(1)
535	ld	r27,-64(1)
536	ld	r28,-56(1)
537	ld	r29,-48(1)
538	ld	r30,-40(1)
539	ld	r31,-32(1)
540	li	r3,0
541	blr
542
543/*
544 * on an exception, reset to the beginning and jump back into the
545 * standard __copy_tofrom_user
546 */
547.Labort:
548	ld	r20,-120(1)
549	ld	r21,-112(1)
550	ld	r22,-104(1)
551	ld	r23,-96(1)
552	ld	r24,-88(1)
553	ld	r25,-80(1)
554	ld	r26,-72(1)
555	ld	r27,-64(1)
556	ld	r28,-56(1)
557	ld	r29,-48(1)
558	ld	r30,-40(1)
559	ld	r31,-32(1)
560	ld	r3,-24(r1)
561	ld	r4,-16(r1)
562	li	r5,4096
563	b	.Ldst_aligned
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
564EXPORT_SYMBOL(__copy_tofrom_user)
v4.17
 
  1/*
  2 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
  3 *
  4 * This program is free software; you can redistribute it and/or
  5 * modify it under the terms of the GNU General Public License
  6 * as published by the Free Software Foundation; either version
  7 * 2 of the License, or (at your option) any later version.
  8 */
 
  9#include <asm/processor.h>
 10#include <asm/ppc_asm.h>
 11#include <asm/export.h>
 
 
 
 
 
 
 12
 13#ifdef __BIG_ENDIAN__
 14#define sLd sld		/* Shift towards low-numbered address. */
 15#define sHd srd		/* Shift towards high-numbered address. */
 16#else
 17#define sLd srd		/* Shift towards low-numbered address. */
 18#define sHd sld		/* Shift towards high-numbered address. */
 19#endif
 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 21	.align	7
 22_GLOBAL_TOC(__copy_tofrom_user)
 23#ifdef CONFIG_PPC_BOOK3S_64
 24BEGIN_FTR_SECTION
 25	nop
 26FTR_SECTION_ELSE
 27	b	__copy_tofrom_user_power7
 28ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
 29#endif
 30_GLOBAL(__copy_tofrom_user_base)
 31	/* first check for a whole page copy on a page boundary */
 32	cmpldi	cr1,r5,16
 33	cmpdi	cr6,r5,4096
 34	or	r0,r3,r4
 35	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
 36	andi.	r0,r0,4095
 37	std	r3,-24(r1)
 38	crand	cr0*4+2,cr0*4+2,cr6*4+2
 39	std	r4,-16(r1)
 40	std	r5,-8(r1)
 41	dcbt	0,r4
 42	beq	.Lcopy_page_4K
 43	andi.	r6,r6,7
 44	PPC_MTOCRF(0x01,r5)
 45	blt	cr1,.Lshort_copy
 46/* Below we want to nop out the bne if we're on a CPU that has the
 47 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
 48 * cleared.
 49 * At the time of writing the only CPU that has this combination of bits
 50 * set is Power6.
 51 */
 
 52BEGIN_FTR_SECTION
 53	nop
 54FTR_SECTION_ELSE
 55	bne	.Ldst_unaligned
 56ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
 57		    CPU_FTR_UNALIGNED_LD_STD)
 58.Ldst_aligned:
 59	addi	r3,r3,-16
 
 
 60BEGIN_FTR_SECTION
 61	andi.	r0,r4,7
 62	bne	.Lsrc_unaligned
 63END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 64	blt	cr1,.Ldo_tail		/* if < 16 bytes to copy */
 65	srdi	r0,r5,5
 66	cmpdi	cr1,r0,0
 6720:	ld	r7,0(r4)
 68220:	ld	r6,8(r4)
 69	addi	r4,r4,16
 70	mtctr	r0
 71	andi.	r0,r5,0x10
 72	beq	22f
 73	addi	r3,r3,16
 
 74	addi	r4,r4,-16
 75	mr	r9,r7
 76	mr	r8,r6
 77	beq	cr1,72f
 7821:	ld	r7,16(r4)
 79221:	ld	r6,24(r4)
 
 80	addi	r4,r4,32
 8170:	std	r9,0(r3)
 82270:	std	r8,8(r3)
 8322:	ld	r9,0(r4)
 84222:	ld	r8,8(r4)
 8571:	std	r7,16(r3)
 86271:	std	r6,24(r3)
 
 
 
 
 87	addi	r3,r3,32
 
 88	bdnz	21b
 8972:	std	r9,0(r3)
 90272:	std	r8,8(r3)
 
 
 
 91	andi.	r5,r5,0xf
 92	beq+	3f
 93	addi	r4,r4,16
 94.Ldo_tail:
 95	addi	r3,r3,16
 
 96	bf	cr7*4+0,246f
 97244:	ld	r9,0(r4)
 98	addi	r4,r4,8
 99245:	std	r9,0(r3)
100	addi	r3,r3,8
101246:	bf	cr7*4+1,1f
10223:	lwz	r9,0(r4)
103	addi	r4,r4,4
10473:	stw	r9,0(r3)
105	addi	r3,r3,4
1061:	bf	cr7*4+2,2f
10744:	lhz	r9,0(r4)
108	addi	r4,r4,2
10974:	sth	r9,0(r3)
110	addi	r3,r3,2
1112:	bf	cr7*4+3,3f
11245:	lbz	r9,0(r4)
11375:	stb	r9,0(r3)
1143:	li	r3,0
115	blr
116
117.Lsrc_unaligned:
 
118	srdi	r6,r5,3
119	addi	r5,r5,-16
120	subf	r4,r0,r4
121	srdi	r7,r5,4
122	sldi	r10,r0,3
123	cmpldi	cr6,r6,3
124	andi.	r5,r5,7
125	mtctr	r7
126	subfic	r11,r10,64
127	add	r5,r5,r0
128	bt	cr7*4+0,28f
129
13024:	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
13125:	ld	r0,8(r4)
132	sLd	r6,r9,r10
13326:	ldu	r9,16(r4)
134	sHd	r7,r0,r11
135	sLd	r8,r0,r10
136	or	r7,r7,r6
137	blt	cr6,79f
13827:	ld	r0,8(r4)
139	b	2f
140
14128:	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
14229:	ldu	r9,8(r4)
 
143	sLd	r8,r0,r10
144	addi	r3,r3,-8
 
145	blt	cr6,5f
14630:	ld	r0,8(r4)
147	sHd	r12,r9,r11
148	sLd	r6,r9,r10
14931:	ldu	r9,16(r4)
150	or	r12,r8,r12
151	sHd	r7,r0,r11
152	sLd	r8,r0,r10
153	addi	r3,r3,16
 
154	beq	cr6,78f
155
1561:	or	r7,r7,r6
15732:	ld	r0,8(r4)
15876:	std	r12,8(r3)
 
1592:	sHd	r12,r9,r11
160	sLd	r6,r9,r10
16133:	ldu	r9,16(r4)
162	or	r12,r8,r12
16377:	stdu	r7,16(r3)
 
164	sHd	r7,r0,r11
165	sLd	r8,r0,r10
166	bdnz	1b
167
16878:	std	r12,8(r3)
 
 
169	or	r7,r7,r6
17079:	std	r7,16(r3)
 
 
1715:	sHd	r12,r9,r11
172	or	r12,r8,r12
17380:	std	r12,24(r3)
 
174	bne	6f
175	li	r3,0
176	blr
1776:	cmpwi	cr1,r5,8
178	addi	r3,r3,32
 
179	sLd	r9,r9,r10
180	ble	cr1,7f
18134:	ld	r0,8(r4)
182	sHd	r7,r0,r11
183	or	r9,r7,r9
1847:
185	bf	cr7*4+1,1f
186#ifdef __BIG_ENDIAN__
187	rotldi	r9,r9,32
188#endif
18994:	stw	r9,0(r3)
190#ifdef __LITTLE_ENDIAN__
191	rotrdi	r9,r9,32
192#endif
193	addi	r3,r3,4
1941:	bf	cr7*4+2,2f
195#ifdef __BIG_ENDIAN__
196	rotldi	r9,r9,16
197#endif
19895:	sth	r9,0(r3)
199#ifdef __LITTLE_ENDIAN__
200	rotrdi	r9,r9,16
201#endif
202	addi	r3,r3,2
2032:	bf	cr7*4+3,3f
204#ifdef __BIG_ENDIAN__
205	rotldi	r9,r9,8
206#endif
20796:	stb	r9,0(r3)
208#ifdef __LITTLE_ENDIAN__
209	rotrdi	r9,r9,8
210#endif
2113:	li	r3,0
212	blr
213
214.Ldst_unaligned:
 
215	PPC_MTOCRF(0x01,r6)		/* put #bytes to 8B bdry into cr7 */
216	subf	r5,r6,r5
217	li	r7,0
218	cmpldi	cr1,r5,16
219	bf	cr7*4+3,1f
22035:	lbz	r0,0(r4)
22181:	stb	r0,0(r3)
 
 
222	addi	r7,r7,1
2231:	bf	cr7*4+2,2f
22436:	lhzx	r0,r7,r4
22582:	sthx	r0,r7,r3
 
 
226	addi	r7,r7,2
2272:	bf	cr7*4+1,3f
22837:	lwzx	r0,r7,r4
22983:	stwx	r0,r7,r3
 
 
2303:	PPC_MTOCRF(0x01,r5)
231	add	r4,r6,r4
232	add	r3,r6,r3
233	b	.Ldst_aligned
234
235.Lshort_copy:
 
236	bf	cr7*4+0,1f
23738:	lwz	r0,0(r4)
23839:	lwz	r9,4(r4)
239	addi	r4,r4,8
24084:	stw	r0,0(r3)
24185:	stw	r9,4(r3)
242	addi	r3,r3,8
2431:	bf	cr7*4+1,2f
24440:	lwz	r0,0(r4)
245	addi	r4,r4,4
24686:	stw	r0,0(r3)
247	addi	r3,r3,4
2482:	bf	cr7*4+2,3f
24941:	lhz	r0,0(r4)
250	addi	r4,r4,2
25187:	sth	r0,0(r3)
252	addi	r3,r3,2
2533:	bf	cr7*4+3,4f
25442:	lbz	r0,0(r4)
25588:	stb	r0,0(r3)
2564:	li	r3,0
257	blr
258
259/*
260 * exception handlers follow
261 * we have to return the number of bytes not copied
262 * for an exception on a load, we set the rest of the destination to 0
 
 
 
263 */
264
265136:
266137:
267	add	r3,r3,r7
268	b	1f
269130:
270131:
271	addi	r3,r3,8
272120:
273320:
274122:
275322:
276124:
277125:
278126:
279127:
280128:
281129:
282133:
283	addi	r3,r3,8
284132:
 
285	addi	r3,r3,8
286121:
287321:
288344:
289134:
290135:
291138:
292139:
293140:
294141:
295142:
296123:
297144:
298145:
299
300/*
301 * here we have had a fault on a load and r3 points to the first
302 * unmodified byte of the destination
 
 
 
303 */
3041:	ld	r6,-24(r1)
 
305	ld	r4,-16(r1)
306	ld	r5,-8(r1)
307	subf	r6,r6,r3
308	add	r4,r4,r6
309	subf	r5,r6,r5	/* #bytes left to go */
310
311/*
312 * first see if we can copy any more bytes before hitting another exception
313 */
314	mtctr	r5
 
 
31543:	lbz	r0,0(r4)
316	addi	r4,r4,1
31789:	stb	r0,0(r3)
318	addi	r3,r3,1
319	bdnz	43b
320	li	r3,0		/* huh? all copied successfully this time? */
321	blr
322
323/*
324 * here we have trapped again, amount remaining is in ctr.
325 */
326143:	mfctr	r3
 
327	blr
328
329/*
330 * exception handlers for stores: we just need to work
331 * out how many bytes weren't copied
 
 
 
332 */
333182:
334183:
335	add	r3,r3,r7
336	b	1f
337371:
338180:
339	addi	r3,r3,8
340171:
341177:
342179:
343	addi	r3,r3,8
344370:
345372:
346176:
347178:
348	addi	r3,r3,4
349185:
350	addi	r3,r3,4
351170:
352172:
353345:
354173:
355174:
356175:
357181:
358184:
359186:
360187:
361188:
362189:	
363194:
364195:
365196:
3661:
367	ld	r6,-24(r1)
368	ld	r5,-8(r1)
369	add	r6,r6,r5
370	subf	r3,r3,r6	/* #bytes not copied */
 
 
371	blr
372
373	EX_TABLE(20b,120b)
374	EX_TABLE(220b,320b)
375	EX_TABLE(21b,121b)
376	EX_TABLE(221b,321b)
377	EX_TABLE(70b,170b)
378	EX_TABLE(270b,370b)
379	EX_TABLE(22b,122b)
380	EX_TABLE(222b,322b)
381	EX_TABLE(71b,171b)
382	EX_TABLE(271b,371b)
383	EX_TABLE(72b,172b)
384	EX_TABLE(272b,372b)
385	EX_TABLE(244b,344b)
386	EX_TABLE(245b,345b)
387	EX_TABLE(23b,123b)
388	EX_TABLE(73b,173b)
389	EX_TABLE(44b,144b)
390	EX_TABLE(74b,174b)
391	EX_TABLE(45b,145b)
392	EX_TABLE(75b,175b)
393	EX_TABLE(24b,124b)
394	EX_TABLE(25b,125b)
395	EX_TABLE(26b,126b)
396	EX_TABLE(27b,127b)
397	EX_TABLE(28b,128b)
398	EX_TABLE(29b,129b)
399	EX_TABLE(30b,130b)
400	EX_TABLE(31b,131b)
401	EX_TABLE(32b,132b)
402	EX_TABLE(76b,176b)
403	EX_TABLE(33b,133b)
404	EX_TABLE(77b,177b)
405	EX_TABLE(78b,178b)
406	EX_TABLE(79b,179b)
407	EX_TABLE(80b,180b)
408	EX_TABLE(34b,134b)
409	EX_TABLE(94b,194b)
410	EX_TABLE(95b,195b)
411	EX_TABLE(96b,196b)
412	EX_TABLE(35b,135b)
413	EX_TABLE(81b,181b)
414	EX_TABLE(36b,136b)
415	EX_TABLE(82b,182b)
416	EX_TABLE(37b,137b)
417	EX_TABLE(83b,183b)
418	EX_TABLE(38b,138b)
419	EX_TABLE(39b,139b)
420	EX_TABLE(84b,184b)
421	EX_TABLE(85b,185b)
422	EX_TABLE(40b,140b)
423	EX_TABLE(86b,186b)
424	EX_TABLE(41b,141b)
425	EX_TABLE(87b,187b)
426	EX_TABLE(42b,142b)
427	EX_TABLE(88b,188b)
428	EX_TABLE(43b,143b)
429	EX_TABLE(89b,189b)
430
431/*
432 * Routine to copy a whole page of data, optimized for POWER4.
433 * On POWER4 it is more than 50% faster than the simple loop
434 * above (following the .Ldst_aligned label).
435 */
 
 
 
436.Lcopy_page_4K:
437	std	r31,-32(1)
438	std	r30,-40(1)
439	std	r29,-48(1)
440	std	r28,-56(1)
441	std	r27,-64(1)
442	std	r26,-72(1)
443	std	r25,-80(1)
444	std	r24,-88(1)
445	std	r23,-96(1)
446	std	r22,-104(1)
447	std	r21,-112(1)
448	std	r20,-120(1)
449	li	r5,4096/32 - 1
450	addi	r3,r3,-8
451	li	r0,5
4520:	addi	r5,r5,-24
453	mtctr	r0
45420:	ld	r22,640(4)
45521:	ld	r21,512(4)
45622:	ld	r20,384(4)
45723:	ld	r11,256(4)
45824:	ld	r9,128(4)
45925:	ld	r7,0(4)
46026:	ld	r25,648(4)
46127:	ld	r24,520(4)
46228:	ld	r23,392(4)
46329:	ld	r10,264(4)
46430:	ld	r8,136(4)
46531:	ldu	r6,8(4)
466	cmpwi	r5,24
4671:
46832:	std	r22,648(3)
46933:	std	r21,520(3)
47034:	std	r20,392(3)
47135:	std	r11,264(3)
47236:	std	r9,136(3)
47337:	std	r7,8(3)
47438:	ld	r28,648(4)
47539:	ld	r27,520(4)
47640:	ld	r26,392(4)
47741:	ld	r31,264(4)
47842:	ld	r30,136(4)
47943:	ld	r29,8(4)
48044:	std	r25,656(3)
48145:	std	r24,528(3)
48246:	std	r23,400(3)
48347:	std	r10,272(3)
48448:	std	r8,144(3)
48549:	std	r6,16(3)
48650:	ld	r22,656(4)
48751:	ld	r21,528(4)
48852:	ld	r20,400(4)
48953:	ld	r11,272(4)
49054:	ld	r9,144(4)
49155:	ld	r7,16(4)
49256:	std	r28,664(3)
49357:	std	r27,536(3)
49458:	std	r26,408(3)
49559:	std	r31,280(3)
49660:	std	r30,152(3)
49761:	stdu	r29,24(3)
49862:	ld	r25,664(4)
49963:	ld	r24,536(4)
50064:	ld	r23,408(4)
50165:	ld	r10,280(4)
50266:	ld	r8,152(4)
50367:	ldu	r6,24(4)
504	bdnz	1b
50568:	std	r22,648(3)
50669:	std	r21,520(3)
50770:	std	r20,392(3)
50871:	std	r11,264(3)
50972:	std	r9,136(3)
51073:	std	r7,8(3)
51174:	addi	r4,r4,640
51275:	addi	r3,r3,648
513	bge	0b
514	mtctr	r5
51576:	ld	r7,0(4)
51677:	ld	r8,8(4)
51778:	ldu	r9,16(4)
5183:
51979:	ld	r10,8(4)
52080:	std	r7,8(3)
52181:	ld	r7,16(4)
52282:	std	r8,16(3)
52383:	ld	r8,24(4)
52484:	std	r9,24(3)
52585:	ldu	r9,32(4)
52686:	stdu	r10,32(3)
527	bdnz	3b
5284:
52987:	ld	r10,8(4)
53088:	std	r7,8(3)
53189:	std	r8,16(3)
53290:	std	r9,24(3)
53391:	std	r10,32(3)
5349:	ld	r20,-120(1)
535	ld	r21,-112(1)
536	ld	r22,-104(1)
537	ld	r23,-96(1)
538	ld	r24,-88(1)
539	ld	r25,-80(1)
540	ld	r26,-72(1)
541	ld	r27,-64(1)
542	ld	r28,-56(1)
543	ld	r29,-48(1)
544	ld	r30,-40(1)
545	ld	r31,-32(1)
546	li	r3,0
547	blr
548
549/*
550 * on an exception, reset to the beginning and jump back into the
551 * standard __copy_tofrom_user
552 */
553100:	ld	r20,-120(1)
 
554	ld	r21,-112(1)
555	ld	r22,-104(1)
556	ld	r23,-96(1)
557	ld	r24,-88(1)
558	ld	r25,-80(1)
559	ld	r26,-72(1)
560	ld	r27,-64(1)
561	ld	r28,-56(1)
562	ld	r29,-48(1)
563	ld	r30,-40(1)
564	ld	r31,-32(1)
565	ld	r3,-24(r1)
566	ld	r4,-16(r1)
567	li	r5,4096
568	b	.Ldst_aligned
569
570	EX_TABLE(20b,100b)
571	EX_TABLE(21b,100b)
572	EX_TABLE(22b,100b)
573	EX_TABLE(23b,100b)
574	EX_TABLE(24b,100b)
575	EX_TABLE(25b,100b)
576	EX_TABLE(26b,100b)
577	EX_TABLE(27b,100b)
578	EX_TABLE(28b,100b)
579	EX_TABLE(29b,100b)
580	EX_TABLE(30b,100b)
581	EX_TABLE(31b,100b)
582	EX_TABLE(32b,100b)
583	EX_TABLE(33b,100b)
584	EX_TABLE(34b,100b)
585	EX_TABLE(35b,100b)
586	EX_TABLE(36b,100b)
587	EX_TABLE(37b,100b)
588	EX_TABLE(38b,100b)
589	EX_TABLE(39b,100b)
590	EX_TABLE(40b,100b)
591	EX_TABLE(41b,100b)
592	EX_TABLE(42b,100b)
593	EX_TABLE(43b,100b)
594	EX_TABLE(44b,100b)
595	EX_TABLE(45b,100b)
596	EX_TABLE(46b,100b)
597	EX_TABLE(47b,100b)
598	EX_TABLE(48b,100b)
599	EX_TABLE(49b,100b)
600	EX_TABLE(50b,100b)
601	EX_TABLE(51b,100b)
602	EX_TABLE(52b,100b)
603	EX_TABLE(53b,100b)
604	EX_TABLE(54b,100b)
605	EX_TABLE(55b,100b)
606	EX_TABLE(56b,100b)
607	EX_TABLE(57b,100b)
608	EX_TABLE(58b,100b)
609	EX_TABLE(59b,100b)
610	EX_TABLE(60b,100b)
611	EX_TABLE(61b,100b)
612	EX_TABLE(62b,100b)
613	EX_TABLE(63b,100b)
614	EX_TABLE(64b,100b)
615	EX_TABLE(65b,100b)
616	EX_TABLE(66b,100b)
617	EX_TABLE(67b,100b)
618	EX_TABLE(68b,100b)
619	EX_TABLE(69b,100b)
620	EX_TABLE(70b,100b)
621	EX_TABLE(71b,100b)
622	EX_TABLE(72b,100b)
623	EX_TABLE(73b,100b)
624	EX_TABLE(74b,100b)
625	EX_TABLE(75b,100b)
626	EX_TABLE(76b,100b)
627	EX_TABLE(77b,100b)
628	EX_TABLE(78b,100b)
629	EX_TABLE(79b,100b)
630	EX_TABLE(80b,100b)
631	EX_TABLE(81b,100b)
632	EX_TABLE(82b,100b)
633	EX_TABLE(83b,100b)
634	EX_TABLE(84b,100b)
635	EX_TABLE(85b,100b)
636	EX_TABLE(86b,100b)
637	EX_TABLE(87b,100b)
638	EX_TABLE(88b,100b)
639	EX_TABLE(89b,100b)
640	EX_TABLE(90b,100b)
641	EX_TABLE(91b,100b)
642
643EXPORT_SYMBOL(__copy_tofrom_user)