Linux Audio

Check our new training course

Loading...
v6.8
  1/* SPDX-License-Identifier: GPL-2.0-or-later */
  2/*
  3 * Memory copy functions for 32-bit PowerPC.
  4 *
  5 * Copyright (C) 1996-2005 Paul Mackerras.
 
 
 
 
 
  6 */
  7#include <linux/export.h>
  8#include <asm/processor.h>
  9#include <asm/cache.h>
 10#include <asm/errno.h>
 11#include <asm/ppc_asm.h>
 12#include <asm/code-patching-asm.h>
 13#include <asm/kasan.h>
 14
 15#define COPY_16_BYTES		\
 16	lwz	r7,4(r4);	\
 17	lwz	r8,8(r4);	\
 18	lwz	r9,12(r4);	\
 19	lwzu	r10,16(r4);	\
 20	stw	r7,4(r6);	\
 21	stw	r8,8(r6);	\
 22	stw	r9,12(r6);	\
 23	stwu	r10,16(r6)
 24
 25#define COPY_16_BYTES_WITHEX(n)	\
 268 ## n ## 0:			\
 27	lwz	r7,4(r4);	\
 288 ## n ## 1:			\
 29	lwz	r8,8(r4);	\
 308 ## n ## 2:			\
 31	lwz	r9,12(r4);	\
 328 ## n ## 3:			\
 33	lwzu	r10,16(r4);	\
 348 ## n ## 4:			\
 35	stw	r7,4(r6);	\
 368 ## n ## 5:			\
 37	stw	r8,8(r6);	\
 388 ## n ## 6:			\
 39	stw	r9,12(r6);	\
 408 ## n ## 7:			\
 41	stwu	r10,16(r6)
 42
 43#define COPY_16_BYTES_EXCODE(n)			\
 449 ## n ## 0:					\
 45	addi	r5,r5,-(16 * n);		\
 46	b	104f;				\
 479 ## n ## 1:					\
 48	addi	r5,r5,-(16 * n);		\
 49	b	105f;				\
 50	EX_TABLE(8 ## n ## 0b,9 ## n ## 0b);	\
 51	EX_TABLE(8 ## n ## 1b,9 ## n ## 0b);	\
 52	EX_TABLE(8 ## n ## 2b,9 ## n ## 0b);	\
 53	EX_TABLE(8 ## n ## 3b,9 ## n ## 0b);	\
 54	EX_TABLE(8 ## n ## 4b,9 ## n ## 1b);	\
 55	EX_TABLE(8 ## n ## 5b,9 ## n ## 1b);	\
 56	EX_TABLE(8 ## n ## 6b,9 ## n ## 1b);	\
 57	EX_TABLE(8 ## n ## 7b,9 ## n ## 1b)
 
 
 
 58
 59	.text
 
 
 
 60
 61CACHELINE_BYTES = L1_CACHE_BYTES
 62LG_CACHELINE_BYTES = L1_CACHE_SHIFT
 63CACHELINE_MASK = (L1_CACHE_BYTES-1)
 64
 65#ifndef CONFIG_KASAN
 66_GLOBAL(memset16)
 67	rlwinm.	r0 ,r5, 31, 1, 31
 68	addi	r6, r3, -4
 69	beq-	2f
 70	rlwimi	r4 ,r4 ,16 ,0 ,15
 71	mtctr	r0
 721:	stwu	r4, 4(r6)
 73	bdnz	1b
 742:	andi.	r0, r5, 1
 75	beqlr
 76	sth	r4, 4(r6)
 77	blr
 78EXPORT_SYMBOL(memset16)
 79#endif
 80
 81/*
 82 * Use dcbz on the complete cache lines in the destination
 83 * to set them to zero.  This requires that the destination
 84 * area is cacheable.  -- paulus
 85 *
 86 * During early init, cache might not be active yet, so dcbz cannot be used.
 87 * We therefore skip the optimised bloc that uses dcbz. This jump is
 88 * replaced by a nop once cache is active. This is done in machine_init()
 89 */
 90_GLOBAL_KASAN(memset)
 91	cmplwi	0,r5,4
 92	blt	7f
 93
 94	rlwimi	r4,r4,8,16,23
 95	rlwimi	r4,r4,16,0,15
 96
 97	stw	r4,0(r3)
 
 
 
 98	beqlr
 99	andi.	r0,r3,3
100	add	r5,r0,r5
101	subf	r6,r0,r3
102	cmplwi	0,r4,0
103	/*
104	 * Skip optimised bloc until cache is enabled. Will be replaced
105	 * by 'bne' during boot to use normal procedure if r4 is not zero
106	 */
1075:	b	2f
108	patch_site	5b, patch__memset_nocache
109
110	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
111	add	r8,r7,r5
112	srwi	r9,r8,LG_CACHELINE_BYTES
113	addic.	r9,r9,-1	/* total number of complete cachelines */
114	ble	2f
115	xori	r0,r7,CACHELINE_MASK & ~3
116	srwi.	r0,r0,2
117	beq	3f
118	mtctr	r0
1194:	stwu	r4,4(r6)
120	bdnz	4b
1213:	mtctr	r9
122	li	r7,4
12310:	dcbz	r7,r6
124	addi	r6,r6,CACHELINE_BYTES
125	bdnz	10b
126	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
127	addi	r5,r5,4
128
1292:	srwi	r0,r5,2
130	mtctr	r0
131	bdz	6f
1321:	stwu	r4,4(r6)
133	bdnz	1b
1346:	andi.	r5,r5,3
 
135	beqlr
136	mtctr	r5
137	addi	r6,r6,3
1388:	stbu	r4,1(r6)
139	bdnz	8b
140	blr
141
1427:	cmpwi	0,r5,0
143	beqlr
144	mtctr	r5
145	addi	r6,r3,-1
1469:	stbu	r4,1(r6)
147	bdnz	9b
148	blr
149EXPORT_SYMBOL(memset)
150EXPORT_SYMBOL_KASAN(memset)
151
152/*
153 * This version uses dcbz on the complete cache lines in the
154 * destination area to reduce memory traffic.  This requires that
155 * the destination area is cacheable.
156 * We only use this version if the source and dest don't overlap.
157 * -- paulus.
158 *
159 * During early init, cache might not be active yet, so dcbz cannot be used.
160 * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
161 * replaced by a nop once cache is active. This is done in machine_init()
162 */
163_GLOBAL_KASAN(memmove)
164	cmplw	0,r3,r4
165	bgt	backwards_memcpy
166	/* fall through */
167
168_GLOBAL_KASAN(memcpy)
1691:	b	generic_memcpy
170	patch_site	1b, patch__memcpy_nocache
171
172	add	r7,r3,r5		/* test if the src & dst overlap */
173	add	r8,r4,r5
174	cmplw	0,r4,r7
175	cmplw	1,r3,r8
176	crand	0,0,4			/* cr0.lt &= cr1.lt */
177	blt	generic_memcpy		/* if regions overlap */
178
179	addi	r4,r4,-4
180	addi	r6,r3,-4
181	neg	r0,r3
182	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
183	beq	58f
184
185	cmplw	0,r5,r0			/* is this more than total to do? */
186	blt	63f			/* if not much to do */
187	andi.	r8,r0,3			/* get it word-aligned first */
188	subf	r5,r0,r5
189	mtctr	r8
190	beq+	61f
19170:	lbz	r9,4(r4)		/* do some bytes */
192	addi	r4,r4,1
193	addi	r6,r6,1
194	stb	r9,3(r6)
195	bdnz	70b
19661:	srwi.	r0,r0,2
197	mtctr	r0
198	beq	58f
19972:	lwzu	r9,4(r4)		/* do some words */
200	stwu	r9,4(r6)
201	bdnz	72b
202
20358:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
204	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
205	li	r11,4
206	mtctr	r0
207	beq	63f
20853:
209	dcbz	r11,r6
210	COPY_16_BYTES
211#if L1_CACHE_BYTES >= 32
212	COPY_16_BYTES
213#if L1_CACHE_BYTES >= 64
214	COPY_16_BYTES
215	COPY_16_BYTES
216#if L1_CACHE_BYTES >= 128
217	COPY_16_BYTES
218	COPY_16_BYTES
219	COPY_16_BYTES
220	COPY_16_BYTES
221#endif
222#endif
223#endif
224	bdnz	53b
225
22663:	srwi.	r0,r5,2
227	mtctr	r0
228	beq	64f
22930:	lwzu	r0,4(r4)
230	stwu	r0,4(r6)
231	bdnz	30b
232
23364:	andi.	r0,r5,3
234	mtctr	r0
235	beq+	65f
236	addi	r4,r4,3
237	addi	r6,r6,3
23840:	lbzu	r0,1(r4)
239	stbu	r0,1(r6)
240	bdnz	40b
24165:	blr
242EXPORT_SYMBOL(memcpy)
243EXPORT_SYMBOL(memmove)
244EXPORT_SYMBOL_KASAN(memcpy)
245EXPORT_SYMBOL_KASAN(memmove)
246
247generic_memcpy:
248	srwi.	r7,r5,3
249	addi	r6,r3,-4
250	addi	r4,r4,-4
251	beq	2f			/* if less than 8 bytes to do */
252	andi.	r0,r6,3			/* get dest word aligned */
253	mtctr	r7
254	bne	5f
2551:	lwz	r7,4(r4)
256	lwzu	r8,8(r4)
257	stw	r7,4(r6)
258	stwu	r8,8(r6)
259	bdnz	1b
260	andi.	r5,r5,7
2612:	cmplwi	0,r5,4
262	blt	3f
263	lwzu	r0,4(r4)
264	addi	r5,r5,-4
265	stwu	r0,4(r6)
2663:	cmpwi	0,r5,0
267	beqlr
268	mtctr	r5
269	addi	r4,r4,3
270	addi	r6,r6,3
2714:	lbzu	r0,1(r4)
272	stbu	r0,1(r6)
273	bdnz	4b
274	blr
2755:	subfic	r0,r0,4
276	mtctr	r0
2776:	lbz	r7,4(r4)
278	addi	r4,r4,1
279	stb	r7,4(r6)
280	addi	r6,r6,1
281	bdnz	6b
282	subf	r5,r0,r5
283	rlwinm.	r7,r5,32-3,3,31
284	beq	2b
285	mtctr	r7
286	b	1b
287
288_GLOBAL(backwards_memcpy)
289	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
290	add	r6,r3,r5
291	add	r4,r4,r5
292	beq	2f
293	andi.	r0,r6,3
294	mtctr	r7
295	bne	5f
2961:	lwz	r7,-4(r4)
297	lwzu	r8,-8(r4)
298	stw	r7,-4(r6)
299	stwu	r8,-8(r6)
300	bdnz	1b
301	andi.	r5,r5,7
3022:	cmplwi	0,r5,4
303	blt	3f
304	lwzu	r0,-4(r4)
305	subi	r5,r5,4
306	stwu	r0,-4(r6)
3073:	cmpwi	0,r5,0
308	beqlr
309	mtctr	r5
3104:	lbzu	r0,-1(r4)
311	stbu	r0,-1(r6)
312	bdnz	4b
313	blr
3145:	mtctr	r0
3156:	lbzu	r7,-1(r4)
316	stbu	r7,-1(r6)
317	bdnz	6b
318	subf	r5,r0,r5
319	rlwinm.	r7,r5,32-3,3,31
320	beq	2b
321	mtctr	r7
322	b	1b
323
324_GLOBAL(__copy_tofrom_user)
325	addi	r4,r4,-4
326	addi	r6,r3,-4
327	neg	r0,r3
328	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
329	beq	58f
330
331	cmplw	0,r5,r0			/* is this more than total to do? */
332	blt	63f			/* if not much to do */
333	andi.	r8,r0,3			/* get it word-aligned first */
334	mtctr	r8
335	beq+	61f
33670:	lbz	r9,4(r4)		/* do some bytes */
33771:	stb	r9,4(r6)
338	addi	r4,r4,1
339	addi	r6,r6,1
340	bdnz	70b
34161:	subf	r5,r0,r5
342	srwi.	r0,r0,2
343	mtctr	r0
344	beq	58f
34572:	lwzu	r9,4(r4)		/* do some words */
34673:	stwu	r9,4(r6)
347	bdnz	72b
348
349	EX_TABLE(70b,100f)
350	EX_TABLE(71b,101f)
351	EX_TABLE(72b,102f)
352	EX_TABLE(73b,103f)
 
 
 
353
35458:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
355	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
356	li	r11,4
357	beq	63f
358
359	/* Here we decide how far ahead to prefetch the source */
360	li	r3,4
361	cmpwi	r0,1
362	li	r7,0
363	ble	114f
364	li	r7,1
365#if MAX_COPY_PREFETCH > 1
366	/* Heuristically, for large transfers we prefetch
367	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
368	   we prefetch 1 cacheline ahead. */
369	cmpwi	r0,MAX_COPY_PREFETCH
370	ble	112f
371	li	r7,MAX_COPY_PREFETCH
372112:	mtctr	r7
373111:	dcbt	r3,r4
374	addi	r3,r3,CACHELINE_BYTES
375	bdnz	111b
376#else
377	dcbt	r3,r4
378	addi	r3,r3,CACHELINE_BYTES
379#endif /* MAX_COPY_PREFETCH > 1 */
380
381114:	subf	r8,r7,r0
382	mr	r0,r7
383	mtctr	r8
384
38553:	dcbt	r3,r4
38654:	dcbz	r11,r6
387	EX_TABLE(54b,105f)
 
 
 
388/* the main body of the cacheline loop */
389	COPY_16_BYTES_WITHEX(0)
390#if L1_CACHE_BYTES >= 32
391	COPY_16_BYTES_WITHEX(1)
392#if L1_CACHE_BYTES >= 64
393	COPY_16_BYTES_WITHEX(2)
394	COPY_16_BYTES_WITHEX(3)
395#if L1_CACHE_BYTES >= 128
396	COPY_16_BYTES_WITHEX(4)
397	COPY_16_BYTES_WITHEX(5)
398	COPY_16_BYTES_WITHEX(6)
399	COPY_16_BYTES_WITHEX(7)
400#endif
401#endif
402#endif
403	bdnz	53b
404	cmpwi	r0,0
405	li	r3,4
406	li	r7,0
407	bne	114b
408
40963:	srwi.	r0,r5,2
410	mtctr	r0
411	beq	64f
41230:	lwzu	r0,4(r4)
41331:	stwu	r0,4(r6)
414	bdnz	30b
415
41664:	andi.	r0,r5,3
417	mtctr	r0
418	beq+	65f
41940:	lbz	r0,4(r4)
42041:	stb	r0,4(r6)
421	addi	r4,r4,1
422	addi	r6,r6,1
423	bdnz	40b
42465:	li	r3,0
425	blr
426
427/* read fault, initial single-byte copy */
428100:	li	r9,0
429	b	90f
430/* write fault, initial single-byte copy */
431101:	li	r9,1
43290:	subf	r5,r8,r5
433	li	r3,0
434	b	99f
435/* read fault, initial word copy */
436102:	li	r9,0
437	b	91f
438/* write fault, initial word copy */
439103:	li	r9,1
44091:	li	r3,2
441	b	99f
442
443/*
444 * this stuff handles faults in the cacheline loop and branches to either
445 * 104f (if in read part) or 105f (if in write part), after updating r5
446 */
447	COPY_16_BYTES_EXCODE(0)
448#if L1_CACHE_BYTES >= 32
449	COPY_16_BYTES_EXCODE(1)
450#if L1_CACHE_BYTES >= 64
451	COPY_16_BYTES_EXCODE(2)
452	COPY_16_BYTES_EXCODE(3)
453#if L1_CACHE_BYTES >= 128
454	COPY_16_BYTES_EXCODE(4)
455	COPY_16_BYTES_EXCODE(5)
456	COPY_16_BYTES_EXCODE(6)
457	COPY_16_BYTES_EXCODE(7)
458#endif
459#endif
460#endif
461
462/* read fault in cacheline loop */
463104:	li	r9,0
464	b	92f
465/* fault on dcbz (effectively a write fault) */
466/* or write fault in cacheline loop */
467105:	li	r9,1
46892:	li	r3,LG_CACHELINE_BYTES
469	mfctr	r8
470	add	r0,r0,r8
471	b	106f
472/* read fault in final word loop */
473108:	li	r9,0
474	b	93f
475/* write fault in final word loop */
476109:	li	r9,1
47793:	andi.	r5,r5,3
478	li	r3,2
479	b	99f
480/* read fault in final byte loop */
481110:	li	r9,0
482	b	94f
483/* write fault in final byte loop */
484111:	li	r9,1
48594:	li	r5,0
486	li	r3,0
487/*
488 * At this stage the number of bytes not copied is
489 * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
490 */
49199:	mfctr	r0
492106:	slw	r3,r0,r3
493	add.	r3,r3,r5
494	beq	120f			/* shouldn't happen */
495	cmpwi	0,r9,0
496	bne	120f
497/* for a read fault, first try to continue the copy one byte at a time */
498	mtctr	r3
499130:	lbz	r0,4(r4)
500131:	stb	r0,4(r6)
501	addi	r4,r4,1
502	addi	r6,r6,1
503	bdnz	130b
504/* then clear out the destination: r3 bytes starting at 4(r6) */
505132:	mfctr	r3
 
 
 
 
 
 
 
 
 
 
 
 
506120:	blr
507
508	EX_TABLE(30b,108b)
509	EX_TABLE(31b,109b)
510	EX_TABLE(40b,110b)
511	EX_TABLE(41b,111b)
512	EX_TABLE(130b,132b)
513	EX_TABLE(131b,120b)
514
515EXPORT_SYMBOL(__copy_tofrom_user)
 
 
 
v4.6
 
  1/*
  2 * Memory copy functions for 32-bit PowerPC.
  3 *
  4 * Copyright (C) 1996-2005 Paul Mackerras.
  5 *
  6 * This program is free software; you can redistribute it and/or
  7 * modify it under the terms of the GNU General Public License
  8 * as published by the Free Software Foundation; either version
  9 * 2 of the License, or (at your option) any later version.
 10 */
 
 11#include <asm/processor.h>
 12#include <asm/cache.h>
 13#include <asm/errno.h>
 14#include <asm/ppc_asm.h>
 
 
 15
 16#define COPY_16_BYTES		\
 17	lwz	r7,4(r4);	\
 18	lwz	r8,8(r4);	\
 19	lwz	r9,12(r4);	\
 20	lwzu	r10,16(r4);	\
 21	stw	r7,4(r6);	\
 22	stw	r8,8(r6);	\
 23	stw	r9,12(r6);	\
 24	stwu	r10,16(r6)
 25
 26#define COPY_16_BYTES_WITHEX(n)	\
 278 ## n ## 0:			\
 28	lwz	r7,4(r4);	\
 298 ## n ## 1:			\
 30	lwz	r8,8(r4);	\
 318 ## n ## 2:			\
 32	lwz	r9,12(r4);	\
 338 ## n ## 3:			\
 34	lwzu	r10,16(r4);	\
 358 ## n ## 4:			\
 36	stw	r7,4(r6);	\
 378 ## n ## 5:			\
 38	stw	r8,8(r6);	\
 398 ## n ## 6:			\
 40	stw	r9,12(r6);	\
 418 ## n ## 7:			\
 42	stwu	r10,16(r6)
 43
 44#define COPY_16_BYTES_EXCODE(n)			\
 459 ## n ## 0:					\
 46	addi	r5,r5,-(16 * n);		\
 47	b	104f;				\
 489 ## n ## 1:					\
 49	addi	r5,r5,-(16 * n);		\
 50	b	105f;				\
 51.section __ex_table,"a";			\
 52	.align	2;				\
 53	.long	8 ## n ## 0b,9 ## n ## 0b;	\
 54	.long	8 ## n ## 1b,9 ## n ## 0b;	\
 55	.long	8 ## n ## 2b,9 ## n ## 0b;	\
 56	.long	8 ## n ## 3b,9 ## n ## 0b;	\
 57	.long	8 ## n ## 4b,9 ## n ## 1b;	\
 58	.long	8 ## n ## 5b,9 ## n ## 1b;	\
 59	.long	8 ## n ## 6b,9 ## n ## 1b;	\
 60	.long	8 ## n ## 7b,9 ## n ## 1b;	\
 61	.text
 62
 63	.text
 64	.stabs	"arch/powerpc/lib/",N_SO,0,0,0f
 65	.stabs	"copy_32.S",N_SO,0,0,0f
 660:
 67
 68CACHELINE_BYTES = L1_CACHE_BYTES
 69LG_CACHELINE_BYTES = L1_CACHE_SHIFT
 70CACHELINE_MASK = (L1_CACHE_BYTES-1)
 71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 72/*
 73 * Use dcbz on the complete cache lines in the destination
 74 * to set them to zero.  This requires that the destination
 75 * area is cacheable.  -- paulus
 76 *
 77 * During early init, cache might not be active yet, so dcbz cannot be used.
 78 * We therefore skip the optimised bloc that uses dcbz. This jump is
 79 * replaced by a nop once cache is active. This is done in machine_init()
 80 */
 81_GLOBAL(memset)
 
 
 
 82	rlwimi	r4,r4,8,16,23
 83	rlwimi	r4,r4,16,0,15
 84
 85	addi	r6,r3,-4
 86	cmplwi	0,r5,4
 87	blt	7f
 88	stwu	r4,4(r6)
 89	beqlr
 90	andi.	r0,r6,3
 91	add	r5,r0,r5
 92	subf	r6,r0,r6
 93	cmplwi	0,r4,0
 94	bne	2f	/* Use normal procedure if r4 is not zero */
 95_GLOBAL(memset_nocache_branch)
 96	b	2f	/* Skip optimised bloc until cache is enabled */
 
 
 
 97
 98	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
 99	add	r8,r7,r5
100	srwi	r9,r8,LG_CACHELINE_BYTES
101	addic.	r9,r9,-1	/* total number of complete cachelines */
102	ble	2f
103	xori	r0,r7,CACHELINE_MASK & ~3
104	srwi.	r0,r0,2
105	beq	3f
106	mtctr	r0
1074:	stwu	r4,4(r6)
108	bdnz	4b
1093:	mtctr	r9
110	li	r7,4
11110:	dcbz	r7,r6
112	addi	r6,r6,CACHELINE_BYTES
113	bdnz	10b
114	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
115	addi	r5,r5,4
116
1172:	srwi	r0,r5,2
118	mtctr	r0
119	bdz	6f
1201:	stwu	r4,4(r6)
121	bdnz	1b
1226:	andi.	r5,r5,3
1237:	cmpwi	0,r5,0
124	beqlr
125	mtctr	r5
126	addi	r6,r6,3
1278:	stbu	r4,1(r6)
128	bdnz	8b
129	blr
130
 
 
 
 
 
 
 
 
 
 
131/*
132 * This version uses dcbz on the complete cache lines in the
133 * destination area to reduce memory traffic.  This requires that
134 * the destination area is cacheable.
135 * We only use this version if the source and dest don't overlap.
136 * -- paulus.
137 *
138 * During early init, cache might not be active yet, so dcbz cannot be used.
139 * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
140 * replaced by a nop once cache is active. This is done in machine_init()
141 */
142_GLOBAL(memmove)
143	cmplw	0,r3,r4
144	bgt	backwards_memcpy
145	/* fall through */
146
147_GLOBAL(memcpy)
148	b	generic_memcpy
 
 
149	add	r7,r3,r5		/* test if the src & dst overlap */
150	add	r8,r4,r5
151	cmplw	0,r4,r7
152	cmplw	1,r3,r8
153	crand	0,0,4			/* cr0.lt &= cr1.lt */
154	blt	generic_memcpy		/* if regions overlap */
155
156	addi	r4,r4,-4
157	addi	r6,r3,-4
158	neg	r0,r3
159	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
160	beq	58f
161
162	cmplw	0,r5,r0			/* is this more than total to do? */
163	blt	63f			/* if not much to do */
164	andi.	r8,r0,3			/* get it word-aligned first */
165	subf	r5,r0,r5
166	mtctr	r8
167	beq+	61f
16870:	lbz	r9,4(r4)		/* do some bytes */
169	addi	r4,r4,1
170	addi	r6,r6,1
171	stb	r9,3(r6)
172	bdnz	70b
17361:	srwi.	r0,r0,2
174	mtctr	r0
175	beq	58f
17672:	lwzu	r9,4(r4)		/* do some words */
177	stwu	r9,4(r6)
178	bdnz	72b
179
18058:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
181	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
182	li	r11,4
183	mtctr	r0
184	beq	63f
18553:
186	dcbz	r11,r6
187	COPY_16_BYTES
188#if L1_CACHE_BYTES >= 32
189	COPY_16_BYTES
190#if L1_CACHE_BYTES >= 64
191	COPY_16_BYTES
192	COPY_16_BYTES
193#if L1_CACHE_BYTES >= 128
194	COPY_16_BYTES
195	COPY_16_BYTES
196	COPY_16_BYTES
197	COPY_16_BYTES
198#endif
199#endif
200#endif
201	bdnz	53b
202
20363:	srwi.	r0,r5,2
204	mtctr	r0
205	beq	64f
20630:	lwzu	r0,4(r4)
207	stwu	r0,4(r6)
208	bdnz	30b
209
21064:	andi.	r0,r5,3
211	mtctr	r0
212	beq+	65f
213	addi	r4,r4,3
214	addi	r6,r6,3
21540:	lbzu	r0,1(r4)
216	stbu	r0,1(r6)
217	bdnz	40b
21865:	blr
 
 
 
 
219
220_GLOBAL(generic_memcpy)
221	srwi.	r7,r5,3
222	addi	r6,r3,-4
223	addi	r4,r4,-4
224	beq	2f			/* if less than 8 bytes to do */
225	andi.	r0,r6,3			/* get dest word aligned */
226	mtctr	r7
227	bne	5f
2281:	lwz	r7,4(r4)
229	lwzu	r8,8(r4)
230	stw	r7,4(r6)
231	stwu	r8,8(r6)
232	bdnz	1b
233	andi.	r5,r5,7
2342:	cmplwi	0,r5,4
235	blt	3f
236	lwzu	r0,4(r4)
237	addi	r5,r5,-4
238	stwu	r0,4(r6)
2393:	cmpwi	0,r5,0
240	beqlr
241	mtctr	r5
242	addi	r4,r4,3
243	addi	r6,r6,3
2444:	lbzu	r0,1(r4)
245	stbu	r0,1(r6)
246	bdnz	4b
247	blr
2485:	subfic	r0,r0,4
249	mtctr	r0
2506:	lbz	r7,4(r4)
251	addi	r4,r4,1
252	stb	r7,4(r6)
253	addi	r6,r6,1
254	bdnz	6b
255	subf	r5,r0,r5
256	rlwinm.	r7,r5,32-3,3,31
257	beq	2b
258	mtctr	r7
259	b	1b
260
261_GLOBAL(backwards_memcpy)
262	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
263	add	r6,r3,r5
264	add	r4,r4,r5
265	beq	2f
266	andi.	r0,r6,3
267	mtctr	r7
268	bne	5f
2691:	lwz	r7,-4(r4)
270	lwzu	r8,-8(r4)
271	stw	r7,-4(r6)
272	stwu	r8,-8(r6)
273	bdnz	1b
274	andi.	r5,r5,7
2752:	cmplwi	0,r5,4
276	blt	3f
277	lwzu	r0,-4(r4)
278	subi	r5,r5,4
279	stwu	r0,-4(r6)
2803:	cmpwi	0,r5,0
281	beqlr
282	mtctr	r5
2834:	lbzu	r0,-1(r4)
284	stbu	r0,-1(r6)
285	bdnz	4b
286	blr
2875:	mtctr	r0
2886:	lbzu	r7,-1(r4)
289	stbu	r7,-1(r6)
290	bdnz	6b
291	subf	r5,r0,r5
292	rlwinm.	r7,r5,32-3,3,31
293	beq	2b
294	mtctr	r7
295	b	1b
296
297_GLOBAL(__copy_tofrom_user)
298	addi	r4,r4,-4
299	addi	r6,r3,-4
300	neg	r0,r3
301	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
302	beq	58f
303
304	cmplw	0,r5,r0			/* is this more than total to do? */
305	blt	63f			/* if not much to do */
306	andi.	r8,r0,3			/* get it word-aligned first */
307	mtctr	r8
308	beq+	61f
30970:	lbz	r9,4(r4)		/* do some bytes */
31071:	stb	r9,4(r6)
311	addi	r4,r4,1
312	addi	r6,r6,1
313	bdnz	70b
31461:	subf	r5,r0,r5
315	srwi.	r0,r0,2
316	mtctr	r0
317	beq	58f
31872:	lwzu	r9,4(r4)		/* do some words */
31973:	stwu	r9,4(r6)
320	bdnz	72b
321
322	.section __ex_table,"a"
323	.align	2
324	.long	70b,100f
325	.long	71b,101f
326	.long	72b,102f
327	.long	73b,103f
328	.text
329
33058:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
331	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
332	li	r11,4
333	beq	63f
334
335	/* Here we decide how far ahead to prefetch the source */
336	li	r3,4
337	cmpwi	r0,1
338	li	r7,0
339	ble	114f
340	li	r7,1
341#if MAX_COPY_PREFETCH > 1
342	/* Heuristically, for large transfers we prefetch
343	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
344	   we prefetch 1 cacheline ahead. */
345	cmpwi	r0,MAX_COPY_PREFETCH
346	ble	112f
347	li	r7,MAX_COPY_PREFETCH
348112:	mtctr	r7
349111:	dcbt	r3,r4
350	addi	r3,r3,CACHELINE_BYTES
351	bdnz	111b
352#else
353	dcbt	r3,r4
354	addi	r3,r3,CACHELINE_BYTES
355#endif /* MAX_COPY_PREFETCH > 1 */
356
357114:	subf	r8,r7,r0
358	mr	r0,r7
359	mtctr	r8
360
36153:	dcbt	r3,r4
36254:	dcbz	r11,r6
363	.section __ex_table,"a"
364	.align	2
365	.long	54b,105f
366	.text
367/* the main body of the cacheline loop */
368	COPY_16_BYTES_WITHEX(0)
369#if L1_CACHE_BYTES >= 32
370	COPY_16_BYTES_WITHEX(1)
371#if L1_CACHE_BYTES >= 64
372	COPY_16_BYTES_WITHEX(2)
373	COPY_16_BYTES_WITHEX(3)
374#if L1_CACHE_BYTES >= 128
375	COPY_16_BYTES_WITHEX(4)
376	COPY_16_BYTES_WITHEX(5)
377	COPY_16_BYTES_WITHEX(6)
378	COPY_16_BYTES_WITHEX(7)
379#endif
380#endif
381#endif
382	bdnz	53b
383	cmpwi	r0,0
384	li	r3,4
385	li	r7,0
386	bne	114b
387
38863:	srwi.	r0,r5,2
389	mtctr	r0
390	beq	64f
39130:	lwzu	r0,4(r4)
39231:	stwu	r0,4(r6)
393	bdnz	30b
394
39564:	andi.	r0,r5,3
396	mtctr	r0
397	beq+	65f
39840:	lbz	r0,4(r4)
39941:	stb	r0,4(r6)
400	addi	r4,r4,1
401	addi	r6,r6,1
402	bdnz	40b
40365:	li	r3,0
404	blr
405
406/* read fault, initial single-byte copy */
407100:	li	r9,0
408	b	90f
409/* write fault, initial single-byte copy */
410101:	li	r9,1
41190:	subf	r5,r8,r5
412	li	r3,0
413	b	99f
414/* read fault, initial word copy */
415102:	li	r9,0
416	b	91f
417/* write fault, initial word copy */
418103:	li	r9,1
41991:	li	r3,2
420	b	99f
421
422/*
423 * this stuff handles faults in the cacheline loop and branches to either
424 * 104f (if in read part) or 105f (if in write part), after updating r5
425 */
426	COPY_16_BYTES_EXCODE(0)
427#if L1_CACHE_BYTES >= 32
428	COPY_16_BYTES_EXCODE(1)
429#if L1_CACHE_BYTES >= 64
430	COPY_16_BYTES_EXCODE(2)
431	COPY_16_BYTES_EXCODE(3)
432#if L1_CACHE_BYTES >= 128
433	COPY_16_BYTES_EXCODE(4)
434	COPY_16_BYTES_EXCODE(5)
435	COPY_16_BYTES_EXCODE(6)
436	COPY_16_BYTES_EXCODE(7)
437#endif
438#endif
439#endif
440
441/* read fault in cacheline loop */
442104:	li	r9,0
443	b	92f
444/* fault on dcbz (effectively a write fault) */
445/* or write fault in cacheline loop */
446105:	li	r9,1
44792:	li	r3,LG_CACHELINE_BYTES
448	mfctr	r8
449	add	r0,r0,r8
450	b	106f
451/* read fault in final word loop */
452108:	li	r9,0
453	b	93f
454/* write fault in final word loop */
455109:	li	r9,1
45693:	andi.	r5,r5,3
457	li	r3,2
458	b	99f
459/* read fault in final byte loop */
460110:	li	r9,0
461	b	94f
462/* write fault in final byte loop */
463111:	li	r9,1
46494:	li	r5,0
465	li	r3,0
466/*
467 * At this stage the number of bytes not copied is
468 * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
469 */
47099:	mfctr	r0
471106:	slw	r3,r0,r3
472	add.	r3,r3,r5
473	beq	120f			/* shouldn't happen */
474	cmpwi	0,r9,0
475	bne	120f
476/* for a read fault, first try to continue the copy one byte at a time */
477	mtctr	r3
478130:	lbz	r0,4(r4)
479131:	stb	r0,4(r6)
480	addi	r4,r4,1
481	addi	r6,r6,1
482	bdnz	130b
483/* then clear out the destination: r3 bytes starting at 4(r6) */
484132:	mfctr	r3
485	srwi.	r0,r3,2
486	li	r9,0
487	mtctr	r0
488	beq	113f
489112:	stwu	r9,4(r6)
490	bdnz	112b
491113:	andi.	r0,r3,3
492	mtctr	r0
493	beq	120f
494114:	stb	r9,4(r6)
495	addi	r6,r6,1
496	bdnz	114b
497120:	blr
498
499	.section __ex_table,"a"
500	.align	2
501	.long	30b,108b
502	.long	31b,109b
503	.long	40b,110b
504	.long	41b,111b
505	.long	130b,132b
506	.long	131b,120b
507	.long	112b,120b
508	.long	114b,120b
509	.text