Linux Audio

Check our new training course

Loading...
v6.8
  1/* SPDX-License-Identifier: GPL-2.0-or-later */
  2/*
  3 * Memory copy functions for 32-bit PowerPC.
  4 *
  5 * Copyright (C) 1996-2005 Paul Mackerras.
  6 */
  7#include <linux/export.h>
  8#include <asm/processor.h>
  9#include <asm/cache.h>
 10#include <asm/errno.h>
 11#include <asm/ppc_asm.h>
 
 12#include <asm/code-patching-asm.h>
 13#include <asm/kasan.h>
 14
 15#define COPY_16_BYTES		\
 16	lwz	r7,4(r4);	\
 17	lwz	r8,8(r4);	\
 18	lwz	r9,12(r4);	\
 19	lwzu	r10,16(r4);	\
 20	stw	r7,4(r6);	\
 21	stw	r8,8(r6);	\
 22	stw	r9,12(r6);	\
 23	stwu	r10,16(r6)
 24
 25#define COPY_16_BYTES_WITHEX(n)	\
 268 ## n ## 0:			\
 27	lwz	r7,4(r4);	\
 288 ## n ## 1:			\
 29	lwz	r8,8(r4);	\
 308 ## n ## 2:			\
 31	lwz	r9,12(r4);	\
 328 ## n ## 3:			\
 33	lwzu	r10,16(r4);	\
 348 ## n ## 4:			\
 35	stw	r7,4(r6);	\
 368 ## n ## 5:			\
 37	stw	r8,8(r6);	\
 388 ## n ## 6:			\
 39	stw	r9,12(r6);	\
 408 ## n ## 7:			\
 41	stwu	r10,16(r6)
 42
 43#define COPY_16_BYTES_EXCODE(n)			\
 449 ## n ## 0:					\
 45	addi	r5,r5,-(16 * n);		\
 46	b	104f;				\
 479 ## n ## 1:					\
 48	addi	r5,r5,-(16 * n);		\
 49	b	105f;				\
 50	EX_TABLE(8 ## n ## 0b,9 ## n ## 0b);	\
 51	EX_TABLE(8 ## n ## 1b,9 ## n ## 0b);	\
 52	EX_TABLE(8 ## n ## 2b,9 ## n ## 0b);	\
 53	EX_TABLE(8 ## n ## 3b,9 ## n ## 0b);	\
 54	EX_TABLE(8 ## n ## 4b,9 ## n ## 1b);	\
 55	EX_TABLE(8 ## n ## 5b,9 ## n ## 1b);	\
 56	EX_TABLE(8 ## n ## 6b,9 ## n ## 1b);	\
 57	EX_TABLE(8 ## n ## 7b,9 ## n ## 1b)
 58
 59	.text
 
 
 
 60
 61CACHELINE_BYTES = L1_CACHE_BYTES
 62LG_CACHELINE_BYTES = L1_CACHE_SHIFT
 63CACHELINE_MASK = (L1_CACHE_BYTES-1)
 64
 65#ifndef CONFIG_KASAN
 66_GLOBAL(memset16)
 67	rlwinm.	r0 ,r5, 31, 1, 31
 68	addi	r6, r3, -4
 69	beq-	2f
 70	rlwimi	r4 ,r4 ,16 ,0 ,15
 71	mtctr	r0
 721:	stwu	r4, 4(r6)
 73	bdnz	1b
 742:	andi.	r0, r5, 1
 75	beqlr
 76	sth	r4, 4(r6)
 77	blr
 78EXPORT_SYMBOL(memset16)
 79#endif
 80
 81/*
 82 * Use dcbz on the complete cache lines in the destination
 83 * to set them to zero.  This requires that the destination
 84 * area is cacheable.  -- paulus
 85 *
 86 * During early init, cache might not be active yet, so dcbz cannot be used.
 87 * We therefore skip the optimised bloc that uses dcbz. This jump is
 88 * replaced by a nop once cache is active. This is done in machine_init()
 89 */
 90_GLOBAL_KASAN(memset)
 91	cmplwi	0,r5,4
 92	blt	7f
 93
 94	rlwimi	r4,r4,8,16,23
 95	rlwimi	r4,r4,16,0,15
 96
 97	stw	r4,0(r3)
 98	beqlr
 99	andi.	r0,r3,3
100	add	r5,r0,r5
101	subf	r6,r0,r3
102	cmplwi	0,r4,0
103	/*
104	 * Skip optimised bloc until cache is enabled. Will be replaced
105	 * by 'bne' during boot to use normal procedure if r4 is not zero
106	 */
1075:	b	2f
108	patch_site	5b, patch__memset_nocache
109
110	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
111	add	r8,r7,r5
112	srwi	r9,r8,LG_CACHELINE_BYTES
113	addic.	r9,r9,-1	/* total number of complete cachelines */
114	ble	2f
115	xori	r0,r7,CACHELINE_MASK & ~3
116	srwi.	r0,r0,2
117	beq	3f
118	mtctr	r0
1194:	stwu	r4,4(r6)
120	bdnz	4b
1213:	mtctr	r9
122	li	r7,4
12310:	dcbz	r7,r6
124	addi	r6,r6,CACHELINE_BYTES
125	bdnz	10b
126	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
127	addi	r5,r5,4
128
1292:	srwi	r0,r5,2
130	mtctr	r0
131	bdz	6f
1321:	stwu	r4,4(r6)
133	bdnz	1b
1346:	andi.	r5,r5,3
135	beqlr
136	mtctr	r5
137	addi	r6,r6,3
1388:	stbu	r4,1(r6)
139	bdnz	8b
140	blr
141
1427:	cmpwi	0,r5,0
143	beqlr
144	mtctr	r5
145	addi	r6,r3,-1
1469:	stbu	r4,1(r6)
147	bdnz	9b
148	blr
149EXPORT_SYMBOL(memset)
150EXPORT_SYMBOL_KASAN(memset)
151
152/*
153 * This version uses dcbz on the complete cache lines in the
154 * destination area to reduce memory traffic.  This requires that
155 * the destination area is cacheable.
156 * We only use this version if the source and dest don't overlap.
157 * -- paulus.
158 *
159 * During early init, cache might not be active yet, so dcbz cannot be used.
160 * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
161 * replaced by a nop once cache is active. This is done in machine_init()
162 */
163_GLOBAL_KASAN(memmove)
164	cmplw	0,r3,r4
165	bgt	backwards_memcpy
166	/* fall through */
167
168_GLOBAL_KASAN(memcpy)
1691:	b	generic_memcpy
170	patch_site	1b, patch__memcpy_nocache
171
172	add	r7,r3,r5		/* test if the src & dst overlap */
173	add	r8,r4,r5
174	cmplw	0,r4,r7
175	cmplw	1,r3,r8
176	crand	0,0,4			/* cr0.lt &= cr1.lt */
177	blt	generic_memcpy		/* if regions overlap */
178
179	addi	r4,r4,-4
180	addi	r6,r3,-4
181	neg	r0,r3
182	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
183	beq	58f
184
185	cmplw	0,r5,r0			/* is this more than total to do? */
186	blt	63f			/* if not much to do */
187	andi.	r8,r0,3			/* get it word-aligned first */
188	subf	r5,r0,r5
189	mtctr	r8
190	beq+	61f
19170:	lbz	r9,4(r4)		/* do some bytes */
192	addi	r4,r4,1
193	addi	r6,r6,1
194	stb	r9,3(r6)
195	bdnz	70b
19661:	srwi.	r0,r0,2
197	mtctr	r0
198	beq	58f
19972:	lwzu	r9,4(r4)		/* do some words */
200	stwu	r9,4(r6)
201	bdnz	72b
202
20358:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
204	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
205	li	r11,4
206	mtctr	r0
207	beq	63f
20853:
209	dcbz	r11,r6
210	COPY_16_BYTES
211#if L1_CACHE_BYTES >= 32
212	COPY_16_BYTES
213#if L1_CACHE_BYTES >= 64
214	COPY_16_BYTES
215	COPY_16_BYTES
216#if L1_CACHE_BYTES >= 128
217	COPY_16_BYTES
218	COPY_16_BYTES
219	COPY_16_BYTES
220	COPY_16_BYTES
221#endif
222#endif
223#endif
224	bdnz	53b
225
22663:	srwi.	r0,r5,2
227	mtctr	r0
228	beq	64f
22930:	lwzu	r0,4(r4)
230	stwu	r0,4(r6)
231	bdnz	30b
232
23364:	andi.	r0,r5,3
234	mtctr	r0
235	beq+	65f
236	addi	r4,r4,3
237	addi	r6,r6,3
23840:	lbzu	r0,1(r4)
239	stbu	r0,1(r6)
240	bdnz	40b
24165:	blr
242EXPORT_SYMBOL(memcpy)
243EXPORT_SYMBOL(memmove)
244EXPORT_SYMBOL_KASAN(memcpy)
245EXPORT_SYMBOL_KASAN(memmove)
246
247generic_memcpy:
248	srwi.	r7,r5,3
249	addi	r6,r3,-4
250	addi	r4,r4,-4
251	beq	2f			/* if less than 8 bytes to do */
252	andi.	r0,r6,3			/* get dest word aligned */
253	mtctr	r7
254	bne	5f
2551:	lwz	r7,4(r4)
256	lwzu	r8,8(r4)
257	stw	r7,4(r6)
258	stwu	r8,8(r6)
259	bdnz	1b
260	andi.	r5,r5,7
2612:	cmplwi	0,r5,4
262	blt	3f
263	lwzu	r0,4(r4)
264	addi	r5,r5,-4
265	stwu	r0,4(r6)
2663:	cmpwi	0,r5,0
267	beqlr
268	mtctr	r5
269	addi	r4,r4,3
270	addi	r6,r6,3
2714:	lbzu	r0,1(r4)
272	stbu	r0,1(r6)
273	bdnz	4b
274	blr
2755:	subfic	r0,r0,4
276	mtctr	r0
2776:	lbz	r7,4(r4)
278	addi	r4,r4,1
279	stb	r7,4(r6)
280	addi	r6,r6,1
281	bdnz	6b
282	subf	r5,r0,r5
283	rlwinm.	r7,r5,32-3,3,31
284	beq	2b
285	mtctr	r7
286	b	1b
287
288_GLOBAL(backwards_memcpy)
289	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
290	add	r6,r3,r5
291	add	r4,r4,r5
292	beq	2f
293	andi.	r0,r6,3
294	mtctr	r7
295	bne	5f
2961:	lwz	r7,-4(r4)
297	lwzu	r8,-8(r4)
298	stw	r7,-4(r6)
299	stwu	r8,-8(r6)
300	bdnz	1b
301	andi.	r5,r5,7
3022:	cmplwi	0,r5,4
303	blt	3f
304	lwzu	r0,-4(r4)
305	subi	r5,r5,4
306	stwu	r0,-4(r6)
3073:	cmpwi	0,r5,0
308	beqlr
309	mtctr	r5
3104:	lbzu	r0,-1(r4)
311	stbu	r0,-1(r6)
312	bdnz	4b
313	blr
3145:	mtctr	r0
3156:	lbzu	r7,-1(r4)
316	stbu	r7,-1(r6)
317	bdnz	6b
318	subf	r5,r0,r5
319	rlwinm.	r7,r5,32-3,3,31
320	beq	2b
321	mtctr	r7
322	b	1b
323
324_GLOBAL(__copy_tofrom_user)
325	addi	r4,r4,-4
326	addi	r6,r3,-4
327	neg	r0,r3
328	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
329	beq	58f
330
331	cmplw	0,r5,r0			/* is this more than total to do? */
332	blt	63f			/* if not much to do */
333	andi.	r8,r0,3			/* get it word-aligned first */
334	mtctr	r8
335	beq+	61f
33670:	lbz	r9,4(r4)		/* do some bytes */
33771:	stb	r9,4(r6)
338	addi	r4,r4,1
339	addi	r6,r6,1
340	bdnz	70b
34161:	subf	r5,r0,r5
342	srwi.	r0,r0,2
343	mtctr	r0
344	beq	58f
34572:	lwzu	r9,4(r4)		/* do some words */
34673:	stwu	r9,4(r6)
347	bdnz	72b
348
349	EX_TABLE(70b,100f)
350	EX_TABLE(71b,101f)
351	EX_TABLE(72b,102f)
352	EX_TABLE(73b,103f)
353
35458:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
355	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
356	li	r11,4
357	beq	63f
358
359	/* Here we decide how far ahead to prefetch the source */
360	li	r3,4
361	cmpwi	r0,1
362	li	r7,0
363	ble	114f
364	li	r7,1
365#if MAX_COPY_PREFETCH > 1
366	/* Heuristically, for large transfers we prefetch
367	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
368	   we prefetch 1 cacheline ahead. */
369	cmpwi	r0,MAX_COPY_PREFETCH
370	ble	112f
371	li	r7,MAX_COPY_PREFETCH
372112:	mtctr	r7
373111:	dcbt	r3,r4
374	addi	r3,r3,CACHELINE_BYTES
375	bdnz	111b
376#else
377	dcbt	r3,r4
378	addi	r3,r3,CACHELINE_BYTES
379#endif /* MAX_COPY_PREFETCH > 1 */
380
381114:	subf	r8,r7,r0
382	mr	r0,r7
383	mtctr	r8
384
38553:	dcbt	r3,r4
38654:	dcbz	r11,r6
387	EX_TABLE(54b,105f)
388/* the main body of the cacheline loop */
389	COPY_16_BYTES_WITHEX(0)
390#if L1_CACHE_BYTES >= 32
391	COPY_16_BYTES_WITHEX(1)
392#if L1_CACHE_BYTES >= 64
393	COPY_16_BYTES_WITHEX(2)
394	COPY_16_BYTES_WITHEX(3)
395#if L1_CACHE_BYTES >= 128
396	COPY_16_BYTES_WITHEX(4)
397	COPY_16_BYTES_WITHEX(5)
398	COPY_16_BYTES_WITHEX(6)
399	COPY_16_BYTES_WITHEX(7)
400#endif
401#endif
402#endif
403	bdnz	53b
404	cmpwi	r0,0
405	li	r3,4
406	li	r7,0
407	bne	114b
408
40963:	srwi.	r0,r5,2
410	mtctr	r0
411	beq	64f
41230:	lwzu	r0,4(r4)
41331:	stwu	r0,4(r6)
414	bdnz	30b
415
41664:	andi.	r0,r5,3
417	mtctr	r0
418	beq+	65f
41940:	lbz	r0,4(r4)
42041:	stb	r0,4(r6)
421	addi	r4,r4,1
422	addi	r6,r6,1
423	bdnz	40b
42465:	li	r3,0
425	blr
426
427/* read fault, initial single-byte copy */
428100:	li	r9,0
429	b	90f
430/* write fault, initial single-byte copy */
431101:	li	r9,1
43290:	subf	r5,r8,r5
433	li	r3,0
434	b	99f
435/* read fault, initial word copy */
436102:	li	r9,0
437	b	91f
438/* write fault, initial word copy */
439103:	li	r9,1
44091:	li	r3,2
441	b	99f
442
443/*
444 * this stuff handles faults in the cacheline loop and branches to either
445 * 104f (if in read part) or 105f (if in write part), after updating r5
446 */
447	COPY_16_BYTES_EXCODE(0)
448#if L1_CACHE_BYTES >= 32
449	COPY_16_BYTES_EXCODE(1)
450#if L1_CACHE_BYTES >= 64
451	COPY_16_BYTES_EXCODE(2)
452	COPY_16_BYTES_EXCODE(3)
453#if L1_CACHE_BYTES >= 128
454	COPY_16_BYTES_EXCODE(4)
455	COPY_16_BYTES_EXCODE(5)
456	COPY_16_BYTES_EXCODE(6)
457	COPY_16_BYTES_EXCODE(7)
458#endif
459#endif
460#endif
461
462/* read fault in cacheline loop */
463104:	li	r9,0
464	b	92f
465/* fault on dcbz (effectively a write fault) */
466/* or write fault in cacheline loop */
467105:	li	r9,1
46892:	li	r3,LG_CACHELINE_BYTES
469	mfctr	r8
470	add	r0,r0,r8
471	b	106f
472/* read fault in final word loop */
473108:	li	r9,0
474	b	93f
475/* write fault in final word loop */
476109:	li	r9,1
47793:	andi.	r5,r5,3
478	li	r3,2
479	b	99f
480/* read fault in final byte loop */
481110:	li	r9,0
482	b	94f
483/* write fault in final byte loop */
484111:	li	r9,1
48594:	li	r5,0
486	li	r3,0
487/*
488 * At this stage the number of bytes not copied is
489 * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
490 */
49199:	mfctr	r0
492106:	slw	r3,r0,r3
493	add.	r3,r3,r5
494	beq	120f			/* shouldn't happen */
495	cmpwi	0,r9,0
496	bne	120f
497/* for a read fault, first try to continue the copy one byte at a time */
498	mtctr	r3
499130:	lbz	r0,4(r4)
500131:	stb	r0,4(r6)
501	addi	r4,r4,1
502	addi	r6,r6,1
503	bdnz	130b
504/* then clear out the destination: r3 bytes starting at 4(r6) */
505132:	mfctr	r3
506120:	blr
507
508	EX_TABLE(30b,108b)
509	EX_TABLE(31b,109b)
510	EX_TABLE(40b,110b)
511	EX_TABLE(41b,111b)
512	EX_TABLE(130b,132b)
513	EX_TABLE(131b,120b)
514
515EXPORT_SYMBOL(__copy_tofrom_user)
v5.9
  1/* SPDX-License-Identifier: GPL-2.0-or-later */
  2/*
  3 * Memory copy functions for 32-bit PowerPC.
  4 *
  5 * Copyright (C) 1996-2005 Paul Mackerras.
  6 */
 
  7#include <asm/processor.h>
  8#include <asm/cache.h>
  9#include <asm/errno.h>
 10#include <asm/ppc_asm.h>
 11#include <asm/export.h>
 12#include <asm/code-patching-asm.h>
 13#include <asm/kasan.h>
 14
 15#define COPY_16_BYTES		\
 16	lwz	r7,4(r4);	\
 17	lwz	r8,8(r4);	\
 18	lwz	r9,12(r4);	\
 19	lwzu	r10,16(r4);	\
 20	stw	r7,4(r6);	\
 21	stw	r8,8(r6);	\
 22	stw	r9,12(r6);	\
 23	stwu	r10,16(r6)
 24
 25#define COPY_16_BYTES_WITHEX(n)	\
 268 ## n ## 0:			\
 27	lwz	r7,4(r4);	\
 288 ## n ## 1:			\
 29	lwz	r8,8(r4);	\
 308 ## n ## 2:			\
 31	lwz	r9,12(r4);	\
 328 ## n ## 3:			\
 33	lwzu	r10,16(r4);	\
 348 ## n ## 4:			\
 35	stw	r7,4(r6);	\
 368 ## n ## 5:			\
 37	stw	r8,8(r6);	\
 388 ## n ## 6:			\
 39	stw	r9,12(r6);	\
 408 ## n ## 7:			\
 41	stwu	r10,16(r6)
 42
 43#define COPY_16_BYTES_EXCODE(n)			\
 449 ## n ## 0:					\
 45	addi	r5,r5,-(16 * n);		\
 46	b	104f;				\
 479 ## n ## 1:					\
 48	addi	r5,r5,-(16 * n);		\
 49	b	105f;				\
 50	EX_TABLE(8 ## n ## 0b,9 ## n ## 0b);	\
 51	EX_TABLE(8 ## n ## 1b,9 ## n ## 0b);	\
 52	EX_TABLE(8 ## n ## 2b,9 ## n ## 0b);	\
 53	EX_TABLE(8 ## n ## 3b,9 ## n ## 0b);	\
 54	EX_TABLE(8 ## n ## 4b,9 ## n ## 1b);	\
 55	EX_TABLE(8 ## n ## 5b,9 ## n ## 1b);	\
 56	EX_TABLE(8 ## n ## 6b,9 ## n ## 1b);	\
 57	EX_TABLE(8 ## n ## 7b,9 ## n ## 1b)
 58
 59	.text
 60	.stabs	"arch/powerpc/lib/",N_SO,0,0,0f
 61	.stabs	"copy_32.S",N_SO,0,0,0f
 620:
 63
 64CACHELINE_BYTES = L1_CACHE_BYTES
 65LG_CACHELINE_BYTES = L1_CACHE_SHIFT
 66CACHELINE_MASK = (L1_CACHE_BYTES-1)
 67
 68#ifndef CONFIG_KASAN
 69_GLOBAL(memset16)
 70	rlwinm.	r0 ,r5, 31, 1, 31
 71	addi	r6, r3, -4
 72	beq-	2f
 73	rlwimi	r4 ,r4 ,16 ,0 ,15
 74	mtctr	r0
 751:	stwu	r4, 4(r6)
 76	bdnz	1b
 772:	andi.	r0, r5, 1
 78	beqlr
 79	sth	r4, 4(r6)
 80	blr
 81EXPORT_SYMBOL(memset16)
 82#endif
 83
 84/*
 85 * Use dcbz on the complete cache lines in the destination
 86 * to set them to zero.  This requires that the destination
 87 * area is cacheable.  -- paulus
 88 *
 89 * During early init, cache might not be active yet, so dcbz cannot be used.
 90 * We therefore skip the optimised bloc that uses dcbz. This jump is
 91 * replaced by a nop once cache is active. This is done in machine_init()
 92 */
 93_GLOBAL_KASAN(memset)
 94	cmplwi	0,r5,4
 95	blt	7f
 96
 97	rlwimi	r4,r4,8,16,23
 98	rlwimi	r4,r4,16,0,15
 99
100	stw	r4,0(r3)
101	beqlr
102	andi.	r0,r3,3
103	add	r5,r0,r5
104	subf	r6,r0,r3
105	cmplwi	0,r4,0
106	/*
107	 * Skip optimised bloc until cache is enabled. Will be replaced
108	 * by 'bne' during boot to use normal procedure if r4 is not zero
109	 */
1105:	b	2f
111	patch_site	5b, patch__memset_nocache
112
113	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
114	add	r8,r7,r5
115	srwi	r9,r8,LG_CACHELINE_BYTES
116	addic.	r9,r9,-1	/* total number of complete cachelines */
117	ble	2f
118	xori	r0,r7,CACHELINE_MASK & ~3
119	srwi.	r0,r0,2
120	beq	3f
121	mtctr	r0
1224:	stwu	r4,4(r6)
123	bdnz	4b
1243:	mtctr	r9
125	li	r7,4
12610:	dcbz	r7,r6
127	addi	r6,r6,CACHELINE_BYTES
128	bdnz	10b
129	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
130	addi	r5,r5,4
131
1322:	srwi	r0,r5,2
133	mtctr	r0
134	bdz	6f
1351:	stwu	r4,4(r6)
136	bdnz	1b
1376:	andi.	r5,r5,3
138	beqlr
139	mtctr	r5
140	addi	r6,r6,3
1418:	stbu	r4,1(r6)
142	bdnz	8b
143	blr
144
1457:	cmpwi	0,r5,0
146	beqlr
147	mtctr	r5
148	addi	r6,r3,-1
1499:	stbu	r4,1(r6)
150	bdnz	9b
151	blr
152EXPORT_SYMBOL(memset)
153EXPORT_SYMBOL_KASAN(memset)
154
155/*
156 * This version uses dcbz on the complete cache lines in the
157 * destination area to reduce memory traffic.  This requires that
158 * the destination area is cacheable.
159 * We only use this version if the source and dest don't overlap.
160 * -- paulus.
161 *
162 * During early init, cache might not be active yet, so dcbz cannot be used.
163 * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
164 * replaced by a nop once cache is active. This is done in machine_init()
165 */
166_GLOBAL_KASAN(memmove)
167	cmplw	0,r3,r4
168	bgt	backwards_memcpy
169	/* fall through */
170
171_GLOBAL_KASAN(memcpy)
1721:	b	generic_memcpy
173	patch_site	1b, patch__memcpy_nocache
174
175	add	r7,r3,r5		/* test if the src & dst overlap */
176	add	r8,r4,r5
177	cmplw	0,r4,r7
178	cmplw	1,r3,r8
179	crand	0,0,4			/* cr0.lt &= cr1.lt */
180	blt	generic_memcpy		/* if regions overlap */
181
182	addi	r4,r4,-4
183	addi	r6,r3,-4
184	neg	r0,r3
185	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
186	beq	58f
187
188	cmplw	0,r5,r0			/* is this more than total to do? */
189	blt	63f			/* if not much to do */
190	andi.	r8,r0,3			/* get it word-aligned first */
191	subf	r5,r0,r5
192	mtctr	r8
193	beq+	61f
19470:	lbz	r9,4(r4)		/* do some bytes */
195	addi	r4,r4,1
196	addi	r6,r6,1
197	stb	r9,3(r6)
198	bdnz	70b
19961:	srwi.	r0,r0,2
200	mtctr	r0
201	beq	58f
20272:	lwzu	r9,4(r4)		/* do some words */
203	stwu	r9,4(r6)
204	bdnz	72b
205
20658:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
207	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
208	li	r11,4
209	mtctr	r0
210	beq	63f
21153:
212	dcbz	r11,r6
213	COPY_16_BYTES
214#if L1_CACHE_BYTES >= 32
215	COPY_16_BYTES
216#if L1_CACHE_BYTES >= 64
217	COPY_16_BYTES
218	COPY_16_BYTES
219#if L1_CACHE_BYTES >= 128
220	COPY_16_BYTES
221	COPY_16_BYTES
222	COPY_16_BYTES
223	COPY_16_BYTES
224#endif
225#endif
226#endif
227	bdnz	53b
228
22963:	srwi.	r0,r5,2
230	mtctr	r0
231	beq	64f
23230:	lwzu	r0,4(r4)
233	stwu	r0,4(r6)
234	bdnz	30b
235
23664:	andi.	r0,r5,3
237	mtctr	r0
238	beq+	65f
239	addi	r4,r4,3
240	addi	r6,r6,3
24140:	lbzu	r0,1(r4)
242	stbu	r0,1(r6)
243	bdnz	40b
24465:	blr
245EXPORT_SYMBOL(memcpy)
246EXPORT_SYMBOL(memmove)
247EXPORT_SYMBOL_KASAN(memcpy)
248EXPORT_SYMBOL_KASAN(memmove)
249
250generic_memcpy:
251	srwi.	r7,r5,3
252	addi	r6,r3,-4
253	addi	r4,r4,-4
254	beq	2f			/* if less than 8 bytes to do */
255	andi.	r0,r6,3			/* get dest word aligned */
256	mtctr	r7
257	bne	5f
2581:	lwz	r7,4(r4)
259	lwzu	r8,8(r4)
260	stw	r7,4(r6)
261	stwu	r8,8(r6)
262	bdnz	1b
263	andi.	r5,r5,7
2642:	cmplwi	0,r5,4
265	blt	3f
266	lwzu	r0,4(r4)
267	addi	r5,r5,-4
268	stwu	r0,4(r6)
2693:	cmpwi	0,r5,0
270	beqlr
271	mtctr	r5
272	addi	r4,r4,3
273	addi	r6,r6,3
2744:	lbzu	r0,1(r4)
275	stbu	r0,1(r6)
276	bdnz	4b
277	blr
2785:	subfic	r0,r0,4
279	mtctr	r0
2806:	lbz	r7,4(r4)
281	addi	r4,r4,1
282	stb	r7,4(r6)
283	addi	r6,r6,1
284	bdnz	6b
285	subf	r5,r0,r5
286	rlwinm.	r7,r5,32-3,3,31
287	beq	2b
288	mtctr	r7
289	b	1b
290
291_GLOBAL(backwards_memcpy)
292	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
293	add	r6,r3,r5
294	add	r4,r4,r5
295	beq	2f
296	andi.	r0,r6,3
297	mtctr	r7
298	bne	5f
2991:	lwz	r7,-4(r4)
300	lwzu	r8,-8(r4)
301	stw	r7,-4(r6)
302	stwu	r8,-8(r6)
303	bdnz	1b
304	andi.	r5,r5,7
3052:	cmplwi	0,r5,4
306	blt	3f
307	lwzu	r0,-4(r4)
308	subi	r5,r5,4
309	stwu	r0,-4(r6)
3103:	cmpwi	0,r5,0
311	beqlr
312	mtctr	r5
3134:	lbzu	r0,-1(r4)
314	stbu	r0,-1(r6)
315	bdnz	4b
316	blr
3175:	mtctr	r0
3186:	lbzu	r7,-1(r4)
319	stbu	r7,-1(r6)
320	bdnz	6b
321	subf	r5,r0,r5
322	rlwinm.	r7,r5,32-3,3,31
323	beq	2b
324	mtctr	r7
325	b	1b
326
327_GLOBAL(__copy_tofrom_user)
328	addi	r4,r4,-4
329	addi	r6,r3,-4
330	neg	r0,r3
331	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
332	beq	58f
333
334	cmplw	0,r5,r0			/* is this more than total to do? */
335	blt	63f			/* if not much to do */
336	andi.	r8,r0,3			/* get it word-aligned first */
337	mtctr	r8
338	beq+	61f
33970:	lbz	r9,4(r4)		/* do some bytes */
34071:	stb	r9,4(r6)
341	addi	r4,r4,1
342	addi	r6,r6,1
343	bdnz	70b
34461:	subf	r5,r0,r5
345	srwi.	r0,r0,2
346	mtctr	r0
347	beq	58f
34872:	lwzu	r9,4(r4)		/* do some words */
34973:	stwu	r9,4(r6)
350	bdnz	72b
351
352	EX_TABLE(70b,100f)
353	EX_TABLE(71b,101f)
354	EX_TABLE(72b,102f)
355	EX_TABLE(73b,103f)
356
35758:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
358	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
359	li	r11,4
360	beq	63f
361
362	/* Here we decide how far ahead to prefetch the source */
363	li	r3,4
364	cmpwi	r0,1
365	li	r7,0
366	ble	114f
367	li	r7,1
368#if MAX_COPY_PREFETCH > 1
369	/* Heuristically, for large transfers we prefetch
370	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
371	   we prefetch 1 cacheline ahead. */
372	cmpwi	r0,MAX_COPY_PREFETCH
373	ble	112f
374	li	r7,MAX_COPY_PREFETCH
375112:	mtctr	r7
376111:	dcbt	r3,r4
377	addi	r3,r3,CACHELINE_BYTES
378	bdnz	111b
379#else
380	dcbt	r3,r4
381	addi	r3,r3,CACHELINE_BYTES
382#endif /* MAX_COPY_PREFETCH > 1 */
383
384114:	subf	r8,r7,r0
385	mr	r0,r7
386	mtctr	r8
387
38853:	dcbt	r3,r4
38954:	dcbz	r11,r6
390	EX_TABLE(54b,105f)
391/* the main body of the cacheline loop */
392	COPY_16_BYTES_WITHEX(0)
393#if L1_CACHE_BYTES >= 32
394	COPY_16_BYTES_WITHEX(1)
395#if L1_CACHE_BYTES >= 64
396	COPY_16_BYTES_WITHEX(2)
397	COPY_16_BYTES_WITHEX(3)
398#if L1_CACHE_BYTES >= 128
399	COPY_16_BYTES_WITHEX(4)
400	COPY_16_BYTES_WITHEX(5)
401	COPY_16_BYTES_WITHEX(6)
402	COPY_16_BYTES_WITHEX(7)
403#endif
404#endif
405#endif
406	bdnz	53b
407	cmpwi	r0,0
408	li	r3,4
409	li	r7,0
410	bne	114b
411
41263:	srwi.	r0,r5,2
413	mtctr	r0
414	beq	64f
41530:	lwzu	r0,4(r4)
41631:	stwu	r0,4(r6)
417	bdnz	30b
418
41964:	andi.	r0,r5,3
420	mtctr	r0
421	beq+	65f
42240:	lbz	r0,4(r4)
42341:	stb	r0,4(r6)
424	addi	r4,r4,1
425	addi	r6,r6,1
426	bdnz	40b
42765:	li	r3,0
428	blr
429
430/* read fault, initial single-byte copy */
431100:	li	r9,0
432	b	90f
433/* write fault, initial single-byte copy */
434101:	li	r9,1
43590:	subf	r5,r8,r5
436	li	r3,0
437	b	99f
438/* read fault, initial word copy */
439102:	li	r9,0
440	b	91f
441/* write fault, initial word copy */
442103:	li	r9,1
44391:	li	r3,2
444	b	99f
445
446/*
447 * this stuff handles faults in the cacheline loop and branches to either
448 * 104f (if in read part) or 105f (if in write part), after updating r5
449 */
450	COPY_16_BYTES_EXCODE(0)
451#if L1_CACHE_BYTES >= 32
452	COPY_16_BYTES_EXCODE(1)
453#if L1_CACHE_BYTES >= 64
454	COPY_16_BYTES_EXCODE(2)
455	COPY_16_BYTES_EXCODE(3)
456#if L1_CACHE_BYTES >= 128
457	COPY_16_BYTES_EXCODE(4)
458	COPY_16_BYTES_EXCODE(5)
459	COPY_16_BYTES_EXCODE(6)
460	COPY_16_BYTES_EXCODE(7)
461#endif
462#endif
463#endif
464
465/* read fault in cacheline loop */
466104:	li	r9,0
467	b	92f
468/* fault on dcbz (effectively a write fault) */
469/* or write fault in cacheline loop */
470105:	li	r9,1
47192:	li	r3,LG_CACHELINE_BYTES
472	mfctr	r8
473	add	r0,r0,r8
474	b	106f
475/* read fault in final word loop */
476108:	li	r9,0
477	b	93f
478/* write fault in final word loop */
479109:	li	r9,1
48093:	andi.	r5,r5,3
481	li	r3,2
482	b	99f
483/* read fault in final byte loop */
484110:	li	r9,0
485	b	94f
486/* write fault in final byte loop */
487111:	li	r9,1
48894:	li	r5,0
489	li	r3,0
490/*
491 * At this stage the number of bytes not copied is
492 * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
493 */
49499:	mfctr	r0
495106:	slw	r3,r0,r3
496	add.	r3,r3,r5
497	beq	120f			/* shouldn't happen */
498	cmpwi	0,r9,0
499	bne	120f
500/* for a read fault, first try to continue the copy one byte at a time */
501	mtctr	r3
502130:	lbz	r0,4(r4)
503131:	stb	r0,4(r6)
504	addi	r4,r4,1
505	addi	r6,r6,1
506	bdnz	130b
507/* then clear out the destination: r3 bytes starting at 4(r6) */
508132:	mfctr	r3
509120:	blr
510
511	EX_TABLE(30b,108b)
512	EX_TABLE(31b,109b)
513	EX_TABLE(40b,110b)
514	EX_TABLE(41b,111b)
515	EX_TABLE(130b,132b)
516	EX_TABLE(131b,120b)
517
518EXPORT_SYMBOL(__copy_tofrom_user)