Loading...
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * Memory copy functions for 32-bit PowerPC.
4 *
5 * Copyright (C) 1996-2005 Paul Mackerras.
6 */
7#include <linux/export.h>
8#include <asm/processor.h>
9#include <asm/cache.h>
10#include <asm/errno.h>
11#include <asm/ppc_asm.h>
12#include <asm/code-patching-asm.h>
13#include <asm/kasan.h>
14
15#define COPY_16_BYTES \
16 lwz r7,4(r4); \
17 lwz r8,8(r4); \
18 lwz r9,12(r4); \
19 lwzu r10,16(r4); \
20 stw r7,4(r6); \
21 stw r8,8(r6); \
22 stw r9,12(r6); \
23 stwu r10,16(r6)
24
25#define COPY_16_BYTES_WITHEX(n) \
268 ## n ## 0: \
27 lwz r7,4(r4); \
288 ## n ## 1: \
29 lwz r8,8(r4); \
308 ## n ## 2: \
31 lwz r9,12(r4); \
328 ## n ## 3: \
33 lwzu r10,16(r4); \
348 ## n ## 4: \
35 stw r7,4(r6); \
368 ## n ## 5: \
37 stw r8,8(r6); \
388 ## n ## 6: \
39 stw r9,12(r6); \
408 ## n ## 7: \
41 stwu r10,16(r6)
42
43#define COPY_16_BYTES_EXCODE(n) \
449 ## n ## 0: \
45 addi r5,r5,-(16 * n); \
46 b 104f; \
479 ## n ## 1: \
48 addi r5,r5,-(16 * n); \
49 b 105f; \
50 EX_TABLE(8 ## n ## 0b,9 ## n ## 0b); \
51 EX_TABLE(8 ## n ## 1b,9 ## n ## 0b); \
52 EX_TABLE(8 ## n ## 2b,9 ## n ## 0b); \
53 EX_TABLE(8 ## n ## 3b,9 ## n ## 0b); \
54 EX_TABLE(8 ## n ## 4b,9 ## n ## 1b); \
55 EX_TABLE(8 ## n ## 5b,9 ## n ## 1b); \
56 EX_TABLE(8 ## n ## 6b,9 ## n ## 1b); \
57 EX_TABLE(8 ## n ## 7b,9 ## n ## 1b)
58
59 .text
60
61CACHELINE_BYTES = L1_CACHE_BYTES
62LG_CACHELINE_BYTES = L1_CACHE_SHIFT
63CACHELINE_MASK = (L1_CACHE_BYTES-1)
64
65#ifndef CONFIG_KASAN
66_GLOBAL(memset16)
67 rlwinm. r0 ,r5, 31, 1, 31
68 addi r6, r3, -4
69 beq- 2f
70 rlwimi r4 ,r4 ,16 ,0 ,15
71 mtctr r0
721: stwu r4, 4(r6)
73 bdnz 1b
742: andi. r0, r5, 1
75 beqlr
76 sth r4, 4(r6)
77 blr
78EXPORT_SYMBOL(memset16)
79#endif
80
81/*
82 * Use dcbz on the complete cache lines in the destination
83 * to set them to zero. This requires that the destination
84 * area is cacheable. -- paulus
85 *
86 * During early init, cache might not be active yet, so dcbz cannot be used.
87 * We therefore skip the optimised bloc that uses dcbz. This jump is
88 * replaced by a nop once cache is active. This is done in machine_init()
89 */
90_GLOBAL_KASAN(memset)
91 cmplwi 0,r5,4
92 blt 7f
93
94 rlwimi r4,r4,8,16,23
95 rlwimi r4,r4,16,0,15
96
97 stw r4,0(r3)
98 beqlr
99 andi. r0,r3,3
100 add r5,r0,r5
101 subf r6,r0,r3
102 cmplwi 0,r4,0
103 /*
104 * Skip optimised bloc until cache is enabled. Will be replaced
105 * by 'bne' during boot to use normal procedure if r4 is not zero
106 */
1075: b 2f
108 patch_site 5b, patch__memset_nocache
109
110 clrlwi r7,r6,32-LG_CACHELINE_BYTES
111 add r8,r7,r5
112 srwi r9,r8,LG_CACHELINE_BYTES
113 addic. r9,r9,-1 /* total number of complete cachelines */
114 ble 2f
115 xori r0,r7,CACHELINE_MASK & ~3
116 srwi. r0,r0,2
117 beq 3f
118 mtctr r0
1194: stwu r4,4(r6)
120 bdnz 4b
1213: mtctr r9
122 li r7,4
12310: dcbz r7,r6
124 addi r6,r6,CACHELINE_BYTES
125 bdnz 10b
126 clrlwi r5,r8,32-LG_CACHELINE_BYTES
127 addi r5,r5,4
128
1292: srwi r0,r5,2
130 mtctr r0
131 bdz 6f
1321: stwu r4,4(r6)
133 bdnz 1b
1346: andi. r5,r5,3
135 beqlr
136 mtctr r5
137 addi r6,r6,3
1388: stbu r4,1(r6)
139 bdnz 8b
140 blr
141
1427: cmpwi 0,r5,0
143 beqlr
144 mtctr r5
145 addi r6,r3,-1
1469: stbu r4,1(r6)
147 bdnz 9b
148 blr
149EXPORT_SYMBOL(memset)
150EXPORT_SYMBOL_KASAN(memset)
151
152/*
153 * This version uses dcbz on the complete cache lines in the
154 * destination area to reduce memory traffic. This requires that
155 * the destination area is cacheable.
156 * We only use this version if the source and dest don't overlap.
157 * -- paulus.
158 *
159 * During early init, cache might not be active yet, so dcbz cannot be used.
160 * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
161 * replaced by a nop once cache is active. This is done in machine_init()
162 */
163_GLOBAL_KASAN(memmove)
164 cmplw 0,r3,r4
165 bgt backwards_memcpy
166 /* fall through */
167
168_GLOBAL_KASAN(memcpy)
1691: b generic_memcpy
170 patch_site 1b, patch__memcpy_nocache
171
172 add r7,r3,r5 /* test if the src & dst overlap */
173 add r8,r4,r5
174 cmplw 0,r4,r7
175 cmplw 1,r3,r8
176 crand 0,0,4 /* cr0.lt &= cr1.lt */
177 blt generic_memcpy /* if regions overlap */
178
179 addi r4,r4,-4
180 addi r6,r3,-4
181 neg r0,r3
182 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
183 beq 58f
184
185 cmplw 0,r5,r0 /* is this more than total to do? */
186 blt 63f /* if not much to do */
187 andi. r8,r0,3 /* get it word-aligned first */
188 subf r5,r0,r5
189 mtctr r8
190 beq+ 61f
19170: lbz r9,4(r4) /* do some bytes */
192 addi r4,r4,1
193 addi r6,r6,1
194 stb r9,3(r6)
195 bdnz 70b
19661: srwi. r0,r0,2
197 mtctr r0
198 beq 58f
19972: lwzu r9,4(r4) /* do some words */
200 stwu r9,4(r6)
201 bdnz 72b
202
20358: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
204 clrlwi r5,r5,32-LG_CACHELINE_BYTES
205 li r11,4
206 mtctr r0
207 beq 63f
20853:
209 dcbz r11,r6
210 COPY_16_BYTES
211#if L1_CACHE_BYTES >= 32
212 COPY_16_BYTES
213#if L1_CACHE_BYTES >= 64
214 COPY_16_BYTES
215 COPY_16_BYTES
216#if L1_CACHE_BYTES >= 128
217 COPY_16_BYTES
218 COPY_16_BYTES
219 COPY_16_BYTES
220 COPY_16_BYTES
221#endif
222#endif
223#endif
224 bdnz 53b
225
22663: srwi. r0,r5,2
227 mtctr r0
228 beq 64f
22930: lwzu r0,4(r4)
230 stwu r0,4(r6)
231 bdnz 30b
232
23364: andi. r0,r5,3
234 mtctr r0
235 beq+ 65f
236 addi r4,r4,3
237 addi r6,r6,3
23840: lbzu r0,1(r4)
239 stbu r0,1(r6)
240 bdnz 40b
24165: blr
242EXPORT_SYMBOL(memcpy)
243EXPORT_SYMBOL(memmove)
244EXPORT_SYMBOL_KASAN(memcpy)
245EXPORT_SYMBOL_KASAN(memmove)
246
247generic_memcpy:
248 srwi. r7,r5,3
249 addi r6,r3,-4
250 addi r4,r4,-4
251 beq 2f /* if less than 8 bytes to do */
252 andi. r0,r6,3 /* get dest word aligned */
253 mtctr r7
254 bne 5f
2551: lwz r7,4(r4)
256 lwzu r8,8(r4)
257 stw r7,4(r6)
258 stwu r8,8(r6)
259 bdnz 1b
260 andi. r5,r5,7
2612: cmplwi 0,r5,4
262 blt 3f
263 lwzu r0,4(r4)
264 addi r5,r5,-4
265 stwu r0,4(r6)
2663: cmpwi 0,r5,0
267 beqlr
268 mtctr r5
269 addi r4,r4,3
270 addi r6,r6,3
2714: lbzu r0,1(r4)
272 stbu r0,1(r6)
273 bdnz 4b
274 blr
2755: subfic r0,r0,4
276 mtctr r0
2776: lbz r7,4(r4)
278 addi r4,r4,1
279 stb r7,4(r6)
280 addi r6,r6,1
281 bdnz 6b
282 subf r5,r0,r5
283 rlwinm. r7,r5,32-3,3,31
284 beq 2b
285 mtctr r7
286 b 1b
287
288_GLOBAL(backwards_memcpy)
289 rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */
290 add r6,r3,r5
291 add r4,r4,r5
292 beq 2f
293 andi. r0,r6,3
294 mtctr r7
295 bne 5f
2961: lwz r7,-4(r4)
297 lwzu r8,-8(r4)
298 stw r7,-4(r6)
299 stwu r8,-8(r6)
300 bdnz 1b
301 andi. r5,r5,7
3022: cmplwi 0,r5,4
303 blt 3f
304 lwzu r0,-4(r4)
305 subi r5,r5,4
306 stwu r0,-4(r6)
3073: cmpwi 0,r5,0
308 beqlr
309 mtctr r5
3104: lbzu r0,-1(r4)
311 stbu r0,-1(r6)
312 bdnz 4b
313 blr
3145: mtctr r0
3156: lbzu r7,-1(r4)
316 stbu r7,-1(r6)
317 bdnz 6b
318 subf r5,r0,r5
319 rlwinm. r7,r5,32-3,3,31
320 beq 2b
321 mtctr r7
322 b 1b
323
324_GLOBAL(__copy_tofrom_user)
325 addi r4,r4,-4
326 addi r6,r3,-4
327 neg r0,r3
328 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
329 beq 58f
330
331 cmplw 0,r5,r0 /* is this more than total to do? */
332 blt 63f /* if not much to do */
333 andi. r8,r0,3 /* get it word-aligned first */
334 mtctr r8
335 beq+ 61f
33670: lbz r9,4(r4) /* do some bytes */
33771: stb r9,4(r6)
338 addi r4,r4,1
339 addi r6,r6,1
340 bdnz 70b
34161: subf r5,r0,r5
342 srwi. r0,r0,2
343 mtctr r0
344 beq 58f
34572: lwzu r9,4(r4) /* do some words */
34673: stwu r9,4(r6)
347 bdnz 72b
348
349 EX_TABLE(70b,100f)
350 EX_TABLE(71b,101f)
351 EX_TABLE(72b,102f)
352 EX_TABLE(73b,103f)
353
35458: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
355 clrlwi r5,r5,32-LG_CACHELINE_BYTES
356 li r11,4
357 beq 63f
358
359 /* Here we decide how far ahead to prefetch the source */
360 li r3,4
361 cmpwi r0,1
362 li r7,0
363 ble 114f
364 li r7,1
365#if MAX_COPY_PREFETCH > 1
366 /* Heuristically, for large transfers we prefetch
367 MAX_COPY_PREFETCH cachelines ahead. For small transfers
368 we prefetch 1 cacheline ahead. */
369 cmpwi r0,MAX_COPY_PREFETCH
370 ble 112f
371 li r7,MAX_COPY_PREFETCH
372112: mtctr r7
373111: dcbt r3,r4
374 addi r3,r3,CACHELINE_BYTES
375 bdnz 111b
376#else
377 dcbt r3,r4
378 addi r3,r3,CACHELINE_BYTES
379#endif /* MAX_COPY_PREFETCH > 1 */
380
381114: subf r8,r7,r0
382 mr r0,r7
383 mtctr r8
384
38553: dcbt r3,r4
38654: dcbz r11,r6
387 EX_TABLE(54b,105f)
388/* the main body of the cacheline loop */
389 COPY_16_BYTES_WITHEX(0)
390#if L1_CACHE_BYTES >= 32
391 COPY_16_BYTES_WITHEX(1)
392#if L1_CACHE_BYTES >= 64
393 COPY_16_BYTES_WITHEX(2)
394 COPY_16_BYTES_WITHEX(3)
395#if L1_CACHE_BYTES >= 128
396 COPY_16_BYTES_WITHEX(4)
397 COPY_16_BYTES_WITHEX(5)
398 COPY_16_BYTES_WITHEX(6)
399 COPY_16_BYTES_WITHEX(7)
400#endif
401#endif
402#endif
403 bdnz 53b
404 cmpwi r0,0
405 li r3,4
406 li r7,0
407 bne 114b
408
40963: srwi. r0,r5,2
410 mtctr r0
411 beq 64f
41230: lwzu r0,4(r4)
41331: stwu r0,4(r6)
414 bdnz 30b
415
41664: andi. r0,r5,3
417 mtctr r0
418 beq+ 65f
41940: lbz r0,4(r4)
42041: stb r0,4(r6)
421 addi r4,r4,1
422 addi r6,r6,1
423 bdnz 40b
42465: li r3,0
425 blr
426
427/* read fault, initial single-byte copy */
428100: li r9,0
429 b 90f
430/* write fault, initial single-byte copy */
431101: li r9,1
43290: subf r5,r8,r5
433 li r3,0
434 b 99f
435/* read fault, initial word copy */
436102: li r9,0
437 b 91f
438/* write fault, initial word copy */
439103: li r9,1
44091: li r3,2
441 b 99f
442
443/*
444 * this stuff handles faults in the cacheline loop and branches to either
445 * 104f (if in read part) or 105f (if in write part), after updating r5
446 */
447 COPY_16_BYTES_EXCODE(0)
448#if L1_CACHE_BYTES >= 32
449 COPY_16_BYTES_EXCODE(1)
450#if L1_CACHE_BYTES >= 64
451 COPY_16_BYTES_EXCODE(2)
452 COPY_16_BYTES_EXCODE(3)
453#if L1_CACHE_BYTES >= 128
454 COPY_16_BYTES_EXCODE(4)
455 COPY_16_BYTES_EXCODE(5)
456 COPY_16_BYTES_EXCODE(6)
457 COPY_16_BYTES_EXCODE(7)
458#endif
459#endif
460#endif
461
462/* read fault in cacheline loop */
463104: li r9,0
464 b 92f
465/* fault on dcbz (effectively a write fault) */
466/* or write fault in cacheline loop */
467105: li r9,1
46892: li r3,LG_CACHELINE_BYTES
469 mfctr r8
470 add r0,r0,r8
471 b 106f
472/* read fault in final word loop */
473108: li r9,0
474 b 93f
475/* write fault in final word loop */
476109: li r9,1
47793: andi. r5,r5,3
478 li r3,2
479 b 99f
480/* read fault in final byte loop */
481110: li r9,0
482 b 94f
483/* write fault in final byte loop */
484111: li r9,1
48594: li r5,0
486 li r3,0
487/*
488 * At this stage the number of bytes not copied is
489 * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
490 */
49199: mfctr r0
492106: slw r3,r0,r3
493 add. r3,r3,r5
494 beq 120f /* shouldn't happen */
495 cmpwi 0,r9,0
496 bne 120f
497/* for a read fault, first try to continue the copy one byte at a time */
498 mtctr r3
499130: lbz r0,4(r4)
500131: stb r0,4(r6)
501 addi r4,r4,1
502 addi r6,r6,1
503 bdnz 130b
504/* then clear out the destination: r3 bytes starting at 4(r6) */
505132: mfctr r3
506120: blr
507
508 EX_TABLE(30b,108b)
509 EX_TABLE(31b,109b)
510 EX_TABLE(40b,110b)
511 EX_TABLE(41b,111b)
512 EX_TABLE(130b,132b)
513 EX_TABLE(131b,120b)
514
515EXPORT_SYMBOL(__copy_tofrom_user)
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * Memory copy functions for 32-bit PowerPC.
4 *
5 * Copyright (C) 1996-2005 Paul Mackerras.
6 */
7#include <asm/processor.h>
8#include <asm/cache.h>
9#include <asm/errno.h>
10#include <asm/ppc_asm.h>
11#include <asm/export.h>
12#include <asm/code-patching-asm.h>
13#include <asm/kasan.h>
14
15#define COPY_16_BYTES \
16 lwz r7,4(r4); \
17 lwz r8,8(r4); \
18 lwz r9,12(r4); \
19 lwzu r10,16(r4); \
20 stw r7,4(r6); \
21 stw r8,8(r6); \
22 stw r9,12(r6); \
23 stwu r10,16(r6)
24
25#define COPY_16_BYTES_WITHEX(n) \
268 ## n ## 0: \
27 lwz r7,4(r4); \
288 ## n ## 1: \
29 lwz r8,8(r4); \
308 ## n ## 2: \
31 lwz r9,12(r4); \
328 ## n ## 3: \
33 lwzu r10,16(r4); \
348 ## n ## 4: \
35 stw r7,4(r6); \
368 ## n ## 5: \
37 stw r8,8(r6); \
388 ## n ## 6: \
39 stw r9,12(r6); \
408 ## n ## 7: \
41 stwu r10,16(r6)
42
43#define COPY_16_BYTES_EXCODE(n) \
449 ## n ## 0: \
45 addi r5,r5,-(16 * n); \
46 b 104f; \
479 ## n ## 1: \
48 addi r5,r5,-(16 * n); \
49 b 105f; \
50 EX_TABLE(8 ## n ## 0b,9 ## n ## 0b); \
51 EX_TABLE(8 ## n ## 1b,9 ## n ## 0b); \
52 EX_TABLE(8 ## n ## 2b,9 ## n ## 0b); \
53 EX_TABLE(8 ## n ## 3b,9 ## n ## 0b); \
54 EX_TABLE(8 ## n ## 4b,9 ## n ## 1b); \
55 EX_TABLE(8 ## n ## 5b,9 ## n ## 1b); \
56 EX_TABLE(8 ## n ## 6b,9 ## n ## 1b); \
57 EX_TABLE(8 ## n ## 7b,9 ## n ## 1b)
58
59 .text
60 .stabs "arch/powerpc/lib/",N_SO,0,0,0f
61 .stabs "copy_32.S",N_SO,0,0,0f
620:
63
64CACHELINE_BYTES = L1_CACHE_BYTES
65LG_CACHELINE_BYTES = L1_CACHE_SHIFT
66CACHELINE_MASK = (L1_CACHE_BYTES-1)
67
68#ifndef CONFIG_KASAN
69_GLOBAL(memset16)
70 rlwinm. r0 ,r5, 31, 1, 31
71 addi r6, r3, -4
72 beq- 2f
73 rlwimi r4 ,r4 ,16 ,0 ,15
74 mtctr r0
751: stwu r4, 4(r6)
76 bdnz 1b
772: andi. r0, r5, 1
78 beqlr
79 sth r4, 4(r6)
80 blr
81EXPORT_SYMBOL(memset16)
82#endif
83
84/*
85 * Use dcbz on the complete cache lines in the destination
86 * to set them to zero. This requires that the destination
87 * area is cacheable. -- paulus
88 *
89 * During early init, cache might not be active yet, so dcbz cannot be used.
90 * We therefore skip the optimised bloc that uses dcbz. This jump is
91 * replaced by a nop once cache is active. This is done in machine_init()
92 */
93_GLOBAL_KASAN(memset)
94 cmplwi 0,r5,4
95 blt 7f
96
97 rlwimi r4,r4,8,16,23
98 rlwimi r4,r4,16,0,15
99
100 stw r4,0(r3)
101 beqlr
102 andi. r0,r3,3
103 add r5,r0,r5
104 subf r6,r0,r3
105 cmplwi 0,r4,0
106 /*
107 * Skip optimised bloc until cache is enabled. Will be replaced
108 * by 'bne' during boot to use normal procedure if r4 is not zero
109 */
1105: b 2f
111 patch_site 5b, patch__memset_nocache
112
113 clrlwi r7,r6,32-LG_CACHELINE_BYTES
114 add r8,r7,r5
115 srwi r9,r8,LG_CACHELINE_BYTES
116 addic. r9,r9,-1 /* total number of complete cachelines */
117 ble 2f
118 xori r0,r7,CACHELINE_MASK & ~3
119 srwi. r0,r0,2
120 beq 3f
121 mtctr r0
1224: stwu r4,4(r6)
123 bdnz 4b
1243: mtctr r9
125 li r7,4
12610: dcbz r7,r6
127 addi r6,r6,CACHELINE_BYTES
128 bdnz 10b
129 clrlwi r5,r8,32-LG_CACHELINE_BYTES
130 addi r5,r5,4
131
1322: srwi r0,r5,2
133 mtctr r0
134 bdz 6f
1351: stwu r4,4(r6)
136 bdnz 1b
1376: andi. r5,r5,3
138 beqlr
139 mtctr r5
140 addi r6,r6,3
1418: stbu r4,1(r6)
142 bdnz 8b
143 blr
144
1457: cmpwi 0,r5,0
146 beqlr
147 mtctr r5
148 addi r6,r3,-1
1499: stbu r4,1(r6)
150 bdnz 9b
151 blr
152EXPORT_SYMBOL(memset)
153EXPORT_SYMBOL_KASAN(memset)
154
155/*
156 * This version uses dcbz on the complete cache lines in the
157 * destination area to reduce memory traffic. This requires that
158 * the destination area is cacheable.
159 * We only use this version if the source and dest don't overlap.
160 * -- paulus.
161 *
162 * During early init, cache might not be active yet, so dcbz cannot be used.
163 * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
164 * replaced by a nop once cache is active. This is done in machine_init()
165 */
166_GLOBAL_KASAN(memmove)
167 cmplw 0,r3,r4
168 bgt backwards_memcpy
169 /* fall through */
170
171_GLOBAL_KASAN(memcpy)
1721: b generic_memcpy
173 patch_site 1b, patch__memcpy_nocache
174
175 add r7,r3,r5 /* test if the src & dst overlap */
176 add r8,r4,r5
177 cmplw 0,r4,r7
178 cmplw 1,r3,r8
179 crand 0,0,4 /* cr0.lt &= cr1.lt */
180 blt generic_memcpy /* if regions overlap */
181
182 addi r4,r4,-4
183 addi r6,r3,-4
184 neg r0,r3
185 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
186 beq 58f
187
188 cmplw 0,r5,r0 /* is this more than total to do? */
189 blt 63f /* if not much to do */
190 andi. r8,r0,3 /* get it word-aligned first */
191 subf r5,r0,r5
192 mtctr r8
193 beq+ 61f
19470: lbz r9,4(r4) /* do some bytes */
195 addi r4,r4,1
196 addi r6,r6,1
197 stb r9,3(r6)
198 bdnz 70b
19961: srwi. r0,r0,2
200 mtctr r0
201 beq 58f
20272: lwzu r9,4(r4) /* do some words */
203 stwu r9,4(r6)
204 bdnz 72b
205
20658: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
207 clrlwi r5,r5,32-LG_CACHELINE_BYTES
208 li r11,4
209 mtctr r0
210 beq 63f
21153:
212 dcbz r11,r6
213 COPY_16_BYTES
214#if L1_CACHE_BYTES >= 32
215 COPY_16_BYTES
216#if L1_CACHE_BYTES >= 64
217 COPY_16_BYTES
218 COPY_16_BYTES
219#if L1_CACHE_BYTES >= 128
220 COPY_16_BYTES
221 COPY_16_BYTES
222 COPY_16_BYTES
223 COPY_16_BYTES
224#endif
225#endif
226#endif
227 bdnz 53b
228
22963: srwi. r0,r5,2
230 mtctr r0
231 beq 64f
23230: lwzu r0,4(r4)
233 stwu r0,4(r6)
234 bdnz 30b
235
23664: andi. r0,r5,3
237 mtctr r0
238 beq+ 65f
239 addi r4,r4,3
240 addi r6,r6,3
24140: lbzu r0,1(r4)
242 stbu r0,1(r6)
243 bdnz 40b
24465: blr
245EXPORT_SYMBOL(memcpy)
246EXPORT_SYMBOL(memmove)
247EXPORT_SYMBOL_KASAN(memcpy)
248EXPORT_SYMBOL_KASAN(memmove)
249
250generic_memcpy:
251 srwi. r7,r5,3
252 addi r6,r3,-4
253 addi r4,r4,-4
254 beq 2f /* if less than 8 bytes to do */
255 andi. r0,r6,3 /* get dest word aligned */
256 mtctr r7
257 bne 5f
2581: lwz r7,4(r4)
259 lwzu r8,8(r4)
260 stw r7,4(r6)
261 stwu r8,8(r6)
262 bdnz 1b
263 andi. r5,r5,7
2642: cmplwi 0,r5,4
265 blt 3f
266 lwzu r0,4(r4)
267 addi r5,r5,-4
268 stwu r0,4(r6)
2693: cmpwi 0,r5,0
270 beqlr
271 mtctr r5
272 addi r4,r4,3
273 addi r6,r6,3
2744: lbzu r0,1(r4)
275 stbu r0,1(r6)
276 bdnz 4b
277 blr
2785: subfic r0,r0,4
279 mtctr r0
2806: lbz r7,4(r4)
281 addi r4,r4,1
282 stb r7,4(r6)
283 addi r6,r6,1
284 bdnz 6b
285 subf r5,r0,r5
286 rlwinm. r7,r5,32-3,3,31
287 beq 2b
288 mtctr r7
289 b 1b
290
291_GLOBAL(backwards_memcpy)
292 rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */
293 add r6,r3,r5
294 add r4,r4,r5
295 beq 2f
296 andi. r0,r6,3
297 mtctr r7
298 bne 5f
2991: lwz r7,-4(r4)
300 lwzu r8,-8(r4)
301 stw r7,-4(r6)
302 stwu r8,-8(r6)
303 bdnz 1b
304 andi. r5,r5,7
3052: cmplwi 0,r5,4
306 blt 3f
307 lwzu r0,-4(r4)
308 subi r5,r5,4
309 stwu r0,-4(r6)
3103: cmpwi 0,r5,0
311 beqlr
312 mtctr r5
3134: lbzu r0,-1(r4)
314 stbu r0,-1(r6)
315 bdnz 4b
316 blr
3175: mtctr r0
3186: lbzu r7,-1(r4)
319 stbu r7,-1(r6)
320 bdnz 6b
321 subf r5,r0,r5
322 rlwinm. r7,r5,32-3,3,31
323 beq 2b
324 mtctr r7
325 b 1b
326
327_GLOBAL(__copy_tofrom_user)
328 addi r4,r4,-4
329 addi r6,r3,-4
330 neg r0,r3
331 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
332 beq 58f
333
334 cmplw 0,r5,r0 /* is this more than total to do? */
335 blt 63f /* if not much to do */
336 andi. r8,r0,3 /* get it word-aligned first */
337 mtctr r8
338 beq+ 61f
33970: lbz r9,4(r4) /* do some bytes */
34071: stb r9,4(r6)
341 addi r4,r4,1
342 addi r6,r6,1
343 bdnz 70b
34461: subf r5,r0,r5
345 srwi. r0,r0,2
346 mtctr r0
347 beq 58f
34872: lwzu r9,4(r4) /* do some words */
34973: stwu r9,4(r6)
350 bdnz 72b
351
352 EX_TABLE(70b,100f)
353 EX_TABLE(71b,101f)
354 EX_TABLE(72b,102f)
355 EX_TABLE(73b,103f)
356
35758: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
358 clrlwi r5,r5,32-LG_CACHELINE_BYTES
359 li r11,4
360 beq 63f
361
362 /* Here we decide how far ahead to prefetch the source */
363 li r3,4
364 cmpwi r0,1
365 li r7,0
366 ble 114f
367 li r7,1
368#if MAX_COPY_PREFETCH > 1
369 /* Heuristically, for large transfers we prefetch
370 MAX_COPY_PREFETCH cachelines ahead. For small transfers
371 we prefetch 1 cacheline ahead. */
372 cmpwi r0,MAX_COPY_PREFETCH
373 ble 112f
374 li r7,MAX_COPY_PREFETCH
375112: mtctr r7
376111: dcbt r3,r4
377 addi r3,r3,CACHELINE_BYTES
378 bdnz 111b
379#else
380 dcbt r3,r4
381 addi r3,r3,CACHELINE_BYTES
382#endif /* MAX_COPY_PREFETCH > 1 */
383
384114: subf r8,r7,r0
385 mr r0,r7
386 mtctr r8
387
38853: dcbt r3,r4
38954: dcbz r11,r6
390 EX_TABLE(54b,105f)
391/* the main body of the cacheline loop */
392 COPY_16_BYTES_WITHEX(0)
393#if L1_CACHE_BYTES >= 32
394 COPY_16_BYTES_WITHEX(1)
395#if L1_CACHE_BYTES >= 64
396 COPY_16_BYTES_WITHEX(2)
397 COPY_16_BYTES_WITHEX(3)
398#if L1_CACHE_BYTES >= 128
399 COPY_16_BYTES_WITHEX(4)
400 COPY_16_BYTES_WITHEX(5)
401 COPY_16_BYTES_WITHEX(6)
402 COPY_16_BYTES_WITHEX(7)
403#endif
404#endif
405#endif
406 bdnz 53b
407 cmpwi r0,0
408 li r3,4
409 li r7,0
410 bne 114b
411
41263: srwi. r0,r5,2
413 mtctr r0
414 beq 64f
41530: lwzu r0,4(r4)
41631: stwu r0,4(r6)
417 bdnz 30b
418
41964: andi. r0,r5,3
420 mtctr r0
421 beq+ 65f
42240: lbz r0,4(r4)
42341: stb r0,4(r6)
424 addi r4,r4,1
425 addi r6,r6,1
426 bdnz 40b
42765: li r3,0
428 blr
429
430/* read fault, initial single-byte copy */
431100: li r9,0
432 b 90f
433/* write fault, initial single-byte copy */
434101: li r9,1
43590: subf r5,r8,r5
436 li r3,0
437 b 99f
438/* read fault, initial word copy */
439102: li r9,0
440 b 91f
441/* write fault, initial word copy */
442103: li r9,1
44391: li r3,2
444 b 99f
445
446/*
447 * this stuff handles faults in the cacheline loop and branches to either
448 * 104f (if in read part) or 105f (if in write part), after updating r5
449 */
450 COPY_16_BYTES_EXCODE(0)
451#if L1_CACHE_BYTES >= 32
452 COPY_16_BYTES_EXCODE(1)
453#if L1_CACHE_BYTES >= 64
454 COPY_16_BYTES_EXCODE(2)
455 COPY_16_BYTES_EXCODE(3)
456#if L1_CACHE_BYTES >= 128
457 COPY_16_BYTES_EXCODE(4)
458 COPY_16_BYTES_EXCODE(5)
459 COPY_16_BYTES_EXCODE(6)
460 COPY_16_BYTES_EXCODE(7)
461#endif
462#endif
463#endif
464
465/* read fault in cacheline loop */
466104: li r9,0
467 b 92f
468/* fault on dcbz (effectively a write fault) */
469/* or write fault in cacheline loop */
470105: li r9,1
47192: li r3,LG_CACHELINE_BYTES
472 mfctr r8
473 add r0,r0,r8
474 b 106f
475/* read fault in final word loop */
476108: li r9,0
477 b 93f
478/* write fault in final word loop */
479109: li r9,1
48093: andi. r5,r5,3
481 li r3,2
482 b 99f
483/* read fault in final byte loop */
484110: li r9,0
485 b 94f
486/* write fault in final byte loop */
487111: li r9,1
48894: li r5,0
489 li r3,0
490/*
491 * At this stage the number of bytes not copied is
492 * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
493 */
49499: mfctr r0
495106: slw r3,r0,r3
496 add. r3,r3,r5
497 beq 120f /* shouldn't happen */
498 cmpwi 0,r9,0
499 bne 120f
500/* for a read fault, first try to continue the copy one byte at a time */
501 mtctr r3
502130: lbz r0,4(r4)
503131: stb r0,4(r6)
504 addi r4,r4,1
505 addi r6,r6,1
506 bdnz 130b
507/* then clear out the destination: r3 bytes starting at 4(r6) */
508132: mfctr r3
509120: blr
510
511 EX_TABLE(30b,108b)
512 EX_TABLE(31b,109b)
513 EX_TABLE(40b,110b)
514 EX_TABLE(41b,111b)
515 EX_TABLE(130b,132b)
516 EX_TABLE(131b,120b)
517
518EXPORT_SYMBOL(__copy_tofrom_user)