Loading...
1/*
2 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
3 * Copyright 2002 Andi Kleen, SuSE Labs.
4 * Subject to the GNU Public License v2.
5 *
6 * Functions to copy from and to user space.
7 */
8
9#include <linux/linkage.h>
10#include <asm/current.h>
11#include <asm/asm-offsets.h>
12#include <asm/thread_info.h>
13#include <asm/cpufeatures.h>
14#include <asm/alternative-asm.h>
15#include <asm/asm.h>
16#include <asm/smap.h>
17#include <asm/export.h>
18
19/*
20 * copy_user_generic_unrolled - memory copy with exception handling.
21 * This version is for CPUs like P4 that don't have efficient micro
22 * code for rep movsq
23 *
24 * Input:
25 * rdi destination
26 * rsi source
27 * rdx count
28 *
29 * Output:
30 * eax uncopied bytes or 0 if successful.
31 */
32ENTRY(copy_user_generic_unrolled)
33 ASM_STAC
34 cmpl $8,%edx
35 jb 20f /* less then 8 bytes, go to byte copy loop */
36 ALIGN_DESTINATION
37 movl %edx,%ecx
38 andl $63,%edx
39 shrl $6,%ecx
40 jz 17f
411: movq (%rsi),%r8
422: movq 1*8(%rsi),%r9
433: movq 2*8(%rsi),%r10
444: movq 3*8(%rsi),%r11
455: movq %r8,(%rdi)
466: movq %r9,1*8(%rdi)
477: movq %r10,2*8(%rdi)
488: movq %r11,3*8(%rdi)
499: movq 4*8(%rsi),%r8
5010: movq 5*8(%rsi),%r9
5111: movq 6*8(%rsi),%r10
5212: movq 7*8(%rsi),%r11
5313: movq %r8,4*8(%rdi)
5414: movq %r9,5*8(%rdi)
5515: movq %r10,6*8(%rdi)
5616: movq %r11,7*8(%rdi)
57 leaq 64(%rsi),%rsi
58 leaq 64(%rdi),%rdi
59 decl %ecx
60 jnz 1b
6117: movl %edx,%ecx
62 andl $7,%edx
63 shrl $3,%ecx
64 jz 20f
6518: movq (%rsi),%r8
6619: movq %r8,(%rdi)
67 leaq 8(%rsi),%rsi
68 leaq 8(%rdi),%rdi
69 decl %ecx
70 jnz 18b
7120: andl %edx,%edx
72 jz 23f
73 movl %edx,%ecx
7421: movb (%rsi),%al
7522: movb %al,(%rdi)
76 incq %rsi
77 incq %rdi
78 decl %ecx
79 jnz 21b
8023: xor %eax,%eax
81 ASM_CLAC
82 ret
83
84 .section .fixup,"ax"
8530: shll $6,%ecx
86 addl %ecx,%edx
87 jmp 60f
8840: leal (%rdx,%rcx,8),%edx
89 jmp 60f
9050: movl %ecx,%edx
9160: jmp copy_user_handle_tail /* ecx is zerorest also */
92 .previous
93
94 _ASM_EXTABLE(1b,30b)
95 _ASM_EXTABLE(2b,30b)
96 _ASM_EXTABLE(3b,30b)
97 _ASM_EXTABLE(4b,30b)
98 _ASM_EXTABLE(5b,30b)
99 _ASM_EXTABLE(6b,30b)
100 _ASM_EXTABLE(7b,30b)
101 _ASM_EXTABLE(8b,30b)
102 _ASM_EXTABLE(9b,30b)
103 _ASM_EXTABLE(10b,30b)
104 _ASM_EXTABLE(11b,30b)
105 _ASM_EXTABLE(12b,30b)
106 _ASM_EXTABLE(13b,30b)
107 _ASM_EXTABLE(14b,30b)
108 _ASM_EXTABLE(15b,30b)
109 _ASM_EXTABLE(16b,30b)
110 _ASM_EXTABLE(18b,40b)
111 _ASM_EXTABLE(19b,40b)
112 _ASM_EXTABLE(21b,50b)
113 _ASM_EXTABLE(22b,50b)
114ENDPROC(copy_user_generic_unrolled)
115EXPORT_SYMBOL(copy_user_generic_unrolled)
116
117/* Some CPUs run faster using the string copy instructions.
118 * This is also a lot simpler. Use them when possible.
119 *
120 * Only 4GB of copy is supported. This shouldn't be a problem
121 * because the kernel normally only writes from/to page sized chunks
122 * even if user space passed a longer buffer.
123 * And more would be dangerous because both Intel and AMD have
124 * errata with rep movsq > 4GB. If someone feels the need to fix
125 * this please consider this.
126 *
127 * Input:
128 * rdi destination
129 * rsi source
130 * rdx count
131 *
132 * Output:
133 * eax uncopied bytes or 0 if successful.
134 */
135ENTRY(copy_user_generic_string)
136 ASM_STAC
137 cmpl $8,%edx
138 jb 2f /* less than 8 bytes, go to byte copy loop */
139 ALIGN_DESTINATION
140 movl %edx,%ecx
141 shrl $3,%ecx
142 andl $7,%edx
1431: rep
144 movsq
1452: movl %edx,%ecx
1463: rep
147 movsb
148 xorl %eax,%eax
149 ASM_CLAC
150 ret
151
152 .section .fixup,"ax"
15311: leal (%rdx,%rcx,8),%ecx
15412: movl %ecx,%edx /* ecx is zerorest also */
155 jmp copy_user_handle_tail
156 .previous
157
158 _ASM_EXTABLE(1b,11b)
159 _ASM_EXTABLE(3b,12b)
160ENDPROC(copy_user_generic_string)
161EXPORT_SYMBOL(copy_user_generic_string)
162
163/*
164 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
165 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
166 *
167 * Input:
168 * rdi destination
169 * rsi source
170 * rdx count
171 *
172 * Output:
173 * eax uncopied bytes or 0 if successful.
174 */
175ENTRY(copy_user_enhanced_fast_string)
176 ASM_STAC
177 movl %edx,%ecx
1781: rep
179 movsb
180 xorl %eax,%eax
181 ASM_CLAC
182 ret
183
184 .section .fixup,"ax"
18512: movl %ecx,%edx /* ecx is zerorest also */
186 jmp copy_user_handle_tail
187 .previous
188
189 _ASM_EXTABLE(1b,12b)
190ENDPROC(copy_user_enhanced_fast_string)
191EXPORT_SYMBOL(copy_user_enhanced_fast_string)
192
193/*
194 * copy_user_nocache - Uncached memory copy with exception handling
195 * This will force destination out of cache for more performance.
196 *
197 * Note: Cached memory copy is used when destination or size is not
198 * naturally aligned. That is:
199 * - Require 8-byte alignment when size is 8 bytes or larger.
200 * - Require 4-byte alignment when size is 4 bytes.
201 */
202ENTRY(__copy_user_nocache)
203 ASM_STAC
204
205 /* If size is less than 8 bytes, go to 4-byte copy */
206 cmpl $8,%edx
207 jb .L_4b_nocache_copy_entry
208
209 /* If destination is not 8-byte aligned, "cache" copy to align it */
210 ALIGN_DESTINATION
211
212 /* Set 4x8-byte copy count and remainder */
213 movl %edx,%ecx
214 andl $63,%edx
215 shrl $6,%ecx
216 jz .L_8b_nocache_copy_entry /* jump if count is 0 */
217
218 /* Perform 4x8-byte nocache loop-copy */
219.L_4x8b_nocache_copy_loop:
2201: movq (%rsi),%r8
2212: movq 1*8(%rsi),%r9
2223: movq 2*8(%rsi),%r10
2234: movq 3*8(%rsi),%r11
2245: movnti %r8,(%rdi)
2256: movnti %r9,1*8(%rdi)
2267: movnti %r10,2*8(%rdi)
2278: movnti %r11,3*8(%rdi)
2289: movq 4*8(%rsi),%r8
22910: movq 5*8(%rsi),%r9
23011: movq 6*8(%rsi),%r10
23112: movq 7*8(%rsi),%r11
23213: movnti %r8,4*8(%rdi)
23314: movnti %r9,5*8(%rdi)
23415: movnti %r10,6*8(%rdi)
23516: movnti %r11,7*8(%rdi)
236 leaq 64(%rsi),%rsi
237 leaq 64(%rdi),%rdi
238 decl %ecx
239 jnz .L_4x8b_nocache_copy_loop
240
241 /* Set 8-byte copy count and remainder */
242.L_8b_nocache_copy_entry:
243 movl %edx,%ecx
244 andl $7,%edx
245 shrl $3,%ecx
246 jz .L_4b_nocache_copy_entry /* jump if count is 0 */
247
248 /* Perform 8-byte nocache loop-copy */
249.L_8b_nocache_copy_loop:
25020: movq (%rsi),%r8
25121: movnti %r8,(%rdi)
252 leaq 8(%rsi),%rsi
253 leaq 8(%rdi),%rdi
254 decl %ecx
255 jnz .L_8b_nocache_copy_loop
256
257 /* If no byte left, we're done */
258.L_4b_nocache_copy_entry:
259 andl %edx,%edx
260 jz .L_finish_copy
261
262 /* If destination is not 4-byte aligned, go to byte copy: */
263 movl %edi,%ecx
264 andl $3,%ecx
265 jnz .L_1b_cache_copy_entry
266
267 /* Set 4-byte copy count (1 or 0) and remainder */
268 movl %edx,%ecx
269 andl $3,%edx
270 shrl $2,%ecx
271 jz .L_1b_cache_copy_entry /* jump if count is 0 */
272
273 /* Perform 4-byte nocache copy: */
27430: movl (%rsi),%r8d
27531: movnti %r8d,(%rdi)
276 leaq 4(%rsi),%rsi
277 leaq 4(%rdi),%rdi
278
279 /* If no bytes left, we're done: */
280 andl %edx,%edx
281 jz .L_finish_copy
282
283 /* Perform byte "cache" loop-copy for the remainder */
284.L_1b_cache_copy_entry:
285 movl %edx,%ecx
286.L_1b_cache_copy_loop:
28740: movb (%rsi),%al
28841: movb %al,(%rdi)
289 incq %rsi
290 incq %rdi
291 decl %ecx
292 jnz .L_1b_cache_copy_loop
293
294 /* Finished copying; fence the prior stores */
295.L_finish_copy:
296 xorl %eax,%eax
297 ASM_CLAC
298 sfence
299 ret
300
301 .section .fixup,"ax"
302.L_fixup_4x8b_copy:
303 shll $6,%ecx
304 addl %ecx,%edx
305 jmp .L_fixup_handle_tail
306.L_fixup_8b_copy:
307 lea (%rdx,%rcx,8),%rdx
308 jmp .L_fixup_handle_tail
309.L_fixup_4b_copy:
310 lea (%rdx,%rcx,4),%rdx
311 jmp .L_fixup_handle_tail
312.L_fixup_1b_copy:
313 movl %ecx,%edx
314.L_fixup_handle_tail:
315 sfence
316 jmp copy_user_handle_tail
317 .previous
318
319 _ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
320 _ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
321 _ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
322 _ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
323 _ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
324 _ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
325 _ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
326 _ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
327 _ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
328 _ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
329 _ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
330 _ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
331 _ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
332 _ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
333 _ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
334 _ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
335 _ASM_EXTABLE(20b,.L_fixup_8b_copy)
336 _ASM_EXTABLE(21b,.L_fixup_8b_copy)
337 _ASM_EXTABLE(30b,.L_fixup_4b_copy)
338 _ASM_EXTABLE(31b,.L_fixup_4b_copy)
339 _ASM_EXTABLE(40b,.L_fixup_1b_copy)
340 _ASM_EXTABLE(41b,.L_fixup_1b_copy)
341ENDPROC(__copy_user_nocache)
342EXPORT_SYMBOL(__copy_user_nocache)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
4 * Copyright 2002 Andi Kleen, SuSE Labs.
5 *
6 * Functions to copy from and to user space.
7 */
8
9#include <linux/linkage.h>
10#include <asm/current.h>
11#include <asm/asm-offsets.h>
12#include <asm/thread_info.h>
13#include <asm/cpufeatures.h>
14#include <asm/alternative.h>
15#include <asm/asm.h>
16#include <asm/smap.h>
17#include <asm/export.h>
18#include <asm/trapnr.h>
19
20.macro ALIGN_DESTINATION
21 /* check for bad alignment of destination */
22 movl %edi,%ecx
23 andl $7,%ecx
24 jz 102f /* already aligned */
25 subl $8,%ecx
26 negl %ecx
27 subl %ecx,%edx
28100: movb (%rsi),%al
29101: movb %al,(%rdi)
30 incq %rsi
31 incq %rdi
32 decl %ecx
33 jnz 100b
34102:
35
36 _ASM_EXTABLE_CPY(100b, .Lcopy_user_handle_align)
37 _ASM_EXTABLE_CPY(101b, .Lcopy_user_handle_align)
38.endm
39
40/*
41 * copy_user_generic_unrolled - memory copy with exception handling.
42 * This version is for CPUs like P4 that don't have efficient micro
43 * code for rep movsq
44 *
45 * Input:
46 * rdi destination
47 * rsi source
48 * rdx count
49 *
50 * Output:
51 * eax uncopied bytes or 0 if successful.
52 */
53SYM_FUNC_START(copy_user_generic_unrolled)
54 ASM_STAC
55 cmpl $8,%edx
56 jb .Lcopy_user_short_string_bytes
57 ALIGN_DESTINATION
58 movl %edx,%ecx
59 andl $63,%edx
60 shrl $6,%ecx
61 jz copy_user_short_string
621: movq (%rsi),%r8
632: movq 1*8(%rsi),%r9
643: movq 2*8(%rsi),%r10
654: movq 3*8(%rsi),%r11
665: movq %r8,(%rdi)
676: movq %r9,1*8(%rdi)
687: movq %r10,2*8(%rdi)
698: movq %r11,3*8(%rdi)
709: movq 4*8(%rsi),%r8
7110: movq 5*8(%rsi),%r9
7211: movq 6*8(%rsi),%r10
7312: movq 7*8(%rsi),%r11
7413: movq %r8,4*8(%rdi)
7514: movq %r9,5*8(%rdi)
7615: movq %r10,6*8(%rdi)
7716: movq %r11,7*8(%rdi)
78 leaq 64(%rsi),%rsi
79 leaq 64(%rdi),%rdi
80 decl %ecx
81 jnz 1b
82 jmp copy_user_short_string
83
8430: shll $6,%ecx
85 addl %ecx,%edx
86 jmp .Lcopy_user_handle_tail
87
88 _ASM_EXTABLE_CPY(1b, 30b)
89 _ASM_EXTABLE_CPY(2b, 30b)
90 _ASM_EXTABLE_CPY(3b, 30b)
91 _ASM_EXTABLE_CPY(4b, 30b)
92 _ASM_EXTABLE_CPY(5b, 30b)
93 _ASM_EXTABLE_CPY(6b, 30b)
94 _ASM_EXTABLE_CPY(7b, 30b)
95 _ASM_EXTABLE_CPY(8b, 30b)
96 _ASM_EXTABLE_CPY(9b, 30b)
97 _ASM_EXTABLE_CPY(10b, 30b)
98 _ASM_EXTABLE_CPY(11b, 30b)
99 _ASM_EXTABLE_CPY(12b, 30b)
100 _ASM_EXTABLE_CPY(13b, 30b)
101 _ASM_EXTABLE_CPY(14b, 30b)
102 _ASM_EXTABLE_CPY(15b, 30b)
103 _ASM_EXTABLE_CPY(16b, 30b)
104SYM_FUNC_END(copy_user_generic_unrolled)
105EXPORT_SYMBOL(copy_user_generic_unrolled)
106
107/* Some CPUs run faster using the string copy instructions.
108 * This is also a lot simpler. Use them when possible.
109 *
110 * Only 4GB of copy is supported. This shouldn't be a problem
111 * because the kernel normally only writes from/to page sized chunks
112 * even if user space passed a longer buffer.
113 * And more would be dangerous because both Intel and AMD have
114 * errata with rep movsq > 4GB. If someone feels the need to fix
115 * this please consider this.
116 *
117 * Input:
118 * rdi destination
119 * rsi source
120 * rdx count
121 *
122 * Output:
123 * eax uncopied bytes or 0 if successful.
124 */
125SYM_FUNC_START(copy_user_generic_string)
126 ASM_STAC
127 cmpl $8,%edx
128 jb 2f /* less than 8 bytes, go to byte copy loop */
129 ALIGN_DESTINATION
130 movl %edx,%ecx
131 shrl $3,%ecx
132 andl $7,%edx
1331: rep movsq
1342: movl %edx,%ecx
1353: rep movsb
136 xorl %eax,%eax
137 ASM_CLAC
138 RET
139
14011: leal (%rdx,%rcx,8),%ecx
14112: movl %ecx,%edx /* ecx is zerorest also */
142 jmp .Lcopy_user_handle_tail
143
144 _ASM_EXTABLE_CPY(1b, 11b)
145 _ASM_EXTABLE_CPY(3b, 12b)
146SYM_FUNC_END(copy_user_generic_string)
147EXPORT_SYMBOL(copy_user_generic_string)
148
149/*
150 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
151 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
152 *
153 * Input:
154 * rdi destination
155 * rsi source
156 * rdx count
157 *
158 * Output:
159 * eax uncopied bytes or 0 if successful.
160 */
161SYM_FUNC_START(copy_user_enhanced_fast_string)
162 ASM_STAC
163 /* CPUs without FSRM should avoid rep movsb for short copies */
164 ALTERNATIVE "cmpl $64, %edx; jb copy_user_short_string", "", X86_FEATURE_FSRM
165 movl %edx,%ecx
1661: rep movsb
167 xorl %eax,%eax
168 ASM_CLAC
169 RET
170
17112: movl %ecx,%edx /* ecx is zerorest also */
172 jmp .Lcopy_user_handle_tail
173
174 _ASM_EXTABLE_CPY(1b, 12b)
175SYM_FUNC_END(copy_user_enhanced_fast_string)
176EXPORT_SYMBOL(copy_user_enhanced_fast_string)
177
178/*
179 * Try to copy last bytes and clear the rest if needed.
180 * Since protection fault in copy_from/to_user is not a normal situation,
181 * it is not necessary to optimize tail handling.
182 * Don't try to copy the tail if machine check happened
183 *
184 * Input:
185 * eax trap number written by ex_handler_copy()
186 * rdi destination
187 * rsi source
188 * rdx count
189 *
190 * Output:
191 * eax uncopied bytes or 0 if successful.
192 */
193SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
194 cmp $X86_TRAP_MC,%eax
195 je 3f
196
197 movl %edx,%ecx
1981: rep movsb
1992: mov %ecx,%eax
200 ASM_CLAC
201 RET
202
2033:
204 movl %edx,%eax
205 ASM_CLAC
206 RET
207
208 _ASM_EXTABLE_CPY(1b, 2b)
209
210.Lcopy_user_handle_align:
211 addl %ecx,%edx /* ecx is zerorest also */
212 jmp .Lcopy_user_handle_tail
213
214SYM_CODE_END(.Lcopy_user_handle_tail)
215
216/*
217 * Finish memcpy of less than 64 bytes. #AC should already be set.
218 *
219 * Input:
220 * rdi destination
221 * rsi source
222 * rdx count (< 64)
223 *
224 * Output:
225 * eax uncopied bytes or 0 if successful.
226 */
227SYM_CODE_START_LOCAL(copy_user_short_string)
228 movl %edx,%ecx
229 andl $7,%edx
230 shrl $3,%ecx
231 jz .Lcopy_user_short_string_bytes
23218: movq (%rsi),%r8
23319: movq %r8,(%rdi)
234 leaq 8(%rsi),%rsi
235 leaq 8(%rdi),%rdi
236 decl %ecx
237 jnz 18b
238.Lcopy_user_short_string_bytes:
239 andl %edx,%edx
240 jz 23f
241 movl %edx,%ecx
24221: movb (%rsi),%al
24322: movb %al,(%rdi)
244 incq %rsi
245 incq %rdi
246 decl %ecx
247 jnz 21b
24823: xor %eax,%eax
249 ASM_CLAC
250 RET
251
25240: leal (%rdx,%rcx,8),%edx
253 jmp 60f
25450: movl %ecx,%edx /* ecx is zerorest also */
25560: jmp .Lcopy_user_handle_tail
256
257 _ASM_EXTABLE_CPY(18b, 40b)
258 _ASM_EXTABLE_CPY(19b, 40b)
259 _ASM_EXTABLE_CPY(21b, 50b)
260 _ASM_EXTABLE_CPY(22b, 50b)
261SYM_CODE_END(copy_user_short_string)
262
263/*
264 * copy_user_nocache - Uncached memory copy with exception handling
265 * This will force destination out of cache for more performance.
266 *
267 * Note: Cached memory copy is used when destination or size is not
268 * naturally aligned. That is:
269 * - Require 8-byte alignment when size is 8 bytes or larger.
270 * - Require 4-byte alignment when size is 4 bytes.
271 */
272SYM_FUNC_START(__copy_user_nocache)
273 ASM_STAC
274
275 /* If size is less than 8 bytes, go to 4-byte copy */
276 cmpl $8,%edx
277 jb .L_4b_nocache_copy_entry
278
279 /* If destination is not 8-byte aligned, "cache" copy to align it */
280 ALIGN_DESTINATION
281
282 /* Set 4x8-byte copy count and remainder */
283 movl %edx,%ecx
284 andl $63,%edx
285 shrl $6,%ecx
286 jz .L_8b_nocache_copy_entry /* jump if count is 0 */
287
288 /* Perform 4x8-byte nocache loop-copy */
289.L_4x8b_nocache_copy_loop:
2901: movq (%rsi),%r8
2912: movq 1*8(%rsi),%r9
2923: movq 2*8(%rsi),%r10
2934: movq 3*8(%rsi),%r11
2945: movnti %r8,(%rdi)
2956: movnti %r9,1*8(%rdi)
2967: movnti %r10,2*8(%rdi)
2978: movnti %r11,3*8(%rdi)
2989: movq 4*8(%rsi),%r8
29910: movq 5*8(%rsi),%r9
30011: movq 6*8(%rsi),%r10
30112: movq 7*8(%rsi),%r11
30213: movnti %r8,4*8(%rdi)
30314: movnti %r9,5*8(%rdi)
30415: movnti %r10,6*8(%rdi)
30516: movnti %r11,7*8(%rdi)
306 leaq 64(%rsi),%rsi
307 leaq 64(%rdi),%rdi
308 decl %ecx
309 jnz .L_4x8b_nocache_copy_loop
310
311 /* Set 8-byte copy count and remainder */
312.L_8b_nocache_copy_entry:
313 movl %edx,%ecx
314 andl $7,%edx
315 shrl $3,%ecx
316 jz .L_4b_nocache_copy_entry /* jump if count is 0 */
317
318 /* Perform 8-byte nocache loop-copy */
319.L_8b_nocache_copy_loop:
32020: movq (%rsi),%r8
32121: movnti %r8,(%rdi)
322 leaq 8(%rsi),%rsi
323 leaq 8(%rdi),%rdi
324 decl %ecx
325 jnz .L_8b_nocache_copy_loop
326
327 /* If no byte left, we're done */
328.L_4b_nocache_copy_entry:
329 andl %edx,%edx
330 jz .L_finish_copy
331
332 /* If destination is not 4-byte aligned, go to byte copy: */
333 movl %edi,%ecx
334 andl $3,%ecx
335 jnz .L_1b_cache_copy_entry
336
337 /* Set 4-byte copy count (1 or 0) and remainder */
338 movl %edx,%ecx
339 andl $3,%edx
340 shrl $2,%ecx
341 jz .L_1b_cache_copy_entry /* jump if count is 0 */
342
343 /* Perform 4-byte nocache copy: */
34430: movl (%rsi),%r8d
34531: movnti %r8d,(%rdi)
346 leaq 4(%rsi),%rsi
347 leaq 4(%rdi),%rdi
348
349 /* If no bytes left, we're done: */
350 andl %edx,%edx
351 jz .L_finish_copy
352
353 /* Perform byte "cache" loop-copy for the remainder */
354.L_1b_cache_copy_entry:
355 movl %edx,%ecx
356.L_1b_cache_copy_loop:
35740: movb (%rsi),%al
35841: movb %al,(%rdi)
359 incq %rsi
360 incq %rdi
361 decl %ecx
362 jnz .L_1b_cache_copy_loop
363
364 /* Finished copying; fence the prior stores */
365.L_finish_copy:
366 xorl %eax,%eax
367 ASM_CLAC
368 sfence
369 RET
370
371.L_fixup_4x8b_copy:
372 shll $6,%ecx
373 addl %ecx,%edx
374 jmp .L_fixup_handle_tail
375.L_fixup_8b_copy:
376 lea (%rdx,%rcx,8),%rdx
377 jmp .L_fixup_handle_tail
378.L_fixup_4b_copy:
379 lea (%rdx,%rcx,4),%rdx
380 jmp .L_fixup_handle_tail
381.L_fixup_1b_copy:
382 movl %ecx,%edx
383.L_fixup_handle_tail:
384 sfence
385 jmp .Lcopy_user_handle_tail
386
387 _ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
388 _ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
389 _ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy)
390 _ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy)
391 _ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy)
392 _ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy)
393 _ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy)
394 _ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy)
395 _ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy)
396 _ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy)
397 _ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy)
398 _ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy)
399 _ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy)
400 _ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy)
401 _ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy)
402 _ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy)
403 _ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy)
404 _ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy)
405 _ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy)
406 _ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy)
407 _ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy)
408 _ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy)
409SYM_FUNC_END(__copy_user_nocache)
410EXPORT_SYMBOL(__copy_user_nocache)