Loading...
1/*
2 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
3 * Copyright 2002 Andi Kleen, SuSE Labs.
4 * Subject to the GNU Public License v2.
5 *
6 * Functions to copy from and to user space.
7 */
8
9#include <linux/linkage.h>
10#include <asm/current.h>
11#include <asm/asm-offsets.h>
12#include <asm/thread_info.h>
13#include <asm/cpufeatures.h>
14#include <asm/alternative-asm.h>
15#include <asm/asm.h>
16#include <asm/smap.h>
17#include <asm/export.h>
18
19/*
20 * copy_user_generic_unrolled - memory copy with exception handling.
21 * This version is for CPUs like P4 that don't have efficient micro
22 * code for rep movsq
23 *
24 * Input:
25 * rdi destination
26 * rsi source
27 * rdx count
28 *
29 * Output:
30 * eax uncopied bytes or 0 if successful.
31 */
32ENTRY(copy_user_generic_unrolled)
33 ASM_STAC
34 cmpl $8,%edx
35 jb 20f /* less then 8 bytes, go to byte copy loop */
36 ALIGN_DESTINATION
37 movl %edx,%ecx
38 andl $63,%edx
39 shrl $6,%ecx
40 jz 17f
411: movq (%rsi),%r8
422: movq 1*8(%rsi),%r9
433: movq 2*8(%rsi),%r10
444: movq 3*8(%rsi),%r11
455: movq %r8,(%rdi)
466: movq %r9,1*8(%rdi)
477: movq %r10,2*8(%rdi)
488: movq %r11,3*8(%rdi)
499: movq 4*8(%rsi),%r8
5010: movq 5*8(%rsi),%r9
5111: movq 6*8(%rsi),%r10
5212: movq 7*8(%rsi),%r11
5313: movq %r8,4*8(%rdi)
5414: movq %r9,5*8(%rdi)
5515: movq %r10,6*8(%rdi)
5616: movq %r11,7*8(%rdi)
57 leaq 64(%rsi),%rsi
58 leaq 64(%rdi),%rdi
59 decl %ecx
60 jnz 1b
6117: movl %edx,%ecx
62 andl $7,%edx
63 shrl $3,%ecx
64 jz 20f
6518: movq (%rsi),%r8
6619: movq %r8,(%rdi)
67 leaq 8(%rsi),%rsi
68 leaq 8(%rdi),%rdi
69 decl %ecx
70 jnz 18b
7120: andl %edx,%edx
72 jz 23f
73 movl %edx,%ecx
7421: movb (%rsi),%al
7522: movb %al,(%rdi)
76 incq %rsi
77 incq %rdi
78 decl %ecx
79 jnz 21b
8023: xor %eax,%eax
81 ASM_CLAC
82 ret
83
84 .section .fixup,"ax"
8530: shll $6,%ecx
86 addl %ecx,%edx
87 jmp 60f
8840: leal (%rdx,%rcx,8),%edx
89 jmp 60f
9050: movl %ecx,%edx
9160: jmp copy_user_handle_tail /* ecx is zerorest also */
92 .previous
93
94 _ASM_EXTABLE(1b,30b)
95 _ASM_EXTABLE(2b,30b)
96 _ASM_EXTABLE(3b,30b)
97 _ASM_EXTABLE(4b,30b)
98 _ASM_EXTABLE(5b,30b)
99 _ASM_EXTABLE(6b,30b)
100 _ASM_EXTABLE(7b,30b)
101 _ASM_EXTABLE(8b,30b)
102 _ASM_EXTABLE(9b,30b)
103 _ASM_EXTABLE(10b,30b)
104 _ASM_EXTABLE(11b,30b)
105 _ASM_EXTABLE(12b,30b)
106 _ASM_EXTABLE(13b,30b)
107 _ASM_EXTABLE(14b,30b)
108 _ASM_EXTABLE(15b,30b)
109 _ASM_EXTABLE(16b,30b)
110 _ASM_EXTABLE(18b,40b)
111 _ASM_EXTABLE(19b,40b)
112 _ASM_EXTABLE(21b,50b)
113 _ASM_EXTABLE(22b,50b)
114ENDPROC(copy_user_generic_unrolled)
115EXPORT_SYMBOL(copy_user_generic_unrolled)
116
117/* Some CPUs run faster using the string copy instructions.
118 * This is also a lot simpler. Use them when possible.
119 *
120 * Only 4GB of copy is supported. This shouldn't be a problem
121 * because the kernel normally only writes from/to page sized chunks
122 * even if user space passed a longer buffer.
123 * And more would be dangerous because both Intel and AMD have
124 * errata with rep movsq > 4GB. If someone feels the need to fix
125 * this please consider this.
126 *
127 * Input:
128 * rdi destination
129 * rsi source
130 * rdx count
131 *
132 * Output:
133 * eax uncopied bytes or 0 if successful.
134 */
135ENTRY(copy_user_generic_string)
136 ASM_STAC
137 cmpl $8,%edx
138 jb 2f /* less than 8 bytes, go to byte copy loop */
139 ALIGN_DESTINATION
140 movl %edx,%ecx
141 shrl $3,%ecx
142 andl $7,%edx
1431: rep
144 movsq
1452: movl %edx,%ecx
1463: rep
147 movsb
148 xorl %eax,%eax
149 ASM_CLAC
150 ret
151
152 .section .fixup,"ax"
15311: leal (%rdx,%rcx,8),%ecx
15412: movl %ecx,%edx /* ecx is zerorest also */
155 jmp copy_user_handle_tail
156 .previous
157
158 _ASM_EXTABLE(1b,11b)
159 _ASM_EXTABLE(3b,12b)
160ENDPROC(copy_user_generic_string)
161EXPORT_SYMBOL(copy_user_generic_string)
162
163/*
164 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
165 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
166 *
167 * Input:
168 * rdi destination
169 * rsi source
170 * rdx count
171 *
172 * Output:
173 * eax uncopied bytes or 0 if successful.
174 */
175ENTRY(copy_user_enhanced_fast_string)
176 ASM_STAC
177 movl %edx,%ecx
1781: rep
179 movsb
180 xorl %eax,%eax
181 ASM_CLAC
182 ret
183
184 .section .fixup,"ax"
18512: movl %ecx,%edx /* ecx is zerorest also */
186 jmp copy_user_handle_tail
187 .previous
188
189 _ASM_EXTABLE(1b,12b)
190ENDPROC(copy_user_enhanced_fast_string)
191EXPORT_SYMBOL(copy_user_enhanced_fast_string)
192
193/*
194 * copy_user_nocache - Uncached memory copy with exception handling
195 * This will force destination out of cache for more performance.
196 *
197 * Note: Cached memory copy is used when destination or size is not
198 * naturally aligned. That is:
199 * - Require 8-byte alignment when size is 8 bytes or larger.
200 * - Require 4-byte alignment when size is 4 bytes.
201 */
202ENTRY(__copy_user_nocache)
203 ASM_STAC
204
205 /* If size is less than 8 bytes, go to 4-byte copy */
206 cmpl $8,%edx
207 jb .L_4b_nocache_copy_entry
208
209 /* If destination is not 8-byte aligned, "cache" copy to align it */
210 ALIGN_DESTINATION
211
212 /* Set 4x8-byte copy count and remainder */
213 movl %edx,%ecx
214 andl $63,%edx
215 shrl $6,%ecx
216 jz .L_8b_nocache_copy_entry /* jump if count is 0 */
217
218 /* Perform 4x8-byte nocache loop-copy */
219.L_4x8b_nocache_copy_loop:
2201: movq (%rsi),%r8
2212: movq 1*8(%rsi),%r9
2223: movq 2*8(%rsi),%r10
2234: movq 3*8(%rsi),%r11
2245: movnti %r8,(%rdi)
2256: movnti %r9,1*8(%rdi)
2267: movnti %r10,2*8(%rdi)
2278: movnti %r11,3*8(%rdi)
2289: movq 4*8(%rsi),%r8
22910: movq 5*8(%rsi),%r9
23011: movq 6*8(%rsi),%r10
23112: movq 7*8(%rsi),%r11
23213: movnti %r8,4*8(%rdi)
23314: movnti %r9,5*8(%rdi)
23415: movnti %r10,6*8(%rdi)
23516: movnti %r11,7*8(%rdi)
236 leaq 64(%rsi),%rsi
237 leaq 64(%rdi),%rdi
238 decl %ecx
239 jnz .L_4x8b_nocache_copy_loop
240
241 /* Set 8-byte copy count and remainder */
242.L_8b_nocache_copy_entry:
243 movl %edx,%ecx
244 andl $7,%edx
245 shrl $3,%ecx
246 jz .L_4b_nocache_copy_entry /* jump if count is 0 */
247
248 /* Perform 8-byte nocache loop-copy */
249.L_8b_nocache_copy_loop:
25020: movq (%rsi),%r8
25121: movnti %r8,(%rdi)
252 leaq 8(%rsi),%rsi
253 leaq 8(%rdi),%rdi
254 decl %ecx
255 jnz .L_8b_nocache_copy_loop
256
257 /* If no byte left, we're done */
258.L_4b_nocache_copy_entry:
259 andl %edx,%edx
260 jz .L_finish_copy
261
262 /* If destination is not 4-byte aligned, go to byte copy: */
263 movl %edi,%ecx
264 andl $3,%ecx
265 jnz .L_1b_cache_copy_entry
266
267 /* Set 4-byte copy count (1 or 0) and remainder */
268 movl %edx,%ecx
269 andl $3,%edx
270 shrl $2,%ecx
271 jz .L_1b_cache_copy_entry /* jump if count is 0 */
272
273 /* Perform 4-byte nocache copy: */
27430: movl (%rsi),%r8d
27531: movnti %r8d,(%rdi)
276 leaq 4(%rsi),%rsi
277 leaq 4(%rdi),%rdi
278
279 /* If no bytes left, we're done: */
280 andl %edx,%edx
281 jz .L_finish_copy
282
283 /* Perform byte "cache" loop-copy for the remainder */
284.L_1b_cache_copy_entry:
285 movl %edx,%ecx
286.L_1b_cache_copy_loop:
28740: movb (%rsi),%al
28841: movb %al,(%rdi)
289 incq %rsi
290 incq %rdi
291 decl %ecx
292 jnz .L_1b_cache_copy_loop
293
294 /* Finished copying; fence the prior stores */
295.L_finish_copy:
296 xorl %eax,%eax
297 ASM_CLAC
298 sfence
299 ret
300
301 .section .fixup,"ax"
302.L_fixup_4x8b_copy:
303 shll $6,%ecx
304 addl %ecx,%edx
305 jmp .L_fixup_handle_tail
306.L_fixup_8b_copy:
307 lea (%rdx,%rcx,8),%rdx
308 jmp .L_fixup_handle_tail
309.L_fixup_4b_copy:
310 lea (%rdx,%rcx,4),%rdx
311 jmp .L_fixup_handle_tail
312.L_fixup_1b_copy:
313 movl %ecx,%edx
314.L_fixup_handle_tail:
315 sfence
316 jmp copy_user_handle_tail
317 .previous
318
319 _ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
320 _ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
321 _ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
322 _ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
323 _ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
324 _ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
325 _ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
326 _ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
327 _ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
328 _ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
329 _ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
330 _ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
331 _ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
332 _ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
333 _ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
334 _ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
335 _ASM_EXTABLE(20b,.L_fixup_8b_copy)
336 _ASM_EXTABLE(21b,.L_fixup_8b_copy)
337 _ASM_EXTABLE(30b,.L_fixup_4b_copy)
338 _ASM_EXTABLE(31b,.L_fixup_4b_copy)
339 _ASM_EXTABLE(40b,.L_fixup_1b_copy)
340 _ASM_EXTABLE(41b,.L_fixup_1b_copy)
341ENDPROC(__copy_user_nocache)
342EXPORT_SYMBOL(__copy_user_nocache)
1/*
2 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
3 * Copyright 2002 Andi Kleen, SuSE Labs.
4 * Subject to the GNU Public License v2.
5 *
6 * Functions to copy from and to user space.
7 */
8
9#include <linux/linkage.h>
10#include <asm/dwarf2.h>
11
12#define FIX_ALIGNMENT 1
13
14#include <asm/current.h>
15#include <asm/asm-offsets.h>
16#include <asm/thread_info.h>
17#include <asm/cpufeature.h>
18#include <asm/alternative-asm.h>
19#include <asm/asm.h>
20
21/*
22 * By placing feature2 after feature1 in altinstructions section, we logically
23 * implement:
24 * If CPU has feature2, jmp to alt2 is used
25 * else if CPU has feature1, jmp to alt1 is used
26 * else jmp to orig is used.
27 */
28 .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
290:
30 .byte 0xe9 /* 32bit jump */
31 .long \orig-1f /* by default jump to orig */
321:
33 .section .altinstr_replacement,"ax"
342: .byte 0xe9 /* near jump with 32bit immediate */
35 .long \alt1-1b /* offset */ /* or alternatively to alt1 */
363: .byte 0xe9 /* near jump with 32bit immediate */
37 .long \alt2-1b /* offset */ /* or alternatively to alt2 */
38 .previous
39
40 .section .altinstructions,"a"
41 altinstruction_entry 0b,2b,\feature1,5,5
42 altinstruction_entry 0b,3b,\feature2,5,5
43 .previous
44 .endm
45
46 .macro ALIGN_DESTINATION
47#ifdef FIX_ALIGNMENT
48 /* check for bad alignment of destination */
49 movl %edi,%ecx
50 andl $7,%ecx
51 jz 102f /* already aligned */
52 subl $8,%ecx
53 negl %ecx
54 subl %ecx,%edx
55100: movb (%rsi),%al
56101: movb %al,(%rdi)
57 incq %rsi
58 incq %rdi
59 decl %ecx
60 jnz 100b
61102:
62 .section .fixup,"ax"
63103: addl %ecx,%edx /* ecx is zerorest also */
64 jmp copy_user_handle_tail
65 .previous
66
67 _ASM_EXTABLE(100b,103b)
68 _ASM_EXTABLE(101b,103b)
69#endif
70 .endm
71
72/* Standard copy_to_user with segment limit checking */
73ENTRY(_copy_to_user)
74 CFI_STARTPROC
75 GET_THREAD_INFO(%rax)
76 movq %rdi,%rcx
77 addq %rdx,%rcx
78 jc bad_to_user
79 cmpq TI_addr_limit(%rax),%rcx
80 ja bad_to_user
81 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
82 copy_user_generic_unrolled,copy_user_generic_string, \
83 copy_user_enhanced_fast_string
84 CFI_ENDPROC
85ENDPROC(_copy_to_user)
86
87/* Standard copy_from_user with segment limit checking */
88ENTRY(_copy_from_user)
89 CFI_STARTPROC
90 GET_THREAD_INFO(%rax)
91 movq %rsi,%rcx
92 addq %rdx,%rcx
93 jc bad_from_user
94 cmpq TI_addr_limit(%rax),%rcx
95 ja bad_from_user
96 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
97 copy_user_generic_unrolled,copy_user_generic_string, \
98 copy_user_enhanced_fast_string
99 CFI_ENDPROC
100ENDPROC(_copy_from_user)
101
102 .section .fixup,"ax"
103 /* must zero dest */
104ENTRY(bad_from_user)
105bad_from_user:
106 CFI_STARTPROC
107 movl %edx,%ecx
108 xorl %eax,%eax
109 rep
110 stosb
111bad_to_user:
112 movl %edx,%eax
113 ret
114 CFI_ENDPROC
115ENDPROC(bad_from_user)
116 .previous
117
118/*
119 * copy_user_generic_unrolled - memory copy with exception handling.
120 * This version is for CPUs like P4 that don't have efficient micro
121 * code for rep movsq
122 *
123 * Input:
124 * rdi destination
125 * rsi source
126 * rdx count
127 *
128 * Output:
129 * eax uncopied bytes or 0 if successful.
130 */
131ENTRY(copy_user_generic_unrolled)
132 CFI_STARTPROC
133 cmpl $8,%edx
134 jb 20f /* less then 8 bytes, go to byte copy loop */
135 ALIGN_DESTINATION
136 movl %edx,%ecx
137 andl $63,%edx
138 shrl $6,%ecx
139 jz 17f
1401: movq (%rsi),%r8
1412: movq 1*8(%rsi),%r9
1423: movq 2*8(%rsi),%r10
1434: movq 3*8(%rsi),%r11
1445: movq %r8,(%rdi)
1456: movq %r9,1*8(%rdi)
1467: movq %r10,2*8(%rdi)
1478: movq %r11,3*8(%rdi)
1489: movq 4*8(%rsi),%r8
14910: movq 5*8(%rsi),%r9
15011: movq 6*8(%rsi),%r10
15112: movq 7*8(%rsi),%r11
15213: movq %r8,4*8(%rdi)
15314: movq %r9,5*8(%rdi)
15415: movq %r10,6*8(%rdi)
15516: movq %r11,7*8(%rdi)
156 leaq 64(%rsi),%rsi
157 leaq 64(%rdi),%rdi
158 decl %ecx
159 jnz 1b
16017: movl %edx,%ecx
161 andl $7,%edx
162 shrl $3,%ecx
163 jz 20f
16418: movq (%rsi),%r8
16519: movq %r8,(%rdi)
166 leaq 8(%rsi),%rsi
167 leaq 8(%rdi),%rdi
168 decl %ecx
169 jnz 18b
17020: andl %edx,%edx
171 jz 23f
172 movl %edx,%ecx
17321: movb (%rsi),%al
17422: movb %al,(%rdi)
175 incq %rsi
176 incq %rdi
177 decl %ecx
178 jnz 21b
17923: xor %eax,%eax
180 ret
181
182 .section .fixup,"ax"
18330: shll $6,%ecx
184 addl %ecx,%edx
185 jmp 60f
18640: lea (%rdx,%rcx,8),%rdx
187 jmp 60f
18850: movl %ecx,%edx
18960: jmp copy_user_handle_tail /* ecx is zerorest also */
190 .previous
191
192 _ASM_EXTABLE(1b,30b)
193 _ASM_EXTABLE(2b,30b)
194 _ASM_EXTABLE(3b,30b)
195 _ASM_EXTABLE(4b,30b)
196 _ASM_EXTABLE(5b,30b)
197 _ASM_EXTABLE(6b,30b)
198 _ASM_EXTABLE(7b,30b)
199 _ASM_EXTABLE(8b,30b)
200 _ASM_EXTABLE(9b,30b)
201 _ASM_EXTABLE(10b,30b)
202 _ASM_EXTABLE(11b,30b)
203 _ASM_EXTABLE(12b,30b)
204 _ASM_EXTABLE(13b,30b)
205 _ASM_EXTABLE(14b,30b)
206 _ASM_EXTABLE(15b,30b)
207 _ASM_EXTABLE(16b,30b)
208 _ASM_EXTABLE(18b,40b)
209 _ASM_EXTABLE(19b,40b)
210 _ASM_EXTABLE(21b,50b)
211 _ASM_EXTABLE(22b,50b)
212 CFI_ENDPROC
213ENDPROC(copy_user_generic_unrolled)
214
215/* Some CPUs run faster using the string copy instructions.
216 * This is also a lot simpler. Use them when possible.
217 *
218 * Only 4GB of copy is supported. This shouldn't be a problem
219 * because the kernel normally only writes from/to page sized chunks
220 * even if user space passed a longer buffer.
221 * And more would be dangerous because both Intel and AMD have
222 * errata with rep movsq > 4GB. If someone feels the need to fix
223 * this please consider this.
224 *
225 * Input:
226 * rdi destination
227 * rsi source
228 * rdx count
229 *
230 * Output:
231 * eax uncopied bytes or 0 if successful.
232 */
233ENTRY(copy_user_generic_string)
234 CFI_STARTPROC
235 andl %edx,%edx
236 jz 4f
237 cmpl $8,%edx
238 jb 2f /* less than 8 bytes, go to byte copy loop */
239 ALIGN_DESTINATION
240 movl %edx,%ecx
241 shrl $3,%ecx
242 andl $7,%edx
2431: rep
244 movsq
2452: movl %edx,%ecx
2463: rep
247 movsb
2484: xorl %eax,%eax
249 ret
250
251 .section .fixup,"ax"
25211: lea (%rdx,%rcx,8),%rcx
25312: movl %ecx,%edx /* ecx is zerorest also */
254 jmp copy_user_handle_tail
255 .previous
256
257 _ASM_EXTABLE(1b,11b)
258 _ASM_EXTABLE(3b,12b)
259 CFI_ENDPROC
260ENDPROC(copy_user_generic_string)
261
262/*
263 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
264 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
265 *
266 * Input:
267 * rdi destination
268 * rsi source
269 * rdx count
270 *
271 * Output:
272 * eax uncopied bytes or 0 if successful.
273 */
274ENTRY(copy_user_enhanced_fast_string)
275 CFI_STARTPROC
276 andl %edx,%edx
277 jz 2f
278 movl %edx,%ecx
2791: rep
280 movsb
2812: xorl %eax,%eax
282 ret
283
284 .section .fixup,"ax"
28512: movl %ecx,%edx /* ecx is zerorest also */
286 jmp copy_user_handle_tail
287 .previous
288
289 _ASM_EXTABLE(1b,12b)
290 CFI_ENDPROC
291ENDPROC(copy_user_enhanced_fast_string)