copy_user_64.S - arch/x86/lib/copy_user_64.S - Linux diff v4.10.11

  1/*
  2 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
  3 * Copyright 2002 Andi Kleen, SuSE Labs.
  4 * Subject to the GNU Public License v2.
  5 *
  6 * Functions to copy from and to user space.
  7 */
  8
  9#include <linux/linkage.h>
 
 
 
 
 10#include <asm/current.h>
 11#include <asm/asm-offsets.h>
 12#include <asm/thread_info.h>
 13#include <asm/cpufeatures.h>
 14#include <asm/alternative-asm.h>
 15#include <asm/asm.h>
 16#include <asm/smap.h>
 17#include <asm/export.h>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 18
 19/*
 20 * copy_user_generic_unrolled - memory copy with exception handling.
 21 * This version is for CPUs like P4 that don't have efficient micro
 22 * code for rep movsq
 23 *
 24 * Input:
 25 * rdi destination
 26 * rsi source
 27 * rdx count
 28 *
 29 * Output:
 30 * eax uncopied bytes or 0 if successful.
 31 */
 32ENTRY(copy_user_generic_unrolled)
 33	ASM_STAC
 34	cmpl $8,%edx
 35	jb 20f		/* less then 8 bytes, go to byte copy loop */
 36	ALIGN_DESTINATION
 37	movl %edx,%ecx
 38	andl $63,%edx
 39	shrl $6,%ecx
 40	jz 17f
 411:	movq (%rsi),%r8
 422:	movq 1*8(%rsi),%r9
 433:	movq 2*8(%rsi),%r10
 444:	movq 3*8(%rsi),%r11
 455:	movq %r8,(%rdi)
 466:	movq %r9,1*8(%rdi)
 477:	movq %r10,2*8(%rdi)
 488:	movq %r11,3*8(%rdi)
 499:	movq 4*8(%rsi),%r8
 5010:	movq 5*8(%rsi),%r9
 5111:	movq 6*8(%rsi),%r10
 5212:	movq 7*8(%rsi),%r11
 5313:	movq %r8,4*8(%rdi)
 5414:	movq %r9,5*8(%rdi)
 5515:	movq %r10,6*8(%rdi)
 5616:	movq %r11,7*8(%rdi)
 57	leaq 64(%rsi),%rsi
 58	leaq 64(%rdi),%rdi
 59	decl %ecx
 60	jnz 1b
 6117:	movl %edx,%ecx
 62	andl $7,%edx
 63	shrl $3,%ecx
 64	jz 20f
 6518:	movq (%rsi),%r8
 6619:	movq %r8,(%rdi)
 67	leaq 8(%rsi),%rsi
 68	leaq 8(%rdi),%rdi
 69	decl %ecx
 70	jnz 18b
 7120:	andl %edx,%edx
 72	jz 23f
 73	movl %edx,%ecx
 7421:	movb (%rsi),%al
 7522:	movb %al,(%rdi)
 76	incq %rsi
 77	incq %rdi
 78	decl %ecx
 79	jnz 21b
 8023:	xor %eax,%eax
 81	ASM_CLAC
 82	ret
 83
 84	.section .fixup,"ax"
 8530:	shll $6,%ecx
 86	addl %ecx,%edx
 87	jmp 60f
 8840:	leal (%rdx,%rcx,8),%edx
 89	jmp 60f
 9050:	movl %ecx,%edx
 9160:	jmp copy_user_handle_tail /* ecx is zerorest also */
 92	.previous
 93
 94	_ASM_EXTABLE(1b,30b)
 95	_ASM_EXTABLE(2b,30b)
 96	_ASM_EXTABLE(3b,30b)
 97	_ASM_EXTABLE(4b,30b)
 98	_ASM_EXTABLE(5b,30b)
 99	_ASM_EXTABLE(6b,30b)
100	_ASM_EXTABLE(7b,30b)
101	_ASM_EXTABLE(8b,30b)
102	_ASM_EXTABLE(9b,30b)
103	_ASM_EXTABLE(10b,30b)
104	_ASM_EXTABLE(11b,30b)
105	_ASM_EXTABLE(12b,30b)
106	_ASM_EXTABLE(13b,30b)
107	_ASM_EXTABLE(14b,30b)
108	_ASM_EXTABLE(15b,30b)
109	_ASM_EXTABLE(16b,30b)
110	_ASM_EXTABLE(18b,40b)
111	_ASM_EXTABLE(19b,40b)
112	_ASM_EXTABLE(21b,50b)
113	_ASM_EXTABLE(22b,50b)
 
114ENDPROC(copy_user_generic_unrolled)
115EXPORT_SYMBOL(copy_user_generic_unrolled)
116
117/* Some CPUs run faster using the string copy instructions.
118 * This is also a lot simpler. Use them when possible.
119 *
120 * Only 4GB of copy is supported. This shouldn't be a problem
121 * because the kernel normally only writes from/to page sized chunks
122 * even if user space passed a longer buffer.
123 * And more would be dangerous because both Intel and AMD have
124 * errata with rep movsq > 4GB. If someone feels the need to fix
125 * this please consider this.
126 *
127 * Input:
128 * rdi destination
129 * rsi source
130 * rdx count
131 *
132 * Output:
133 * eax uncopied bytes or 0 if successful.
134 */
135ENTRY(copy_user_generic_string)
136	ASM_STAC
 
 
137	cmpl $8,%edx
138	jb 2f		/* less than 8 bytes, go to byte copy loop */
139	ALIGN_DESTINATION
140	movl %edx,%ecx
141	shrl $3,%ecx
142	andl $7,%edx
1431:	rep
144	movsq
1452:	movl %edx,%ecx
1463:	rep
147	movsb
148	xorl %eax,%eax
149	ASM_CLAC
150	ret
151
152	.section .fixup,"ax"
15311:	leal (%rdx,%rcx,8),%ecx
15412:	movl %ecx,%edx		/* ecx is zerorest also */
155	jmp copy_user_handle_tail
156	.previous
157
158	_ASM_EXTABLE(1b,11b)
159	_ASM_EXTABLE(3b,12b)
 
160ENDPROC(copy_user_generic_string)
161EXPORT_SYMBOL(copy_user_generic_string)
162
163/*
164 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
165 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
166 *
167 * Input:
168 * rdi destination
169 * rsi source
170 * rdx count
171 *
172 * Output:
173 * eax uncopied bytes or 0 if successful.
174 */
175ENTRY(copy_user_enhanced_fast_string)
176	ASM_STAC
 
 
177	movl %edx,%ecx
1781:	rep
179	movsb
180	xorl %eax,%eax
181	ASM_CLAC
182	ret
183
184	.section .fixup,"ax"
18512:	movl %ecx,%edx		/* ecx is zerorest also */
186	jmp copy_user_handle_tail
187	.previous
188
189	_ASM_EXTABLE(1b,12b)
 
190ENDPROC(copy_user_enhanced_fast_string)
191EXPORT_SYMBOL(copy_user_enhanced_fast_string)
192
193/*
194 * copy_user_nocache - Uncached memory copy with exception handling
195 * This will force destination out of cache for more performance.
196 *
197 * Note: Cached memory copy is used when destination or size is not
198 * naturally aligned. That is:
199 *  - Require 8-byte alignment when size is 8 bytes or larger.
200 *  - Require 4-byte alignment when size is 4 bytes.
201 */
202ENTRY(__copy_user_nocache)
203	ASM_STAC
204
205	/* If size is less than 8 bytes, go to 4-byte copy */
206	cmpl $8,%edx
207	jb .L_4b_nocache_copy_entry
208
209	/* If destination is not 8-byte aligned, "cache" copy to align it */
210	ALIGN_DESTINATION
211
212	/* Set 4x8-byte copy count and remainder */
213	movl %edx,%ecx
214	andl $63,%edx
215	shrl $6,%ecx
216	jz .L_8b_nocache_copy_entry	/* jump if count is 0 */
217
218	/* Perform 4x8-byte nocache loop-copy */
219.L_4x8b_nocache_copy_loop:
2201:	movq (%rsi),%r8
2212:	movq 1*8(%rsi),%r9
2223:	movq 2*8(%rsi),%r10
2234:	movq 3*8(%rsi),%r11
2245:	movnti %r8,(%rdi)
2256:	movnti %r9,1*8(%rdi)
2267:	movnti %r10,2*8(%rdi)
2278:	movnti %r11,3*8(%rdi)
2289:	movq 4*8(%rsi),%r8
22910:	movq 5*8(%rsi),%r9
23011:	movq 6*8(%rsi),%r10
23112:	movq 7*8(%rsi),%r11
23213:	movnti %r8,4*8(%rdi)
23314:	movnti %r9,5*8(%rdi)
23415:	movnti %r10,6*8(%rdi)
23516:	movnti %r11,7*8(%rdi)
236	leaq 64(%rsi),%rsi
237	leaq 64(%rdi),%rdi
238	decl %ecx
239	jnz .L_4x8b_nocache_copy_loop
240
241	/* Set 8-byte copy count and remainder */
242.L_8b_nocache_copy_entry:
243	movl %edx,%ecx
244	andl $7,%edx
245	shrl $3,%ecx
246	jz .L_4b_nocache_copy_entry	/* jump if count is 0 */
247
248	/* Perform 8-byte nocache loop-copy */
249.L_8b_nocache_copy_loop:
25020:	movq (%rsi),%r8
25121:	movnti %r8,(%rdi)
252	leaq 8(%rsi),%rsi
253	leaq 8(%rdi),%rdi
254	decl %ecx
255	jnz .L_8b_nocache_copy_loop
256
257	/* If no byte left, we're done */
258.L_4b_nocache_copy_entry:
259	andl %edx,%edx
260	jz .L_finish_copy
261
262	/* If destination is not 4-byte aligned, go to byte copy: */
263	movl %edi,%ecx
264	andl $3,%ecx
265	jnz .L_1b_cache_copy_entry
266
267	/* Set 4-byte copy count (1 or 0) and remainder */
268	movl %edx,%ecx
269	andl $3,%edx
270	shrl $2,%ecx
271	jz .L_1b_cache_copy_entry	/* jump if count is 0 */
272
273	/* Perform 4-byte nocache copy: */
27430:	movl (%rsi),%r8d
27531:	movnti %r8d,(%rdi)
276	leaq 4(%rsi),%rsi
277	leaq 4(%rdi),%rdi
278
279	/* If no bytes left, we're done: */
280	andl %edx,%edx
281	jz .L_finish_copy
282
283	/* Perform byte "cache" loop-copy for the remainder */
284.L_1b_cache_copy_entry:
285	movl %edx,%ecx
286.L_1b_cache_copy_loop:
28740:	movb (%rsi),%al
28841:	movb %al,(%rdi)
289	incq %rsi
290	incq %rdi
291	decl %ecx
292	jnz .L_1b_cache_copy_loop
293
294	/* Finished copying; fence the prior stores */
295.L_finish_copy:
296	xorl %eax,%eax
297	ASM_CLAC
298	sfence
299	ret
300
301	.section .fixup,"ax"
302.L_fixup_4x8b_copy:
303	shll $6,%ecx
304	addl %ecx,%edx
305	jmp .L_fixup_handle_tail
306.L_fixup_8b_copy:
307	lea (%rdx,%rcx,8),%rdx
308	jmp .L_fixup_handle_tail
309.L_fixup_4b_copy:
310	lea (%rdx,%rcx,4),%rdx
311	jmp .L_fixup_handle_tail
312.L_fixup_1b_copy:
313	movl %ecx,%edx
314.L_fixup_handle_tail:
315	sfence
316	jmp copy_user_handle_tail
317	.previous
318
319	_ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
320	_ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
321	_ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
322	_ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
323	_ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
324	_ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
325	_ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
326	_ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
327	_ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
328	_ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
329	_ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
330	_ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
331	_ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
332	_ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
333	_ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
334	_ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
335	_ASM_EXTABLE(20b,.L_fixup_8b_copy)
336	_ASM_EXTABLE(21b,.L_fixup_8b_copy)
337	_ASM_EXTABLE(30b,.L_fixup_4b_copy)
338	_ASM_EXTABLE(31b,.L_fixup_4b_copy)
339	_ASM_EXTABLE(40b,.L_fixup_1b_copy)
340	_ASM_EXTABLE(41b,.L_fixup_1b_copy)
341ENDPROC(__copy_user_nocache)
342EXPORT_SYMBOL(__copy_user_nocache)

  1/*
  2 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
  3 * Copyright 2002 Andi Kleen, SuSE Labs.
  4 * Subject to the GNU Public License v2.
  5 *
  6 * Functions to copy from and to user space.
  7 */
  8
  9#include <linux/linkage.h>
 10#include <asm/dwarf2.h>
 11
 12#define FIX_ALIGNMENT 1
 13
 14#include <asm/current.h>
 15#include <asm/asm-offsets.h>
 16#include <asm/thread_info.h>
 17#include <asm/cpufeature.h>
 18#include <asm/alternative-asm.h>
 19#include <asm/asm.h>
 20
 21/*
 22 * By placing feature2 after feature1 in altinstructions section, we logically
 23 * implement:
 24 * If CPU has feature2, jmp to alt2 is used
 25 * else if CPU has feature1, jmp to alt1 is used
 26 * else jmp to orig is used.
 27 */
 28	.macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
 290:
 30	.byte 0xe9	/* 32bit jump */
 31	.long \orig-1f	/* by default jump to orig */
 321:
 33	.section .altinstr_replacement,"ax"
 342:	.byte 0xe9			/* near jump with 32bit immediate */
 35	.long \alt1-1b /* offset */   /* or alternatively to alt1 */
 363:	.byte 0xe9			/* near jump with 32bit immediate */
 37	.long \alt2-1b /* offset */   /* or alternatively to alt2 */
 38	.previous
 39
 40	.section .altinstructions,"a"
 41	altinstruction_entry 0b,2b,\feature1,5,5
 42	altinstruction_entry 0b,3b,\feature2,5,5
 43	.previous
 44	.endm
 45
 46	.macro ALIGN_DESTINATION
 47#ifdef FIX_ALIGNMENT
 48	/* check for bad alignment of destination */
 49	movl %edi,%ecx
 50	andl $7,%ecx
 51	jz 102f				/* already aligned */
 52	subl $8,%ecx
 53	negl %ecx
 54	subl %ecx,%edx
 55100:	movb (%rsi),%al
 56101:	movb %al,(%rdi)
 57	incq %rsi
 58	incq %rdi
 59	decl %ecx
 60	jnz 100b
 61102:
 62	.section .fixup,"ax"
 63103:	addl %ecx,%edx			/* ecx is zerorest also */
 64	jmp copy_user_handle_tail
 65	.previous
 66
 67	_ASM_EXTABLE(100b,103b)
 68	_ASM_EXTABLE(101b,103b)
 69#endif
 70	.endm
 71
 72/* Standard copy_to_user with segment limit checking */
 73ENTRY(_copy_to_user)
 74	CFI_STARTPROC
 75	GET_THREAD_INFO(%rax)
 76	movq %rdi,%rcx
 77	addq %rdx,%rcx
 78	jc bad_to_user
 79	cmpq TI_addr_limit(%rax),%rcx
 80	ja bad_to_user
 81	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
 82		copy_user_generic_unrolled,copy_user_generic_string,	\
 83		copy_user_enhanced_fast_string
 84	CFI_ENDPROC
 85ENDPROC(_copy_to_user)
 86
 87/* Standard copy_from_user with segment limit checking */
 88ENTRY(_copy_from_user)
 89	CFI_STARTPROC
 90	GET_THREAD_INFO(%rax)
 91	movq %rsi,%rcx
 92	addq %rdx,%rcx
 93	jc bad_from_user
 94	cmpq TI_addr_limit(%rax),%rcx
 95	ja bad_from_user
 96	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
 97		copy_user_generic_unrolled,copy_user_generic_string,	\
 98		copy_user_enhanced_fast_string
 99	CFI_ENDPROC
100ENDPROC(_copy_from_user)
101
102	.section .fixup,"ax"
103	/* must zero dest */
104ENTRY(bad_from_user)
105bad_from_user:
106	CFI_STARTPROC
107	movl %edx,%ecx
108	xorl %eax,%eax
109	rep
110	stosb
111bad_to_user:
112	movl %edx,%eax
113	ret
114	CFI_ENDPROC
115ENDPROC(bad_from_user)
116	.previous
117
118/*
119 * copy_user_generic_unrolled - memory copy with exception handling.
120 * This version is for CPUs like P4 that don't have efficient micro
121 * code for rep movsq
122 *
123 * Input:
124 * rdi destination
125 * rsi source
126 * rdx count
127 *
128 * Output:
129 * eax uncopied bytes or 0 if successful.
130 */
131ENTRY(copy_user_generic_unrolled)
132	CFI_STARTPROC
133	cmpl $8,%edx
134	jb 20f		/* less then 8 bytes, go to byte copy loop */
135	ALIGN_DESTINATION
136	movl %edx,%ecx
137	andl $63,%edx
138	shrl $6,%ecx
139	jz 17f
1401:	movq (%rsi),%r8
1412:	movq 1*8(%rsi),%r9
1423:	movq 2*8(%rsi),%r10
1434:	movq 3*8(%rsi),%r11
1445:	movq %r8,(%rdi)
1456:	movq %r9,1*8(%rdi)
1467:	movq %r10,2*8(%rdi)
1478:	movq %r11,3*8(%rdi)
1489:	movq 4*8(%rsi),%r8
14910:	movq 5*8(%rsi),%r9
15011:	movq 6*8(%rsi),%r10
15112:	movq 7*8(%rsi),%r11
15213:	movq %r8,4*8(%rdi)
15314:	movq %r9,5*8(%rdi)
15415:	movq %r10,6*8(%rdi)
15516:	movq %r11,7*8(%rdi)
156	leaq 64(%rsi),%rsi
157	leaq 64(%rdi),%rdi
158	decl %ecx
159	jnz 1b
16017:	movl %edx,%ecx
161	andl $7,%edx
162	shrl $3,%ecx
163	jz 20f
16418:	movq (%rsi),%r8
16519:	movq %r8,(%rdi)
166	leaq 8(%rsi),%rsi
167	leaq 8(%rdi),%rdi
168	decl %ecx
169	jnz 18b
17020:	andl %edx,%edx
171	jz 23f
172	movl %edx,%ecx
17321:	movb (%rsi),%al
17422:	movb %al,(%rdi)
175	incq %rsi
176	incq %rdi
177	decl %ecx
178	jnz 21b
17923:	xor %eax,%eax
 
180	ret
181
182	.section .fixup,"ax"
18330:	shll $6,%ecx
184	addl %ecx,%edx
185	jmp 60f
18640:	lea (%rdx,%rcx,8),%rdx
187	jmp 60f
18850:	movl %ecx,%edx
18960:	jmp copy_user_handle_tail /* ecx is zerorest also */
190	.previous
191
192	_ASM_EXTABLE(1b,30b)
193	_ASM_EXTABLE(2b,30b)
194	_ASM_EXTABLE(3b,30b)
195	_ASM_EXTABLE(4b,30b)
196	_ASM_EXTABLE(5b,30b)
197	_ASM_EXTABLE(6b,30b)
198	_ASM_EXTABLE(7b,30b)
199	_ASM_EXTABLE(8b,30b)
200	_ASM_EXTABLE(9b,30b)
201	_ASM_EXTABLE(10b,30b)
202	_ASM_EXTABLE(11b,30b)
203	_ASM_EXTABLE(12b,30b)
204	_ASM_EXTABLE(13b,30b)
205	_ASM_EXTABLE(14b,30b)
206	_ASM_EXTABLE(15b,30b)
207	_ASM_EXTABLE(16b,30b)
208	_ASM_EXTABLE(18b,40b)
209	_ASM_EXTABLE(19b,40b)
210	_ASM_EXTABLE(21b,50b)
211	_ASM_EXTABLE(22b,50b)
212	CFI_ENDPROC
213ENDPROC(copy_user_generic_unrolled)
 
214
215/* Some CPUs run faster using the string copy instructions.
216 * This is also a lot simpler. Use them when possible.
217 *
218 * Only 4GB of copy is supported. This shouldn't be a problem
219 * because the kernel normally only writes from/to page sized chunks
220 * even if user space passed a longer buffer.
221 * And more would be dangerous because both Intel and AMD have
222 * errata with rep movsq > 4GB. If someone feels the need to fix
223 * this please consider this.
224 *
225 * Input:
226 * rdi destination
227 * rsi source
228 * rdx count
229 *
230 * Output:
231 * eax uncopied bytes or 0 if successful.
232 */
233ENTRY(copy_user_generic_string)
234	CFI_STARTPROC
235	andl %edx,%edx
236	jz 4f
237	cmpl $8,%edx
238	jb 2f		/* less than 8 bytes, go to byte copy loop */
239	ALIGN_DESTINATION
240	movl %edx,%ecx
241	shrl $3,%ecx
242	andl $7,%edx
2431:	rep
244	movsq
2452:	movl %edx,%ecx
2463:	rep
247	movsb
2484:	xorl %eax,%eax
 
249	ret
250
251	.section .fixup,"ax"
25211:	lea (%rdx,%rcx,8),%rcx
25312:	movl %ecx,%edx		/* ecx is zerorest also */
254	jmp copy_user_handle_tail
255	.previous
256
257	_ASM_EXTABLE(1b,11b)
258	_ASM_EXTABLE(3b,12b)
259	CFI_ENDPROC
260ENDPROC(copy_user_generic_string)
 
261
262/*
263 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
264 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
265 *
266 * Input:
267 * rdi destination
268 * rsi source
269 * rdx count
270 *
271 * Output:
272 * eax uncopied bytes or 0 if successful.
273 */
274ENTRY(copy_user_enhanced_fast_string)
275	CFI_STARTPROC
276	andl %edx,%edx
277	jz 2f
278	movl %edx,%ecx
2791:	rep
280	movsb
2812:	xorl %eax,%eax
 
282	ret
283
284	.section .fixup,"ax"
28512:	movl %ecx,%edx		/* ecx is zerorest also */
286	jmp copy_user_handle_tail
287	.previous
288
289	_ASM_EXTABLE(1b,12b)
290	CFI_ENDPROC
291ENDPROC(copy_user_enhanced_fast_string)