Loading...
Note: File does not exist in v3.1.
1/* SPDX-License-Identifier: GPL-2.0-only */
2/* Copyright 2002 Andi Kleen */
3
4#include <linux/linkage.h>
5#include <asm/errno.h>
6#include <asm/cpufeatures.h>
7#include <asm/mcsafe_test.h>
8#include <asm/alternative-asm.h>
9#include <asm/export.h>
10
11.pushsection .noinstr.text, "ax"
12
13/*
14 * We build a jump to memcpy_orig by default which gets NOPped out on
15 * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
16 * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
17 * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
18 */
19
20.weak memcpy
21
22/*
23 * memcpy - Copy a memory block.
24 *
25 * Input:
26 * rdi destination
27 * rsi source
28 * rdx count
29 *
30 * Output:
31 * rax original destination
32 */
33SYM_FUNC_START_ALIAS(__memcpy)
34SYM_FUNC_START_LOCAL(memcpy)
35 ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
36 "jmp memcpy_erms", X86_FEATURE_ERMS
37
38 movq %rdi, %rax
39 movq %rdx, %rcx
40 shrq $3, %rcx
41 andl $7, %edx
42 rep movsq
43 movl %edx, %ecx
44 rep movsb
45 ret
46SYM_FUNC_END(memcpy)
47SYM_FUNC_END_ALIAS(__memcpy)
48EXPORT_SYMBOL(memcpy)
49EXPORT_SYMBOL(__memcpy)
50
51/*
52 * memcpy_erms() - enhanced fast string memcpy. This is faster and
53 * simpler than memcpy. Use memcpy_erms when possible.
54 */
55SYM_FUNC_START(memcpy_erms)
56 movq %rdi, %rax
57 movq %rdx, %rcx
58 rep movsb
59 ret
60SYM_FUNC_END(memcpy_erms)
61
62SYM_FUNC_START(memcpy_orig)
63 movq %rdi, %rax
64
65 cmpq $0x20, %rdx
66 jb .Lhandle_tail
67
68 /*
69 * We check whether memory false dependence could occur,
70 * then jump to corresponding copy mode.
71 */
72 cmp %dil, %sil
73 jl .Lcopy_backward
74 subq $0x20, %rdx
75.Lcopy_forward_loop:
76 subq $0x20, %rdx
77
78 /*
79 * Move in blocks of 4x8 bytes:
80 */
81 movq 0*8(%rsi), %r8
82 movq 1*8(%rsi), %r9
83 movq 2*8(%rsi), %r10
84 movq 3*8(%rsi), %r11
85 leaq 4*8(%rsi), %rsi
86
87 movq %r8, 0*8(%rdi)
88 movq %r9, 1*8(%rdi)
89 movq %r10, 2*8(%rdi)
90 movq %r11, 3*8(%rdi)
91 leaq 4*8(%rdi), %rdi
92 jae .Lcopy_forward_loop
93 addl $0x20, %edx
94 jmp .Lhandle_tail
95
96.Lcopy_backward:
97 /*
98 * Calculate copy position to tail.
99 */
100 addq %rdx, %rsi
101 addq %rdx, %rdi
102 subq $0x20, %rdx
103 /*
104 * At most 3 ALU operations in one cycle,
105 * so append NOPS in the same 16 bytes trunk.
106 */
107 .p2align 4
108.Lcopy_backward_loop:
109 subq $0x20, %rdx
110 movq -1*8(%rsi), %r8
111 movq -2*8(%rsi), %r9
112 movq -3*8(%rsi), %r10
113 movq -4*8(%rsi), %r11
114 leaq -4*8(%rsi), %rsi
115 movq %r8, -1*8(%rdi)
116 movq %r9, -2*8(%rdi)
117 movq %r10, -3*8(%rdi)
118 movq %r11, -4*8(%rdi)
119 leaq -4*8(%rdi), %rdi
120 jae .Lcopy_backward_loop
121
122 /*
123 * Calculate copy position to head.
124 */
125 addl $0x20, %edx
126 subq %rdx, %rsi
127 subq %rdx, %rdi
128.Lhandle_tail:
129 cmpl $16, %edx
130 jb .Lless_16bytes
131
132 /*
133 * Move data from 16 bytes to 31 bytes.
134 */
135 movq 0*8(%rsi), %r8
136 movq 1*8(%rsi), %r9
137 movq -2*8(%rsi, %rdx), %r10
138 movq -1*8(%rsi, %rdx), %r11
139 movq %r8, 0*8(%rdi)
140 movq %r9, 1*8(%rdi)
141 movq %r10, -2*8(%rdi, %rdx)
142 movq %r11, -1*8(%rdi, %rdx)
143 retq
144 .p2align 4
145.Lless_16bytes:
146 cmpl $8, %edx
147 jb .Lless_8bytes
148 /*
149 * Move data from 8 bytes to 15 bytes.
150 */
151 movq 0*8(%rsi), %r8
152 movq -1*8(%rsi, %rdx), %r9
153 movq %r8, 0*8(%rdi)
154 movq %r9, -1*8(%rdi, %rdx)
155 retq
156 .p2align 4
157.Lless_8bytes:
158 cmpl $4, %edx
159 jb .Lless_3bytes
160
161 /*
162 * Move data from 4 bytes to 7 bytes.
163 */
164 movl (%rsi), %ecx
165 movl -4(%rsi, %rdx), %r8d
166 movl %ecx, (%rdi)
167 movl %r8d, -4(%rdi, %rdx)
168 retq
169 .p2align 4
170.Lless_3bytes:
171 subl $1, %edx
172 jb .Lend
173 /*
174 * Move data from 1 bytes to 3 bytes.
175 */
176 movzbl (%rsi), %ecx
177 jz .Lstore_1byte
178 movzbq 1(%rsi), %r8
179 movzbq (%rsi, %rdx), %r9
180 movb %r8b, 1(%rdi)
181 movb %r9b, (%rdi, %rdx)
182.Lstore_1byte:
183 movb %cl, (%rdi)
184
185.Lend:
186 retq
187SYM_FUNC_END(memcpy_orig)
188
189.popsection
190
191#ifndef CONFIG_UML
192
193MCSAFE_TEST_CTL
194
195/*
196 * __memcpy_mcsafe - memory copy with machine check exception handling
197 * Note that we only catch machine checks when reading the source addresses.
198 * Writes to target are posted and don't generate machine checks.
199 */
200SYM_FUNC_START(__memcpy_mcsafe)
201 cmpl $8, %edx
202 /* Less than 8 bytes? Go to byte copy loop */
203 jb .L_no_whole_words
204
205 /* Check for bad alignment of source */
206 testl $7, %esi
207 /* Already aligned */
208 jz .L_8byte_aligned
209
210 /* Copy one byte at a time until source is 8-byte aligned */
211 movl %esi, %ecx
212 andl $7, %ecx
213 subl $8, %ecx
214 negl %ecx
215 subl %ecx, %edx
216.L_read_leading_bytes:
217 movb (%rsi), %al
218 MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
219 MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
220.L_write_leading_bytes:
221 movb %al, (%rdi)
222 incq %rsi
223 incq %rdi
224 decl %ecx
225 jnz .L_read_leading_bytes
226
227.L_8byte_aligned:
228 movl %edx, %ecx
229 andl $7, %edx
230 shrl $3, %ecx
231 jz .L_no_whole_words
232
233.L_read_words:
234 movq (%rsi), %r8
235 MCSAFE_TEST_SRC %rsi 8 .E_read_words
236 MCSAFE_TEST_DST %rdi 8 .E_write_words
237.L_write_words:
238 movq %r8, (%rdi)
239 addq $8, %rsi
240 addq $8, %rdi
241 decl %ecx
242 jnz .L_read_words
243
244 /* Any trailing bytes? */
245.L_no_whole_words:
246 andl %edx, %edx
247 jz .L_done_memcpy_trap
248
249 /* Copy trailing bytes */
250 movl %edx, %ecx
251.L_read_trailing_bytes:
252 movb (%rsi), %al
253 MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
254 MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
255.L_write_trailing_bytes:
256 movb %al, (%rdi)
257 incq %rsi
258 incq %rdi
259 decl %ecx
260 jnz .L_read_trailing_bytes
261
262 /* Copy successful. Return zero */
263.L_done_memcpy_trap:
264 xorl %eax, %eax
265.L_done:
266 ret
267SYM_FUNC_END(__memcpy_mcsafe)
268EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
269
270 .section .fixup, "ax"
271 /*
272 * Return number of bytes not copied for any failure. Note that
273 * there is no "tail" handling since the source buffer is 8-byte
274 * aligned and poison is cacheline aligned.
275 */
276.E_read_words:
277 shll $3, %ecx
278.E_leading_bytes:
279 addl %edx, %ecx
280.E_trailing_bytes:
281 mov %ecx, %eax
282 jmp .L_done
283
284 /*
285 * For write fault handling, given the destination is unaligned,
286 * we handle faults on multi-byte writes with a byte-by-byte
287 * copy up to the write-protected page.
288 */
289.E_write_words:
290 shll $3, %ecx
291 addl %edx, %ecx
292 movl %ecx, %edx
293 jmp mcsafe_handle_tail
294
295 .previous
296
297 _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
298 _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
299 _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
300 _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
301 _ASM_EXTABLE(.L_write_words, .E_write_words)
302 _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
303#endif