Loading...
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4 */
5
6#include <linux/export.h>
7#include <asm/alternative-asm.h>
8#include <asm/asm.h>
9#include <asm/asmmacro.h>
10#include <asm/cpu.h>
11#include <asm/regdef.h>
12
13.section .noinstr.text, "ax"
14
15SYM_FUNC_START(memcpy)
16 /*
17 * Some CPUs support hardware unaligned access
18 */
19 ALTERNATIVE "b __memcpy_generic", \
20 "b __memcpy_fast", CPU_FEATURE_UAL
21SYM_FUNC_END(memcpy)
22SYM_FUNC_ALIAS(__memcpy, memcpy)
23
24EXPORT_SYMBOL(memcpy)
25EXPORT_SYMBOL(__memcpy)
26
27_ASM_NOKPROBE(memcpy)
28_ASM_NOKPROBE(__memcpy)
29
30/*
31 * void *__memcpy_generic(void *dst, const void *src, size_t n)
32 *
33 * a0: dst
34 * a1: src
35 * a2: n
36 */
37SYM_FUNC_START(__memcpy_generic)
38 move a3, a0
39 beqz a2, 2f
40
411: ld.b t0, a1, 0
42 st.b t0, a0, 0
43 addi.d a0, a0, 1
44 addi.d a1, a1, 1
45 addi.d a2, a2, -1
46 bgt a2, zero, 1b
47
482: move a0, a3
49 jr ra
50SYM_FUNC_END(__memcpy_generic)
51_ASM_NOKPROBE(__memcpy_generic)
52
53 .align 5
54SYM_FUNC_START_NOALIGN(__memcpy_small)
55 pcaddi t0, 8
56 slli.d a2, a2, 5
57 add.d t0, t0, a2
58 jr t0
59
60 .align 5
610: jr ra
62
63 .align 5
641: ld.b t0, a1, 0
65 st.b t0, a0, 0
66 jr ra
67
68 .align 5
692: ld.h t0, a1, 0
70 st.h t0, a0, 0
71 jr ra
72
73 .align 5
743: ld.h t0, a1, 0
75 ld.b t1, a1, 2
76 st.h t0, a0, 0
77 st.b t1, a0, 2
78 jr ra
79
80 .align 5
814: ld.w t0, a1, 0
82 st.w t0, a0, 0
83 jr ra
84
85 .align 5
865: ld.w t0, a1, 0
87 ld.b t1, a1, 4
88 st.w t0, a0, 0
89 st.b t1, a0, 4
90 jr ra
91
92 .align 5
936: ld.w t0, a1, 0
94 ld.h t1, a1, 4
95 st.w t0, a0, 0
96 st.h t1, a0, 4
97 jr ra
98
99 .align 5
1007: ld.w t0, a1, 0
101 ld.w t1, a1, 3
102 st.w t0, a0, 0
103 st.w t1, a0, 3
104 jr ra
105
106 .align 5
1078: ld.d t0, a1, 0
108 st.d t0, a0, 0
109 jr ra
110SYM_FUNC_END(__memcpy_small)
111_ASM_NOKPROBE(__memcpy_small)
112
113/*
114 * void *__memcpy_fast(void *dst, const void *src, size_t n)
115 *
116 * a0: dst
117 * a1: src
118 * a2: n
119 */
120SYM_FUNC_START(__memcpy_fast)
121 sltui t0, a2, 9
122 bnez t0, __memcpy_small
123
124 add.d a3, a1, a2
125 add.d a2, a0, a2
126 ld.d a6, a1, 0
127 ld.d a7, a3, -8
128
129 /* align up destination address */
130 andi t1, a0, 7
131 sub.d t0, zero, t1
132 addi.d t0, t0, 8
133 add.d a1, a1, t0
134 add.d a5, a0, t0
135
136 addi.d a4, a3, -64
137 bgeu a1, a4, .Llt64
138
139 /* copy 64 bytes at a time */
140.Lloop64:
141 ld.d t0, a1, 0
142 ld.d t1, a1, 8
143 ld.d t2, a1, 16
144 ld.d t3, a1, 24
145 ld.d t4, a1, 32
146 ld.d t5, a1, 40
147 ld.d t6, a1, 48
148 ld.d t7, a1, 56
149 addi.d a1, a1, 64
150 st.d t0, a5, 0
151 st.d t1, a5, 8
152 st.d t2, a5, 16
153 st.d t3, a5, 24
154 st.d t4, a5, 32
155 st.d t5, a5, 40
156 st.d t6, a5, 48
157 st.d t7, a5, 56
158 addi.d a5, a5, 64
159 bltu a1, a4, .Lloop64
160
161 /* copy the remaining bytes */
162.Llt64:
163 addi.d a4, a3, -32
164 bgeu a1, a4, .Llt32
165 ld.d t0, a1, 0
166 ld.d t1, a1, 8
167 ld.d t2, a1, 16
168 ld.d t3, a1, 24
169 addi.d a1, a1, 32
170 st.d t0, a5, 0
171 st.d t1, a5, 8
172 st.d t2, a5, 16
173 st.d t3, a5, 24
174 addi.d a5, a5, 32
175
176.Llt32:
177 addi.d a4, a3, -16
178 bgeu a1, a4, .Llt16
179 ld.d t0, a1, 0
180 ld.d t1, a1, 8
181 addi.d a1, a1, 16
182 st.d t0, a5, 0
183 st.d t1, a5, 8
184 addi.d a5, a5, 16
185
186.Llt16:
187 addi.d a4, a3, -8
188 bgeu a1, a4, .Llt8
189 ld.d t0, a1, 0
190 st.d t0, a5, 0
191
192.Llt8:
193 st.d a6, a0, 0
194 st.d a7, a2, -8
195
196 /* return */
197 jr ra
198SYM_FUNC_END(__memcpy_fast)
199_ASM_NOKPROBE(__memcpy_fast)