Linux Audio

Check our new training course

Loading...
v4.17
 
  1/*
  2 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
  3 *
  4 * This program is free software; you can redistribute it and/or
  5 * modify it under the terms of the GNU General Public License
  6 * as published by the Free Software Foundation; either version
  7 * 2 of the License, or (at your option) any later version.
  8 */
  9#include <asm/processor.h>
 10#include <asm/ppc_asm.h>
 11#include <asm/export.h>
 
 
 
 
 
 
 
 
 12
 13	.align	7
 14_GLOBAL_TOC(memcpy)
 15BEGIN_FTR_SECTION
 16#ifdef __LITTLE_ENDIAN__
 17	cmpdi	cr7,r5,0
 18#else
 19	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* save destination pointer for return value */
 20#endif
 21FTR_SECTION_ELSE
 22#ifdef CONFIG_PPC_BOOK3S_64
 23#ifndef SELFTEST
 24	b	memcpy_power7
 25#endif
 26#endif
 27ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
 28#ifdef __LITTLE_ENDIAN__
 29	/* dumb little-endian memcpy that will get replaced at runtime */
 30	addi r9,r3,-1
 31	addi r4,r4,-1
 32	beqlr cr7
 33	mtctr r5
 341:	lbzu r10,1(r4)
 35	stbu r10,1(r9)
 36	bdnz 1b
 37	blr
 38#else
 39	PPC_MTOCRF(0x01,r5)
 40	cmpldi	cr1,r5,16
 41	neg	r6,r3		# LS 3 bits = # bytes to 8-byte dest bdry
 42	andi.	r6,r6,7
 43	dcbt	0,r4
 44	blt	cr1,.Lshort_copy
 45/* Below we want to nop out the bne if we're on a CPU that has the
 46   CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
 47   cleared.
 48   At the time of writing the only CPU that has this combination of bits
 49   set is Power6. */
 
 50BEGIN_FTR_SECTION
 51	nop
 52FTR_SECTION_ELSE
 53	bne	.Ldst_unaligned
 54ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
 55                    CPU_FTR_UNALIGNED_LD_STD)
 56.Ldst_aligned:
 57	addi	r3,r3,-16
 
 58BEGIN_FTR_SECTION
 59	andi.	r0,r4,7
 60	bne	.Lsrc_unaligned
 61END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 62	srdi	r7,r5,4
 63	ld	r9,0(r4)
 64	addi	r4,r4,-8
 65	mtctr	r7
 66	andi.	r5,r5,7
 67	bf	cr7*4+0,2f
 68	addi	r3,r3,8
 69	addi	r4,r4,8
 70	mr	r8,r9
 71	blt	cr1,3f
 721:	ld	r9,8(r4)
 73	std	r8,8(r3)
 742:	ldu	r8,16(r4)
 75	stdu	r9,16(r3)
 76	bdnz	1b
 773:	std	r8,8(r3)
 78	beq	3f
 79	addi	r3,r3,16
 80.Ldo_tail:
 81	bf	cr7*4+1,1f
 82	lwz	r9,8(r4)
 83	addi	r4,r4,4
 84	stw	r9,0(r3)
 85	addi	r3,r3,4
 861:	bf	cr7*4+2,2f
 87	lhz	r9,8(r4)
 88	addi	r4,r4,2
 89	sth	r9,0(r3)
 90	addi	r3,r3,2
 912:	bf	cr7*4+3,3f
 92	lbz	r9,8(r4)
 93	stb	r9,0(r3)
 943:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
 95	blr
 96
 97.Lsrc_unaligned:
 98	srdi	r6,r5,3
 99	addi	r5,r5,-16
100	subf	r4,r0,r4
101	srdi	r7,r5,4
102	sldi	r10,r0,3
103	cmpdi	cr6,r6,3
104	andi.	r5,r5,7
105	mtctr	r7
106	subfic	r11,r10,64
107	add	r5,r5,r0
108
109	bt	cr7*4+0,0f
110
111	ld	r9,0(r4)	# 3+2n loads, 2+2n stores
112	ld	r0,8(r4)
113	sld	r6,r9,r10
114	ldu	r9,16(r4)
115	srd	r7,r0,r11
116	sld	r8,r0,r10
117	or	r7,r7,r6
118	blt	cr6,4f
119	ld	r0,8(r4)
120	# s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
121	b	2f
122
1230:	ld	r0,0(r4)	# 4+2n loads, 3+2n stores
124	ldu	r9,8(r4)
125	sld	r8,r0,r10
126	addi	r3,r3,-8
127	blt	cr6,5f
128	ld	r0,8(r4)
129	srd	r12,r9,r11
130	sld	r6,r9,r10
131	ldu	r9,16(r4)
132	or	r12,r8,r12
133	srd	r7,r0,r11
134	sld	r8,r0,r10
135	addi	r3,r3,16
136	beq	cr6,3f
137
138	# d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
1391:	or	r7,r7,r6
140	ld	r0,8(r4)
141	std	r12,8(r3)
1422:	srd	r12,r9,r11
143	sld	r6,r9,r10
144	ldu	r9,16(r4)
145	or	r12,r8,r12
146	stdu	r7,16(r3)
147	srd	r7,r0,r11
148	sld	r8,r0,r10
149	bdnz	1b
150
1513:	std	r12,8(r3)
152	or	r7,r7,r6
1534:	std	r7,16(r3)
1545:	srd	r12,r9,r11
155	or	r12,r8,r12
156	std	r12,24(r3)
157	beq	4f
158	cmpwi	cr1,r5,8
159	addi	r3,r3,32
160	sld	r9,r9,r10
161	ble	cr1,6f
162	ld	r0,8(r4)
163	srd	r7,r0,r11
164	or	r9,r7,r9
1656:
166	bf	cr7*4+1,1f
167	rotldi	r9,r9,32
168	stw	r9,0(r3)
169	addi	r3,r3,4
1701:	bf	cr7*4+2,2f
171	rotldi	r9,r9,16
172	sth	r9,0(r3)
173	addi	r3,r3,2
1742:	bf	cr7*4+3,3f
175	rotldi	r9,r9,8
176	stb	r9,0(r3)
1773:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
178	blr
179
180.Ldst_unaligned:
181	PPC_MTOCRF(0x01,r6)		# put #bytes to 8B bdry into cr7
182	subf	r5,r6,r5
183	li	r7,0
184	cmpldi	cr1,r5,16
185	bf	cr7*4+3,1f
186	lbz	r0,0(r4)
187	stb	r0,0(r3)
188	addi	r7,r7,1
1891:	bf	cr7*4+2,2f
190	lhzx	r0,r7,r4
191	sthx	r0,r7,r3
192	addi	r7,r7,2
1932:	bf	cr7*4+1,3f
194	lwzx	r0,r7,r4
195	stwx	r0,r7,r3
1963:	PPC_MTOCRF(0x01,r5)
197	add	r4,r6,r4
198	add	r3,r6,r3
199	b	.Ldst_aligned
200
201.Lshort_copy:
202	bf	cr7*4+0,1f
203	lwz	r0,0(r4)
204	lwz	r9,4(r4)
205	addi	r4,r4,8
206	stw	r0,0(r3)
207	stw	r9,4(r3)
208	addi	r3,r3,8
2091:	bf	cr7*4+1,2f
210	lwz	r0,0(r4)
211	addi	r4,r4,4
212	stw	r0,0(r3)
213	addi	r3,r3,4
2142:	bf	cr7*4+2,3f
215	lhz	r0,0(r4)
216	addi	r4,r4,2
217	sth	r0,0(r3)
218	addi	r3,r3,2
2193:	bf	cr7*4+3,4f
220	lbz	r0,0(r4)
221	stb	r0,0(r3)
2224:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
223	blr
224#endif
225EXPORT_SYMBOL(memcpy)
v5.4
  1/* SPDX-License-Identifier: GPL-2.0-or-later */
  2/*
  3 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
 
 
 
 
 
  4 */
  5#include <asm/processor.h>
  6#include <asm/ppc_asm.h>
  7#include <asm/export.h>
  8#include <asm/asm-compat.h>
  9#include <asm/feature-fixups.h>
 10#include <asm/kasan.h>
 11
 12#ifndef SELFTEST_CASE
 13/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */
 14#define SELFTEST_CASE	0
 15#endif
 16
 17	.align	7
 18_GLOBAL_TOC_KASAN(memcpy)
 19BEGIN_FTR_SECTION
 20#ifdef __LITTLE_ENDIAN__
 21	cmpdi	cr7,r5,0
 22#else
 23	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* save destination pointer for return value */
 24#endif
 25FTR_SECTION_ELSE
 26#ifdef CONFIG_PPC_BOOK3S_64
 
 27	b	memcpy_power7
 28#endif
 
 29ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
 30#ifdef __LITTLE_ENDIAN__
 31	/* dumb little-endian memcpy that will get replaced at runtime */
 32	addi r9,r3,-1
 33	addi r4,r4,-1
 34	beqlr cr7
 35	mtctr r5
 361:	lbzu r10,1(r4)
 37	stbu r10,1(r9)
 38	bdnz 1b
 39	blr
 40#else
 41	PPC_MTOCRF(0x01,r5)
 42	cmpldi	cr1,r5,16
 43	neg	r6,r3		# LS 3 bits = # bytes to 8-byte dest bdry
 44	andi.	r6,r6,7
 45	dcbt	0,r4
 46	blt	cr1,.Lshort_copy
 47/* Below we want to nop out the bne if we're on a CPU that has the
 48   CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
 49   cleared.
 50   At the time of writing the only CPU that has this combination of bits
 51   set is Power6. */
 52test_feature = (SELFTEST_CASE == 1)
 53BEGIN_FTR_SECTION
 54	nop
 55FTR_SECTION_ELSE
 56	bne	.Ldst_unaligned
 57ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
 58                    CPU_FTR_UNALIGNED_LD_STD)
 59.Ldst_aligned:
 60	addi	r3,r3,-16
 61test_feature = (SELFTEST_CASE == 0)
 62BEGIN_FTR_SECTION
 63	andi.	r0,r4,7
 64	bne	.Lsrc_unaligned
 65END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 66	srdi	r7,r5,4
 67	ld	r9,0(r4)
 68	addi	r4,r4,-8
 69	mtctr	r7
 70	andi.	r5,r5,7
 71	bf	cr7*4+0,2f
 72	addi	r3,r3,8
 73	addi	r4,r4,8
 74	mr	r8,r9
 75	blt	cr1,3f
 761:	ld	r9,8(r4)
 77	std	r8,8(r3)
 782:	ldu	r8,16(r4)
 79	stdu	r9,16(r3)
 80	bdnz	1b
 813:	std	r8,8(r3)
 82	beq	3f
 83	addi	r3,r3,16
 84.Ldo_tail:
 85	bf	cr7*4+1,1f
 86	lwz	r9,8(r4)
 87	addi	r4,r4,4
 88	stw	r9,0(r3)
 89	addi	r3,r3,4
 901:	bf	cr7*4+2,2f
 91	lhz	r9,8(r4)
 92	addi	r4,r4,2
 93	sth	r9,0(r3)
 94	addi	r3,r3,2
 952:	bf	cr7*4+3,3f
 96	lbz	r9,8(r4)
 97	stb	r9,0(r3)
 983:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
 99	blr
100
101.Lsrc_unaligned:
102	srdi	r6,r5,3
103	addi	r5,r5,-16
104	subf	r4,r0,r4
105	srdi	r7,r5,4
106	sldi	r10,r0,3
107	cmpdi	cr6,r6,3
108	andi.	r5,r5,7
109	mtctr	r7
110	subfic	r11,r10,64
111	add	r5,r5,r0
112
113	bt	cr7*4+0,0f
114
115	ld	r9,0(r4)	# 3+2n loads, 2+2n stores
116	ld	r0,8(r4)
117	sld	r6,r9,r10
118	ldu	r9,16(r4)
119	srd	r7,r0,r11
120	sld	r8,r0,r10
121	or	r7,r7,r6
122	blt	cr6,4f
123	ld	r0,8(r4)
124	# s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
125	b	2f
126
1270:	ld	r0,0(r4)	# 4+2n loads, 3+2n stores
128	ldu	r9,8(r4)
129	sld	r8,r0,r10
130	addi	r3,r3,-8
131	blt	cr6,5f
132	ld	r0,8(r4)
133	srd	r12,r9,r11
134	sld	r6,r9,r10
135	ldu	r9,16(r4)
136	or	r12,r8,r12
137	srd	r7,r0,r11
138	sld	r8,r0,r10
139	addi	r3,r3,16
140	beq	cr6,3f
141
142	# d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
1431:	or	r7,r7,r6
144	ld	r0,8(r4)
145	std	r12,8(r3)
1462:	srd	r12,r9,r11
147	sld	r6,r9,r10
148	ldu	r9,16(r4)
149	or	r12,r8,r12
150	stdu	r7,16(r3)
151	srd	r7,r0,r11
152	sld	r8,r0,r10
153	bdnz	1b
154
1553:	std	r12,8(r3)
156	or	r7,r7,r6
1574:	std	r7,16(r3)
1585:	srd	r12,r9,r11
159	or	r12,r8,r12
160	std	r12,24(r3)
161	beq	4f
162	cmpwi	cr1,r5,8
163	addi	r3,r3,32
164	sld	r9,r9,r10
165	ble	cr1,6f
166	ld	r0,8(r4)
167	srd	r7,r0,r11
168	or	r9,r7,r9
1696:
170	bf	cr7*4+1,1f
171	rotldi	r9,r9,32
172	stw	r9,0(r3)
173	addi	r3,r3,4
1741:	bf	cr7*4+2,2f
175	rotldi	r9,r9,16
176	sth	r9,0(r3)
177	addi	r3,r3,2
1782:	bf	cr7*4+3,3f
179	rotldi	r9,r9,8
180	stb	r9,0(r3)
1813:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
182	blr
183
184.Ldst_unaligned:
185	PPC_MTOCRF(0x01,r6)		# put #bytes to 8B bdry into cr7
186	subf	r5,r6,r5
187	li	r7,0
188	cmpldi	cr1,r5,16
189	bf	cr7*4+3,1f
190	lbz	r0,0(r4)
191	stb	r0,0(r3)
192	addi	r7,r7,1
1931:	bf	cr7*4+2,2f
194	lhzx	r0,r7,r4
195	sthx	r0,r7,r3
196	addi	r7,r7,2
1972:	bf	cr7*4+1,3f
198	lwzx	r0,r7,r4
199	stwx	r0,r7,r3
2003:	PPC_MTOCRF(0x01,r5)
201	add	r4,r6,r4
202	add	r3,r6,r3
203	b	.Ldst_aligned
204
205.Lshort_copy:
206	bf	cr7*4+0,1f
207	lwz	r0,0(r4)
208	lwz	r9,4(r4)
209	addi	r4,r4,8
210	stw	r0,0(r3)
211	stw	r9,4(r3)
212	addi	r3,r3,8
2131:	bf	cr7*4+1,2f
214	lwz	r0,0(r4)
215	addi	r4,r4,4
216	stw	r0,0(r3)
217	addi	r3,r3,4
2182:	bf	cr7*4+2,3f
219	lhz	r0,0(r4)
220	addi	r4,r4,2
221	sth	r0,0(r3)
222	addi	r3,r3,2
2233:	bf	cr7*4+3,4f
224	lbz	r0,0(r4)
225	stb	r0,0(r3)
2264:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
227	blr
228#endif
229EXPORT_SYMBOL(memcpy)
230EXPORT_SYMBOL_KASAN(memcpy)