Linux Audio

Check our new training course

Loading...
v4.17
  1/* SPDX-License-Identifier: GPL-2.0 */
  2/* csum_copy.S: Checksum+copy code for sparc64
  3 *
  4 * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
  5 */
  6
  7#include <asm/export.h>
  8
  9#ifdef __KERNEL__
 10#define GLOBAL_SPARE	%g7
 11#else
 12#define GLOBAL_SPARE	%g5
 13#endif
 14
 15#ifndef EX_LD
 16#define EX_LD(x)	x
 17#endif
 18
 19#ifndef EX_ST
 20#define EX_ST(x)	x
 21#endif
 22
 23#ifndef EX_RETVAL
 24#define EX_RETVAL(x)	x
 25#endif
 26
 27#ifndef LOAD
 28#define LOAD(type,addr,dest)	type [addr], dest
 29#endif
 30
 31#ifndef STORE
 32#define STORE(type,src,addr)	type src, [addr]
 33#endif
 34
 35#ifndef FUNC_NAME
 36#define FUNC_NAME	csum_partial_copy_nocheck
 37#endif
 38
 39	.register	%g2, #scratch
 40	.register	%g3, #scratch
 41
 42	.text
 43
 4490:
 45	/* We checked for zero length already, so there must be
 46	 * at least one byte.
 47	 */
 48	be,pt		%icc, 1f
 49	 nop
 50	EX_LD(LOAD(ldub, %o0 + 0x00, %o4))
 51	add		%o0, 1, %o0
 52	sub		%o2, 1, %o2
 53	EX_ST(STORE(stb, %o4, %o1 + 0x00))
 54	add		%o1, 1, %o1
 551:	andcc		%o0, 0x2, %g0
 56	be,pn		%icc, 80f
 57	 cmp		%o2, 2
 58	blu,pn		%icc, 60f
 59	 nop
 60	EX_LD(LOAD(lduh, %o0 + 0x00, %o5))
 61	add		%o0, 2, %o0
 62	sub		%o2, 2, %o2
 63	EX_ST(STORE(sth, %o5, %o1 + 0x00))
 64	add		%o1, 2, %o1
 65	ba,pt		%xcc, 80f
 66	 add		%o5, %o4, %o4
 67
 68	.globl		FUNC_NAME
 69	.type		FUNC_NAME,#function
 70	EXPORT_SYMBOL(FUNC_NAME)
 71FUNC_NAME:		/* %o0=src, %o1=dst, %o2=len, %o3=sum */
 72	LOAD(prefetch, %o0 + 0x000, #n_reads)
 73	xor		%o0, %o1, %g1
 74	clr		%o4
 75	andcc		%g1, 0x3, %g0
 76	bne,pn		%icc, 95f
 77	 LOAD(prefetch, %o0 + 0x040, #n_reads)
 78	
 79	brz,pn		%o2, 70f
 80	 andcc		%o0, 0x3, %g0
 81
 82	/* We "remember" whether the lowest bit in the address
 83	 * was set in GLOBAL_SPARE.  Because if it is, we have to swap
 84	 * upper and lower 8 bit fields of the sum we calculate.
 85	*/
 86	bne,pn		%icc, 90b
 87	 andcc		%o0, 0x1, GLOBAL_SPARE
 88
 8980:
 90	LOAD(prefetch, %o0 + 0x080, #n_reads)
 91	andncc		%o2, 0x3f, %g3
 92
 93	LOAD(prefetch, %o0 + 0x0c0, #n_reads)
 94	sub		%o2, %g3, %o2
 95	brz,pn		%g3, 2f
 96	 LOAD(prefetch, %o0 + 0x100, #n_reads)
 97
 98	/* So that we don't need to use the non-pairing
 99	 * add-with-carry instructions we accumulate 32-bit
100	 * values into a 64-bit register.  At the end of the
101	 * loop we fold it down to 32-bits and so on.
102	 */
103	ba,pt		%xcc, 1f
104	LOAD(prefetch, %o0 + 0x140, #n_reads)
105
106	.align		32
1071:	EX_LD(LOAD(lduw, %o0 + 0x00, %o5))
108	EX_LD(LOAD(lduw, %o0 + 0x04, %g1))
109	EX_LD(LOAD(lduw, %o0 + 0x08, %g2))
110	add		%o4, %o5, %o4
111	EX_ST(STORE(stw, %o5, %o1 + 0x00))
112	EX_LD(LOAD(lduw, %o0 + 0x0c, %o5))
113	add		%o4, %g1, %o4
114	EX_ST(STORE(stw, %g1, %o1 + 0x04))
115	EX_LD(LOAD(lduw, %o0 + 0x10, %g1))
116	add		%o4, %g2, %o4
117	EX_ST(STORE(stw, %g2, %o1 + 0x08))
118	EX_LD(LOAD(lduw, %o0 + 0x14, %g2))
119	add		%o4, %o5, %o4
120	EX_ST(STORE(stw, %o5, %o1 + 0x0c))
121	EX_LD(LOAD(lduw, %o0 + 0x18, %o5))
122	add		%o4, %g1, %o4
123	EX_ST(STORE(stw, %g1, %o1 + 0x10))
124	EX_LD(LOAD(lduw, %o0 + 0x1c, %g1))
125	add		%o4, %g2, %o4
126	EX_ST(STORE(stw, %g2, %o1 + 0x14))
127	EX_LD(LOAD(lduw, %o0 + 0x20, %g2))
128	add		%o4, %o5, %o4
129	EX_ST(STORE(stw, %o5, %o1 + 0x18))
130	EX_LD(LOAD(lduw, %o0 + 0x24, %o5))
131	add		%o4, %g1, %o4
132	EX_ST(STORE(stw, %g1, %o1 + 0x1c))
133	EX_LD(LOAD(lduw, %o0 + 0x28, %g1))
134	add		%o4, %g2, %o4
135	EX_ST(STORE(stw, %g2, %o1 + 0x20))
136	EX_LD(LOAD(lduw, %o0 + 0x2c, %g2))
137	add		%o4, %o5, %o4
138	EX_ST(STORE(stw, %o5, %o1 + 0x24))
139	EX_LD(LOAD(lduw, %o0 + 0x30, %o5))
140	add		%o4, %g1, %o4
141	EX_ST(STORE(stw, %g1, %o1 + 0x28))
142	EX_LD(LOAD(lduw, %o0 + 0x34, %g1))
143	add		%o4, %g2, %o4
144	EX_ST(STORE(stw, %g2, %o1 + 0x2c))
145	EX_LD(LOAD(lduw, %o0 + 0x38, %g2))
146	add		%o4, %o5, %o4
147	EX_ST(STORE(stw, %o5, %o1 + 0x30))
148	EX_LD(LOAD(lduw, %o0 + 0x3c, %o5))
149	add		%o4, %g1, %o4
150	EX_ST(STORE(stw, %g1, %o1 + 0x34))
151	LOAD(prefetch, %o0 + 0x180, #n_reads)
152	add		%o4, %g2, %o4
153	EX_ST(STORE(stw, %g2, %o1 + 0x38))
154	subcc		%g3, 0x40, %g3
155	add		%o0, 0x40, %o0
156	add		%o4, %o5, %o4
157	EX_ST(STORE(stw, %o5, %o1 + 0x3c))
158	bne,pt		%icc, 1b
159	 add		%o1, 0x40, %o1
160
1612:	and		%o2, 0x3c, %g3
162	brz,pn		%g3, 2f
163	 sub		%o2, %g3, %o2
1641:	EX_LD(LOAD(lduw, %o0 + 0x00, %o5))
165	subcc		%g3, 0x4, %g3
166	add		%o0, 0x4, %o0
167	add		%o4, %o5, %o4
168	EX_ST(STORE(stw, %o5, %o1 + 0x00))
169	bne,pt		%icc, 1b
170	 add		%o1, 0x4, %o1
171
1722:
173	/* fold 64-->32 */
174	srlx		%o4, 32, %o5
175	srl		%o4, 0, %o4
176	add		%o4, %o5, %o4
177	srlx		%o4, 32, %o5
178	srl		%o4, 0, %o4
179	add		%o4, %o5, %o4
180
181	/* fold 32-->16 */
182	sethi		%hi(0xffff0000), %g1
183	srl		%o4, 16, %o5
184	andn		%o4, %g1, %g2
185	add		%o5, %g2, %o4
186	srl		%o4, 16, %o5
187	andn		%o4, %g1, %g2
188	add		%o5, %g2, %o4
189
19060:
191	/* %o4 has the 16-bit sum we have calculated so-far.  */
192	cmp		%o2, 2
193	blu,pt		%icc, 1f
194	 nop
195	EX_LD(LOAD(lduh, %o0 + 0x00, %o5))
196	sub		%o2, 2, %o2
197	add		%o0, 2, %o0
198	add		%o4, %o5, %o4
199	EX_ST(STORE(sth, %o5, %o1 + 0x00))
200	add		%o1, 0x2, %o1
2011:	brz,pt		%o2, 1f
202	 nop
203	EX_LD(LOAD(ldub, %o0 + 0x00, %o5))
204	sub		%o2, 1, %o2
205	add		%o0, 1, %o0
206	EX_ST(STORE(stb, %o5, %o1 + 0x00))
207	sllx		%o5, 8, %o5
208	add		%o1, 1, %o1
209	add		%o4, %o5, %o4
2101:
211	/* fold 32-->16 */
212	sethi		%hi(0xffff0000), %g1
213	srl		%o4, 16, %o5
214	andn		%o4, %g1, %g2
215	add		%o5, %g2, %o4
216	srl		%o4, 16, %o5
217	andn		%o4, %g1, %g2
218	add		%o5, %g2, %o4
219
2201:	brz,pt		GLOBAL_SPARE, 1f
221	 nop
222
223	/* We started with an odd byte, byte-swap the result.  */
224	srl		%o4, 8, %o5
225	and		%o4, 0xff, %g1
226	sll		%g1, 8, %g1
227	or		%o5, %g1, %o4
228
2291:	addcc		%o3, %o4, %o3
230	addc		%g0, %o3, %o3
231
23270:
233	retl
234	 srl		%o3, 0, %o0
235
23695:	mov		0, GLOBAL_SPARE
237	brlez,pn	%o2, 4f
238	 andcc		%o0, 1, %o5		
239	be,a,pt		%icc, 1f
240	 srl		%o2, 1, %g1		
241	sub		%o2, 1, %o2	
242	EX_LD(LOAD(ldub, %o0, GLOBAL_SPARE))
243	add		%o0, 1, %o0	
244	EX_ST(STORE(stb, GLOBAL_SPARE, %o1))
245	srl		%o2, 1, %g1
246	add		%o1, 1, %o1
2471:	brz,a,pn	%g1, 3f
248	 andcc		%o2, 1, %g0
249	andcc		%o0, 2, %g0	
250	be,a,pt		%icc, 1f
251	 srl		%g1, 1, %g1
252	EX_LD(LOAD(lduh, %o0, %o4))
253	sub		%o2, 2, %o2	
254	srl		%o4, 8, %g2
255	sub		%g1, 1, %g1	
256	EX_ST(STORE(stb, %g2, %o1))
257	add		%o4, GLOBAL_SPARE, GLOBAL_SPARE
258	EX_ST(STORE(stb, %o4, %o1 + 1))
259	add		%o0, 2, %o0	
260	srl		%g1, 1, %g1
261	add		%o1, 2, %o1
2621:	brz,a,pn	%g1, 2f		
263	 andcc		%o2, 2, %g0
264	EX_LD(LOAD(lduw, %o0, %o4))
2655:	srl		%o4, 24, %g2
266	srl		%o4, 16, %g3
267	EX_ST(STORE(stb, %g2, %o1))
268	srl		%o4, 8, %g2
269	EX_ST(STORE(stb, %g3, %o1 + 1))
270	add		%o0, 4, %o0
271	EX_ST(STORE(stb, %g2, %o1 + 2))
272	addcc		%o4, GLOBAL_SPARE, GLOBAL_SPARE
273	EX_ST(STORE(stb, %o4, %o1 + 3))
274	addc		GLOBAL_SPARE, %g0, GLOBAL_SPARE
275	add		%o1, 4, %o1
276	subcc		%g1, 1, %g1
277	bne,a,pt	%icc, 5b
278	 EX_LD(LOAD(lduw, %o0, %o4))
279	sll		GLOBAL_SPARE, 16, %g2
280	srl		GLOBAL_SPARE, 16, GLOBAL_SPARE
281	srl		%g2, 16, %g2
282	andcc		%o2, 2, %g0
283	add		%g2, GLOBAL_SPARE, GLOBAL_SPARE 
2842:	be,a,pt		%icc, 3f		
285	 andcc		%o2, 1, %g0
286	EX_LD(LOAD(lduh, %o0, %o4))
287	andcc		%o2, 1, %g0
288	srl		%o4, 8, %g2
289	add		%o0, 2, %o0	
290	EX_ST(STORE(stb, %g2, %o1))
291	add		GLOBAL_SPARE, %o4, GLOBAL_SPARE
292	EX_ST(STORE(stb, %o4, %o1 + 1))
293	add		%o1, 2, %o1
2943:	be,a,pt		%icc, 1f		
295	 sll		GLOBAL_SPARE, 16, %o4
296	EX_LD(LOAD(ldub, %o0, %g2))
297	sll		%g2, 8, %o4	
298	EX_ST(STORE(stb, %g2, %o1))
299	add		GLOBAL_SPARE, %o4, GLOBAL_SPARE
300	sll		GLOBAL_SPARE, 16, %o4
3011:	addcc		%o4, GLOBAL_SPARE, GLOBAL_SPARE
302	srl		GLOBAL_SPARE, 16, %o4
303	addc		%g0, %o4, GLOBAL_SPARE
304	brz,pt		%o5, 4f
305	 srl		GLOBAL_SPARE, 8, %o4
306	and		GLOBAL_SPARE, 0xff, %g2
307	and		%o4, 0xff, %o4
308	sll		%g2, 8, %g2
309	or		%g2, %o4, GLOBAL_SPARE
3104:	addcc		%o3, GLOBAL_SPARE, %o3
311	addc		%g0, %o3, %o0
312	retl
313	 srl		%o0, 0, %o0
314	.size		FUNC_NAME, .-FUNC_NAME
v3.1
 
  1/* csum_copy.S: Checksum+copy code for sparc64
  2 *
  3 * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
  4 */
  5
 
 
  6#ifdef __KERNEL__
  7#define GLOBAL_SPARE	%g7
  8#else
  9#define GLOBAL_SPARE	%g5
 10#endif
 11
 12#ifndef EX_LD
 13#define EX_LD(x)	x
 14#endif
 15
 16#ifndef EX_ST
 17#define EX_ST(x)	x
 18#endif
 19
 20#ifndef EX_RETVAL
 21#define EX_RETVAL(x)	x
 22#endif
 23
 24#ifndef LOAD
 25#define LOAD(type,addr,dest)	type [addr], dest
 26#endif
 27
 28#ifndef STORE
 29#define STORE(type,src,addr)	type src, [addr]
 30#endif
 31
 32#ifndef FUNC_NAME
 33#define FUNC_NAME	csum_partial_copy_nocheck
 34#endif
 35
 36	.register	%g2, #scratch
 37	.register	%g3, #scratch
 38
 39	.text
 40
 4190:
 42	/* We checked for zero length already, so there must be
 43	 * at least one byte.
 44	 */
 45	be,pt		%icc, 1f
 46	 nop
 47	EX_LD(LOAD(ldub, %o0 + 0x00, %o4))
 48	add		%o0, 1, %o0
 49	sub		%o2, 1, %o2
 50	EX_ST(STORE(stb, %o4, %o1 + 0x00))
 51	add		%o1, 1, %o1
 521:	andcc		%o0, 0x2, %g0
 53	be,pn		%icc, 80f
 54	 cmp		%o2, 2
 55	blu,pn		%icc, 60f
 56	 nop
 57	EX_LD(LOAD(lduh, %o0 + 0x00, %o5))
 58	add		%o0, 2, %o0
 59	sub		%o2, 2, %o2
 60	EX_ST(STORE(sth, %o5, %o1 + 0x00))
 61	add		%o1, 2, %o1
 62	ba,pt		%xcc, 80f
 63	 add		%o5, %o4, %o4
 64
 65	.globl		FUNC_NAME
 
 
 66FUNC_NAME:		/* %o0=src, %o1=dst, %o2=len, %o3=sum */
 67	LOAD(prefetch, %o0 + 0x000, #n_reads)
 68	xor		%o0, %o1, %g1
 69	clr		%o4
 70	andcc		%g1, 0x3, %g0
 71	bne,pn		%icc, 95f
 72	 LOAD(prefetch, %o0 + 0x040, #n_reads)
 73	
 74	brz,pn		%o2, 70f
 75	 andcc		%o0, 0x3, %g0
 76
 77	/* We "remember" whether the lowest bit in the address
 78	 * was set in GLOBAL_SPARE.  Because if it is, we have to swap
 79	 * upper and lower 8 bit fields of the sum we calculate.
 80	*/
 81	bne,pn		%icc, 90b
 82	 andcc		%o0, 0x1, GLOBAL_SPARE
 83
 8480:
 85	LOAD(prefetch, %o0 + 0x080, #n_reads)
 86	andncc		%o2, 0x3f, %g3
 87
 88	LOAD(prefetch, %o0 + 0x0c0, #n_reads)
 89	sub		%o2, %g3, %o2
 90	brz,pn		%g3, 2f
 91	 LOAD(prefetch, %o0 + 0x100, #n_reads)
 92
 93	/* So that we don't need to use the non-pairing
 94	 * add-with-carry instructions we accumulate 32-bit
 95	 * values into a 64-bit register.  At the end of the
 96	 * loop we fold it down to 32-bits and so on.
 97	 */
 98	ba,pt		%xcc, 1f
 99	LOAD(prefetch, %o0 + 0x140, #n_reads)
100
101	.align		32
1021:	EX_LD(LOAD(lduw, %o0 + 0x00, %o5))
103	EX_LD(LOAD(lduw, %o0 + 0x04, %g1))
104	EX_LD(LOAD(lduw, %o0 + 0x08, %g2))
105	add		%o4, %o5, %o4
106	EX_ST(STORE(stw, %o5, %o1 + 0x00))
107	EX_LD(LOAD(lduw, %o0 + 0x0c, %o5))
108	add		%o4, %g1, %o4
109	EX_ST(STORE(stw, %g1, %o1 + 0x04))
110	EX_LD(LOAD(lduw, %o0 + 0x10, %g1))
111	add		%o4, %g2, %o4
112	EX_ST(STORE(stw, %g2, %o1 + 0x08))
113	EX_LD(LOAD(lduw, %o0 + 0x14, %g2))
114	add		%o4, %o5, %o4
115	EX_ST(STORE(stw, %o5, %o1 + 0x0c))
116	EX_LD(LOAD(lduw, %o0 + 0x18, %o5))
117	add		%o4, %g1, %o4
118	EX_ST(STORE(stw, %g1, %o1 + 0x10))
119	EX_LD(LOAD(lduw, %o0 + 0x1c, %g1))
120	add		%o4, %g2, %o4
121	EX_ST(STORE(stw, %g2, %o1 + 0x14))
122	EX_LD(LOAD(lduw, %o0 + 0x20, %g2))
123	add		%o4, %o5, %o4
124	EX_ST(STORE(stw, %o5, %o1 + 0x18))
125	EX_LD(LOAD(lduw, %o0 + 0x24, %o5))
126	add		%o4, %g1, %o4
127	EX_ST(STORE(stw, %g1, %o1 + 0x1c))
128	EX_LD(LOAD(lduw, %o0 + 0x28, %g1))
129	add		%o4, %g2, %o4
130	EX_ST(STORE(stw, %g2, %o1 + 0x20))
131	EX_LD(LOAD(lduw, %o0 + 0x2c, %g2))
132	add		%o4, %o5, %o4
133	EX_ST(STORE(stw, %o5, %o1 + 0x24))
134	EX_LD(LOAD(lduw, %o0 + 0x30, %o5))
135	add		%o4, %g1, %o4
136	EX_ST(STORE(stw, %g1, %o1 + 0x28))
137	EX_LD(LOAD(lduw, %o0 + 0x34, %g1))
138	add		%o4, %g2, %o4
139	EX_ST(STORE(stw, %g2, %o1 + 0x2c))
140	EX_LD(LOAD(lduw, %o0 + 0x38, %g2))
141	add		%o4, %o5, %o4
142	EX_ST(STORE(stw, %o5, %o1 + 0x30))
143	EX_LD(LOAD(lduw, %o0 + 0x3c, %o5))
144	add		%o4, %g1, %o4
145	EX_ST(STORE(stw, %g1, %o1 + 0x34))
146	LOAD(prefetch, %o0 + 0x180, #n_reads)
147	add		%o4, %g2, %o4
148	EX_ST(STORE(stw, %g2, %o1 + 0x38))
149	subcc		%g3, 0x40, %g3
150	add		%o0, 0x40, %o0
151	add		%o4, %o5, %o4
152	EX_ST(STORE(stw, %o5, %o1 + 0x3c))
153	bne,pt		%icc, 1b
154	 add		%o1, 0x40, %o1
155
1562:	and		%o2, 0x3c, %g3
157	brz,pn		%g3, 2f
158	 sub		%o2, %g3, %o2
1591:	EX_LD(LOAD(lduw, %o0 + 0x00, %o5))
160	subcc		%g3, 0x4, %g3
161	add		%o0, 0x4, %o0
162	add		%o4, %o5, %o4
163	EX_ST(STORE(stw, %o5, %o1 + 0x00))
164	bne,pt		%icc, 1b
165	 add		%o1, 0x4, %o1
166
1672:
168	/* fold 64-->32 */
169	srlx		%o4, 32, %o5
170	srl		%o4, 0, %o4
171	add		%o4, %o5, %o4
172	srlx		%o4, 32, %o5
173	srl		%o4, 0, %o4
174	add		%o4, %o5, %o4
175
176	/* fold 32-->16 */
177	sethi		%hi(0xffff0000), %g1
178	srl		%o4, 16, %o5
179	andn		%o4, %g1, %g2
180	add		%o5, %g2, %o4
181	srl		%o4, 16, %o5
182	andn		%o4, %g1, %g2
183	add		%o5, %g2, %o4
184
18560:
186	/* %o4 has the 16-bit sum we have calculated so-far.  */
187	cmp		%o2, 2
188	blu,pt		%icc, 1f
189	 nop
190	EX_LD(LOAD(lduh, %o0 + 0x00, %o5))
191	sub		%o2, 2, %o2
192	add		%o0, 2, %o0
193	add		%o4, %o5, %o4
194	EX_ST(STORE(sth, %o5, %o1 + 0x00))
195	add		%o1, 0x2, %o1
1961:	brz,pt		%o2, 1f
197	 nop
198	EX_LD(LOAD(ldub, %o0 + 0x00, %o5))
199	sub		%o2, 1, %o2
200	add		%o0, 1, %o0
201	EX_ST(STORE(stb, %o5, %o1 + 0x00))
202	sllx		%o5, 8, %o5
203	add		%o1, 1, %o1
204	add		%o4, %o5, %o4
2051:
206	/* fold 32-->16 */
207	sethi		%hi(0xffff0000), %g1
208	srl		%o4, 16, %o5
209	andn		%o4, %g1, %g2
210	add		%o5, %g2, %o4
211	srl		%o4, 16, %o5
212	andn		%o4, %g1, %g2
213	add		%o5, %g2, %o4
214
2151:	brz,pt		GLOBAL_SPARE, 1f
216	 nop
217
218	/* We started with an odd byte, byte-swap the result.  */
219	srl		%o4, 8, %o5
220	and		%o4, 0xff, %g1
221	sll		%g1, 8, %g1
222	or		%o5, %g1, %o4
223
2241:	addcc		%o3, %o4, %o3
225	addc		%g0, %o3, %o3
226
22770:
228	retl
229	 srl		%o3, 0, %o0
230
23195:	mov		0, GLOBAL_SPARE
232	brlez,pn	%o2, 4f
233	 andcc		%o0, 1, %o5		
234	be,a,pt		%icc, 1f
235	 srl		%o2, 1, %g1		
236	sub		%o2, 1, %o2	
237	EX_LD(LOAD(ldub, %o0, GLOBAL_SPARE))
238	add		%o0, 1, %o0	
239	EX_ST(STORE(stb, GLOBAL_SPARE, %o1))
240	srl		%o2, 1, %g1
241	add		%o1, 1, %o1
2421:	brz,a,pn	%g1, 3f
243	 andcc		%o2, 1, %g0
244	andcc		%o0, 2, %g0	
245	be,a,pt		%icc, 1f
246	 srl		%g1, 1, %g1
247	EX_LD(LOAD(lduh, %o0, %o4))
248	sub		%o2, 2, %o2	
249	srl		%o4, 8, %g2
250	sub		%g1, 1, %g1	
251	EX_ST(STORE(stb, %g2, %o1))
252	add		%o4, GLOBAL_SPARE, GLOBAL_SPARE
253	EX_ST(STORE(stb, %o4, %o1 + 1))
254	add		%o0, 2, %o0	
255	srl		%g1, 1, %g1
256	add		%o1, 2, %o1
2571:	brz,a,pn	%g1, 2f		
258	 andcc		%o2, 2, %g0
259	EX_LD(LOAD(lduw, %o0, %o4))
2605:	srl		%o4, 24, %g2
261	srl		%o4, 16, %g3
262	EX_ST(STORE(stb, %g2, %o1))
263	srl		%o4, 8, %g2
264	EX_ST(STORE(stb, %g3, %o1 + 1))
265	add		%o0, 4, %o0
266	EX_ST(STORE(stb, %g2, %o1 + 2))
267	addcc		%o4, GLOBAL_SPARE, GLOBAL_SPARE
268	EX_ST(STORE(stb, %o4, %o1 + 3))
269	addc		GLOBAL_SPARE, %g0, GLOBAL_SPARE
270	add		%o1, 4, %o1
271	subcc		%g1, 1, %g1
272	bne,a,pt	%icc, 5b
273	 EX_LD(LOAD(lduw, %o0, %o4))
274	sll		GLOBAL_SPARE, 16, %g2
275	srl		GLOBAL_SPARE, 16, GLOBAL_SPARE
276	srl		%g2, 16, %g2
277	andcc		%o2, 2, %g0
278	add		%g2, GLOBAL_SPARE, GLOBAL_SPARE 
2792:	be,a,pt		%icc, 3f		
280	 andcc		%o2, 1, %g0
281	EX_LD(LOAD(lduh, %o0, %o4))
282	andcc		%o2, 1, %g0
283	srl		%o4, 8, %g2
284	add		%o0, 2, %o0	
285	EX_ST(STORE(stb, %g2, %o1))
286	add		GLOBAL_SPARE, %o4, GLOBAL_SPARE
287	EX_ST(STORE(stb, %o4, %o1 + 1))
288	add		%o1, 2, %o1
2893:	be,a,pt		%icc, 1f		
290	 sll		GLOBAL_SPARE, 16, %o4
291	EX_LD(LOAD(ldub, %o0, %g2))
292	sll		%g2, 8, %o4	
293	EX_ST(STORE(stb, %g2, %o1))
294	add		GLOBAL_SPARE, %o4, GLOBAL_SPARE
295	sll		GLOBAL_SPARE, 16, %o4
2961:	addcc		%o4, GLOBAL_SPARE, GLOBAL_SPARE
297	srl		GLOBAL_SPARE, 16, %o4
298	addc		%g0, %o4, GLOBAL_SPARE
299	brz,pt		%o5, 4f
300	 srl		GLOBAL_SPARE, 8, %o4
301	and		GLOBAL_SPARE, 0xff, %g2
302	and		%o4, 0xff, %o4
303	sll		%g2, 8, %g2
304	or		%g2, %o4, GLOBAL_SPARE
3054:	addcc		%o3, GLOBAL_SPARE, %o3
306	addc		%g0, %o3, %o0
307	retl
308	 srl		%o0, 0, %o0
309	.size		FUNC_NAME, .-FUNC_NAME