Linux Audio

Check our new training course

Loading...
v3.5.6
  1/* copy_user.S: Sparc optimized copy_from_user and copy_to_user code.
  2 *
  3 *  Copyright(C) 1995 Linus Torvalds
  4 *  Copyright(C) 1996 David S. Miller
  5 *  Copyright(C) 1996 Eddie C. Dost
  6 *  Copyright(C) 1996,1998 Jakub Jelinek
  7 *
  8 * derived from:
  9 *	e-mail between David and Eddie.
 10 *
 11 * Returns 0 if successful, otherwise count of bytes not copied yet
 12 */
 13
 14#include <asm/ptrace.h>
 15#include <asm/asmmacro.h>
 16#include <asm/page.h>
 17#include <asm/thread_info.h>
 
 18
 19/* Work around cpp -rob */
 20#define ALLOC #alloc
 21#define EXECINSTR #execinstr
 22#define EX(x,y,a,b) 				\
 2398: 	x,y;					\
 24	.section .fixup,ALLOC,EXECINSTR;	\
 25	.align	4;				\
 2699:	ba fixupretl;				\
 27	 a, b, %g3;				\
 28	.section __ex_table,ALLOC;		\
 29	.align	4;				\
 30	.word	98b, 99b;			\
 31	.text;					\
 32	.align	4
 33
 34#define EX2(x,y,c,d,e,a,b) 			\
 3598: 	x,y;					\
 36	.section .fixup,ALLOC,EXECINSTR;	\
 37	.align	4;				\
 3899:	c, d, e;				\
 39	ba fixupretl;				\
 40	 a, b, %g3;				\
 41	.section __ex_table,ALLOC;		\
 42	.align	4;				\
 43	.word	98b, 99b;			\
 44	.text;					\
 45	.align	4
 46
 47#define EXO2(x,y) 				\
 4898: 	x, y;					\
 49	.section __ex_table,ALLOC;		\
 50	.align	4;				\
 51	.word	98b, 97f;			\
 52	.text;					\
 53	.align	4
 54
 55#define EXT(start,end,handler)			\
 56	.section __ex_table,ALLOC;		\
 57	.align	4;				\
 58	.word	start, 0, end, handler;		\
 59	.text;					\
 60	.align	4
 61
 62/* Please do not change following macros unless you change logic used
 63 * in .fixup at the end of this file as well
 64 */
 65
 66/* Both these macros have to start with exactly the same insn */
 67#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
 68	ldd	[%src + (offset) + 0x00], %t0; \
 69	ldd	[%src + (offset) + 0x08], %t2; \
 70	ldd	[%src + (offset) + 0x10], %t4; \
 71	ldd	[%src + (offset) + 0x18], %t6; \
 72	st	%t0, [%dst + (offset) + 0x00]; \
 73	st	%t1, [%dst + (offset) + 0x04]; \
 74	st	%t2, [%dst + (offset) + 0x08]; \
 75	st	%t3, [%dst + (offset) + 0x0c]; \
 76	st	%t4, [%dst + (offset) + 0x10]; \
 77	st	%t5, [%dst + (offset) + 0x14]; \
 78	st	%t6, [%dst + (offset) + 0x18]; \
 79	st	%t7, [%dst + (offset) + 0x1c];
 80
 81#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
 82	ldd	[%src + (offset) + 0x00], %t0; \
 83	ldd	[%src + (offset) + 0x08], %t2; \
 84	ldd	[%src + (offset) + 0x10], %t4; \
 85	ldd	[%src + (offset) + 0x18], %t6; \
 86	std	%t0, [%dst + (offset) + 0x00]; \
 87	std	%t2, [%dst + (offset) + 0x08]; \
 88	std	%t4, [%dst + (offset) + 0x10]; \
 89	std	%t6, [%dst + (offset) + 0x18];
 90
 91#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
 92	ldd	[%src - (offset) - 0x10], %t0; \
 93	ldd	[%src - (offset) - 0x08], %t2; \
 94	st	%t0, [%dst - (offset) - 0x10]; \
 95	st	%t1, [%dst - (offset) - 0x0c]; \
 96	st	%t2, [%dst - (offset) - 0x08]; \
 97	st	%t3, [%dst - (offset) - 0x04];
 98
 99#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \
100	lduh	[%src + (offset) + 0x00], %t0; \
101	lduh	[%src + (offset) + 0x02], %t1; \
102	lduh	[%src + (offset) + 0x04], %t2; \
103	lduh	[%src + (offset) + 0x06], %t3; \
104	sth	%t0, [%dst + (offset) + 0x00]; \
105	sth	%t1, [%dst + (offset) + 0x02]; \
106	sth	%t2, [%dst + (offset) + 0x04]; \
107	sth	%t3, [%dst + (offset) + 0x06];
108
109#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
110	ldub	[%src - (offset) - 0x02], %t0; \
111	ldub	[%src - (offset) - 0x01], %t1; \
112	stb	%t0, [%dst - (offset) - 0x02]; \
113	stb	%t1, [%dst - (offset) - 0x01];
114
115	.text
116	.align	4
117
118	.globl  __copy_user_begin
119__copy_user_begin:
120
121	.globl	__copy_user
 
122dword_align:
123	andcc	%o1, 1, %g0
124	be	4f
125	 andcc	%o1, 2, %g0
126
127	EXO2(ldub [%o1], %g2)
128	add	%o1, 1, %o1
129	EXO2(stb %g2, [%o0])
130	sub	%o2, 1, %o2
131	bne	3f
132	 add	%o0, 1, %o0
133
134	EXO2(lduh [%o1], %g2)
135	add	%o1, 2, %o1
136	EXO2(sth %g2, [%o0])
137	sub	%o2, 2, %o2
138	b	3f
139	 add	%o0, 2, %o0
1404:
141	EXO2(lduh [%o1], %g2)
142	add	%o1, 2, %o1
143	EXO2(sth %g2, [%o0])
144	sub	%o2, 2, %o2
145	b	3f
146	 add	%o0, 2, %o0
147
148__copy_user:	/* %o0=dst %o1=src %o2=len */
149	xor	%o0, %o1, %o4
1501:
151	andcc	%o4, 3, %o5
1522:
153	bne	cannot_optimize
154	 cmp	%o2, 15
155
156	bleu	short_aligned_end
157	 andcc	%o1, 3, %g0
158
159	bne	dword_align
1603:
161	 andcc	%o1, 4, %g0
162
163	be	2f
164	 mov	%o2, %g1
165
166	EXO2(ld [%o1], %o4)
167	sub	%g1, 4, %g1
168	EXO2(st %o4, [%o0])
169	add	%o1, 4, %o1
170	add	%o0, 4, %o0
1712:
172	andcc	%g1, 0xffffff80, %g7
173	be	3f
174	 andcc	%o0, 4, %g0
175
176	be	ldd_std + 4
1775:
178	MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
179	MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
180	MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
181	MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
18280:
183	EXT(5b, 80b, 50f)
184	subcc	%g7, 128, %g7
185	add	%o1, 128, %o1
186	bne	5b
187	 add	%o0, 128, %o0
1883:
189	andcc	%g1, 0x70, %g7
190	be	copy_user_table_end
191	 andcc	%g1, 8, %g0
192
193	sethi	%hi(copy_user_table_end), %o5
194	srl	%g7, 1, %o4
195	add	%g7, %o4, %o4
196	add	%o1, %g7, %o1
197	sub	%o5, %o4, %o5
198	jmpl	%o5 + %lo(copy_user_table_end), %g0
199	 add	%o0, %g7, %o0
200
201copy_user_table:
202	MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
203	MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
204	MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
205	MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
206	MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
207	MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
208	MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
209copy_user_table_end:
210	EXT(copy_user_table, copy_user_table_end, 51f)
211	be	copy_user_last7
212	 andcc	%g1, 4, %g0
213
214	EX(ldd	[%o1], %g2, and %g1, 0xf)
215	add	%o0, 8, %o0
216	add	%o1, 8, %o1
217	EX(st	%g2, [%o0 - 0x08], and %g1, 0xf)
218	EX2(st	%g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4)
219copy_user_last7:
220	be	1f
221	 andcc	%g1, 2, %g0
222
223	EX(ld	[%o1], %g2, and %g1, 7)
224	add	%o1, 4, %o1
225	EX(st	%g2, [%o0], and %g1, 7)
226	add	%o0, 4, %o0
2271:
228	be	1f
229	 andcc	%g1, 1, %g0
230
231	EX(lduh	[%o1], %g2, and %g1, 3)
232	add	%o1, 2, %o1
233	EX(sth	%g2, [%o0], and %g1, 3)
234	add	%o0, 2, %o0
2351:
236	be	1f
237	 nop
238
239	EX(ldub	[%o1], %g2, add %g0, 1)
240	EX(stb	%g2, [%o0], add %g0, 1)
2411:
242	retl
243 	 clr	%o0
244
245ldd_std:
246	MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
247	MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
248	MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
249	MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
25081:
251	EXT(ldd_std, 81b, 52f)
252	subcc	%g7, 128, %g7
253	add	%o1, 128, %o1
254	bne	ldd_std
255	 add	%o0, 128, %o0
256
257	andcc	%g1, 0x70, %g7
258	be	copy_user_table_end
259	 andcc	%g1, 8, %g0
260
261	sethi	%hi(copy_user_table_end), %o5
262	srl	%g7, 1, %o4
263	add	%g7, %o4, %o4
264	add	%o1, %g7, %o1
265	sub	%o5, %o4, %o5
266	jmpl	%o5 + %lo(copy_user_table_end), %g0
267	 add	%o0, %g7, %o0
268
269cannot_optimize:
270	bleu	short_end
271	 cmp	%o5, 2
272
273	bne	byte_chunk
274	 and	%o2, 0xfffffff0, %o3
275	 
276	andcc	%o1, 1, %g0
277	be	10f
278	 nop
279
280	EXO2(ldub [%o1], %g2)
281	add	%o1, 1, %o1
282	EXO2(stb %g2, [%o0])
283	sub	%o2, 1, %o2
284	andcc	%o2, 0xfffffff0, %o3
285	be	short_end
286	 add	%o0, 1, %o0
28710:
288	MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
289	MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5)
29082:
291	EXT(10b, 82b, 53f)
292	subcc	%o3, 0x10, %o3
293	add	%o1, 0x10, %o1
294	bne	10b
295	 add	%o0, 0x10, %o0
296	b	2f
297	 and	%o2, 0xe, %o3
298	
299byte_chunk:
300	MOVE_SHORTCHUNK(o1, o0, -0x02, g2, g3)
301	MOVE_SHORTCHUNK(o1, o0, -0x04, g2, g3)
302	MOVE_SHORTCHUNK(o1, o0, -0x06, g2, g3)
303	MOVE_SHORTCHUNK(o1, o0, -0x08, g2, g3)
304	MOVE_SHORTCHUNK(o1, o0, -0x0a, g2, g3)
305	MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3)
306	MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3)
307	MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3)
30883:
309	EXT(byte_chunk, 83b, 54f)
310	subcc	%o3, 0x10, %o3
311	add	%o1, 0x10, %o1
312	bne	byte_chunk
313	 add	%o0, 0x10, %o0
314
315short_end:
316	and	%o2, 0xe, %o3
3172:
318	sethi	%hi(short_table_end), %o5
319	sll	%o3, 3, %o4
320	add	%o0, %o3, %o0
321	sub	%o5, %o4, %o5
322	add	%o1, %o3, %o1
323	jmpl	%o5 + %lo(short_table_end), %g0
324	 andcc	%o2, 1, %g0
32584:
326	MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
327	MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
328	MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
329	MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
330	MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
331	MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
332	MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
333short_table_end:
334	EXT(84b, short_table_end, 55f)
335	be	1f
336	 nop
337	EX(ldub	[%o1], %g2, add %g0, 1)
338	EX(stb	%g2, [%o0], add %g0, 1)
3391:
340	retl
341 	 clr	%o0
342
343short_aligned_end:
344	bne	short_end
345	 andcc	%o2, 8, %g0
346
347	be	1f
348	 andcc	%o2, 4, %g0
349
350	EXO2(ld	[%o1 + 0x00], %g2)
351	EXO2(ld	[%o1 + 0x04], %g3)
352	add	%o1, 8, %o1
353	EXO2(st	%g2, [%o0 + 0x00])
354	EX(st	%g3, [%o0 + 0x04], sub %o2, 4)
355	add	%o0, 8, %o0
3561:
357	b	copy_user_last7
358	 mov	%o2, %g1
359
360	.section .fixup,#alloc,#execinstr
361	.align	4
36297:
363	mov	%o2, %g3
364fixupretl:
365	sethi   %hi(PAGE_OFFSET), %g1
366	cmp	%o0, %g1
367	blu	1f
368	 cmp	%o1, %g1
369	bgeu	1f
370	 ld	[%g6 + TI_PREEMPT], %g1
371	cmp	%g1, 0
372	bne	1f
373	 nop
374	save	%sp, -64, %sp
375	mov	%i0, %o0
376	call	__bzero
377	 mov	%g3, %o1
378	restore
3791:	retl
380	 mov	%g3, %o0
381
382/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */
38350:
384/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK
385 * happens. This is derived from the amount ldd reads, st stores, etc.
386 * x = g2 % 12;
387 * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4);
388 * o0 += (g2 / 12) * 32;
389 */
390	cmp	%g2, 12
391	add	%o0, %g7, %o0
392	bcs	1f
393	 cmp	%g2, 24
394	bcs	2f
395	 cmp	%g2, 36
396	bcs	3f
397	 nop
398	sub	%g2, 12, %g2
399	sub	%g7, 32, %g7
4003:	sub	%g2, 12, %g2
401	sub	%g7, 32, %g7
4022:	sub	%g2, 12, %g2
403	sub	%g7, 32, %g7
4041:	cmp	%g2, 4
405	bcs,a	60f
406	 clr	%g2
407	sub	%g2, 4, %g2
408	sll	%g2, 2, %g2
40960:	and	%g1, 0x7f, %g3
410	sub	%o0, %g7, %o0
411	add	%g3, %g7, %g3
412	ba	fixupretl
413	 sub	%g3, %g2, %g3
41451:
415/* i = 41 - g2; j = i % 6;
416 * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16;
417 * o0 -= (i / 6) * 16 + 16;
418 */
419	neg	%g2
420	and	%g1, 0xf, %g1
421	add	%g2, 41, %g2
422	add	%o0, %g1, %o0
4231:	cmp	%g2, 6
424	bcs,a	2f
425	 cmp	%g2, 4
426	add	%g1, 16, %g1
427	b	1b
428	 sub	%g2, 6, %g2
4292:	bcc,a	2f
430	 mov	16, %g2
431	inc	%g2
432	sll	%g2, 2, %g2
4332:	add	%g1, %g2, %g3
434	ba	fixupretl
435	 sub	%o0, %g3, %o0
43652:
437/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0;
438   o0 += (g2 / 8) * 32 */
439	andn	%g2, 7, %g4
440	add	%o0, %g7, %o0
441	andcc	%g2, 4, %g0
442	and	%g2, 3, %g2
443	sll	%g4, 2, %g4
444	sll	%g2, 3, %g2
445	bne	60b
446	 sub	%g7, %g4, %g7
447	ba	60b
448	 clr	%g2
44953:
450/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0;
451   o0 += (g2 & 8) */
452	and	%g2, 3, %g4
453	andcc	%g2, 4, %g0
454	and	%g2, 8, %g2
455	sll	%g4, 1, %g4
456	be	1f
457	 add	%o0, %g2, %o0
458	add	%g2, %g4, %g2
4591:	and	%o2, 0xf, %g3
460	add	%g3, %o3, %g3
461	ba	fixupretl
462	 sub	%g3, %g2, %g3
46354:
464/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0;
465   o0 += (g2 / 4) * 2 */
466	srl	%g2, 2, %o4
467	and	%g2, 1, %o5
468	srl	%g2, 1, %g2
469	add	%o4, %o4, %o4
470	and	%o5, %g2, %o5
471	and	%o2, 0xf, %o2
472	add	%o0, %o4, %o0
473	sub	%o3, %o5, %o3
474	sub	%o2, %o4, %o2
475	ba	fixupretl
476	 add	%o2, %o3, %g3
47755:
478/* i = 27 - g2;
479   g3 = (o2 & 1) + i / 4 * 2 + !(i & 3);
480   o0 -= i / 4 * 2 + 1 */
481	neg	%g2
482	and	%o2, 1, %o2
483	add	%g2, 27, %g2
484	srl	%g2, 2, %o5
485	andcc	%g2, 3, %g0
486	mov	1, %g2
487	add	%o5, %o5, %o5
488	be,a	1f
489	 clr	%g2
4901:	add	%g2, %o5, %g3
491	sub	%o0, %g3, %o0
492	ba	fixupretl
493	 add	%g3, %o2, %g3
494
495	.globl  __copy_user_end
496__copy_user_end:
v4.10.11
  1/* copy_user.S: Sparc optimized copy_from_user and copy_to_user code.
  2 *
  3 *  Copyright(C) 1995 Linus Torvalds
  4 *  Copyright(C) 1996 David S. Miller
  5 *  Copyright(C) 1996 Eddie C. Dost
  6 *  Copyright(C) 1996,1998 Jakub Jelinek
  7 *
  8 * derived from:
  9 *	e-mail between David and Eddie.
 10 *
 11 * Returns 0 if successful, otherwise count of bytes not copied yet
 12 */
 13
 14#include <asm/ptrace.h>
 15#include <asm/asmmacro.h>
 16#include <asm/page.h>
 17#include <asm/thread_info.h>
 18#include <asm/export.h>
 19
 20/* Work around cpp -rob */
 21#define ALLOC #alloc
 22#define EXECINSTR #execinstr
 23#define EX(x,y,a,b) 				\
 2498: 	x,y;					\
 25	.section .fixup,ALLOC,EXECINSTR;	\
 26	.align	4;				\
 2799:	ba fixupretl;				\
 28	 a, b, %g3;				\
 29	.section __ex_table,ALLOC;		\
 30	.align	4;				\
 31	.word	98b, 99b;			\
 32	.text;					\
 33	.align	4
 34
 35#define EX2(x,y,c,d,e,a,b) 			\
 3698: 	x,y;					\
 37	.section .fixup,ALLOC,EXECINSTR;	\
 38	.align	4;				\
 3999:	c, d, e;				\
 40	ba fixupretl;				\
 41	 a, b, %g3;				\
 42	.section __ex_table,ALLOC;		\
 43	.align	4;				\
 44	.word	98b, 99b;			\
 45	.text;					\
 46	.align	4
 47
 48#define EXO2(x,y) 				\
 4998: 	x, y;					\
 50	.section __ex_table,ALLOC;		\
 51	.align	4;				\
 52	.word	98b, 97f;			\
 53	.text;					\
 54	.align	4
 55
 56#define EXT(start,end,handler)			\
 57	.section __ex_table,ALLOC;		\
 58	.align	4;				\
 59	.word	start, 0, end, handler;		\
 60	.text;					\
 61	.align	4
 62
 63/* Please do not change following macros unless you change logic used
 64 * in .fixup at the end of this file as well
 65 */
 66
 67/* Both these macros have to start with exactly the same insn */
 68#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
 69	ldd	[%src + (offset) + 0x00], %t0; \
 70	ldd	[%src + (offset) + 0x08], %t2; \
 71	ldd	[%src + (offset) + 0x10], %t4; \
 72	ldd	[%src + (offset) + 0x18], %t6; \
 73	st	%t0, [%dst + (offset) + 0x00]; \
 74	st	%t1, [%dst + (offset) + 0x04]; \
 75	st	%t2, [%dst + (offset) + 0x08]; \
 76	st	%t3, [%dst + (offset) + 0x0c]; \
 77	st	%t4, [%dst + (offset) + 0x10]; \
 78	st	%t5, [%dst + (offset) + 0x14]; \
 79	st	%t6, [%dst + (offset) + 0x18]; \
 80	st	%t7, [%dst + (offset) + 0x1c];
 81
 82#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
 83	ldd	[%src + (offset) + 0x00], %t0; \
 84	ldd	[%src + (offset) + 0x08], %t2; \
 85	ldd	[%src + (offset) + 0x10], %t4; \
 86	ldd	[%src + (offset) + 0x18], %t6; \
 87	std	%t0, [%dst + (offset) + 0x00]; \
 88	std	%t2, [%dst + (offset) + 0x08]; \
 89	std	%t4, [%dst + (offset) + 0x10]; \
 90	std	%t6, [%dst + (offset) + 0x18];
 91
 92#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
 93	ldd	[%src - (offset) - 0x10], %t0; \
 94	ldd	[%src - (offset) - 0x08], %t2; \
 95	st	%t0, [%dst - (offset) - 0x10]; \
 96	st	%t1, [%dst - (offset) - 0x0c]; \
 97	st	%t2, [%dst - (offset) - 0x08]; \
 98	st	%t3, [%dst - (offset) - 0x04];
 99
100#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \
101	lduh	[%src + (offset) + 0x00], %t0; \
102	lduh	[%src + (offset) + 0x02], %t1; \
103	lduh	[%src + (offset) + 0x04], %t2; \
104	lduh	[%src + (offset) + 0x06], %t3; \
105	sth	%t0, [%dst + (offset) + 0x00]; \
106	sth	%t1, [%dst + (offset) + 0x02]; \
107	sth	%t2, [%dst + (offset) + 0x04]; \
108	sth	%t3, [%dst + (offset) + 0x06];
109
110#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
111	ldub	[%src - (offset) - 0x02], %t0; \
112	ldub	[%src - (offset) - 0x01], %t1; \
113	stb	%t0, [%dst - (offset) - 0x02]; \
114	stb	%t1, [%dst - (offset) - 0x01];
115
116	.text
117	.align	4
118
119	.globl  __copy_user_begin
120__copy_user_begin:
121
122	.globl	__copy_user
123	EXPORT_SYMBOL(__copy_user)
124dword_align:
125	andcc	%o1, 1, %g0
126	be	4f
127	 andcc	%o1, 2, %g0
128
129	EXO2(ldub [%o1], %g2)
130	add	%o1, 1, %o1
131	EXO2(stb %g2, [%o0])
132	sub	%o2, 1, %o2
133	bne	3f
134	 add	%o0, 1, %o0
135
136	EXO2(lduh [%o1], %g2)
137	add	%o1, 2, %o1
138	EXO2(sth %g2, [%o0])
139	sub	%o2, 2, %o2
140	b	3f
141	 add	%o0, 2, %o0
1424:
143	EXO2(lduh [%o1], %g2)
144	add	%o1, 2, %o1
145	EXO2(sth %g2, [%o0])
146	sub	%o2, 2, %o2
147	b	3f
148	 add	%o0, 2, %o0
149
150__copy_user:	/* %o0=dst %o1=src %o2=len */
151	xor	%o0, %o1, %o4
1521:
153	andcc	%o4, 3, %o5
1542:
155	bne	cannot_optimize
156	 cmp	%o2, 15
157
158	bleu	short_aligned_end
159	 andcc	%o1, 3, %g0
160
161	bne	dword_align
1623:
163	 andcc	%o1, 4, %g0
164
165	be	2f
166	 mov	%o2, %g1
167
168	EXO2(ld [%o1], %o4)
169	sub	%g1, 4, %g1
170	EXO2(st %o4, [%o0])
171	add	%o1, 4, %o1
172	add	%o0, 4, %o0
1732:
174	andcc	%g1, 0xffffff80, %g7
175	be	3f
176	 andcc	%o0, 4, %g0
177
178	be	ldd_std + 4
1795:
180	MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
181	MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
182	MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
183	MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
18480:
185	EXT(5b, 80b, 50f)
186	subcc	%g7, 128, %g7
187	add	%o1, 128, %o1
188	bne	5b
189	 add	%o0, 128, %o0
1903:
191	andcc	%g1, 0x70, %g7
192	be	copy_user_table_end
193	 andcc	%g1, 8, %g0
194
195	sethi	%hi(copy_user_table_end), %o5
196	srl	%g7, 1, %o4
197	add	%g7, %o4, %o4
198	add	%o1, %g7, %o1
199	sub	%o5, %o4, %o5
200	jmpl	%o5 + %lo(copy_user_table_end), %g0
201	 add	%o0, %g7, %o0
202
203copy_user_table:
204	MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
205	MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
206	MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
207	MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
208	MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
209	MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
210	MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
211copy_user_table_end:
212	EXT(copy_user_table, copy_user_table_end, 51f)
213	be	copy_user_last7
214	 andcc	%g1, 4, %g0
215
216	EX(ldd	[%o1], %g2, and %g1, 0xf)
217	add	%o0, 8, %o0
218	add	%o1, 8, %o1
219	EX(st	%g2, [%o0 - 0x08], and %g1, 0xf)
220	EX2(st	%g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4)
221copy_user_last7:
222	be	1f
223	 andcc	%g1, 2, %g0
224
225	EX(ld	[%o1], %g2, and %g1, 7)
226	add	%o1, 4, %o1
227	EX(st	%g2, [%o0], and %g1, 7)
228	add	%o0, 4, %o0
2291:
230	be	1f
231	 andcc	%g1, 1, %g0
232
233	EX(lduh	[%o1], %g2, and %g1, 3)
234	add	%o1, 2, %o1
235	EX(sth	%g2, [%o0], and %g1, 3)
236	add	%o0, 2, %o0
2371:
238	be	1f
239	 nop
240
241	EX(ldub	[%o1], %g2, add %g0, 1)
242	EX(stb	%g2, [%o0], add %g0, 1)
2431:
244	retl
245 	 clr	%o0
246
247ldd_std:
248	MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
249	MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
250	MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
251	MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
25281:
253	EXT(ldd_std, 81b, 52f)
254	subcc	%g7, 128, %g7
255	add	%o1, 128, %o1
256	bne	ldd_std
257	 add	%o0, 128, %o0
258
259	andcc	%g1, 0x70, %g7
260	be	copy_user_table_end
261	 andcc	%g1, 8, %g0
262
263	sethi	%hi(copy_user_table_end), %o5
264	srl	%g7, 1, %o4
265	add	%g7, %o4, %o4
266	add	%o1, %g7, %o1
267	sub	%o5, %o4, %o5
268	jmpl	%o5 + %lo(copy_user_table_end), %g0
269	 add	%o0, %g7, %o0
270
271cannot_optimize:
272	bleu	short_end
273	 cmp	%o5, 2
274
275	bne	byte_chunk
276	 and	%o2, 0xfffffff0, %o3
277	 
278	andcc	%o1, 1, %g0
279	be	10f
280	 nop
281
282	EXO2(ldub [%o1], %g2)
283	add	%o1, 1, %o1
284	EXO2(stb %g2, [%o0])
285	sub	%o2, 1, %o2
286	andcc	%o2, 0xfffffff0, %o3
287	be	short_end
288	 add	%o0, 1, %o0
28910:
290	MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
291	MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5)
29282:
293	EXT(10b, 82b, 53f)
294	subcc	%o3, 0x10, %o3
295	add	%o1, 0x10, %o1
296	bne	10b
297	 add	%o0, 0x10, %o0
298	b	2f
299	 and	%o2, 0xe, %o3
300	
301byte_chunk:
302	MOVE_SHORTCHUNK(o1, o0, -0x02, g2, g3)
303	MOVE_SHORTCHUNK(o1, o0, -0x04, g2, g3)
304	MOVE_SHORTCHUNK(o1, o0, -0x06, g2, g3)
305	MOVE_SHORTCHUNK(o1, o0, -0x08, g2, g3)
306	MOVE_SHORTCHUNK(o1, o0, -0x0a, g2, g3)
307	MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3)
308	MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3)
309	MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3)
31083:
311	EXT(byte_chunk, 83b, 54f)
312	subcc	%o3, 0x10, %o3
313	add	%o1, 0x10, %o1
314	bne	byte_chunk
315	 add	%o0, 0x10, %o0
316
317short_end:
318	and	%o2, 0xe, %o3
3192:
320	sethi	%hi(short_table_end), %o5
321	sll	%o3, 3, %o4
322	add	%o0, %o3, %o0
323	sub	%o5, %o4, %o5
324	add	%o1, %o3, %o1
325	jmpl	%o5 + %lo(short_table_end), %g0
326	 andcc	%o2, 1, %g0
32784:
328	MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
329	MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
330	MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
331	MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
332	MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
333	MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
334	MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
335short_table_end:
336	EXT(84b, short_table_end, 55f)
337	be	1f
338	 nop
339	EX(ldub	[%o1], %g2, add %g0, 1)
340	EX(stb	%g2, [%o0], add %g0, 1)
3411:
342	retl
343 	 clr	%o0
344
345short_aligned_end:
346	bne	short_end
347	 andcc	%o2, 8, %g0
348
349	be	1f
350	 andcc	%o2, 4, %g0
351
352	EXO2(ld	[%o1 + 0x00], %g2)
353	EXO2(ld	[%o1 + 0x04], %g3)
354	add	%o1, 8, %o1
355	EXO2(st	%g2, [%o0 + 0x00])
356	EX(st	%g3, [%o0 + 0x04], sub %o2, 4)
357	add	%o0, 8, %o0
3581:
359	b	copy_user_last7
360	 mov	%o2, %g1
361
362	.section .fixup,#alloc,#execinstr
363	.align	4
36497:
365	mov	%o2, %g3
366fixupretl:
367	sethi   %hi(PAGE_OFFSET), %g1
368	cmp	%o0, %g1
369	blu	1f
370	 cmp	%o1, %g1
371	bgeu	1f
372	 ld	[%g6 + TI_PREEMPT], %g1
373	cmp	%g1, 0
374	bne	1f
375	 nop
376	save	%sp, -64, %sp
377	mov	%i0, %o0
378	call	__bzero
379	 mov	%g3, %o1
380	restore
3811:	retl
382	 mov	%g3, %o0
383
384/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */
38550:
386/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK
387 * happens. This is derived from the amount ldd reads, st stores, etc.
388 * x = g2 % 12;
389 * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4);
390 * o0 += (g2 / 12) * 32;
391 */
392	cmp	%g2, 12
393	add	%o0, %g7, %o0
394	bcs	1f
395	 cmp	%g2, 24
396	bcs	2f
397	 cmp	%g2, 36
398	bcs	3f
399	 nop
400	sub	%g2, 12, %g2
401	sub	%g7, 32, %g7
4023:	sub	%g2, 12, %g2
403	sub	%g7, 32, %g7
4042:	sub	%g2, 12, %g2
405	sub	%g7, 32, %g7
4061:	cmp	%g2, 4
407	bcs,a	60f
408	 clr	%g2
409	sub	%g2, 4, %g2
410	sll	%g2, 2, %g2
41160:	and	%g1, 0x7f, %g3
412	sub	%o0, %g7, %o0
413	add	%g3, %g7, %g3
414	ba	fixupretl
415	 sub	%g3, %g2, %g3
41651:
417/* i = 41 - g2; j = i % 6;
418 * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16;
419 * o0 -= (i / 6) * 16 + 16;
420 */
421	neg	%g2
422	and	%g1, 0xf, %g1
423	add	%g2, 41, %g2
424	add	%o0, %g1, %o0
4251:	cmp	%g2, 6
426	bcs,a	2f
427	 cmp	%g2, 4
428	add	%g1, 16, %g1
429	b	1b
430	 sub	%g2, 6, %g2
4312:	bcc,a	2f
432	 mov	16, %g2
433	inc	%g2
434	sll	%g2, 2, %g2
4352:	add	%g1, %g2, %g3
436	ba	fixupretl
437	 sub	%o0, %g3, %o0
43852:
439/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0;
440   o0 += (g2 / 8) * 32 */
441	andn	%g2, 7, %g4
442	add	%o0, %g7, %o0
443	andcc	%g2, 4, %g0
444	and	%g2, 3, %g2
445	sll	%g4, 2, %g4
446	sll	%g2, 3, %g2
447	bne	60b
448	 sub	%g7, %g4, %g7
449	ba	60b
450	 clr	%g2
45153:
452/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0;
453   o0 += (g2 & 8) */
454	and	%g2, 3, %g4
455	andcc	%g2, 4, %g0
456	and	%g2, 8, %g2
457	sll	%g4, 1, %g4
458	be	1f
459	 add	%o0, %g2, %o0
460	add	%g2, %g4, %g2
4611:	and	%o2, 0xf, %g3
462	add	%g3, %o3, %g3
463	ba	fixupretl
464	 sub	%g3, %g2, %g3
46554:
466/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0;
467   o0 += (g2 / 4) * 2 */
468	srl	%g2, 2, %o4
469	and	%g2, 1, %o5
470	srl	%g2, 1, %g2
471	add	%o4, %o4, %o4
472	and	%o5, %g2, %o5
473	and	%o2, 0xf, %o2
474	add	%o0, %o4, %o0
475	sub	%o3, %o5, %o3
476	sub	%o2, %o4, %o2
477	ba	fixupretl
478	 add	%o2, %o3, %g3
47955:
480/* i = 27 - g2;
481   g3 = (o2 & 1) + i / 4 * 2 + !(i & 3);
482   o0 -= i / 4 * 2 + 1 */
483	neg	%g2
484	and	%o2, 1, %o2
485	add	%g2, 27, %g2
486	srl	%g2, 2, %o5
487	andcc	%g2, 3, %g0
488	mov	1, %g2
489	add	%o5, %o5, %o5
490	be,a	1f
491	 clr	%g2
4921:	add	%g2, %o5, %g3
493	sub	%o0, %g3, %o0
494	ba	fixupretl
495	 add	%g3, %o2, %g3
496
497	.globl  __copy_user_end
498__copy_user_end: