Linux Audio

Check our new training course

Loading...
v3.5.6
  1/*
  2 * "memset" implementation for SH4
  3 *
  4 * Copyright (C) 1999  Niibe Yutaka
  5 * Copyright (c) 2009  STMicroelectronics Limited
  6 * Author: Stuart Menefy <stuart.menefy:st.com>
  7 */
  8
  9/*
 10 *            void *memset(void *s, int c, size_t n);
 11 */
 12
 13#include <linux/linkage.h>
 14
 15ENTRY(memset)
 16	mov	#12,r0
 17	add	r6,r4
 18	cmp/gt	r6,r0
 19	bt/s	40f		! if it's too small, set a byte at once
 20	 mov	r4,r0
 21	and	#3,r0
 22	cmp/eq	#0,r0
 23	bt/s	2f		! It's aligned
 24	 sub	r0,r6
 251:
 26	dt	r0
 27	bf/s	1b
 28	 mov.b	r5,@-r4
 292:				! make VVVV
 30	extu.b	r5,r5
 31	swap.b	r5,r0		!   V0
 32	or	r0,r5		!   VV
 33	swap.w	r5,r0		! VV00
 34	or	r0,r5		! VVVV
 35
 36	! Check if enough bytes need to be copied to be worth the big loop
 37	mov	#0x40, r0	! (MT)
 38	cmp/gt	r6,r0		! (MT)  64 > len => slow loop
 39
 40	bt/s	22f
 41	 mov	r6,r0
 42
 43	! align the dst to the cache block size if necessary
 44	mov	r4, r3
 45	mov	#~(0x1f), r1
 46
 47	and	r3, r1
 48	cmp/eq	r3, r1
 49
 50	bt/s	11f		! dst is already aligned
 51	 sub	r1, r3		! r3-r1 -> r3
 52	shlr2	r3		! number of loops
 53
 5410:	mov.l	r5,@-r4
 55	dt	r3
 56	bf/s	10b
 57	 add	#-4, r6
 58
 5911:	! dst is 32byte aligned
 60	mov	r6,r2
 61	mov	#-5,r0
 62	shld	r0,r2		! number of loops
 63
 64	add	#-32, r4
 65	mov	r5, r0
 6612:
 67	movca.l	r0,@r4
 68	mov.l	r5,@(4, r4)
 69	mov.l	r5,@(8, r4)
 70	mov.l	r5,@(12,r4)
 71	mov.l	r5,@(16,r4)
 72	mov.l	r5,@(20,r4)
 73	add	#-0x20, r6
 74	mov.l	r5,@(24,r4)
 75	dt	r2
 76	mov.l	r5,@(28,r4)
 77	bf/s	12b
 78	 add	#-32, r4
 79
 80	add	#32, r4
 81	mov	#8, r0
 82	cmp/ge	r0, r6
 83	bf	40f
 84
 85	mov	r6,r0
 8622:
 87	shlr2	r0
 88	shlr	r0		! r0 = r6 >> 3
 893:
 90	dt	r0
 91	mov.l	r5,@-r4		! set 8-byte at once
 92	bf/s	3b
 93	 mov.l	r5,@-r4
 94	!
 95	mov	#7,r0
 96	and	r0,r6
 97
 98	! fill bytes (length may be zero)
 9940:	tst	r6,r6
100	bt	5f
1014:
102	dt	r6
103	bf/s	4b
104	 mov.b	r5,@-r4
1055:
106	rts
107	 mov	r4,r0
v4.6
  1/*
  2 * "memset" implementation for SH4
  3 *
  4 * Copyright (C) 1999  Niibe Yutaka
  5 * Copyright (c) 2009  STMicroelectronics Limited
  6 * Author: Stuart Menefy <stuart.menefy:st.com>
  7 */
  8
  9/*
 10 *            void *memset(void *s, int c, size_t n);
 11 */
 12
 13#include <linux/linkage.h>
 14
 15ENTRY(memset)
 16	mov	#12,r0
 17	add	r6,r4
 18	cmp/gt	r6,r0
 19	bt/s	40f		! if it's too small, set a byte at once
 20	 mov	r4,r0
 21	and	#3,r0
 22	cmp/eq	#0,r0
 23	bt/s	2f		! It's aligned
 24	 sub	r0,r6
 251:
 26	dt	r0
 27	bf/s	1b
 28	 mov.b	r5,@-r4
 292:				! make VVVV
 30	extu.b	r5,r5
 31	swap.b	r5,r0		!   V0
 32	or	r0,r5		!   VV
 33	swap.w	r5,r0		! VV00
 34	or	r0,r5		! VVVV
 35
 36	! Check if enough bytes need to be copied to be worth the big loop
 37	mov	#0x40, r0	! (MT)
 38	cmp/gt	r6,r0		! (MT)  64 > len => slow loop
 39
 40	bt/s	22f
 41	 mov	r6,r0
 42
 43	! align the dst to the cache block size if necessary
 44	mov	r4, r3
 45	mov	#~(0x1f), r1
 46
 47	and	r3, r1
 48	cmp/eq	r3, r1
 49
 50	bt/s	11f		! dst is already aligned
 51	 sub	r1, r3		! r3-r1 -> r3
 52	shlr2	r3		! number of loops
 53
 5410:	mov.l	r5,@-r4
 55	dt	r3
 56	bf/s	10b
 57	 add	#-4, r6
 58
 5911:	! dst is 32byte aligned
 60	mov	r6,r2
 61	mov	#-5,r0
 62	shld	r0,r2		! number of loops
 63
 64	add	#-32, r4
 65	mov	r5, r0
 6612:
 67	movca.l	r0,@r4
 68	mov.l	r5,@(4, r4)
 69	mov.l	r5,@(8, r4)
 70	mov.l	r5,@(12,r4)
 71	mov.l	r5,@(16,r4)
 72	mov.l	r5,@(20,r4)
 73	add	#-0x20, r6
 74	mov.l	r5,@(24,r4)
 75	dt	r2
 76	mov.l	r5,@(28,r4)
 77	bf/s	12b
 78	 add	#-32, r4
 79
 80	add	#32, r4
 81	mov	#8, r0
 82	cmp/ge	r0, r6
 83	bf	40f
 84
 85	mov	r6,r0
 8622:
 87	shlr2	r0
 88	shlr	r0		! r0 = r6 >> 3
 893:
 90	dt	r0
 91	mov.l	r5,@-r4		! set 8-byte at once
 92	bf/s	3b
 93	 mov.l	r5,@-r4
 94	!
 95	mov	#7,r0
 96	and	r0,r6
 97
 98	! fill bytes (length may be zero)
 9940:	tst	r6,r6
100	bt	5f
1014:
102	dt	r6
103	bf/s	4b
104	 mov.b	r5,@-r4
1055:
106	rts
107	 mov	r4,r0