Linux Audio

Check our new training course

Loading...
  1/*
  2 * A fast checksum+copy routine using movem
  3 * Copyright (c) 1998, 2001 Axis Communications AB
  4 *
  5 * Authors:	Bjorn Wesen
  6 * 
  7 * csum_partial_copy_nocheck(const char *src, char *dst,
  8 *		             int len, unsigned int sum)
  9 */
 10
 11	.globl	csum_partial_copy_nocheck
 12csum_partial_copy_nocheck:	
 13	
 14	;; r10 - src
 15	;; r11 - dst
 16	;; r12 - length
 17	;; r13 - checksum
 18
 19	;; check for breakeven length between movem and normal word looping versions
 20	;; we also do _NOT_ want to compute a checksum over more than the 
 21	;; actual length when length < 40
 22	
 23	cmpu.w	80, $r12
 24	blo	_word_loop
 25	nop
 26
 27	;; need to save the registers we use below in the movem loop
 28	;; this overhead is why we have a check above for breakeven length
 29	;; only r0 - r8 have to be saved, the other ones are clobber-able
 30	;; according to the ABI
 31	
 32	subq	9*4, $sp
 33	movem	$r8, [$sp]
 34	
 35	;; do a movem copy and checksum
 36
 37	subq	10*4, $r12	; update length for the first loop
 38	
 39_mloop:	movem	[$r10+],$r9	; read 10 longwords
 401:	;; A failing userspace access will have this as PC.
 41	movem	$r9,[$r11+]	; write 10 longwords
 42
 43	;; perform dword checksumming on the 10 longwords
 44	
 45	add.d	$r0,$r13
 46	ax
 47	add.d	$r1,$r13
 48	ax
 49	add.d	$r2,$r13
 50	ax
 51	add.d	$r3,$r13
 52	ax
 53	add.d	$r4,$r13
 54	ax
 55	add.d	$r5,$r13
 56	ax
 57	add.d	$r6,$r13
 58	ax
 59	add.d	$r7,$r13
 60	ax
 61	add.d	$r8,$r13
 62	ax
 63	add.d	$r9,$r13
 64
 65	;; fold the carry into the checksum, to avoid having to loop the carry
 66	;; back into the top
 67	
 68	ax
 69	addq	0,$r13
 70
 71	subq	10*4,$r12
 72	bge	_mloop
 73	nop
 74
 75	addq	10*4,$r12	; compensate for last loop underflowing length
 76
 77	movem	[$sp+],$r8	; restore regs
 78
 79_word_loop:
 80	;; only fold if there is anything to fold.
 81
 82	cmpq	0,$r13
 83	beq	_no_fold
 84
 85	;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below
 86	;; r9 can be used as temporary.
 87	
 88	move.d	$r13,$r9
 89	lsrq	16,$r9		; r0 = checksum >> 16
 90	and.d	0xffff,$r13	; checksum = checksum & 0xffff
 91	add.d	$r9,$r13	; checksum += r0
 92	
 93_no_fold:
 94	cmpq	2,$r12
 95	blt	_no_words
 96	nop
 97	
 98	;; copy and checksum the rest of the words
 99	
100	subq	2,$r12
101	
102_wloop:	move.w	[$r10+],$r9
1032:	;; A failing userspace access will have this as PC.
104	addu.w	$r9,$r13
105	subq	2,$r12
106	bge	_wloop
107	move.w	$r9,[$r11+]
108	
109	addq	2,$r12
110		
111_no_words:
112	;; see if we have one odd byte more
113	cmpq	1,$r12
114	beq	_do_byte
115	nop
116	ret
117	move.d	$r13, $r10
118
119_do_byte:	
120	;; copy and checksum the last byte
121	move.b	[$r10],$r9
1223:	;; A failing userspace access will have this as PC.
123	addu.b	$r9,$r13
124	move.b	$r9,[$r11]
125	ret
126	move.d	$r13, $r10
  1/*
  2 * A fast checksum+copy routine using movem
  3 * Copyright (c) 1998, 2001 Axis Communications AB
  4 *
  5 * Authors:	Bjorn Wesen
  6 * 
  7 * csum_partial_copy_nocheck(const char *src, char *dst,
  8 *		             int len, unsigned int sum)
  9 */
 10
 11	.globl	csum_partial_copy_nocheck
 12csum_partial_copy_nocheck:	
 13	
 14	;; r10 - src
 15	;; r11 - dst
 16	;; r12 - length
 17	;; r13 - checksum
 18
 19	;; check for breakeven length between movem and normal word looping versions
 20	;; we also do _NOT_ want to compute a checksum over more than the 
 21	;; actual length when length < 40
 22	
 23	cmpu.w	80, $r12
 24	blo	_word_loop
 25	nop
 26
 27	;; need to save the registers we use below in the movem loop
 28	;; this overhead is why we have a check above for breakeven length
 29	;; only r0 - r8 have to be saved, the other ones are clobber-able
 30	;; according to the ABI
 31	
 32	subq	9*4, $sp
 33	movem	$r8, [$sp]
 34	
 35	;; do a movem copy and checksum
 36
 37	subq	10*4, $r12	; update length for the first loop
 38	
 39_mloop:	movem	[$r10+],$r9	; read 10 longwords
 401:	;; A failing userspace access will have this as PC.
 41	movem	$r9,[$r11+]	; write 10 longwords
 42
 43	;; perform dword checksumming on the 10 longwords
 44	
 45	add.d	$r0,$r13
 46	ax
 47	add.d	$r1,$r13
 48	ax
 49	add.d	$r2,$r13
 50	ax
 51	add.d	$r3,$r13
 52	ax
 53	add.d	$r4,$r13
 54	ax
 55	add.d	$r5,$r13
 56	ax
 57	add.d	$r6,$r13
 58	ax
 59	add.d	$r7,$r13
 60	ax
 61	add.d	$r8,$r13
 62	ax
 63	add.d	$r9,$r13
 64
 65	;; fold the carry into the checksum, to avoid having to loop the carry
 66	;; back into the top
 67	
 68	ax
 69	addq	0,$r13
 70
 71	subq	10*4,$r12
 72	bge	_mloop
 73	nop
 74
 75	addq	10*4,$r12	; compensate for last loop underflowing length
 76
 77	movem	[$sp+],$r8	; restore regs
 78
 79_word_loop:
 80	;; only fold if there is anything to fold.
 81
 82	cmpq	0,$r13
 83	beq	_no_fold
 84
 85	;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below
 86	;; r9 can be used as temporary.
 87	
 88	move.d	$r13,$r9
 89	lsrq	16,$r9		; r0 = checksum >> 16
 90	and.d	0xffff,$r13	; checksum = checksum & 0xffff
 91	add.d	$r9,$r13	; checksum += r0
 92	
 93_no_fold:
 94	cmpq	2,$r12
 95	blt	_no_words
 96	nop
 97	
 98	;; copy and checksum the rest of the words
 99	
100	subq	2,$r12
101	
102_wloop:	move.w	[$r10+],$r9
1032:	;; A failing userspace access will have this as PC.
104	addu.w	$r9,$r13
105	subq	2,$r12
106	bge	_wloop
107	move.w	$r9,[$r11+]
108	
109	addq	2,$r12
110		
111_no_words:
112	;; see if we have one odd byte more
113	cmpq	1,$r12
114	beq	_do_byte
115	nop
116	ret
117	move.d	$r13, $r10
118
119_do_byte:	
120	;; copy and checksum the last byte
121	move.b	[$r10],$r9
1223:	;; A failing userspace access will have this as PC.
123	addu.b	$r9,$r13
124	move.b	$r9,[$r11]
125	ret
126	move.d	$r13, $r10