strcmp.S - arch/arc/lib/strcmp.S - Linux diff v6.9.4

 1/* SPDX-License-Identifier: GPL-2.0-only */
 2/*
 3 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
 4 */
 5
 6/* This is optimized primarily for the ARC700.
 7   It would be possible to speed up the loops by one cycle / word
 8   respective one cycle / byte by forcing double source 1 alignment, unrolling
 9   by a factor of two, and speculatively loading the second word / byte of
10   source 1; however, that would increase the overhead for loop setup / finish,
11   and strcmp might often terminate early.  */
12
13#include <linux/linkage.h>
14
15ENTRY_CFI(strcmp)
16	or	r2,r0,r1
17	bmsk_s	r2,r2,1
18	brne	r2,0,.Lcharloop
19	mov_s	r12,0x01010101
20	ror	r5,r12
21.Lwordloop:
22	ld.ab	r2,[r0,4]
23	ld.ab	r3,[r1,4]
24	nop_s
25	sub	r4,r2,r12
26	bic	r4,r4,r2
27	and	r4,r4,r5
28	brne	r4,0,.Lfound0
29	breq	r2,r3,.Lwordloop
30#ifdef	__LITTLE_ENDIAN__
31	xor	r0,r2,r3	; mask for difference
32	sub_s	r1,r0,1
33	bic_s	r0,r0,r1	; mask for least significant difference bit
34	sub	r1,r5,r0
35	xor	r0,r5,r1	; mask for least significant difference byte
36	and_s	r2,r2,r0
37	and_s	r3,r3,r0
38#endif /* LITTLE ENDIAN */
39	cmp_s	r2,r3
40	mov_s	r0,1
41	j_s.d	[blink]
42	bset.lo	r0,r0,31
43
44	.balign	4
45#ifdef __LITTLE_ENDIAN__
46.Lfound0:
47	xor	r0,r2,r3	; mask for difference
48	or	r0,r0,r4	; or in zero indicator
49	sub_s	r1,r0,1
50	bic_s	r0,r0,r1	; mask for least significant difference bit
51	sub	r1,r5,r0
52	xor	r0,r5,r1	; mask for least significant difference byte
53	and_s	r2,r2,r0
54	and_s	r3,r3,r0
55	sub.f	r0,r2,r3
56	mov.hi	r0,1
57	j_s.d	[blink]
58	bset.lo	r0,r0,31
59#else /* BIG ENDIAN */
60	/* The zero-detection above can mis-detect 0x01 bytes as zeroes
61	   because of carry-propagateion from a lower significant zero byte.
62	   We can compensate for this by checking that bit0 is zero.
63	   This compensation is not necessary in the step where we
64	   get a low estimate for r2, because in any affected bytes
65	   we already have 0x00 or 0x01, which will remain unchanged
66	   when bit 7 is cleared.  */
67	.balign	4
68.Lfound0:
69	lsr	r0,r4,8
70	lsr_s	r1,r2
71	bic_s	r2,r2,r0	; get low estimate for r2 and get ...
72	bic_s	r0,r0,r1	; <this is the adjusted mask for zeros>
73	or_s	r3,r3,r0	; ... high estimate r3 so that r2 > r3 will ...
74	cmp_s	r3,r2		; ... be independent of trailing garbage
75	or_s	r2,r2,r0	; likewise for r3 > r2
76	bic_s	r3,r3,r0
77	rlc	r0,0		; r0 := r2 > r3 ? 1 : 0
78	cmp_s	r2,r3
79	j_s.d	[blink]
80	bset.lo	r0,r0,31
81#endif /* ENDIAN */
82
83	.balign	4
84.Lcharloop:
85	ldb.ab	r2,[r0,1]
86	ldb.ab	r3,[r1,1]
87	nop_s
88	breq	r2,0,.Lcmpend
89	breq	r2,r3,.Lcharloop
90.Lcmpend:
91	j_s.d	[blink]
92	sub	r0,r2,r3
93END_CFI(strcmp)

 1/*
 2 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
 3 *
 4 * This program is free software; you can redistribute it and/or modify
 5 * it under the terms of the GNU General Public License version 2 as
 6 * published by the Free Software Foundation.
 7 */
 8
 9/* This is optimized primarily for the ARC700.
10   It would be possible to speed up the loops by one cycle / word
11   respective one cycle / byte by forcing double source 1 alignment, unrolling
12   by a factor of two, and speculatively loading the second word / byte of
13   source 1; however, that would increase the overhead for loop setup / finish,
14   and strcmp might often terminate early.  */
15
16#include <linux/linkage.h>
17
18ENTRY_CFI(strcmp)
19	or	r2,r0,r1
20	bmsk_s	r2,r2,1
21	brne	r2,0,.Lcharloop
22	mov_s	r12,0x01010101
23	ror	r5,r12
24.Lwordloop:
25	ld.ab	r2,[r0,4]
26	ld.ab	r3,[r1,4]
27	nop_s
28	sub	r4,r2,r12
29	bic	r4,r4,r2
30	and	r4,r4,r5
31	brne	r4,0,.Lfound0
32	breq	r2,r3,.Lwordloop
33#ifdef	__LITTLE_ENDIAN__
34	xor	r0,r2,r3	; mask for difference
35	sub_s	r1,r0,1
36	bic_s	r0,r0,r1	; mask for least significant difference bit
37	sub	r1,r5,r0
38	xor	r0,r5,r1	; mask for least significant difference byte
39	and_s	r2,r2,r0
40	and_s	r3,r3,r0
41#endif /* LITTLE ENDIAN */
42	cmp_s	r2,r3
43	mov_s	r0,1
44	j_s.d	[blink]
45	bset.lo	r0,r0,31
46
47	.balign	4
48#ifdef __LITTLE_ENDIAN__
49.Lfound0:
50	xor	r0,r2,r3	; mask for difference
51	or	r0,r0,r4	; or in zero indicator
52	sub_s	r1,r0,1
53	bic_s	r0,r0,r1	; mask for least significant difference bit
54	sub	r1,r5,r0
55	xor	r0,r5,r1	; mask for least significant difference byte
56	and_s	r2,r2,r0
57	and_s	r3,r3,r0
58	sub.f	r0,r2,r3
59	mov.hi	r0,1
60	j_s.d	[blink]
61	bset.lo	r0,r0,31
62#else /* BIG ENDIAN */
63	/* The zero-detection above can mis-detect 0x01 bytes as zeroes
64	   because of carry-propagateion from a lower significant zero byte.
65	   We can compensate for this by checking that bit0 is zero.
66	   This compensation is not necessary in the step where we
67	   get a low estimate for r2, because in any affected bytes
68	   we already have 0x00 or 0x01, which will remain unchanged
69	   when bit 7 is cleared.  */
70	.balign	4
71.Lfound0:
72	lsr	r0,r4,8
73	lsr_s	r1,r2
74	bic_s	r2,r2,r0	; get low estimate for r2 and get ...
75	bic_s	r0,r0,r1	; <this is the adjusted mask for zeros>
76	or_s	r3,r3,r0	; ... high estimate r3 so that r2 > r3 will ...
77	cmp_s	r3,r2		; ... be independent of trailing garbage
78	or_s	r2,r2,r0	; likewise for r3 > r2
79	bic_s	r3,r3,r0
80	rlc	r0,0		; r0 := r2 > r3 ? 1 : 0
81	cmp_s	r2,r3
82	j_s.d	[blink]
83	bset.lo	r0,r0,31
84#endif /* ENDIAN */
85
86	.balign	4
87.Lcharloop:
88	ldb.ab	r2,[r0,1]
89	ldb.ab	r3,[r1,1]
90	nop_s
91	breq	r2,0,.Lcmpend
92	breq	r2,r3,.Lcharloop
93.Lcmpend:
94	j_s.d	[blink]
95	sub	r0,r2,r3
96END_CFI(strcmp)