Loading...
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
4 */
5
6/* This is optimized primarily for the ARC700.
7 It would be possible to speed up the loops by one cycle / word
8 respective one cycle / byte by forcing double source 1 alignment, unrolling
9 by a factor of two, and speculatively loading the second word / byte of
10 source 1; however, that would increase the overhead for loop setup / finish,
11 and strcmp might often terminate early. */
12
13#include <linux/linkage.h>
14
15ENTRY_CFI(strcmp)
16 or r2,r0,r1
17 bmsk_s r2,r2,1
18 brne r2,0,.Lcharloop
19 mov_s r12,0x01010101
20 ror r5,r12
21.Lwordloop:
22 ld.ab r2,[r0,4]
23 ld.ab r3,[r1,4]
24 nop_s
25 sub r4,r2,r12
26 bic r4,r4,r2
27 and r4,r4,r5
28 brne r4,0,.Lfound0
29 breq r2,r3,.Lwordloop
30#ifdef __LITTLE_ENDIAN__
31 xor r0,r2,r3 ; mask for difference
32 sub_s r1,r0,1
33 bic_s r0,r0,r1 ; mask for least significant difference bit
34 sub r1,r5,r0
35 xor r0,r5,r1 ; mask for least significant difference byte
36 and_s r2,r2,r0
37 and_s r3,r3,r0
38#endif /* LITTLE ENDIAN */
39 cmp_s r2,r3
40 mov_s r0,1
41 j_s.d [blink]
42 bset.lo r0,r0,31
43
44 .balign 4
45#ifdef __LITTLE_ENDIAN__
46.Lfound0:
47 xor r0,r2,r3 ; mask for difference
48 or r0,r0,r4 ; or in zero indicator
49 sub_s r1,r0,1
50 bic_s r0,r0,r1 ; mask for least significant difference bit
51 sub r1,r5,r0
52 xor r0,r5,r1 ; mask for least significant difference byte
53 and_s r2,r2,r0
54 and_s r3,r3,r0
55 sub.f r0,r2,r3
56 mov.hi r0,1
57 j_s.d [blink]
58 bset.lo r0,r0,31
59#else /* BIG ENDIAN */
60 /* The zero-detection above can mis-detect 0x01 bytes as zeroes
61 because of carry-propagateion from a lower significant zero byte.
62 We can compensate for this by checking that bit0 is zero.
63 This compensation is not necessary in the step where we
64 get a low estimate for r2, because in any affected bytes
65 we already have 0x00 or 0x01, which will remain unchanged
66 when bit 7 is cleared. */
67 .balign 4
68.Lfound0:
69 lsr r0,r4,8
70 lsr_s r1,r2
71 bic_s r2,r2,r0 ; get low estimate for r2 and get ...
72 bic_s r0,r0,r1 ; <this is the adjusted mask for zeros>
73 or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ...
74 cmp_s r3,r2 ; ... be independent of trailing garbage
75 or_s r2,r2,r0 ; likewise for r3 > r2
76 bic_s r3,r3,r0
77 rlc r0,0 ; r0 := r2 > r3 ? 1 : 0
78 cmp_s r2,r3
79 j_s.d [blink]
80 bset.lo r0,r0,31
81#endif /* ENDIAN */
82
83 .balign 4
84.Lcharloop:
85 ldb.ab r2,[r0,1]
86 ldb.ab r3,[r1,1]
87 nop_s
88 breq r2,0,.Lcmpend
89 breq r2,r3,.Lcharloop
90.Lcmpend:
91 j_s.d [blink]
92 sub r0,r2,r3
93END_CFI(strcmp)
1/*
2 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9/* This is optimized primarily for the ARC700.
10 It would be possible to speed up the loops by one cycle / word
11 respective one cycle / byte by forcing double source 1 alignment, unrolling
12 by a factor of two, and speculatively loading the second word / byte of
13 source 1; however, that would increase the overhead for loop setup / finish,
14 and strcmp might often terminate early. */
15
16#include <linux/linkage.h>
17
18ENTRY_CFI(strcmp)
19 or r2,r0,r1
20 bmsk_s r2,r2,1
21 brne r2,0,.Lcharloop
22 mov_s r12,0x01010101
23 ror r5,r12
24.Lwordloop:
25 ld.ab r2,[r0,4]
26 ld.ab r3,[r1,4]
27 nop_s
28 sub r4,r2,r12
29 bic r4,r4,r2
30 and r4,r4,r5
31 brne r4,0,.Lfound0
32 breq r2,r3,.Lwordloop
33#ifdef __LITTLE_ENDIAN__
34 xor r0,r2,r3 ; mask for difference
35 sub_s r1,r0,1
36 bic_s r0,r0,r1 ; mask for least significant difference bit
37 sub r1,r5,r0
38 xor r0,r5,r1 ; mask for least significant difference byte
39 and_s r2,r2,r0
40 and_s r3,r3,r0
41#endif /* LITTLE ENDIAN */
42 cmp_s r2,r3
43 mov_s r0,1
44 j_s.d [blink]
45 bset.lo r0,r0,31
46
47 .balign 4
48#ifdef __LITTLE_ENDIAN__
49.Lfound0:
50 xor r0,r2,r3 ; mask for difference
51 or r0,r0,r4 ; or in zero indicator
52 sub_s r1,r0,1
53 bic_s r0,r0,r1 ; mask for least significant difference bit
54 sub r1,r5,r0
55 xor r0,r5,r1 ; mask for least significant difference byte
56 and_s r2,r2,r0
57 and_s r3,r3,r0
58 sub.f r0,r2,r3
59 mov.hi r0,1
60 j_s.d [blink]
61 bset.lo r0,r0,31
62#else /* BIG ENDIAN */
63 /* The zero-detection above can mis-detect 0x01 bytes as zeroes
64 because of carry-propagateion from a lower significant zero byte.
65 We can compensate for this by checking that bit0 is zero.
66 This compensation is not necessary in the step where we
67 get a low estimate for r2, because in any affected bytes
68 we already have 0x00 or 0x01, which will remain unchanged
69 when bit 7 is cleared. */
70 .balign 4
71.Lfound0:
72 lsr r0,r4,8
73 lsr_s r1,r2
74 bic_s r2,r2,r0 ; get low estimate for r2 and get ...
75 bic_s r0,r0,r1 ; <this is the adjusted mask for zeros>
76 or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ...
77 cmp_s r3,r2 ; ... be independent of trailing garbage
78 or_s r2,r2,r0 ; likewise for r3 > r2
79 bic_s r3,r3,r0
80 rlc r0,0 ; r0 := r2 > r3 ? 1 : 0
81 cmp_s r2,r3
82 j_s.d [blink]
83 bset.lo r0,r0,31
84#endif /* ENDIAN */
85
86 .balign 4
87.Lcharloop:
88 ldb.ab r2,[r0,1]
89 ldb.ab r3,[r1,1]
90 nop_s
91 breq r2,0,.Lcmpend
92 breq r2,r3,.Lcharloop
93.Lcmpend:
94 j_s.d [blink]
95 sub r0,r2,r3
96END_CFI(strcmp)