Loading...
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
4 */
5
6#include <linux/linkage.h>
7#include <asm/cache.h>
8
9/*
10 * The memset implementation below is optimized to use prefetchw and prealloc
11 * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
12 * If you want to implement optimized memset for other possible L1 data cache
13 * line lengths (32B and 128B) you should rewrite code carefully checking
14 * we don't call any prefetchw/prealloc instruction for L1 cache lines which
15 * don't belongs to memset area.
16 */
17
18#if L1_CACHE_SHIFT == 6
19
20.macro PREALLOC_INSTR reg, off
21 prealloc [\reg, \off]
22.endm
23
24.macro PREFETCHW_INSTR reg, off
25 prefetchw [\reg, \off]
26.endm
27
28#else
29
30.macro PREALLOC_INSTR reg, off
31.endm
32
33.macro PREFETCHW_INSTR reg, off
34.endm
35
36#endif
37
38ENTRY_CFI(memset)
39 PREFETCHW_INSTR r0, 0 ; Prefetch the first write location
40 mov.f 0, r2
41;;; if size is zero
42 jz.d [blink]
43 mov r3, r0 ; don't clobber ret val
44
45;;; if length < 8
46 brls.d.nt r2, 8, .Lsmallchunk
47 mov.f lp_count,r2
48
49 and.f r4, r0, 0x03
50 rsub lp_count, r4, 4
51 lpnz @.Laligndestination
52 ;; LOOP BEGIN
53 stb.ab r1, [r3,1]
54 sub r2, r2, 1
55.Laligndestination:
56
57;;; Destination is aligned
58 and r1, r1, 0xFF
59 asl r4, r1, 8
60 or r4, r4, r1
61 asl r5, r4, 16
62 or r5, r5, r4
63 mov r4, r5
64
65 sub3 lp_count, r2, 8
66 cmp r2, 64
67 bmsk.hi r2, r2, 5
68 mov.ls lp_count, 0
69 add3.hi r2, r2, 8
70
71;;; Convert len to Dwords, unfold x8
72 lsr.f lp_count, lp_count, 6
73
74 lpnz @.Lset64bytes
75 ;; LOOP START
76 PREALLOC_INSTR r3, 64 ; alloc next line w/o fetching
77
78#ifdef CONFIG_ARC_HAS_LL64
79 std.ab r4, [r3, 8]
80 std.ab r4, [r3, 8]
81 std.ab r4, [r3, 8]
82 std.ab r4, [r3, 8]
83 std.ab r4, [r3, 8]
84 std.ab r4, [r3, 8]
85 std.ab r4, [r3, 8]
86 std.ab r4, [r3, 8]
87#else
88 st.ab r4, [r3, 4]
89 st.ab r4, [r3, 4]
90 st.ab r4, [r3, 4]
91 st.ab r4, [r3, 4]
92 st.ab r4, [r3, 4]
93 st.ab r4, [r3, 4]
94 st.ab r4, [r3, 4]
95 st.ab r4, [r3, 4]
96 st.ab r4, [r3, 4]
97 st.ab r4, [r3, 4]
98 st.ab r4, [r3, 4]
99 st.ab r4, [r3, 4]
100 st.ab r4, [r3, 4]
101 st.ab r4, [r3, 4]
102 st.ab r4, [r3, 4]
103 st.ab r4, [r3, 4]
104#endif
105.Lset64bytes:
106
107 lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes
108 lpnz .Lset32bytes
109 ;; LOOP START
110#ifdef CONFIG_ARC_HAS_LL64
111 std.ab r4, [r3, 8]
112 std.ab r4, [r3, 8]
113 std.ab r4, [r3, 8]
114 std.ab r4, [r3, 8]
115#else
116 st.ab r4, [r3, 4]
117 st.ab r4, [r3, 4]
118 st.ab r4, [r3, 4]
119 st.ab r4, [r3, 4]
120 st.ab r4, [r3, 4]
121 st.ab r4, [r3, 4]
122 st.ab r4, [r3, 4]
123 st.ab r4, [r3, 4]
124#endif
125.Lset32bytes:
126
127 and.f lp_count, r2, 0x1F ;Last remaining 31 bytes
128.Lsmallchunk:
129 lpnz .Lcopy3bytes
130 ;; LOOP START
131 stb.ab r1, [r3, 1]
132.Lcopy3bytes:
133
134 j [blink]
135
136END_CFI(memset)
137
138ENTRY_CFI(memzero)
139 ; adjust bzero args to memset args
140 mov r2, r1
141 b.d memset ;tail call so need to tinker with blink
142 mov r1, 0
143END_CFI(memzero)
1/*
2 * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/linkage.h>
10
11#undef PREALLOC_NOT_AVAIL
12
13ENTRY_CFI(memset)
14 prefetchw [r0] ; Prefetch the write location
15 mov.f 0, r2
16;;; if size is zero
17 jz.d [blink]
18 mov r3, r0 ; don't clobber ret val
19
20;;; if length < 8
21 brls.d.nt r2, 8, .Lsmallchunk
22 mov.f lp_count,r2
23
24 and.f r4, r0, 0x03
25 rsub lp_count, r4, 4
26 lpnz @.Laligndestination
27 ;; LOOP BEGIN
28 stb.ab r1, [r3,1]
29 sub r2, r2, 1
30.Laligndestination:
31
32;;; Destination is aligned
33 and r1, r1, 0xFF
34 asl r4, r1, 8
35 or r4, r4, r1
36 asl r5, r4, 16
37 or r5, r5, r4
38 mov r4, r5
39
40 sub3 lp_count, r2, 8
41 cmp r2, 64
42 bmsk.hi r2, r2, 5
43 mov.ls lp_count, 0
44 add3.hi r2, r2, 8
45
46;;; Convert len to Dwords, unfold x8
47 lsr.f lp_count, lp_count, 6
48
49 lpnz @.Lset64bytes
50 ;; LOOP START
51#ifdef PREALLOC_NOT_AVAIL
52 prefetchw [r3, 64] ;Prefetch the next write location
53#else
54 prealloc [r3, 64]
55#endif
56#ifdef CONFIG_ARC_HAS_LL64
57 std.ab r4, [r3, 8]
58 std.ab r4, [r3, 8]
59 std.ab r4, [r3, 8]
60 std.ab r4, [r3, 8]
61 std.ab r4, [r3, 8]
62 std.ab r4, [r3, 8]
63 std.ab r4, [r3, 8]
64 std.ab r4, [r3, 8]
65#else
66 st.ab r4, [r3, 4]
67 st.ab r4, [r3, 4]
68 st.ab r4, [r3, 4]
69 st.ab r4, [r3, 4]
70 st.ab r4, [r3, 4]
71 st.ab r4, [r3, 4]
72 st.ab r4, [r3, 4]
73 st.ab r4, [r3, 4]
74 st.ab r4, [r3, 4]
75 st.ab r4, [r3, 4]
76 st.ab r4, [r3, 4]
77 st.ab r4, [r3, 4]
78 st.ab r4, [r3, 4]
79 st.ab r4, [r3, 4]
80 st.ab r4, [r3, 4]
81 st.ab r4, [r3, 4]
82#endif
83.Lset64bytes:
84
85 lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes
86 lpnz .Lset32bytes
87 ;; LOOP START
88 prefetchw [r3, 32] ;Prefetch the next write location
89#ifdef CONFIG_ARC_HAS_LL64
90 std.ab r4, [r3, 8]
91 std.ab r4, [r3, 8]
92 std.ab r4, [r3, 8]
93 std.ab r4, [r3, 8]
94#else
95 st.ab r4, [r3, 4]
96 st.ab r4, [r3, 4]
97 st.ab r4, [r3, 4]
98 st.ab r4, [r3, 4]
99 st.ab r4, [r3, 4]
100 st.ab r4, [r3, 4]
101 st.ab r4, [r3, 4]
102 st.ab r4, [r3, 4]
103#endif
104.Lset32bytes:
105
106 and.f lp_count, r2, 0x1F ;Last remaining 31 bytes
107.Lsmallchunk:
108 lpnz .Lcopy3bytes
109 ;; LOOP START
110 stb.ab r1, [r3, 1]
111.Lcopy3bytes:
112
113 j [blink]
114
115END_CFI(memset)
116
117ENTRY_CFI(memzero)
118 ; adjust bzero args to memset args
119 mov r2, r1
120 b.d memset ;tail call so need to tinker with blink
121 mov r1, 0
122END_CFI(memzero)