Loading...
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2006 by Ralf Baechle (ralf@linux-mips.org)
7 */
8#ifndef __ASM_BARRIER_H
9#define __ASM_BARRIER_H
10
11#include <asm/addrspace.h>
12
13/*
14 * Sync types defined by the MIPS architecture (document MD00087 table 6.5)
15 * These values are used with the sync instruction to perform memory barriers.
16 * Types of ordering guarantees available through the SYNC instruction:
17 * - Completion Barriers
18 * - Ordering Barriers
19 * As compared to the completion barrier, the ordering barrier is a
20 * lighter-weight operation as it does not require the specified instructions
21 * before the SYNC to be already completed. Instead it only requires that those
22 * specified instructions which are subsequent to the SYNC in the instruction
23 * stream are never re-ordered for processing ahead of the specified
24 * instructions which are before the SYNC in the instruction stream.
25 * This potentially reduces how many cycles the barrier instruction must stall
26 * before it completes.
27 * Implementations that do not use any of the non-zero values of stype to define
28 * different barriers, such as ordering barriers, must make those stype values
29 * act the same as stype zero.
30 */
31
32/*
33 * Completion barriers:
34 * - Every synchronizable specified memory instruction (loads or stores or both)
35 * that occurs in the instruction stream before the SYNC instruction must be
36 * already globally performed before any synchronizable specified memory
37 * instructions that occur after the SYNC are allowed to be performed, with
38 * respect to any other processor or coherent I/O module.
39 *
40 * - The barrier does not guarantee the order in which instruction fetches are
41 * performed.
42 *
43 * - A stype value of zero will always be defined such that it performs the most
44 * complete set of synchronization operations that are defined.This means
45 * stype zero always does a completion barrier that affects both loads and
46 * stores preceding the SYNC instruction and both loads and stores that are
47 * subsequent to the SYNC instruction. Non-zero values of stype may be defined
48 * by the architecture or specific implementations to perform synchronization
49 * behaviors that are less complete than that of stype zero. If an
50 * implementation does not use one of these non-zero values to define a
51 * different synchronization behavior, then that non-zero value of stype must
52 * act the same as stype zero completion barrier. This allows software written
53 * for an implementation with a lighter-weight barrier to work on another
54 * implementation which only implements the stype zero completion barrier.
55 *
56 * - A completion barrier is required, potentially in conjunction with SSNOP (in
57 * Release 1 of the Architecture) or EHB (in Release 2 of the Architecture),
58 * to guarantee that memory reference results are visible across operating
59 * mode changes. For example, a completion barrier is required on some
60 * implementations on entry to and exit from Debug Mode to guarantee that
61 * memory effects are handled correctly.
62 */
63
64/*
65 * stype 0 - A completion barrier that affects preceding loads and stores and
66 * subsequent loads and stores.
67 * Older instructions which must reach the load/store ordering point before the
68 * SYNC instruction completes: Loads, Stores
69 * Younger instructions which must reach the load/store ordering point only
70 * after the SYNC instruction completes: Loads, Stores
71 * Older instructions which must be globally performed when the SYNC instruction
72 * completes: Loads, Stores
73 */
74#define STYPE_SYNC 0x0
75
76/*
77 * Ordering barriers:
78 * - Every synchronizable specified memory instruction (loads or stores or both)
79 * that occurs in the instruction stream before the SYNC instruction must
80 * reach a stage in the load/store datapath after which no instruction
81 * re-ordering is possible before any synchronizable specified memory
82 * instruction which occurs after the SYNC instruction in the instruction
83 * stream reaches the same stage in the load/store datapath.
84 *
85 * - If any memory instruction before the SYNC instruction in program order,
86 * generates a memory request to the external memory and any memory
87 * instruction after the SYNC instruction in program order also generates a
88 * memory request to external memory, the memory request belonging to the
89 * older instruction must be globally performed before the time the memory
90 * request belonging to the younger instruction is globally performed.
91 *
92 * - The barrier does not guarantee the order in which instruction fetches are
93 * performed.
94 */
95
96/*
97 * stype 0x10 - An ordering barrier that affects preceding loads and stores and
98 * subsequent loads and stores.
99 * Older instructions which must reach the load/store ordering point before the
100 * SYNC instruction completes: Loads, Stores
101 * Younger instructions which must reach the load/store ordering point only
102 * after the SYNC instruction completes: Loads, Stores
103 * Older instructions which must be globally performed when the SYNC instruction
104 * completes: N/A
105 */
106#define STYPE_SYNC_MB 0x10
107
108/*
109 * stype 0x14 - A completion barrier specific to global invalidations
110 *
111 * When a sync instruction of this type completes any preceding GINVI or GINVT
112 * operation has been globalized & completed on all coherent CPUs. Anything
113 * that the GINV* instruction should invalidate will have been invalidated on
114 * all coherent CPUs when this instruction completes. It is implementation
115 * specific whether the GINV* instructions themselves will ensure completion,
116 * or this sync type will.
117 *
118 * In systems implementing global invalidates (ie. with Config5.GI == 2 or 3)
119 * this sync type also requires that previous SYNCI operations have completed.
120 */
121#define STYPE_GINV 0x14
122
123#ifdef CONFIG_CPU_HAS_SYNC
124#define __sync() \
125 __asm__ __volatile__( \
126 ".set push\n\t" \
127 ".set noreorder\n\t" \
128 ".set mips2\n\t" \
129 "sync\n\t" \
130 ".set pop" \
131 : /* no output */ \
132 : /* no input */ \
133 : "memory")
134#else
135#define __sync() do { } while(0)
136#endif
137
138#define __fast_iob() \
139 __asm__ __volatile__( \
140 ".set push\n\t" \
141 ".set noreorder\n\t" \
142 "lw $0,%0\n\t" \
143 "nop\n\t" \
144 ".set pop" \
145 : /* no output */ \
146 : "m" (*(int *)CKSEG1) \
147 : "memory")
148#ifdef CONFIG_CPU_CAVIUM_OCTEON
149# define OCTEON_SYNCW_STR ".set push\n.set arch=octeon\nsyncw\nsyncw\n.set pop\n"
150# define __syncw() __asm__ __volatile__(OCTEON_SYNCW_STR : : : "memory")
151
152# define fast_wmb() __syncw()
153# define fast_rmb() barrier()
154# define fast_mb() __sync()
155# define fast_iob() do { } while (0)
156#else /* ! CONFIG_CPU_CAVIUM_OCTEON */
157# define fast_wmb() __sync()
158# define fast_rmb() __sync()
159# define fast_mb() __sync()
160# ifdef CONFIG_SGI_IP28
161# define fast_iob() \
162 __asm__ __volatile__( \
163 ".set push\n\t" \
164 ".set noreorder\n\t" \
165 "lw $0,%0\n\t" \
166 "sync\n\t" \
167 "lw $0,%0\n\t" \
168 ".set pop" \
169 : /* no output */ \
170 : "m" (*(int *)CKSEG1ADDR(0x1fa00004)) \
171 : "memory")
172# else
173# define fast_iob() \
174 do { \
175 __sync(); \
176 __fast_iob(); \
177 } while (0)
178# endif
179#endif /* CONFIG_CPU_CAVIUM_OCTEON */
180
181#ifdef CONFIG_CPU_HAS_WB
182
183#include <asm/wbflush.h>
184
185#define mb() wbflush()
186#define iob() wbflush()
187
188#else /* !CONFIG_CPU_HAS_WB */
189
190#define mb() fast_mb()
191#define iob() fast_iob()
192
193#endif /* !CONFIG_CPU_HAS_WB */
194
195#define wmb() fast_wmb()
196#define rmb() fast_rmb()
197
198#if defined(CONFIG_WEAK_ORDERING)
199# ifdef CONFIG_CPU_CAVIUM_OCTEON
200# define __smp_mb() __sync()
201# define __smp_rmb() barrier()
202# define __smp_wmb() __syncw()
203# else
204# define __smp_mb() __asm__ __volatile__("sync" : : :"memory")
205# define __smp_rmb() __asm__ __volatile__("sync" : : :"memory")
206# define __smp_wmb() __asm__ __volatile__("sync" : : :"memory")
207# endif
208#else
209#define __smp_mb() barrier()
210#define __smp_rmb() barrier()
211#define __smp_wmb() barrier()
212#endif
213
214/*
215 * When LL/SC does imply order, it must also be a compiler barrier to avoid the
216 * compiler from reordering where the CPU will not. When it does not imply
217 * order, the compiler is also free to reorder across the LL/SC loop and
218 * ordering will be done by smp_llsc_mb() and friends.
219 */
220#if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP)
221#define __WEAK_LLSC_MB " sync \n"
222#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
223#define __LLSC_CLOBBER
224#else
225#define __WEAK_LLSC_MB " \n"
226#define smp_llsc_mb() do { } while (0)
227#define __LLSC_CLOBBER "memory"
228#endif
229
230#ifdef CONFIG_CPU_CAVIUM_OCTEON
231#define smp_mb__before_llsc() smp_wmb()
232#define __smp_mb__before_llsc() __smp_wmb()
233/* Cause previous writes to become visible on all CPUs as soon as possible */
234#define nudge_writes() __asm__ __volatile__(".set push\n\t" \
235 ".set arch=octeon\n\t" \
236 "syncw\n\t" \
237 ".set pop" : : : "memory")
238#else
239#define smp_mb__before_llsc() smp_llsc_mb()
240#define __smp_mb__before_llsc() smp_llsc_mb()
241#define nudge_writes() mb()
242#endif
243
244#define __smp_mb__before_atomic() __smp_mb__before_llsc()
245#define __smp_mb__after_atomic() smp_llsc_mb()
246
247/*
248 * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
249 * store or prefetch) in between an LL & SC can cause the SC instruction to
250 * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
251 * containing such sequences, this bug bites harder than we might otherwise
252 * expect due to reordering & speculation:
253 *
254 * 1) A memory access appearing prior to the LL in program order may actually
255 * be executed after the LL - this is the reordering case.
256 *
257 * In order to avoid this we need to place a memory barrier (ie. a SYNC
258 * instruction) prior to every LL instruction, in between it and any earlier
259 * memory access instructions.
260 *
261 * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
262 *
263 * 2) If a conditional branch exists between an LL & SC with a target outside
264 * of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
265 * or similar, then misprediction of the branch may allow speculative
266 * execution of memory accesses from outside of the LL-SC loop.
267 *
268 * In order to avoid this we need a memory barrier (ie. a SYNC instruction)
269 * at each affected branch target, for which we also use loongson_llsc_mb()
270 * defined below.
271 *
272 * This case affects all current Loongson 3 CPUs.
273 *
274 * The above described cases cause an error in the cache coherence protocol;
275 * such that the Invalidate of a competing LL-SC goes 'missing' and SC
276 * erroneously observes its core still has Exclusive state and lets the SC
277 * proceed.
278 *
279 * Therefore the error only occurs on SMP systems.
280 */
281#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
282#define loongson_llsc_mb() __asm__ __volatile__("sync" : : :"memory")
283#else
284#define loongson_llsc_mb() do { } while (0)
285#endif
286
287static inline void sync_ginv(void)
288{
289 asm volatile("sync\t%0" :: "i"(STYPE_GINV));
290}
291
292#include <asm-generic/barrier.h>
293
294#endif /* __ASM_BARRIER_H */
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2006 by Ralf Baechle (ralf@linux-mips.org)
7 */
8#ifndef __ASM_BARRIER_H
9#define __ASM_BARRIER_H
10
11#include <asm/addrspace.h>
12
13/*
14 * read_barrier_depends - Flush all pending reads that subsequents reads
15 * depend on.
16 *
17 * No data-dependent reads from memory-like regions are ever reordered
18 * over this barrier. All reads preceding this primitive are guaranteed
19 * to access memory (but not necessarily other CPUs' caches) before any
20 * reads following this primitive that depend on the data return by
21 * any of the preceding reads. This primitive is much lighter weight than
22 * rmb() on most CPUs, and is never heavier weight than is
23 * rmb().
24 *
25 * These ordering constraints are respected by both the local CPU
26 * and the compiler.
27 *
28 * Ordering is not guaranteed by anything other than these primitives,
29 * not even by data dependencies. See the documentation for
30 * memory_barrier() for examples and URLs to more information.
31 *
32 * For example, the following code would force ordering (the initial
33 * value of "a" is zero, "b" is one, and "p" is "&a"):
34 *
35 * <programlisting>
36 * CPU 0 CPU 1
37 *
38 * b = 2;
39 * memory_barrier();
40 * p = &b; q = p;
41 * read_barrier_depends();
42 * d = *q;
43 * </programlisting>
44 *
45 * because the read of "*q" depends on the read of "p" and these
46 * two reads are separated by a read_barrier_depends(). However,
47 * the following code, with the same initial values for "a" and "b":
48 *
49 * <programlisting>
50 * CPU 0 CPU 1
51 *
52 * a = 2;
53 * memory_barrier();
54 * b = 3; y = b;
55 * read_barrier_depends();
56 * x = a;
57 * </programlisting>
58 *
59 * does not enforce ordering, since there is no data dependency between
60 * the read of "a" and the read of "b". Therefore, on some CPUs, such
61 * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
62 * in cases like this where there are no data dependencies.
63 */
64
65#define read_barrier_depends() do { } while(0)
66#define smp_read_barrier_depends() do { } while(0)
67
68#ifdef CONFIG_CPU_HAS_SYNC
69#define __sync() \
70 __asm__ __volatile__( \
71 ".set push\n\t" \
72 ".set noreorder\n\t" \
73 ".set mips2\n\t" \
74 "sync\n\t" \
75 ".set pop" \
76 : /* no output */ \
77 : /* no input */ \
78 : "memory")
79#else
80#define __sync() do { } while(0)
81#endif
82
83#define __fast_iob() \
84 __asm__ __volatile__( \
85 ".set push\n\t" \
86 ".set noreorder\n\t" \
87 "lw $0,%0\n\t" \
88 "nop\n\t" \
89 ".set pop" \
90 : /* no output */ \
91 : "m" (*(int *)CKSEG1) \
92 : "memory")
93#ifdef CONFIG_CPU_CAVIUM_OCTEON
94# define OCTEON_SYNCW_STR ".set push\n.set arch=octeon\nsyncw\nsyncw\n.set pop\n"
95# define __syncw() __asm__ __volatile__(OCTEON_SYNCW_STR : : : "memory")
96
97# define fast_wmb() __syncw()
98# define fast_rmb() barrier()
99# define fast_mb() __sync()
100# define fast_iob() do { } while (0)
101#else /* ! CONFIG_CPU_CAVIUM_OCTEON */
102# define fast_wmb() __sync()
103# define fast_rmb() __sync()
104# define fast_mb() __sync()
105# ifdef CONFIG_SGI_IP28
106# define fast_iob() \
107 __asm__ __volatile__( \
108 ".set push\n\t" \
109 ".set noreorder\n\t" \
110 "lw $0,%0\n\t" \
111 "sync\n\t" \
112 "lw $0,%0\n\t" \
113 ".set pop" \
114 : /* no output */ \
115 : "m" (*(int *)CKSEG1ADDR(0x1fa00004)) \
116 : "memory")
117# else
118# define fast_iob() \
119 do { \
120 __sync(); \
121 __fast_iob(); \
122 } while (0)
123# endif
124#endif /* CONFIG_CPU_CAVIUM_OCTEON */
125
126#ifdef CONFIG_CPU_HAS_WB
127
128#include <asm/wbflush.h>
129
130#define wmb() fast_wmb()
131#define rmb() fast_rmb()
132#define mb() wbflush()
133#define iob() wbflush()
134
135#else /* !CONFIG_CPU_HAS_WB */
136
137#define wmb() fast_wmb()
138#define rmb() fast_rmb()
139#define mb() fast_mb()
140#define iob() fast_iob()
141
142#endif /* !CONFIG_CPU_HAS_WB */
143
144#if defined(CONFIG_WEAK_ORDERING) && defined(CONFIG_SMP)
145# ifdef CONFIG_CPU_CAVIUM_OCTEON
146# define smp_mb() __sync()
147# define smp_rmb() barrier()
148# define smp_wmb() __syncw()
149# else
150# define smp_mb() __asm__ __volatile__("sync" : : :"memory")
151# define smp_rmb() __asm__ __volatile__("sync" : : :"memory")
152# define smp_wmb() __asm__ __volatile__("sync" : : :"memory")
153# endif
154#else
155#define smp_mb() barrier()
156#define smp_rmb() barrier()
157#define smp_wmb() barrier()
158#endif
159
160#if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP)
161#define __WEAK_LLSC_MB " sync \n"
162#else
163#define __WEAK_LLSC_MB " \n"
164#endif
165
166#define set_mb(var, value) \
167 do { var = value; smp_mb(); } while (0)
168
169#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
170
171#ifdef CONFIG_CPU_CAVIUM_OCTEON
172#define smp_mb__before_llsc() smp_wmb()
173/* Cause previous writes to become visible on all CPUs as soon as possible */
174#define nudge_writes() __asm__ __volatile__(".set push\n\t" \
175 ".set arch=octeon\n\t" \
176 "syncw\n\t" \
177 ".set pop" : : : "memory")
178#else
179#define smp_mb__before_llsc() smp_llsc_mb()
180#define nudge_writes() mb()
181#endif
182
183#define smp_store_release(p, v) \
184do { \
185 compiletime_assert_atomic_type(*p); \
186 smp_mb(); \
187 ACCESS_ONCE(*p) = (v); \
188} while (0)
189
190#define smp_load_acquire(p) \
191({ \
192 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
193 compiletime_assert_atomic_type(*p); \
194 smp_mb(); \
195 ___p1; \
196})
197
198#endif /* __ASM_BARRIER_H */