barrier.h - arch/mips/include/asm/barrier.h - Linux diff v5.4

  1/*
  2 * This file is subject to the terms and conditions of the GNU General Public
  3 * License.  See the file "COPYING" in the main directory of this archive
  4 * for more details.
  5 *
  6 * Copyright (C) 2006 by Ralf Baechle (ralf@linux-mips.org)
  7 */
  8#ifndef __ASM_BARRIER_H
  9#define __ASM_BARRIER_H
 10
 11#include <asm/addrspace.h>
 
 12
 13/*
 14 * Sync types defined by the MIPS architecture (document MD00087 table 6.5)
 15 * These values are used with the sync instruction to perform memory barriers.
 16 * Types of ordering guarantees available through the SYNC instruction:
 17 * - Completion Barriers
 18 * - Ordering Barriers
 19 * As compared to the completion barrier, the ordering barrier is a
 20 * lighter-weight operation as it does not require the specified instructions
 21 * before the SYNC to be already completed. Instead it only requires that those
 22 * specified instructions which are subsequent to the SYNC in the instruction
 23 * stream are never re-ordered for processing ahead of the specified
 24 * instructions which are before the SYNC in the instruction stream.
 25 * This potentially reduces how many cycles the barrier instruction must stall
 26 * before it completes.
 27 * Implementations that do not use any of the non-zero values of stype to define
 28 * different barriers, such as ordering barriers, must make those stype values
 29 * act the same as stype zero.
 30 */
 31
 32/*
 33 * Completion barriers:
 34 * - Every synchronizable specified memory instruction (loads or stores or both)
 35 *   that occurs in the instruction stream before the SYNC instruction must be
 36 *   already globally performed before any synchronizable specified memory
 37 *   instructions that occur after the SYNC are allowed to be performed, with
 38 *   respect to any other processor or coherent I/O module.
 39 *
 40 * - The barrier does not guarantee the order in which instruction fetches are
 41 *   performed.
 42 *
 43 * - A stype value of zero will always be defined such that it performs the most
 44 *   complete set of synchronization operations that are defined.This means
 45 *   stype zero always does a completion barrier that affects both loads and
 46 *   stores preceding the SYNC instruction and both loads and stores that are
 47 *   subsequent to the SYNC instruction. Non-zero values of stype may be defined
 48 *   by the architecture or specific implementations to perform synchronization
 49 *   behaviors that are less complete than that of stype zero. If an
 50 *   implementation does not use one of these non-zero values to define a
 51 *   different synchronization behavior, then that non-zero value of stype must
 52 *   act the same as stype zero completion barrier. This allows software written
 53 *   for an implementation with a lighter-weight barrier to work on another
 54 *   implementation which only implements the stype zero completion barrier.
 55 *
 56 * - A completion barrier is required, potentially in conjunction with SSNOP (in
 57 *   Release 1 of the Architecture) or EHB (in Release 2 of the Architecture),
 58 *   to guarantee that memory reference results are visible across operating
 59 *   mode changes. For example, a completion barrier is required on some
 60 *   implementations on entry to and exit from Debug Mode to guarantee that
 61 *   memory effects are handled correctly.
 62 */
 63
 64/*
 65 * stype 0 - A completion barrier that affects preceding loads and stores and
 66 * subsequent loads and stores.
 67 * Older instructions which must reach the load/store ordering point before the
 68 * SYNC instruction completes: Loads, Stores
 69 * Younger instructions which must reach the load/store ordering point only
 70 * after the SYNC instruction completes: Loads, Stores
 71 * Older instructions which must be globally performed when the SYNC instruction
 72 * completes: Loads, Stores
 73 */
 74#define STYPE_SYNC 0x0
 75
 76/*
 77 * Ordering barriers:
 78 * - Every synchronizable specified memory instruction (loads or stores or both)
 79 *   that occurs in the instruction stream before the SYNC instruction must
 80 *   reach a stage in the load/store datapath after which no instruction
 81 *   re-ordering is possible before any synchronizable specified memory
 82 *   instruction which occurs after the SYNC instruction in the instruction
 83 *   stream reaches the same stage in the load/store datapath.
 84 *
 85 * - If any memory instruction before the SYNC instruction in program order,
 86 *   generates a memory request to the external memory and any memory
 87 *   instruction after the SYNC instruction in program order also generates a
 88 *   memory request to external memory, the memory request belonging to the
 89 *   older instruction must be globally performed before the time the memory
 90 *   request belonging to the younger instruction is globally performed.
 91 *
 92 * - The barrier does not guarantee the order in which instruction fetches are
 93 *   performed.
 94 */
 95
 96/*
 97 * stype 0x10 - An ordering barrier that affects preceding loads and stores and
 98 * subsequent loads and stores.
 99 * Older instructions which must reach the load/store ordering point before the
100 * SYNC instruction completes: Loads, Stores
101 * Younger instructions which must reach the load/store ordering point only
102 * after the SYNC instruction completes: Loads, Stores
103 * Older instructions which must be globally performed when the SYNC instruction
104 * completes: N/A
105 */
106#define STYPE_SYNC_MB 0x10
107
108/*
109 * stype 0x14 - A completion barrier specific to global invalidations
110 *
111 * When a sync instruction of this type completes any preceding GINVI or GINVT
112 * operation has been globalized & completed on all coherent CPUs. Anything
113 * that the GINV* instruction should invalidate will have been invalidated on
114 * all coherent CPUs when this instruction completes. It is implementation
115 * specific whether the GINV* instructions themselves will ensure completion,
116 * or this sync type will.
117 *
118 * In systems implementing global invalidates (ie. with Config5.GI == 2 or 3)
119 * this sync type also requires that previous SYNCI operations have completed.
120 */
121#define STYPE_GINV	0x14
122
123#ifdef CONFIG_CPU_HAS_SYNC
124#define __sync()				\
125	__asm__ __volatile__(			\
126		".set	push\n\t"		\
127		".set	noreorder\n\t"		\
128		".set	mips2\n\t"		\
129		"sync\n\t"			\
130		".set	pop"			\
131		: /* no output */		\
132		: /* no input */		\
133		: "memory")
134#else
135#define __sync()	do { } while(0)
136#endif
137
138#define __fast_iob()				\
139	__asm__ __volatile__(			\
140		".set	push\n\t"		\
141		".set	noreorder\n\t"		\
142		"lw	$0,%0\n\t"		\
143		"nop\n\t"			\
144		".set	pop"			\
145		: /* no output */		\
146		: "m" (*(int *)CKSEG1)		\
147		: "memory")
148#ifdef CONFIG_CPU_CAVIUM_OCTEON
149# define OCTEON_SYNCW_STR	".set push\n.set arch=octeon\nsyncw\nsyncw\n.set pop\n"
150# define __syncw()	__asm__ __volatile__(OCTEON_SYNCW_STR : : : "memory")
151
152# define fast_wmb()	__syncw()
153# define fast_rmb()	barrier()
154# define fast_mb()	__sync()
155# define fast_iob()	do { } while (0)
156#else /* ! CONFIG_CPU_CAVIUM_OCTEON */
157# define fast_wmb()	__sync()
158# define fast_rmb()	__sync()
159# define fast_mb()	__sync()
160# ifdef CONFIG_SGI_IP28
161#  define fast_iob()				\
162	__asm__ __volatile__(			\
163		".set	push\n\t"		\
164		".set	noreorder\n\t"		\
165		"lw	$0,%0\n\t"		\
166		"sync\n\t"			\
167		"lw	$0,%0\n\t"		\
168		".set	pop"			\
169		: /* no output */		\
170		: "m" (*(int *)CKSEG1ADDR(0x1fa00004)) \
171		: "memory")
172# else
173#  define fast_iob()				\
174	do {					\
175		__sync();			\
176		__fast_iob();			\
177	} while (0)
178# endif
179#endif /* CONFIG_CPU_CAVIUM_OCTEON */
180
181#ifdef CONFIG_CPU_HAS_WB
182
183#include <asm/wbflush.h>
184
185#define mb()		wbflush()
186#define iob()		wbflush()
187
188#else /* !CONFIG_CPU_HAS_WB */
189
190#define mb()		fast_mb()
191#define iob()		fast_iob()
192
193#endif /* !CONFIG_CPU_HAS_WB */
194
195#define wmb()		fast_wmb()
196#define rmb()		fast_rmb()
197
198#if defined(CONFIG_WEAK_ORDERING)
199# ifdef CONFIG_CPU_CAVIUM_OCTEON
200#  define __smp_mb()	__sync()
201#  define __smp_rmb()	barrier()
202#  define __smp_wmb()	__syncw()
203# else
204#  define __smp_mb()	__asm__ __volatile__("sync" : : :"memory")
205#  define __smp_rmb()	__asm__ __volatile__("sync" : : :"memory")
206#  define __smp_wmb()	__asm__ __volatile__("sync" : : :"memory")
207# endif
208#else
209#define __smp_mb()	barrier()
210#define __smp_rmb()	barrier()
211#define __smp_wmb()	barrier()
212#endif
213
214/*
215 * When LL/SC does imply order, it must also be a compiler barrier to avoid the
216 * compiler from reordering where the CPU will not. When it does not imply
217 * order, the compiler is also free to reorder across the LL/SC loop and
218 * ordering will be done by smp_llsc_mb() and friends.
219 */
220#if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP)
221#define __WEAK_LLSC_MB		"	sync	\n"
222#define smp_llsc_mb()		__asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
223#define __LLSC_CLOBBER
 
224#else
225#define __WEAK_LLSC_MB		"		\n"
226#define smp_llsc_mb()		do { } while (0)
227#define __LLSC_CLOBBER		"memory"
228#endif
229
230#ifdef CONFIG_CPU_CAVIUM_OCTEON
231#define smp_mb__before_llsc() smp_wmb()
232#define __smp_mb__before_llsc() __smp_wmb()
233/* Cause previous writes to become visible on all CPUs as soon as possible */
234#define nudge_writes() __asm__ __volatile__(".set push\n\t"		\
235					    ".set arch=octeon\n\t"	\
236					    "syncw\n\t"			\
237					    ".set pop" : : : "memory")
238#else
239#define smp_mb__before_llsc() smp_llsc_mb()
240#define __smp_mb__before_llsc() smp_llsc_mb()
241#define nudge_writes() mb()
242#endif
243
244#define __smp_mb__before_atomic()	__smp_mb__before_llsc()
245#define __smp_mb__after_atomic()	smp_llsc_mb()
246
247/*
248 * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
249 * store or prefetch) in between an LL & SC can cause the SC instruction to
250 * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
251 * containing such sequences, this bug bites harder than we might otherwise
252 * expect due to reordering & speculation:
253 *
254 * 1) A memory access appearing prior to the LL in program order may actually
255 *    be executed after the LL - this is the reordering case.
256 *
257 *    In order to avoid this we need to place a memory barrier (ie. a SYNC
258 *    instruction) prior to every LL instruction, in between it and any earlier
259 *    memory access instructions.
260 *
261 *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
262 *
263 * 2) If a conditional branch exists between an LL & SC with a target outside
264 *    of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
265 *    or similar, then misprediction of the branch may allow speculative
266 *    execution of memory accesses from outside of the LL-SC loop.
267 *
268 *    In order to avoid this we need a memory barrier (ie. a SYNC instruction)
269 *    at each affected branch target, for which we also use loongson_llsc_mb()
270 *    defined below.
271 *
272 *    This case affects all current Loongson 3 CPUs.
273 *
274 * The above described cases cause an error in the cache coherence protocol;
275 * such that the Invalidate of a competing LL-SC goes 'missing' and SC
276 * erroneously observes its core still has Exclusive state and lets the SC
277 * proceed.
278 *
279 * Therefore the error only occurs on SMP systems.
280 */
281#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
282#define loongson_llsc_mb()	__asm__ __volatile__("sync" : : :"memory")
283#else
284#define loongson_llsc_mb()	do { } while (0)
285#endif
286
 
 
287static inline void sync_ginv(void)
288{
289	asm volatile("sync\t%0" :: "i"(STYPE_GINV));
290}
291
292#include <asm-generic/barrier.h>
293
294#endif /* __ASM_BARRIER_H */

  1/*
  2 * This file is subject to the terms and conditions of the GNU General Public
  3 * License.  See the file "COPYING" in the main directory of this archive
  4 * for more details.
  5 *
  6 * Copyright (C) 2006 by Ralf Baechle (ralf@linux-mips.org)
  7 */
  8#ifndef __ASM_BARRIER_H
  9#define __ASM_BARRIER_H
 10
 11#include <asm/addrspace.h>
 12#include <asm/sync.h>
 13
 14static inline void __sync(void)
 15{
 16	asm volatile(__SYNC(full, always) ::: "memory");
 17}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 18
 19static inline void rmb(void)
 20{
 21	asm volatile(__SYNC(rmb, always) ::: "memory");
 22}
 23#define rmb rmb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 24
 25static inline void wmb(void)
 26{
 27	asm volatile(__SYNC(wmb, always) ::: "memory");
 28}
 29#define wmb wmb
 
 
 
 
 
 
 30
 31#define fast_mb()	__sync()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 32
 33#define __fast_iob()				\
 34	__asm__ __volatile__(			\
 35		".set	push\n\t"		\
 36		".set	noreorder\n\t"		\
 37		"lw	$0,%0\n\t"		\
 38		"nop\n\t"			\
 39		".set	pop"			\
 40		: /* no output */		\
 41		: "m" (*(int *)CKSEG1)		\
 42		: "memory")
 43#ifdef CONFIG_CPU_CAVIUM_OCTEON
 
 
 
 
 
 
 44# define fast_iob()	do { } while (0)
 45#else /* ! CONFIG_CPU_CAVIUM_OCTEON */
 
 
 
 46# ifdef CONFIG_SGI_IP28
 47#  define fast_iob()				\
 48	__asm__ __volatile__(			\
 49		".set	push\n\t"		\
 50		".set	noreorder\n\t"		\
 51		"lw	$0,%0\n\t"		\
 52		"sync\n\t"			\
 53		"lw	$0,%0\n\t"		\
 54		".set	pop"			\
 55		: /* no output */		\
 56		: "m" (*(int *)CKSEG1ADDR(0x1fa00004)) \
 57		: "memory")
 58# else
 59#  define fast_iob()				\
 60	do {					\
 61		__sync();			\
 62		__fast_iob();			\
 63	} while (0)
 64# endif
 65#endif /* CONFIG_CPU_CAVIUM_OCTEON */
 66
 67#ifdef CONFIG_CPU_HAS_WB
 68
 69#include <asm/wbflush.h>
 70
 71#define mb()		wbflush()
 72#define iob()		wbflush()
 73
 74#else /* !CONFIG_CPU_HAS_WB */
 75
 76#define mb()		fast_mb()
 77#define iob()		fast_iob()
 78
 79#endif /* !CONFIG_CPU_HAS_WB */
 80
 
 
 
 81#if defined(CONFIG_WEAK_ORDERING)
 82# define __smp_mb()	__sync()
 83# define __smp_rmb()	rmb()
 84# define __smp_wmb()	wmb()
 
 
 
 
 
 
 85#else
 86# define __smp_mb()	barrier()
 87# define __smp_rmb()	barrier()
 88# define __smp_wmb()	barrier()
 89#endif
 90
 91/*
 92 * When LL/SC does imply order, it must also be a compiler barrier to avoid the
 93 * compiler from reordering where the CPU will not. When it does not imply
 94 * order, the compiler is also free to reorder across the LL/SC loop and
 95 * ordering will be done by smp_llsc_mb() and friends.
 96 */
 97#if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP)
 98# define __WEAK_LLSC_MB		sync
 99# define smp_llsc_mb() \
100	__asm__ __volatile__(__stringify(__WEAK_LLSC_MB) : : :"memory")
101# define __LLSC_CLOBBER
102#else
103# define __WEAK_LLSC_MB
104# define smp_llsc_mb()		do { } while (0)
105# define __LLSC_CLOBBER		"memory"
106#endif
107
108#ifdef CONFIG_CPU_CAVIUM_OCTEON
109#define smp_mb__before_llsc() smp_wmb()
110#define __smp_mb__before_llsc() __smp_wmb()
111/* Cause previous writes to become visible on all CPUs as soon as possible */
112#define nudge_writes() __asm__ __volatile__(".set push\n\t"		\
113					    ".set arch=octeon\n\t"	\
114					    "syncw\n\t"			\
115					    ".set pop" : : : "memory")
116#else
117#define smp_mb__before_llsc() smp_llsc_mb()
118#define __smp_mb__before_llsc() smp_llsc_mb()
119#define nudge_writes() mb()
120#endif
121
 
 
 
122/*
123 * In the Loongson3 LL/SC workaround case, all of our LL/SC loops already have
124 * a completion barrier immediately preceding the LL instruction. Therefore we
125 * can skip emitting a barrier from __smp_mb__before_atomic().
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126 */
127#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
128# define __smp_mb__before_atomic()
129#else
130# define __smp_mb__before_atomic()	__smp_mb__before_llsc()
131#endif
132
133#define __smp_mb__after_atomic()	smp_llsc_mb()
134
135static inline void sync_ginv(void)
136{
137	asm volatile(__SYNC(ginv, always));
138}
139
140#include <asm-generic/barrier.h>
141
142#endif /* __ASM_BARRIER_H */