Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.17.
  1/*
  2 * Copyright 2007-2008 Analog Devices Inc.
  3 *              Philippe Gerum <rpm@xenomai.org>
  4 *
  5 * Licensed under the GPL-2 or later.
  6 */
  7
  8#include <linux/linkage.h>
  9#include <asm/blackfin.h>
 10#include <asm/cache.h>
 11#include <asm/asm-offsets.h>
 12#include <asm/rwlock.h>
 13#include <asm/cplb.h>
 14
 15.text
 16
 17.macro coreslot_loadaddr reg:req
 18	\reg\().l = _corelock;
 19	\reg\().h = _corelock;
 20.endm
 21
 22.macro safe_testset addr:req, scratch:req
 23#if ANOMALY_05000477
 24	cli \scratch;
 25	testset (\addr);
 26	sti \scratch;
 27#else
 28	testset (\addr);
 29#endif
 30.endm
 31
 32/*
 33 * r0 = address of atomic data to flush and invalidate (32bit).
 34 *
 35 * Clear interrupts and return the old mask.
 36 * We assume that no atomic data can span cachelines.
 37 *
 38 * Clobbers: r2:0, p0
 39 */
 40ENTRY(_get_core_lock)
 41	r1 = -L1_CACHE_BYTES;
 42	r1 = r0 & r1;
 43	cli r0;
 44	coreslot_loadaddr p0;
 45.Lretry_corelock:
 46	safe_testset p0, r2;
 47	if cc jump .Ldone_corelock;
 48	SSYNC(r2);
 49	jump .Lretry_corelock
 50.Ldone_corelock:
 51	p0 = r1;
 52	/* flush core internal write buffer before invalidate dcache */
 53	CSYNC(r2);
 54	flushinv[p0];
 55	SSYNC(r2);
 56	rts;
 57ENDPROC(_get_core_lock)
 58
 59/*
 60 * r0 = address of atomic data in uncacheable memory region (32bit).
 61 *
 62 * Clear interrupts and return the old mask.
 63 *
 64 * Clobbers: r0, p0
 65 */
 66ENTRY(_get_core_lock_noflush)
 67	cli r0;
 68	coreslot_loadaddr p0;
 69.Lretry_corelock_noflush:
 70	safe_testset p0, r2;
 71	if cc jump .Ldone_corelock_noflush;
 72	SSYNC(r2);
 73	jump .Lretry_corelock_noflush
 74.Ldone_corelock_noflush:
 75	/*
 76	 * SMP kgdb runs into dead loop without NOP here, when one core
 77	 * single steps over get_core_lock_noflush and the other executes
 78	 * get_core_lock as a slave node.
 79	 */
 80	nop;
 81	CSYNC(r2);
 82	rts;
 83ENDPROC(_get_core_lock_noflush)
 84
 85/*
 86 * r0 = interrupt mask to restore.
 87 * r1 = address of atomic data to flush and invalidate (32bit).
 88 *
 89 * Interrupts are masked on entry (see _get_core_lock).
 90 * Clobbers: r2:0, p0
 91 */
 92ENTRY(_put_core_lock)
 93	/* Write-through cache assumed, so no flush needed here. */
 94	coreslot_loadaddr p0;
 95	r1 = 0;
 96	[p0] = r1;
 97	SSYNC(r2);
 98	sti r0;
 99	rts;
100ENDPROC(_put_core_lock)
101
102#ifdef __ARCH_SYNC_CORE_DCACHE
103
104ENTRY(___raw_smp_mark_barrier_asm)
105	[--sp] = rets;
106	[--sp] = ( r7:5 );
107	[--sp] = r0;
108	[--sp] = p1;
109	[--sp] = p0;
110	call _get_core_lock_noflush;
111
112	/*
113	 * Calculate current core mask
114	 */
115	GET_CPUID(p1, r7);
116	r6 = 1;
117	r6 <<= r7;
118
119	/*
120	 * Set bit of other cores in barrier mask. Don't change current core bit.
121	 */
122	p1.l = _barrier_mask;
123	p1.h = _barrier_mask;
124	r7 = [p1];
125	r5 = r7 & r6;
126	r7 = ~r6;
127	cc = r5 == 0;
128	if cc jump 1f;
129	r7 = r7 | r6;
1301:
131	[p1] = r7;
132	SSYNC(r2);
133
134	call _put_core_lock;
135	p0 = [sp++];
136	p1 = [sp++];
137	r0 = [sp++];
138	( r7:5 ) = [sp++];
139	rets = [sp++];
140	rts;
141ENDPROC(___raw_smp_mark_barrier_asm)
142
143ENTRY(___raw_smp_check_barrier_asm)
144	[--sp] = rets;
145	[--sp] = ( r7:5 );
146	[--sp] = r0;
147	[--sp] = p1;
148	[--sp] = p0;
149	call _get_core_lock_noflush;
150
151	/*
152	 * Calculate current core mask
153	 */
154	GET_CPUID(p1, r7);
155	r6 = 1;
156	r6 <<= r7;
157
158	/*
159	 * Clear current core bit in barrier mask if it is set.
160	 */
161	p1.l = _barrier_mask;
162	p1.h = _barrier_mask;
163	r7 = [p1];
164	r5 = r7 & r6;
165	cc = r5 == 0;
166	if cc jump 1f;
167	r6 = ~r6;
168	r7 = r7 & r6;
169	[p1] = r7;
170	SSYNC(r2);
171
172	call _put_core_lock;
173
174	/*
175	 * Invalidate the entire D-cache of current core.
176	 */
177	sp += -12;
178	call _resync_core_dcache
179	sp += 12;
180	jump 2f;
1811:
182	call _put_core_lock;
1832:
184	p0 = [sp++];
185	p1 = [sp++];
186	r0 = [sp++];
187	( r7:5 ) = [sp++];
188	rets = [sp++];
189	rts;
190ENDPROC(___raw_smp_check_barrier_asm)
191
192/*
193 * r0 = irqflags
194 * r1 = address of atomic data
195 *
196 * Clobbers: r2:0, p1:0
197 */
198_start_lock_coherent:
199
200	[--sp] = rets;
201	[--sp] = ( r7:6 );
202	r7 = r0;
203	p1 = r1;
204
205	/*
206	 * Determine whether the atomic data was previously
207	 * owned by another CPU (=r6).
208	 */
209	GET_CPUID(p0, r2);
210	r1 = 1;
211	r1 <<= r2;
212	r2 = ~r1;
213
214	r1 = [p1];
215	r1 >>= 28;   /* CPU fingerprints are stored in the high nibble. */
216	r6 = r1 & r2;
217	r1 = [p1];
218	r1 <<= 4;
219	r1 >>= 4;
220	[p1] = r1;
221
222	/*
223	 * Release the core lock now, but keep IRQs disabled while we are
224	 * performing the remaining housekeeping chores for the current CPU.
225	 */
226	coreslot_loadaddr p0;
227	r1 = 0;
228	[p0] = r1;
229
230	/*
231	 * If another CPU has owned the same atomic section before us,
232	 * then our D-cached copy of the shared data protected by the
233	 * current spin/write_lock may be obsolete.
234	 */
235	cc = r6 == 0;
236	if cc jump .Lcache_synced
237
238	/*
239	 * Invalidate the entire D-cache of the current core.
240	 */
241	sp += -12;
242	call _resync_core_dcache
243	sp += 12;
244
245.Lcache_synced:
246	SSYNC(r2);
247	sti r7;
248	( r7:6 ) = [sp++];
249	rets = [sp++];
250	rts
251
252/*
253 * r0 = irqflags
254 * r1 = address of atomic data
255 *
256 * Clobbers: r2:0, p1:0
257 */
258_end_lock_coherent:
259
260	p1 = r1;
261	GET_CPUID(p0, r2);
262	r2 += 28;
263	r1 = 1;
264	r1 <<= r2;
265	r2 = [p1];
266	r2 = r1 | r2;
267	[p1] = r2;
268	r1 = p1;
269	jump _put_core_lock;
270
271#endif /* __ARCH_SYNC_CORE_DCACHE */
272
273/*
274 * r0 = &spinlock->lock
275 *
276 * Clobbers: r3:0, p1:0
277 */
278ENTRY(___raw_spin_is_locked_asm)
279	p1 = r0;
280	[--sp] = rets;
281	call _get_core_lock;
282	r3 = [p1];
283	cc = bittst( r3, 0 );
284	r3 = cc;
285	r1 = p1;
286	call _put_core_lock;
287	rets = [sp++];
288	r0 = r3;
289	rts;
290ENDPROC(___raw_spin_is_locked_asm)
291
292/*
293 * r0 = &spinlock->lock
294 *
295 * Clobbers: r3:0, p1:0
296 */
297ENTRY(___raw_spin_lock_asm)
298	p1 = r0;
299	[--sp] = rets;
300.Lretry_spinlock:
301	call _get_core_lock;
302	r1 = p1;
303	r2 = [p1];
304	cc = bittst( r2, 0 );
305	if cc jump .Lbusy_spinlock
306#ifdef __ARCH_SYNC_CORE_DCACHE
307	r3 = p1;
308	bitset ( r2, 0 ); /* Raise the lock bit. */
309	[p1] = r2;
310	call _start_lock_coherent
311#else
312	r2 = 1;
313	[p1] = r2;
314	call _put_core_lock;
315#endif
316	rets = [sp++];
317	rts;
318
319.Lbusy_spinlock:
320	/* We don't touch the atomic area if busy, so that flush
321	   will behave like nop in _put_core_lock. */
322	call _put_core_lock;
323	SSYNC(r2);
324	r0 = p1;
325	jump .Lretry_spinlock
326ENDPROC(___raw_spin_lock_asm)
327
328/*
329 * r0 = &spinlock->lock
330 *
331 * Clobbers: r3:0, p1:0
332 */
333ENTRY(___raw_spin_trylock_asm)
334	p1 = r0;
335	[--sp] = rets;
336	call _get_core_lock;
337	r1 = p1;
338	r3 = [p1];
339	cc = bittst( r3, 0 );
340	if cc jump .Lfailed_trylock
341#ifdef __ARCH_SYNC_CORE_DCACHE
342	bitset ( r3, 0 ); /* Raise the lock bit. */
343	[p1] = r3;
344	call _start_lock_coherent
345#else
346	r2 = 1;
347	[p1] = r2;
348	call _put_core_lock;
349#endif
350	r0 = 1;
351	rets = [sp++];
352	rts;
353.Lfailed_trylock:
354	call _put_core_lock;
355	r0 = 0;
356	rets = [sp++];
357	rts;
358ENDPROC(___raw_spin_trylock_asm)
359
360/*
361 * r0 = &spinlock->lock
362 *
363 * Clobbers: r2:0, p1:0
364 */
365ENTRY(___raw_spin_unlock_asm)
366	p1 = r0;
367	[--sp] = rets;
368	call _get_core_lock;
369	r2 = [p1];
370	bitclr ( r2, 0 );
371	[p1] = r2;
372	r1 = p1;
373#ifdef __ARCH_SYNC_CORE_DCACHE
374	call _end_lock_coherent
375#else
376	call _put_core_lock;
377#endif
378	rets = [sp++];
379	rts;
380ENDPROC(___raw_spin_unlock_asm)
381
382/*
383 * r0 = &rwlock->lock
384 *
385 * Clobbers: r2:0, p1:0
386 */
387ENTRY(___raw_read_lock_asm)
388	p1 = r0;
389	[--sp] = rets;
390	call _get_core_lock;
391.Lrdlock_try:
392	r1 = [p1];
393	r1 += -1;
394	[p1] = r1;
395	cc = r1 < 0;
396	if cc jump .Lrdlock_failed
397	r1 = p1;
398#ifdef __ARCH_SYNC_CORE_DCACHE
399	call _start_lock_coherent
400#else
401	call _put_core_lock;
402#endif
403	rets = [sp++];
404	rts;
405
406.Lrdlock_failed:
407	r1 += 1;
408	[p1] = r1;
409.Lrdlock_wait:
410	r1 = p1;
411	call _put_core_lock;
412	SSYNC(r2);
413	r0 = p1;
414	call _get_core_lock;
415	r1 = [p1];
416	cc = r1 < 2;
417	if cc jump .Lrdlock_wait;
418	jump .Lrdlock_try
419ENDPROC(___raw_read_lock_asm)
420
421/*
422 * r0 = &rwlock->lock
423 *
424 * Clobbers: r3:0, p1:0
425 */
426ENTRY(___raw_read_trylock_asm)
427	p1 = r0;
428	[--sp] = rets;
429	call _get_core_lock;
430	r1 = [p1];
431	cc = r1 <= 0;
432	if cc jump .Lfailed_tryrdlock;
433	r1 += -1;
434	[p1] = r1;
435	r1 = p1;
436#ifdef __ARCH_SYNC_CORE_DCACHE
437	call _start_lock_coherent
438#else
439	call _put_core_lock;
440#endif
441	rets = [sp++];
442	r0 = 1;
443	rts;
444.Lfailed_tryrdlock:
445	r1 = p1;
446	call _put_core_lock;
447	rets = [sp++];
448	r0 = 0;
449	rts;
450ENDPROC(___raw_read_trylock_asm)
451
452/*
453 * r0 = &rwlock->lock
454 *
455 * Note: Processing controlled by a reader lock should not have
456 * any side-effect on cache issues with the other core, so we
457 * just release the core lock and exit (no _end_lock_coherent).
458 *
459 * Clobbers: r3:0, p1:0
460 */
461ENTRY(___raw_read_unlock_asm)
462	p1 = r0;
463	[--sp] = rets;
464	call _get_core_lock;
465	r1 = [p1];
466	r1 += 1;
467	[p1] = r1;
468	r1 = p1;
469	call _put_core_lock;
470	rets = [sp++];
471	rts;
472ENDPROC(___raw_read_unlock_asm)
473
474/*
475 * r0 = &rwlock->lock
476 *
477 * Clobbers: r3:0, p1:0
478 */
479ENTRY(___raw_write_lock_asm)
480	p1 = r0;
481	r3.l = lo(RW_LOCK_BIAS);
482	r3.h = hi(RW_LOCK_BIAS);
483	[--sp] = rets;
484	call _get_core_lock;
485.Lwrlock_try:
486	r1 = [p1];
487	r1 = r1 - r3;
488#ifdef __ARCH_SYNC_CORE_DCACHE
489	r2 = r1;
490	r2 <<= 4;
491	r2 >>= 4;
492	cc = r2 == 0;
493#else
494	cc = r1 == 0;
495#endif
496	if !cc jump .Lwrlock_wait
497	[p1] = r1;
498	r1 = p1;
499#ifdef __ARCH_SYNC_CORE_DCACHE
500	call _start_lock_coherent
501#else
502	call _put_core_lock;
503#endif
504	rets = [sp++];
505	rts;
506
507.Lwrlock_wait:
508	r1 = p1;
509	call _put_core_lock;
510	SSYNC(r2);
511	r0 = p1;
512	call _get_core_lock;
513	r1 = [p1];
514#ifdef __ARCH_SYNC_CORE_DCACHE
515	r1 <<= 4;
516	r1 >>= 4;
517#endif
518	cc = r1 == r3;
519	if !cc jump .Lwrlock_wait;
520	jump .Lwrlock_try
521ENDPROC(___raw_write_lock_asm)
522
523/*
524 * r0 = &rwlock->lock
525 *
526 * Clobbers: r3:0, p1:0
527 */
528ENTRY(___raw_write_trylock_asm)
529	p1 = r0;
530	[--sp] = rets;
531	call _get_core_lock;
532	r1 = [p1];
533	r2.l = lo(RW_LOCK_BIAS);
534	r2.h = hi(RW_LOCK_BIAS);
535	cc = r1 == r2;
536	if !cc jump .Lfailed_trywrlock;
537#ifdef __ARCH_SYNC_CORE_DCACHE
538	r1 >>= 28;
539	r1 <<= 28;
540#else
541	r1 = 0;
542#endif
543	[p1] = r1;
544	r1 = p1;
545#ifdef __ARCH_SYNC_CORE_DCACHE
546	call _start_lock_coherent
547#else
548	call _put_core_lock;
549#endif
550	rets = [sp++];
551	r0 = 1;
552	rts;
553
554.Lfailed_trywrlock:
555	r1 = p1;
556	call _put_core_lock;
557	rets = [sp++];
558	r0 = 0;
559	rts;
560ENDPROC(___raw_write_trylock_asm)
561
562/*
563 * r0 = &rwlock->lock
564 *
565 * Clobbers: r3:0, p1:0
566 */
567ENTRY(___raw_write_unlock_asm)
568	p1 = r0;
569	r3.l = lo(RW_LOCK_BIAS);
570	r3.h = hi(RW_LOCK_BIAS);
571	[--sp] = rets;
572	call _get_core_lock;
573	r1 = [p1];
574	r1 = r1 + r3;
575	[p1] = r1;
576	r1 = p1;
577#ifdef __ARCH_SYNC_CORE_DCACHE
578	call _end_lock_coherent
579#else
580	call _put_core_lock;
581#endif
582	rets = [sp++];
583	rts;
584ENDPROC(___raw_write_unlock_asm)
585
586/*
587 * r0 = ptr
588 * r1 = value
589 *
590 * ADD a signed value to a 32bit word and return the new value atomically.
591 * Clobbers: r3:0, p1:0
592 */
593ENTRY(___raw_atomic_add_asm)
594	p1 = r0;
595	r3 = r1;
596	[--sp] = rets;
597	call _get_core_lock;
598	r2 = [p1];
599	r3 = r3 + r2;
600	[p1] = r3;
601	r1 = p1;
602	call _put_core_lock;
603	r0 = r3;
604	rets = [sp++];
605	rts;
606ENDPROC(___raw_atomic_add_asm)
607
608/*
609 * r0 = ptr
610 * r1 = value
611 *
612 * ADD a signed value to a 32bit word and return the old value atomically.
613 * Clobbers: r3:0, p1:0
614 */
615ENTRY(___raw_atomic_xadd_asm)
616	p1 = r0;
617	r3 = r1;
618	[--sp] = rets;
619	call _get_core_lock;
620	r3 = [p1];
621	r2 = r3 + r2;
622	[p1] = r2;
623	r1 = p1;
624	call _put_core_lock;
625	r0 = r3;
626	rets = [sp++];
627	rts;
628ENDPROC(___raw_atomic_add_asm)
629
630/*
631 * r0 = ptr
632 * r1 = mask
633 *
634 * AND the mask bits from a 32bit word and return the old 32bit value
635 * atomically.
636 * Clobbers: r3:0, p1:0
637 */
638ENTRY(___raw_atomic_and_asm)
639	p1 = r0;
640	r3 = r1;
641	[--sp] = rets;
642	call _get_core_lock;
643	r3 = [p1];
644	r2 = r2 & r3;
645	[p1] = r2;
646	r1 = p1;
647	call _put_core_lock;
648	r0 = r3;
649	rets = [sp++];
650	rts;
651ENDPROC(___raw_atomic_and_asm)
652
653/*
654 * r0 = ptr
655 * r1 = mask
656 *
657 * OR the mask bits into a 32bit word and return the old 32bit value
658 * atomically.
659 * Clobbers: r3:0, p1:0
660 */
661ENTRY(___raw_atomic_or_asm)
662	p1 = r0;
663	r3 = r1;
664	[--sp] = rets;
665	call _get_core_lock;
666	r3 = [p1];
667	r2 = r2 | r3;
668	[p1] = r2;
669	r1 = p1;
670	call _put_core_lock;
671	r0 = r3;
672	rets = [sp++];
673	rts;
674ENDPROC(___raw_atomic_or_asm)
675
676/*
677 * r0 = ptr
678 * r1 = mask
679 *
680 * XOR the mask bits with a 32bit word and return the old 32bit value
681 * atomically.
682 * Clobbers: r3:0, p1:0
683 */
684ENTRY(___raw_atomic_xor_asm)
685	p1 = r0;
686	r3 = r1;
687	[--sp] = rets;
688	call _get_core_lock;
689	r3 = [p1];
690	r2 = r2 ^ r3;
691	[p1] = r2;
692	r1 = p1;
693	call _put_core_lock;
694	r0 = r3;
695	rets = [sp++];
696	rts;
697ENDPROC(___raw_atomic_xor_asm)
698
699/*
700 * r0 = ptr
701 * r1 = mask
702 *
703 * Perform a logical AND between the mask bits and a 32bit word, and
704 * return the masked value. We need this on this architecture in
705 * order to invalidate the local cache before testing.
706 *
707 * Clobbers: r3:0, p1:0
708 */
709ENTRY(___raw_atomic_test_asm)
710	p1 = r0;
711	r3 = r1;
712	r1 = -L1_CACHE_BYTES;
713	r1 = r0 & r1;
714	p0 = r1;
715	/* flush core internal write buffer before invalidate dcache */
716	CSYNC(r2);
717	flushinv[p0];
718	SSYNC(r2);
719	r0 = [p1];
720	r0 = r0 & r3;
721	rts;
722ENDPROC(___raw_atomic_test_asm)
723
724/*
725 * r0 = ptr
726 * r1 = value
727 *
728 * Swap *ptr with value and return the old 32bit value atomically.
729 * Clobbers: r3:0, p1:0
730 */
731#define	__do_xchg(src, dst) 		\
732	p1 = r0;			\
733	r3 = r1;			\
734	[--sp] = rets;			\
735	call _get_core_lock;		\
736	r2 = src;			\
737	dst = r3;			\
738	r3 = r2;			\
739	r1 = p1;			\
740	call _put_core_lock;		\
741	r0 = r3;			\
742	rets = [sp++];			\
743	rts;
744
745ENTRY(___raw_xchg_1_asm)
746	__do_xchg(b[p1] (z), b[p1])
747ENDPROC(___raw_xchg_1_asm)
748
749ENTRY(___raw_xchg_2_asm)
750	__do_xchg(w[p1] (z), w[p1])
751ENDPROC(___raw_xchg_2_asm)
752
753ENTRY(___raw_xchg_4_asm)
754	__do_xchg([p1], [p1])
755ENDPROC(___raw_xchg_4_asm)
756
757/*
758 * r0 = ptr
759 * r1 = new
760 * r2 = old
761 *
762 * Swap *ptr with new if *ptr == old and return the previous *ptr
763 * value atomically.
764 *
765 * Clobbers: r3:0, p1:0
766 */
767#define	__do_cmpxchg(src, dst) 		\
768	[--sp] = rets;			\
769	[--sp] = r4;			\
770	p1 = r0;			\
771	r3 = r1;			\
772	r4 = r2;			\
773	call _get_core_lock;		\
774	r2 = src;			\
775	cc = r2 == r4;			\
776	if !cc jump 1f;			\
777	dst = r3;			\
778     1: r3 = r2;			\
779	r1 = p1;			\
780	call _put_core_lock;		\
781	r0 = r3;			\
782	r4 = [sp++];			\
783	rets = [sp++];			\
784	rts;
785
786ENTRY(___raw_cmpxchg_1_asm)
787	__do_cmpxchg(b[p1] (z), b[p1])
788ENDPROC(___raw_cmpxchg_1_asm)
789
790ENTRY(___raw_cmpxchg_2_asm)
791	__do_cmpxchg(w[p1] (z), w[p1])
792ENDPROC(___raw_cmpxchg_2_asm)
793
794ENTRY(___raw_cmpxchg_4_asm)
795	__do_cmpxchg([p1], [p1])
796ENDPROC(___raw_cmpxchg_4_asm)
797
798/*
799 * r0 = ptr
800 * r1 = bitnr
801 *
802 * Set a bit in a 32bit word and return the old 32bit value atomically.
803 * Clobbers: r3:0, p1:0
804 */
805ENTRY(___raw_bit_set_asm)
806	r2 = r1;
807	r1 = 1;
808	r1 <<= r2;
809	jump ___raw_atomic_or_asm
810ENDPROC(___raw_bit_set_asm)
811
812/*
813 * r0 = ptr
814 * r1 = bitnr
815 *
816 * Clear a bit in a 32bit word and return the old 32bit value atomically.
817 * Clobbers: r3:0, p1:0
818 */
819ENTRY(___raw_bit_clear_asm)
820	r2 = 1;
821	r2 <<= r1;
822	r1 = ~r2;
823	jump ___raw_atomic_and_asm
824ENDPROC(___raw_bit_clear_asm)
825
826/*
827 * r0 = ptr
828 * r1 = bitnr
829 *
830 * Toggle a bit in a 32bit word and return the old 32bit value atomically.
831 * Clobbers: r3:0, p1:0
832 */
833ENTRY(___raw_bit_toggle_asm)
834	r2 = r1;
835	r1 = 1;
836	r1 <<= r2;
837	jump ___raw_atomic_xor_asm
838ENDPROC(___raw_bit_toggle_asm)
839
840/*
841 * r0 = ptr
842 * r1 = bitnr
843 *
844 * Test-and-set a bit in a 32bit word and return the old bit value atomically.
845 * Clobbers: r3:0, p1:0
846 */
847ENTRY(___raw_bit_test_set_asm)
848	[--sp] = rets;
849	[--sp] = r1;
850	call ___raw_bit_set_asm
851	r1 = [sp++];
852	r2 = 1;
853	r2 <<= r1;
854	r0 = r0 & r2;
855	cc = r0 == 0;
856	if cc jump 1f
857	r0 = 1;
8581:
859	rets = [sp++];
860	rts;
861ENDPROC(___raw_bit_test_set_asm)
862
863/*
864 * r0 = ptr
865 * r1 = bitnr
866 *
867 * Test-and-clear a bit in a 32bit word and return the old bit value atomically.
868 * Clobbers: r3:0, p1:0
869 */
870ENTRY(___raw_bit_test_clear_asm)
871	[--sp] = rets;
872	[--sp] = r1;
873	call ___raw_bit_clear_asm
874	r1 = [sp++];
875	r2 = 1;
876	r2 <<= r1;
877	r0 = r0 & r2;
878	cc = r0 == 0;
879	if cc jump 1f
880	r0 = 1;
8811:
882	rets = [sp++];
883	rts;
884ENDPROC(___raw_bit_test_clear_asm)
885
886/*
887 * r0 = ptr
888 * r1 = bitnr
889 *
890 * Test-and-toggle a bit in a 32bit word,
891 * and return the old bit value atomically.
892 * Clobbers: r3:0, p1:0
893 */
894ENTRY(___raw_bit_test_toggle_asm)
895	[--sp] = rets;
896	[--sp] = r1;
897	call ___raw_bit_toggle_asm
898	r1 = [sp++];
899	r2 = 1;
900	r2 <<= r1;
901	r0 = r0 & r2;
902	cc = r0 == 0;
903	if cc jump 1f
904	r0 = 1;
9051:
906	rets = [sp++];
907	rts;
908ENDPROC(___raw_bit_test_toggle_asm)
909
910/*
911 * r0 = ptr
912 * r1 = bitnr
913 *
914 * Test a bit in a 32bit word and return its value.
915 * We need this on this architecture in order to invalidate
916 * the local cache before testing.
917 *
918 * Clobbers: r3:0, p1:0
919 */
920ENTRY(___raw_bit_test_asm)
921	r2 = r1;
922	r1 = 1;
923	r1 <<= r2;
924	jump ___raw_atomic_test_asm
925ENDPROC(___raw_bit_test_asm)
926
927/*
928 * r0 = ptr
929 *
930 * Fetch and return an uncached 32bit value.
931 *
932 * Clobbers: r2:0, p1:0
933 */
934ENTRY(___raw_uncached_fetch_asm)
935	p1 = r0;
936	r1 = -L1_CACHE_BYTES;
937	r1 = r0 & r1;
938	p0 = r1;
939	/* flush core internal write buffer before invalidate dcache */
940	CSYNC(r2);
941	flushinv[p0];
942	SSYNC(r2);
943	r0 = [p1];
944	rts;
945ENDPROC(___raw_uncached_fetch_asm)