diff options
| author | Rich Felker <dalias@aerifal.cx> | 2016-01-21 19:28:15 +0000 | 
|---|---|---|
| committer | Rich Felker <dalias@aerifal.cx> | 2016-01-21 19:43:04 +0000 | 
| commit | 61b1e75f7d8004461f2e18f171c26c2f545eed32 (patch) | |
| tree | 7a88f8d51ee3f049745b147b7f36fc8ee22e0e62 | |
| parent | 1315596b510189b5159e742110b504177bdd4932 (diff) | |
| download | musl-61b1e75f7d8004461f2e18f171c26c2f545eed32.tar.gz | |
overhaul sh atomics for new atomics framework, add j-core cas.l backend
sh needs runtime-selected atomic backends since there are a number of
supported models that use non-forwards-compatible (non-smp-compatible)
atomic mechanisms. previously, the code paths for this were highly
inefficient since they involved C function calls with multiple
branches in the callee and heavy spills in the caller. the new code
performs calls the runtime-selected asm fragment from inline asm with
extremely minimal clobbers, rather than using a function call.
for the sh4a case where the atomic mechanism is known and there is no
forward-compatibility issue, the movli.l and movco.l instructions are
provided as a_ll and a_sc, allowing the new shared atomic.h to
generate efficient inline versions of all the basic atomic operations
without needing a cas loop.
| -rw-r--r-- | arch/sh/atomic_arch.h | 110 | ||||
| -rw-r--r-- | arch/sh/src/atomic.c | 158 | ||||
| -rw-r--r-- | arch/sh/src/sh_atomic.h | 15 | ||||
| -rw-r--r-- | src/thread/sh/__set_thread_area.c (renamed from arch/sh/src/__set_thread_area.c) | 24 | ||||
| -rw-r--r-- | src/thread/sh/__set_thread_area.s | 0 | ||||
| -rw-r--r-- | src/thread/sh/atomics.s | 65 | 
6 files changed, 110 insertions, 262 deletions
| diff --git a/arch/sh/atomic_arch.h b/arch/sh/atomic_arch.h index 2ac77246..74444d5d 100644 --- a/arch/sh/atomic_arch.h +++ b/arch/sh/atomic_arch.h @@ -1,96 +1,46 @@ -#define LLSC_CLOBBERS "r0", "t", "memory" -#define LLSC_START(mem) "synco\n"  \ -	"0:	movli.l @" mem ", r0\n" -#define LLSC_END(mem)              \ -	"1:	movco.l r0, @" mem "\n"    \ -	"	bf 0b\n"                   \ -	"	synco\n" +#if defined(__SH4A__) -static inline int __sh_cas_llsc(volatile int *p, int t, int s) +#define a_ll a_ll +static inline int a_ll(volatile int *p)  { -	int old; -	__asm__ __volatile__( -		LLSC_START("%1") -		"	mov r0, %0\n" -		"	cmp/eq %0, %2\n" -		"	bf 1f\n" -		"	mov %3, r0\n" -		LLSC_END("%1") -		: "=&r"(old) : "r"(p), "r"(t), "r"(s) : LLSC_CLOBBERS); -	return old; +	int v; +	__asm__ __volatile__ ("movli.l @%1, %0" : "=z"(v) : "r"(p), "m"(*p)); +	return v;  } -static inline int __sh_swap_llsc(volatile int *x, int v) +#define a_sc a_sc +static inline int a_sc(volatile int *p, int v)  { -	int old; -	__asm__ __volatile__( -		LLSC_START("%1") -		"	mov r0, %0\n" -		"	mov %2, r0\n" -		LLSC_END("%1") -		: "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS); -	return old; +	int r; +	__asm__ __volatile__ ( +		"movco.l %2, @%3 ; movt %0" +		: "=r"(r), "=m"(*p) : "z"(v), "r"(p) : "memory", "cc"); +	return r;  } -static inline int __sh_fetch_add_llsc(volatile int *x, int v) +#define a_barrier a_barrier +static inline void a_barrier()  { -	int old; -	__asm__ __volatile__( -		LLSC_START("%1") -		"	mov r0, %0\n" -		"	add %2, r0\n" -		LLSC_END("%1") -		: "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS); -	return old; +	__asm__ __volatile__ ("synco" : : "memory");  } -static inline void __sh_store_llsc(volatile int *p, int x) -{ -	__asm__ __volatile__( -		"	synco\n" -		"	mov.l %1, @%0\n" -		"	synco\n" -		: : "r"(p), "r"(x) : "memory"); -} +#define a_pre_llsc a_barrier +#define a_post_llsc a_barrier -static inline void __sh_and_llsc(volatile int *x, int v) -{ -	__asm__ __volatile__( -		LLSC_START("%0") -		"	and %1, r0\n" -		LLSC_END("%0") -		: : "r"(x), "r"(v) : LLSC_CLOBBERS); -} +#else -static inline void __sh_or_llsc(volatile int *x, int v) +#define a_cas a_cas +__attribute__((__visibility__("hidden"))) extern const void *__sh_cas_ptr; +static inline int a_cas(volatile int *p, int t, int s)  { -	__asm__ __volatile__( -		LLSC_START("%0") -		"	or %1, r0\n" -		LLSC_END("%0") -		: : "r"(x), "r"(v) : LLSC_CLOBBERS); +	register int r1 __asm__("r1"); +	register int r2 __asm__("r2") = t; +	register int r3 __asm__("r3") = s; +	__asm__ __volatile__ ( +		"jsr @%4 ; nop" +		: "=r"(r1), "+r"(r3) : "z"(p), "r"(r2), "r"(__sh_cas_ptr) +		: "memory", "pr", "cc"); +	return r3;  } -#ifdef __SH4A__ -#define a_cas(p,t,s)     __sh_cas_llsc(p,t,s) -#define a_swap(x,v)      __sh_swap_llsc(x,v) -#define a_fetch_add(x,v) __sh_fetch_add_llsc(x, v) -#define a_store(x,v)     __sh_store_llsc(x, v) -#define a_and(x,v)       __sh_and_llsc(x, v) -#define a_or(x,v)        __sh_or_llsc(x, v) -#else - -int  __sh_cas(volatile int *, int, int); -int  __sh_swap(volatile int *, int); -int  __sh_fetch_add(volatile int *, int); -void __sh_store(volatile int *, int); -void __sh_and(volatile int *, int); -void __sh_or(volatile int *, int); - -#define a_cas(p,t,s)     __sh_cas(p,t,s) -#define a_swap(x,v)      __sh_swap(x,v) -#define a_fetch_add(x,v) __sh_fetch_add(x, v) -#define a_store(x,v)     __sh_store(x, v) -#define a_and(x,v)       __sh_and(x, v) -#define a_or(x,v)        __sh_or(x, v)  #endif diff --git a/arch/sh/src/atomic.c b/arch/sh/src/atomic.c deleted file mode 100644 index 7fd73074..00000000 --- a/arch/sh/src/atomic.c +++ /dev/null @@ -1,158 +0,0 @@ -#ifndef __SH4A__ - -#include "sh_atomic.h" -#include "atomic.h" -#include "libc.h" - -static inline unsigned mask() -{ -	unsigned sr; -	__asm__ __volatile__ ( "\n" -	"	stc sr,r0 \n" -	"	mov r0,%0 \n" -	"	or #0xf0,r0 \n" -	"	ldc r0,sr \n" -	: "=&r"(sr) : : "memory", "r0" ); -	return sr; -} - -static inline void unmask(unsigned sr) -{ -	__asm__ __volatile__ ( "ldc %0,sr" : : "r"(sr) : "memory" ); -} - -/* gusa is a hack in the kernel which lets you create a sequence of instructions - * which will be restarted if the process is preempted in the middle of the - * sequence. It will do for implementing atomics on non-smp systems. ABI is: - * r0  = address of first instruction after the atomic sequence - * r1  = original stack pointer - * r15 = -1 * length of atomic sequence in bytes - */ -#define GUSA_CLOBBERS   "r0", "r1", "memory" -#define GUSA_START(mem,old,nop)    \ -	"	.align 2\n"                \ -	"	mova 1f, r0\n"             \ -	nop                            \ -	"	mov r15, r1\n"             \ -	"	mov #(0f-1f), r15\n"       \ -	"0:	mov.l @" mem ", " old "\n" -/* the target of mova must be 4 byte aligned, so we may need a nop */ -#define GUSA_START_ODD(mem,old)  GUSA_START(mem,old,"") -#define GUSA_START_EVEN(mem,old) GUSA_START(mem,old,"\tnop\n") -#define GUSA_END(mem,new)          \ -	"	mov.l " new ", @" mem "\n" \ -	"1:	mov r1, r15\n" - -int __sh_cas(volatile int *p, int t, int s) -{ -	if (__sh_atomic_model == SH_A_LLSC) return __sh_cas_llsc(p, t, s); - -	if (__sh_atomic_model == SH_A_IMASK) { -		unsigned sr = mask(); -		int old = *p; -		if (old==t) *p = s; -		unmask(sr); -		return old; -	} - -	int old; -	__asm__ __volatile__( -		GUSA_START_EVEN("%1", "%0") -		"	cmp/eq %0, %2\n" -		"	bf 1f\n" -		GUSA_END("%1", "%3") -		: "=&r"(old) : "r"(p), "r"(t), "r"(s) : GUSA_CLOBBERS, "t"); -	return old; -} - -int __sh_swap(volatile int *x, int v) -{ -	if (__sh_atomic_model == SH_A_LLSC) return __sh_swap_llsc(x, v); - -	if (__sh_atomic_model == SH_A_IMASK) { -		unsigned sr = mask(); -		int old = *x; -		*x = v; -		unmask(sr); -		return old; -	} - -	int old; -	__asm__ __volatile__( -		GUSA_START_EVEN("%1", "%0") -		GUSA_END("%1", "%2") -		: "=&r"(old) : "r"(x), "r"(v) : GUSA_CLOBBERS); -	return old; -} - -int __sh_fetch_add(volatile int *x, int v) -{ -	if (__sh_atomic_model == SH_A_LLSC) return __sh_fetch_add_llsc(x, v); - -	if (__sh_atomic_model == SH_A_IMASK) { -		unsigned sr = mask(); -		int old = *x; -		*x = old + v; -		unmask(sr); -		return old; -	} - -	int old, dummy; -	__asm__ __volatile__( -		GUSA_START_EVEN("%2", "%0") -		"	mov %0, %1\n" -		"	add %3, %1\n" -		GUSA_END("%2", "%1") -		: "=&r"(old), "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS); -	return old; -} - -void __sh_store(volatile int *p, int x) -{ -	if (__sh_atomic_model == SH_A_LLSC) return __sh_store_llsc(p, x); -	__asm__ __volatile__( -		"	mov.l %1, @%0\n" -		: : "r"(p), "r"(x) : "memory"); -} - -void __sh_and(volatile int *x, int v) -{ -	if (__sh_atomic_model == SH_A_LLSC) return __sh_and_llsc(x, v); - -	if (__sh_atomic_model == SH_A_IMASK) { -		unsigned sr = mask(); -		int old = *x; -		*x = old & v; -		unmask(sr); -		return; -	} - -	int dummy; -	__asm__ __volatile__( -		GUSA_START_ODD("%1", "%0") -		"	and %2, %0\n" -		GUSA_END("%1", "%0") -		: "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS); -} - -void __sh_or(volatile int *x, int v) -{ -	if (__sh_atomic_model == SH_A_LLSC) return __sh_or_llsc(x, v); - -	if (__sh_atomic_model == SH_A_IMASK) { -		unsigned sr = mask(); -		int old = *x; -		*x = old | v; -		unmask(sr); -		return; -	} - -	int dummy; -	__asm__ __volatile__( -		GUSA_START_ODD("%1", "%0") -		"	or %2, %0\n" -		GUSA_END("%1", "%0") -		: "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS); -} - -#endif diff --git a/arch/sh/src/sh_atomic.h b/arch/sh/src/sh_atomic.h deleted file mode 100644 index 054c2a32..00000000 --- a/arch/sh/src/sh_atomic.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _SH_ATOMIC_H -#define _SH_ATOMIC_H - -#define SH_A_GUSA 0 -#define SH_A_LLSC 1 -#define SH_A_CAS 2 -#if !defined(__SH3__) && !defined(__SH4__) -#define SH_A_IMASK 3 -#else -#define SH_A_IMASK -1LL /* unmatchable by unsigned int */ -#endif - -extern __attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model; - -#endif diff --git a/arch/sh/src/__set_thread_area.c b/src/thread/sh/__set_thread_area.c index 1d3e0225..9c47f78d 100644 --- a/arch/sh/src/__set_thread_area.c +++ b/src/thread/sh/__set_thread_area.c @@ -1,34 +1,40 @@  #include "pthread_impl.h"  #include "libc.h" -#include "sh_atomic.h"  #include <elf.h>  /* Also perform sh-specific init */  #define CPU_HAS_LLSC 0x0040 +#define CPU_HAS_CAS_L 0x0400 -__attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model, __sh_nommu; +__attribute__((__visibility__("hidden"))) +extern const char __sh_cas_gusa[], __sh_cas_llsc[], __sh_cas_imask[], __sh_cas_cas_l[]; + +__attribute__((__visibility__("hidden"))) +const void *__sh_cas_ptr; + +__attribute__((__visibility__("hidden"))) +unsigned __sh_nommu;  int __set_thread_area(void *p)  {  	size_t *aux;  	__asm__ __volatile__ ( "ldc %0, gbr" : : "r"(p) : "memory" );  #ifndef __SH4A__ -	if (__hwcap & CPU_HAS_LLSC) { -		__sh_atomic_model = SH_A_LLSC; -		return 0; -	} +	__sh_cas_ptr = __sh_cas_gusa;  #if !defined(__SH3__) && !defined(__SH4__)  	for (aux=libc.auxv; *aux; aux+=2) {  		if (*aux != AT_PLATFORM) continue;  		const char *s = (void *)aux[1];  		if (s[0]!='s' || s[1]!='h' || s[2]!='2' || s[3]-'0'<10u) break; -		__sh_atomic_model = SH_A_IMASK; +		__sh_cas_ptr = __sh_cas_imask;  		__sh_nommu = 1; -		return 0;  	}  #endif -	/* __sh_atomic_model = SH_A_GUSA; */ /* 0, default */ +	if (__hwcap & CPU_HAS_CAS_L) +		__sh_cas_ptr = __sh_cas_cas_l; +	else if (__hwcap & CPU_HAS_LLSC) +		__sh_cas_ptr = __sh_cas_llsc;  #endif  	return 0;  } diff --git a/src/thread/sh/__set_thread_area.s b/src/thread/sh/__set_thread_area.s deleted file mode 100644 index e69de29b..00000000 --- a/src/thread/sh/__set_thread_area.s +++ /dev/null diff --git a/src/thread/sh/atomics.s b/src/thread/sh/atomics.s new file mode 100644 index 00000000..3b58cccc --- /dev/null +++ b/src/thread/sh/atomics.s @@ -0,0 +1,65 @@ +/* Contract for all versions is same as cas.l r2,r3,@r0 + * pr and r1 are also clobbered (by jsr & r1 as temp). + * r0,r2,r4-r15 must be preserved. + * r3 contains result (==r2 iff cas succeeded). */ + +	.align 2 +.global __sh_cas_gusa +.hidden __sh_cas_gusa +__sh_cas_gusa: +	mov.l r5,@-r15 +	mov.l r4,@-r15 +	mov r0,r4 +	mova 1f,r0 +	mov r15,r1 +	mov #(0f-1f),r15 +0:	mov.l @r4,r5 +	cmp/eq r5,r2 +	bf 1f +	mov.l r3,@r4 +1:	mov r1,r15 +	mov r5,r3 +	mov r4,r0 +	mov.l @r15+,r4 +	rts +	 mov.l @r15+,r5 + +.global __sh_cas_llsc +.hidden __sh_cas_llsc +__sh_cas_llsc: +	mov r0,r1 +	synco +0:	movli.l @r1,r0 +	cmp/eq r0,r2 +	bf 1f +	mov r3,r0 +	movco.l r0,@r1 +	bf 0b +	mov r2,r0 +1:	synco +	mov r0,r3 +	rts +	 mov r1,r0 + +.global __sh_cas_imask +.hidden __sh_cas_imask +__sh_cas_imask: +	mov r0,r1 +	stc sr,r0 +	mov.l r0,@-r15 +	or #0xf0,r0 +	ldc r0,sr +	mov.l @r1,r0 +	cmp/eq r0,r2 +	bf 1f +	mov.l r3,@r1 +1:	ldc.l @r15+,sr +	mov r0,r3 +	rts +	 mov r1,r0 + +.global __sh_cas_cas_l +.hidden __sh_cas_cas_l +__sh_cas_cas_l: +	rts +	 .word 0x2323 /* cas.l r2,r3,@r0 */ | 
