diff options
| -rw-r--r-- | ldso/dynlink.c | 121 | ||||
| -rw-r--r-- | src/internal/pthread_impl.h | 1 | ||||
| -rw-r--r-- | src/ldso/aarch64/tlsdesc.s | 59 | ||||
| -rw-r--r-- | src/ldso/arm/tlsdesc.S | 19 | ||||
| -rw-r--r-- | src/ldso/i386/tlsdesc.s | 8 | ||||
| -rw-r--r-- | src/ldso/x86_64/tlsdesc.s | 21 | ||||
| -rw-r--r-- | src/thread/__tls_get_addr.c | 7 | ||||
| -rw-r--r-- | src/thread/i386/tls.s | 8 | ||||
| -rw-r--r-- | src/thread/pthread_create.c | 2 | 
9 files changed, 86 insertions, 160 deletions
| diff --git a/ldso/dynlink.c b/ldso/dynlink.c index ec921dfd..9e2adb21 100644 --- a/ldso/dynlink.c +++ b/ldso/dynlink.c @@ -17,6 +17,7 @@  #include <pthread.h>  #include <ctype.h>  #include <dlfcn.h> +#include <semaphore.h>  #include "pthread_impl.h"  #include "libc.h"  #include "dynlink.h" @@ -1338,48 +1339,6 @@ void __init_tls(size_t *auxv)  {  } -hidden void *__tls_get_new(tls_mod_off_t *v) -{ -	pthread_t self = __pthread_self(); - -	/* Block signals to make accessing new TLS async-signal-safe */ -	sigset_t set; -	__block_all_sigs(&set); -	if (v[0] <= self->dtv[0]) { -		__restore_sigs(&set); -		return (void *)(self->dtv[v[0]] + v[1]); -	} - -	/* This is safe without any locks held because, if the caller -	 * is able to request the Nth entry of the DTV, the DSO list -	 * must be valid at least that far out and it was synchronized -	 * at program startup or by an already-completed call to dlopen. */ -	struct dso *p; -	for (p=head; p->tls_id != v[0]; p=p->next); - -	/* Get new DTV space from new DSO */ -	uintptr_t *newdtv = p->new_dtv + -		(v[0]+1)*a_fetch_add(&p->new_dtv_idx,1); -	memcpy(newdtv, self->dtv, (self->dtv[0]+1) * sizeof(uintptr_t)); -	newdtv[0] = v[0]; -	self->dtv = self->dtv_copy = newdtv; - -	/* Get new TLS memory from all new DSOs up to the requested one */ -	unsigned char *mem; -	for (p=head; ; p=p->next) { -		if (!p->tls_id || self->dtv[p->tls_id]) continue; -		mem = p->new_tls + (p->tls.size + p->tls.align) -			* a_fetch_add(&p->new_tls_idx,1); -		mem += ((uintptr_t)p->tls.image - (uintptr_t)mem) -			& (p->tls.align-1); -		self->dtv[p->tls_id] = (uintptr_t)mem + DTP_OFFSET; -		memcpy(mem, p->tls.image, p->tls.len); -		if (p->tls_id == v[0]) break; -	} -	__restore_sigs(&set); -	return mem + v[1] + DTP_OFFSET; -} -  static void update_tls_size()  {  	libc.tls_cnt = tls_cnt; @@ -1392,6 +1351,82 @@ static void update_tls_size()  	tls_align);  } +void __dl_prepare_for_threads(void) +{ +	/* MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED */ +	__syscall(SYS_membarrier, 1<<4, 0); +} + +static sem_t barrier_sem; +static void bcast_barrier(int s) +{ +	sem_post(&barrier_sem); +} + +static void install_new_tls(void) +{ +	sigset_t set; +	pthread_t self = __pthread_self(), td; +	uintptr_t (*newdtv)[tls_cnt+1] = (void *)tail->new_dtv; +	struct dso *p; +	size_t i, j; +	size_t old_cnt = self->dtv[0]; + +	__block_app_sigs(&set); +	__tl_lock(); +	/* Copy existing dtv contents from all existing threads. */ +	for (i=0, td=self; !i || td!=self; i++, td=td->next) { +		memcpy(newdtv+i, td->dtv, +			(old_cnt+1)*sizeof(uintptr_t)); +		newdtv[i][0] = tls_cnt; +	} +	/* Install new dtls into the enlarged, uninstalled dtv copies. */ +	for (p=head; ; p=p->next) { +		if (!p->tls_id || self->dtv[p->tls_id]) continue; +		unsigned char *mem = p->new_tls; +		for (j=0; j<i; j++) { +			unsigned char *new = mem; +			new += ((uintptr_t)p->tls.image - (uintptr_t)mem) +				& (p->tls.align-1); +			memcpy(new, p->tls.image, p->tls.len); +			newdtv[j][p->tls_id] = +				(uintptr_t)new + DTP_OFFSET; +			mem += p->tls.size + p->tls.align; +		} +		if (p->tls_id == tls_cnt) break; +	} + +	/* Broadcast barrier to ensure contents of new dtv is visible +	 * if the new dtv pointer is. Use SYS_membarrier if it works, +	 * otherwise emulate with a signal. */ + +	/* MEMBARRIER_CMD_PRIVATE_EXPEDITED */ +	if (__syscall(SYS_membarrier, 1<<3, 0)) { +		sem_init(&barrier_sem, 0, 0); +		struct sigaction sa = { +			.sa_flags = SA_RESTART, +			.sa_handler = bcast_barrier +		}; +		memset(&sa.sa_mask, -1, sizeof sa.sa_mask); +		__libc_sigaction(SIGSYNCCALL, &sa, 0);	 +		for (td=self->next; td!=self; td=td->next) +			if (j) __syscall(SYS_tkill, td->tid, SIGSYNCCALL); +		for (td=self->next; td!=self; td=td->next) +			sem_wait(&barrier_sem); +		sa.sa_handler = SIG_IGN; +		__libc_sigaction(SIGSYNCCALL, &sa, 0); +		sem_destroy(&barrier_sem); +	} + +	/* Install new dtv for each thread. */ +	for (j=0, td=self; !j || td!=self; j++, td=td->next) { +		td->dtv = td->dtv_copy = newdtv[j]; +	} + +	__tl_unlock(); +	__restore_sigs(&set); +} +  /* Stage 1 of the dynamic linker is defined in dlstart.c. It calls the   * following stage 2 and stage 3 functions via primitive symbolic lookup   * since it does not have access to their addresses to begin with. */ @@ -1864,6 +1899,8 @@ void *dlopen(const char *file, int mode)  	redo_lazy_relocs();  	update_tls_size(); +	if (tls_cnt != orig_tls_cnt) +		install_new_tls();  	_dl_debug_state();  	orig_tail = tail;  end: diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h index d5d969ec..de089967 100644 --- a/src/internal/pthread_impl.h +++ b/src/internal/pthread_impl.h @@ -130,6 +130,7 @@ hidden int __init_tp(void *);  hidden void *__copy_tls(unsigned char *);  hidden void __reset_tls(); +hidden void __dl_prepare_for_threads(void);  hidden void __dl_thread_cleanup(void);  hidden void __testcancel();  hidden void __do_cleanup_push(struct __ptcb *); diff --git a/src/ldso/aarch64/tlsdesc.s b/src/ldso/aarch64/tlsdesc.s index 8e4004d7..c91baa45 100644 --- a/src/ldso/aarch64/tlsdesc.s +++ b/src/ldso/aarch64/tlsdesc.s @@ -29,67 +29,10 @@ __tlsdesc_dynamic:  	ldr x0,[x0,#8]        // p  	ldr x2,[x0]           // p->modidx  	ldr x3,[x1,#-8]       // dtv -	ldr x4,[x3]           // dtv[0] -	cmp x2,x4 -	b.hi 1f  	ldr x2,[x3,x2,lsl #3] // dtv[p->modidx]  	ldr x0,[x0,#8]        // p->off  	add x0,x0,x2 -2:	sub x0,x0,x1 +	sub x0,x0,x1  	ldp x3,x4,[sp,#16]  	ldp x1,x2,[sp],#32  	ret - -	// save all registers __tls_get_new may clobber -	// update sp in two steps because offset must be in [-512,509] -1:	stp x29,x30,[sp,#-160]! -	stp x5,x6,[sp,#16] -	stp x7,x8,[sp,#32] -	stp x9,x10,[sp,#48] -	stp x11,x12,[sp,#64] -	stp x13,x14,[sp,#80] -	stp x15,x16,[sp,#96] -	stp x17,x18,[sp,#112] -	stp q0,q1,[sp,#128] -	stp q2,q3,[sp,#-480]! -	stp q4,q5,[sp,#32] -	stp q6,q7,[sp,#64] -	stp q8,q9,[sp,#96] -	stp q10,q11,[sp,#128] -	stp q12,q13,[sp,#160] -	stp q14,q15,[sp,#192] -	stp q16,q17,[sp,#224] -	stp q18,q19,[sp,#256] -	stp q20,q21,[sp,#288] -	stp q22,q23,[sp,#320] -	stp q24,q25,[sp,#352] -	stp q26,q27,[sp,#384] -	stp q28,q29,[sp,#416] -	stp q30,q31,[sp,#448] -	bl __tls_get_new -	mrs x1,tpidr_el0 -	ldp q4,q5,[sp,#32] -	ldp q6,q7,[sp,#64] -	ldp q8,q9,[sp,#96] -	ldp q10,q11,[sp,#128] -	ldp q12,q13,[sp,#160] -	ldp q14,q15,[sp,#192] -	ldp q16,q17,[sp,#224] -	ldp q18,q19,[sp,#256] -	ldp q20,q21,[sp,#288] -	ldp q22,q23,[sp,#320] -	ldp q24,q25,[sp,#352] -	ldp q26,q27,[sp,#384] -	ldp q28,q29,[sp,#416] -	ldp q30,q31,[sp,#448] -	ldp q2,q3,[sp],#480 -	ldp x5,x6,[sp,#16] -	ldp x7,x8,[sp,#32] -	ldp x9,x10,[sp,#48] -	ldp x11,x12,[sp,#64] -	ldp x13,x14,[sp,#80] -	ldp x15,x16,[sp,#96] -	ldp x17,x18,[sp,#112] -	ldp q0,q1,[sp,#128] -	ldp x29,x30,[sp],#160 -	b 2b diff --git a/src/ldso/arm/tlsdesc.S b/src/ldso/arm/tlsdesc.S index 4e67c3e2..455eac1d 100644 --- a/src/ldso/arm/tlsdesc.S +++ b/src/ldso/arm/tlsdesc.S @@ -35,13 +35,9 @@ __tlsdesc_dynamic:  #endif  #endif  	ldr r3,[r0,#-4] // r3 = dtv -	ldr ip,[r3]     // ip = dtv slot count -	cmp r1,ip -	bhi 3f  	ldr ip,[r3,r1,LSL #2]  	sub r0,ip,r0  	add r0,r0,r2    // r0 = r3[r1]-r0+r2 -4:  #if __ARM_ARCH >= 5  	pop {r2,r3,ip,pc}  #else @@ -49,21 +45,6 @@ __tlsdesc_dynamic:  	bx lr  #endif -3: -#if __ARM_PCS_VFP || !__SOFTFP__ -	.fpu vfp -	vpush {d0-d7} -#endif -	push {r0-r3} -	add r0,sp,#4 -	bl __tls_get_new -	pop {r1-r3,ip} -#if __ARM_PCS_VFP || !__SOFTFP__ -	vpop {d0-d7} -#endif -	sub r0,r0,r1    // r0 = retval-tp -	b 4b -  #if ((__ARM_ARCH_6K__ || __ARM_ARCH_6KZ__ || __ARM_ARCH_6ZK__) && !__thumb__) \   || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7  #else diff --git a/src/ldso/i386/tlsdesc.s b/src/ldso/i386/tlsdesc.s index 4a553bce..a5c0100c 100644 --- a/src/ldso/i386/tlsdesc.s +++ b/src/ldso/i386/tlsdesc.s @@ -17,15 +17,9 @@ __tlsdesc_dynamic:  	mov %gs:4,%edx  	push %ecx  	mov (%eax),%ecx -	cmp %ecx,(%edx) -	jc 1f  	mov 4(%eax),%eax  	add (%edx,%ecx,4),%eax -2:	pop %ecx +	pop %ecx  	sub %gs:0,%eax  	pop %edx  	ret -1:	push %eax -	call __tls_get_new -	pop %ecx -	jmp 2b diff --git a/src/ldso/x86_64/tlsdesc.s b/src/ldso/x86_64/tlsdesc.s index 8238c3eb..0151d15c 100644 --- a/src/ldso/x86_64/tlsdesc.s +++ b/src/ldso/x86_64/tlsdesc.s @@ -17,28 +17,9 @@ __tlsdesc_dynamic:  	mov %fs:8,%rdx  	push %rcx  	mov (%rax),%rcx -	cmp %rcx,(%rdx) -	jc 1f  	mov 8(%rax),%rax  	add (%rdx,%rcx,8),%rax -2:	pop %rcx +	pop %rcx  	sub %fs:0,%rax  	pop %rdx  	ret -1:	push %rdi -	push %rdi -	push %rsi -	push %r8 -	push %r9 -	push %r10 -	push %r11 -	mov %rax,%rdi -	call __tls_get_new -	pop %r11 -	pop %r10 -	pop %r9 -	pop %r8 -	pop %rsi -	pop %rdi -	pop %rdi -	jmp 2b diff --git a/src/thread/__tls_get_addr.c b/src/thread/__tls_get_addr.c index d7afdabd..19524fe0 100644 --- a/src/thread/__tls_get_addr.c +++ b/src/thread/__tls_get_addr.c @@ -1,12 +1,7 @@ -#include <stddef.h>  #include "pthread_impl.h"  void *__tls_get_addr(tls_mod_off_t *v)  {  	pthread_t self = __pthread_self(); -	if (v[0] <= self->dtv[0]) -		return (void *)(self->dtv[v[0]] + v[1]); -	return __tls_get_new(v); +	return (void *)(self->dtv[v[0]] + v[1]);  } - -weak_alias(__tls_get_addr, __tls_get_new); diff --git a/src/thread/i386/tls.s b/src/thread/i386/tls.s index 76d5d462..6e4c4cb9 100644 --- a/src/thread/i386/tls.s +++ b/src/thread/i386/tls.s @@ -4,14 +4,6 @@  ___tls_get_addr:  	mov %gs:4,%edx  	mov (%eax),%ecx -	cmp %ecx,(%edx) -	jc 1f  	mov 4(%eax),%eax  	add (%edx,%ecx,4),%eax  	ret -1:	push %eax -.weak __tls_get_new -.hidden __tls_get_new -	call __tls_get_new -	pop %edx -	ret diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c index cec82157..0142b347 100644 --- a/src/thread/pthread_create.c +++ b/src/thread/pthread_create.c @@ -15,6 +15,7 @@ weak_alias(dummy_0, __release_ptc);  weak_alias(dummy_0, __pthread_tsd_run_dtors);  weak_alias(dummy_0, __do_orphaned_stdio_locks);  weak_alias(dummy_0, __dl_thread_cleanup); +weak_alias(dummy_0, __dl_prepare_for_threads);  void __tl_lock(void)  { @@ -235,6 +236,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att  		init_file_lock(__stderr_used);  		__syscall(SYS_rt_sigprocmask, SIG_UNBLOCK, SIGPT_SET, 0, _NSIG/8);  		self->tsd = (void **)__pthread_tsd_main; +		__dl_prepare_for_threads();  		libc.threaded = 1;  	}  	if (attrp && !c11) attr = *attrp; | 
