summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--VERSION2
-rw-r--r--WHATSNEW93
-rw-r--r--arch/aarch64/bits/hwcap.h4
-rw-r--r--arch/aarch64/bits/syscall.h.in2
-rw-r--r--arch/aarch64/fp_arch.h25
-rw-r--r--arch/arm/bits/syscall.h.in1
-rw-r--r--arch/generic/bits/ioctl.h100
-rw-r--r--arch/generic/fp_arch.h (renamed from src/internal/syscall.c)0
-rw-r--r--arch/i386/syscall_arch.h44
-rw-r--r--arch/microblaze/syscall_arch.h11
-rw-r--r--arch/mips/bits/ioctl.h98
-rw-r--r--arch/mips/syscall_arch.h64
-rw-r--r--arch/mips64/bits/ioctl.h98
-rw-r--r--arch/mips64/syscall_arch.h93
-rw-r--r--arch/mipsn32/bits/ioctl.h98
-rw-r--r--arch/mipsn32/syscall_arch.h31
-rw-r--r--arch/or1k/bits/syscall.h.in2
-rw-r--r--arch/or1k/syscall_arch.h9
-rw-r--r--arch/powerpc/bits/ioctl.h97
-rw-r--r--arch/powerpc/bits/ptrace.h2
-rw-r--r--arch/powerpc/syscall_arch.h86
-rw-r--r--arch/powerpc64/bits/ioctl.h97
-rw-r--r--arch/powerpc64/bits/ptrace.h2
-rw-r--r--arch/powerpc64/bits/signal.h17
-rw-r--r--arch/s390x/bits/fcntl.h3
-rw-r--r--arch/s390x/bits/socket.h1
-rw-r--r--arch/sh/bits/ioctl.h96
-rwxr-xr-xconfigure18
-rw-r--r--include/alltypes.h.in1
-rw-r--r--include/elf.h62
-rw-r--r--include/fcntl.h7
-rw-r--r--include/math.h12
-rw-r--r--include/netinet/in.h1
-rw-r--r--include/netinet/tcp.h9
-rw-r--r--include/netinet/udp.h2
-rw-r--r--include/netpacket/packet.h1
-rw-r--r--include/stdio.h4
-rw-r--r--include/sys/inotify.h1
-rw-r--r--include/sys/ioctl.h110
-rw-r--r--include/sys/membarrier.h17
-rw-r--r--include/sys/mman.h2
-rw-r--r--include/sys/prctl.h8
-rw-r--r--include/sys/shm.h2
-rw-r--r--include/sys/socket.h2
-rw-r--r--include/tar.h4
-rw-r--r--include/ucontext.h2
-rw-r--r--include/wchar.h4
-rw-r--r--ldso/dynlink.c406
-rw-r--r--src/aio/aio.c6
-rw-r--r--src/complex/__cexp.c2
-rw-r--r--src/complex/__cexpf.c2
-rw-r--r--src/complex/cabs.c2
-rw-r--r--src/complex/cabsf.c2
-rw-r--r--src/complex/cabsl.c2
-rw-r--r--src/complex/cacos.c2
-rw-r--r--src/complex/cacosf.c2
-rw-r--r--src/complex/cacosh.c2
-rw-r--r--src/complex/cacoshf.c2
-rw-r--r--src/complex/cacoshl.c2
-rw-r--r--src/complex/cacosl.c2
-rw-r--r--src/complex/carg.c2
-rw-r--r--src/complex/cargf.c2
-rw-r--r--src/complex/cargl.c2
-rw-r--r--src/complex/casin.c2
-rw-r--r--src/complex/casinf.c2
-rw-r--r--src/complex/casinh.c2
-rw-r--r--src/complex/casinhf.c2
-rw-r--r--src/complex/casinhl.c2
-rw-r--r--src/complex/casinl.c2
-rw-r--r--src/complex/catan.c2
-rw-r--r--src/complex/catanf.c2
-rw-r--r--src/complex/catanh.c2
-rw-r--r--src/complex/catanhf.c2
-rw-r--r--src/complex/catanhl.c2
-rw-r--r--src/complex/catanl.c2
-rw-r--r--src/complex/ccos.c2
-rw-r--r--src/complex/ccosf.c2
-rw-r--r--src/complex/ccosh.c2
-rw-r--r--src/complex/ccoshf.c2
-rw-r--r--src/complex/ccoshl.c2
-rw-r--r--src/complex/ccosl.c2
-rw-r--r--src/complex/cexp.c2
-rw-r--r--src/complex/cexpf.c2
-rw-r--r--src/complex/cexpl.c2
-rw-r--r--src/complex/cimag.c2
-rw-r--r--src/complex/cimagf.c2
-rw-r--r--src/complex/cimagl.c2
-rw-r--r--src/complex/clog.c2
-rw-r--r--src/complex/clogf.c2
-rw-r--r--src/complex/clogl.c2
-rw-r--r--src/complex/conj.c2
-rw-r--r--src/complex/conjf.c2
-rw-r--r--src/complex/conjl.c2
-rw-r--r--src/complex/cpow.c2
-rw-r--r--src/complex/cpowf.c2
-rw-r--r--src/complex/cpowl.c2
-rw-r--r--src/complex/cproj.c2
-rw-r--r--src/complex/cprojf.c2
-rw-r--r--src/complex/cprojl.c2
-rw-r--r--src/complex/csin.c2
-rw-r--r--src/complex/csinf.c2
-rw-r--r--src/complex/csinh.c2
-rw-r--r--src/complex/csinhf.c2
-rw-r--r--src/complex/csinhl.c2
-rw-r--r--src/complex/csinl.c2
-rw-r--r--src/complex/csqrt.c2
-rw-r--r--src/complex/csqrtf.c2
-rw-r--r--src/complex/csqrtl.c2
-rw-r--r--src/complex/ctan.c2
-rw-r--r--src/complex/ctanf.c2
-rw-r--r--src/complex/ctanh.c2
-rw-r--r--src/complex/ctanhf.c2
-rw-r--r--src/complex/ctanhl.c2
-rw-r--r--src/complex/ctanl.c2
-rw-r--r--src/dirent/fdopendir.c4
-rw-r--r--src/env/__init_tls.c10
-rw-r--r--src/env/__libc_start_main.c2
-rw-r--r--src/include/pthread.h7
-rw-r--r--src/include/stdio.h2
-rw-r--r--src/include/sys/membarrier.h9
-rw-r--r--src/include/wchar.h9
-rw-r--r--src/internal/aarch64/syscall.s14
-rw-r--r--src/internal/arm/syscall.s15
-rw-r--r--src/internal/complex_impl.h22
-rw-r--r--src/internal/defsysinfo.c3
-rw-r--r--src/internal/i386/defsysinfo.s9
-rw-r--r--src/internal/i386/syscall.s78
-rw-r--r--src/internal/libc.c1
-rw-r--r--src/internal/libm.h226
-rw-r--r--src/internal/m68k/syscall.s9
-rw-r--r--src/internal/microblaze/syscall.s14
-rw-r--r--src/internal/mips/syscall.s26
-rw-r--r--src/internal/mips64/syscall.s19
-rw-r--r--src/internal/mipsn32/syscall.s19
-rw-r--r--src/internal/or1k/syscall.s14
-rw-r--r--src/internal/powerpc/syscall.s19
-rw-r--r--src/internal/powerpc64/syscall.s17
-rw-r--r--src/internal/pthread_impl.h26
-rw-r--r--src/internal/s390x/syscall.s15
-rw-r--r--src/internal/sh/syscall.s23
-rw-r--r--src/internal/shgetc.c3
-rw-r--r--src/internal/shgetc.h2
-rw-r--r--src/internal/syscall.h14
-rw-r--r--src/internal/x32/syscall.s13
-rw-r--r--src/internal/x86_64/syscall.s13
-rw-r--r--src/ldso/aarch64/tlsdesc.s74
-rw-r--r--src/ldso/arm/tlsdesc.S19
-rw-r--r--src/ldso/dlerror.c22
-rw-r--r--src/ldso/i386/tlsdesc.s8
-rw-r--r--src/ldso/x86_64/tlsdesc.s21
-rw-r--r--src/linux/membarrier.c77
-rw-r--r--src/locale/dcngettext.c3
-rw-r--r--src/math/__math_divzero.c6
-rw-r--r--src/math/__math_divzerof.c6
-rw-r--r--src/math/__math_invalid.c6
-rw-r--r--src/math/__math_invalidf.c6
-rw-r--r--src/math/__math_oflow.c6
-rw-r--r--src/math/__math_oflowf.c6
-rw-r--r--src/math/__math_uflow.c6
-rw-r--r--src/math/__math_uflowf.c6
-rw-r--r--src/math/__math_xflow.c6
-rw-r--r--src/math/__math_xflowf.c6
-rw-r--r--src/math/atanl.c6
-rw-r--r--src/math/exp.c240
-rw-r--r--src/math/exp2.c466
-rw-r--r--src/math/exp2f.c165
-rw-r--r--src/math/exp2f_data.c35
-rw-r--r--src/math/exp2f_data.h23
-rw-r--r--src/math/exp_data.c182
-rw-r--r--src/math/exp_data.h26
-rw-r--r--src/math/expf.c133
-rw-r--r--src/math/log.c202
-rw-r--r--src/math/log2.c212
-rw-r--r--src/math/log2_data.c201
-rw-r--r--src/math/log2_data.h28
-rw-r--r--src/math/log2f.c114
-rw-r--r--src/math/log2f_data.c33
-rw-r--r--src/math/log2f_data.h19
-rw-r--r--src/math/log_data.c328
-rw-r--r--src/math/log_data.h28
-rw-r--r--src/math/logf.c110
-rw-r--r--src/math/logf_data.c33
-rw-r--r--src/math/logf_data.h20
-rw-r--r--src/math/pow.c621
-rw-r--r--src/math/pow_data.c180
-rw-r--r--src/math/pow_data.h22
-rw-r--r--src/math/powf.c406
-rw-r--r--src/math/powf_data.c34
-rw-r--r--src/math/powf_data.h26
-rw-r--r--src/mman/mlock.c4
-rw-r--r--src/network/dn_skipname.c7
-rw-r--r--src/network/getaddrinfo.c7
-rw-r--r--src/passwd/getspnam.c3
-rw-r--r--src/passwd/getspnam_r.c11
-rw-r--r--src/passwd/putgrent.c2
-rw-r--r--src/passwd/putpwent.c2
-rw-r--r--src/process/execvp.c3
-rw-r--r--src/process/fork.c1
-rw-r--r--src/signal/sigaction.c6
-rw-r--r--src/signal/sigaltstack.c2
-rw-r--r--src/stdio/fgetwc.c10
-rw-r--r--src/stdio/gets.c11
-rw-r--r--src/stdio/rename.c6
-rw-r--r--src/stdio/setvbuf.c4
-rw-r--r--src/thread/__syscall_cp.c2
-rw-r--r--src/thread/__timedwait.c8
-rw-r--r--src/thread/__tls_get_addr.c7
-rw-r--r--src/thread/__unmapself.c5
-rw-r--r--src/thread/i386/tls.s8
-rw-r--r--src/thread/pthread_attr_setinheritsched.c19
-rw-r--r--src/thread/pthread_create.c201
-rw-r--r--src/thread/pthread_detach.c2
-rw-r--r--src/thread/pthread_join.c9
-rw-r--r--src/thread/pthread_key_create.c96
-rw-r--r--src/thread/pthread_key_delete.c14
-rw-r--r--src/thread/pthread_mutex_consistent.c10
-rw-r--r--src/thread/pthread_mutex_timedlock.c45
-rw-r--r--src/thread/pthread_mutex_trylock.c38
-rw-r--r--src/thread/pthread_mutex_unlock.c19
-rw-r--r--src/thread/pthread_mutexattr_setprotocol.c26
-rw-r--r--src/thread/pthread_rwlock_rdlock.c6
-rw-r--r--src/thread/pthread_rwlock_timedrdlock.c6
-rw-r--r--src/thread/pthread_rwlock_timedwrlock.c6
-rw-r--r--src/thread/pthread_rwlock_tryrdlock.c4
-rw-r--r--src/thread/pthread_rwlock_trywrlock.c4
-rw-r--r--src/thread/pthread_rwlock_unlock.c4
-rw-r--r--src/thread/pthread_rwlock_wrlock.c6
-rw-r--r--src/thread/pthread_sigmask.c2
-rw-r--r--src/thread/sem_timedwait.c2
-rw-r--r--src/thread/synccall.c178
-rw-r--r--src/time/timer_create.c38
-rw-r--r--src/time/timer_delete.c2
-rw-r--r--src/unistd/renameat.c4
233 files changed, 4277 insertions, 3394 deletions
diff --git a/VERSION b/VERSION
index be5b4c7b..c442f5e7 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.1.20
+1.1.22
diff --git a/WHATSNEW b/WHATSNEW
index dad17d6e..147229e4 100644
--- a/WHATSNEW
+++ b/WHATSNEW
@@ -1984,3 +1984,96 @@ arch-specfic bugs fixed:
- on mips, return from start function passed to clone crashed (runaway exec)
- printf %a precision specifier malfunctioned except on ld80 archs
- async thread cancellation crashed on powerpc64 and sh-fdpic
+
+
+1.1.21 release notes
+
+new features:
+- setting default thread stack size via PT_GNU_STACK program header
+- arm vfork implementation
+- arm tlsdesc/gnu2 tls dialect support
+- name_to_handle_at and name_to_handle_at syscall wrappers
+- header-level support for new linux features through 4.18
+
+optimizations:
+- glob rewrite with much better performance and stack usage properties
+- single-threaded and already-locked fast paths for getc/putc variants
+- single-instruction fma implementations for arm, s390x, powerpc, & x86_64
+- single-instruction fabs and sqrt implementations for powerpc
+- size and performance from making all internal-only functions/data hidden
+- made &errno and pthread_self results cachable again (attribute((const)))
+- significant speedup in strtod with short inputs
+- new tsearch AVL tree implementation, smaller and faster
+- special-cased nop calls to wmemmove
+- fixed erroneously suboptimal skip conditions in strstr and memmem
+
+hardening:
+- default thread stack guard size increased from 4k to 8k
+
+compatibility:
+- default thread stack size increased from 80k to 128k
+- building for arm as thumb2 with clang internal assembler now works
+- aio threads could overflow stack on kernels that break MINSIGSTKSZ ABI
+- aio threads no longer call malloc (problematic with malloc replacement)
+- pthread_sigmask/sigprocmask now ignore an invalid how when not changing mask
+
+bugs fixed:
+- soft deadlock regression in stdio FILE locks with >2 threads contending
+- deadlock and buffered data loss race in fclose
+- race condition leading to possible crash in dcngettext plural forms
+- glob failed to see past searchable-but-unreadable path components
+- getdelim wrongly realloc'd buffer that was already exactly right size
+- getdelim failed to set stream orientation on early error
+- ttyname[_r] reported wrong error when given bad fd
+- pthread_key_delete left old tsd values exposed if slot was reused
+- freeaddrinfo failed to support freeing sublists
+- access to optopt was broken by copy relocations
+- memccpy returned wrong result if first byte past buffer end matched
+- wordexp read past end of input string ending in backslash
+- sem_wait and sem_timedwait were wrongly not interruptible by signals
+- getspnam[_r] wrongly treated not-found as an error
+
+arch-specfic bugs fixed:
+- soft deadlocks (missing futex wake) on powerpc locking
+- dlsym returned wrong address for thread-local symbols on ppc/mips/m68k
+
+
+1.1.22 release notes
+
+new features:
+- priority-inheritance mutexes
+- membarrier syscall, pre-registration to use it, fallback emulation
+- header-level support for new linux features in 4.19, 4.20, 5.0
+
+major internal changes:
+- complete, async-safe view of all existent threads as global list
+- robust __synccall based on new thread list
+- new dynamic TLS is installed synchronously at dlopen
+- TLSDESC resolver functions no longer make bad ABI assumptions to call C
+- resolved shared library dependencies are now recorded
+
+compatibility & conformance:
+- dependency-order shared library constructor execution
+- sigaltstack no longer rejects SS_AUTODISARM, future flags
+- FILE is now a complete (dummy) type in pre-C11 feature profiles
+- setvbuf reports failure on invalid arguments
+- TSVTX is exposed unconditionally in tar.h
+- multithreaded set*id() no longer depends on /proc
+- key slot reuse after pthread_key_delete no longer depends on /proc
+
+bugs fixed:
+- failures in multithreaded set*id() with concurrent thread creation/exit
+- interposed free was called from invalid/inconsistent contexts
+- freeaddrinfo performed invalid free of some partial results lists
+- dlsym dependency order search had false negatives and false positives
+- dn_skipname gave wrong results for labels with 8-bit content
+- dcngettext clobbered errno, often breaking printing of error messages
+- sscanf read past end of buffer under certain conditions (1.1.21 regression)
+- pthread_key_create spuriously failed under race condition (1.1.21 regression)
+- fdopendir wrongly succeeded with O_PATH file descriptors
+- gets behaved incorrectly in presence of null bytes
+- namespace violations in c11 tsd and mutex function dependencies
+- incorrect prototype for makecontext (unimplemented)
+
+arch-specfic bugs fixed:
+- s390x had wrong values for POSIX_FADV_DONTNEED/_NOREUSE
diff --git a/arch/aarch64/bits/hwcap.h b/arch/aarch64/bits/hwcap.h
index 8541e329..ad670914 100644
--- a/arch/aarch64/bits/hwcap.h
+++ b/arch/aarch64/bits/hwcap.h
@@ -26,3 +26,7 @@
#define HWCAP_USCAT (1 << 25)
#define HWCAP_ILRCPC (1 << 26)
#define HWCAP_FLAGM (1 << 27)
+#define HWCAP_SSBS (1 << 28)
+#define HWCAP_SB (1 << 29)
+#define HWCAP_PACA (1 << 30)
+#define HWCAP_PACG (1UL << 31)
diff --git a/arch/aarch64/bits/syscall.h.in b/arch/aarch64/bits/syscall.h.in
index 47a969bc..4db85162 100644
--- a/arch/aarch64/bits/syscall.h.in
+++ b/arch/aarch64/bits/syscall.h.in
@@ -275,4 +275,6 @@
#define __NR_pkey_free 290
#define __NR_statx 291
#define __NR_io_pgetevents 292
+#define __NR_rseq 293
+#define __NR_kexec_file_load 294
diff --git a/arch/aarch64/fp_arch.h b/arch/aarch64/fp_arch.h
new file mode 100644
index 00000000..f3d445b9
--- /dev/null
+++ b/arch/aarch64/fp_arch.h
@@ -0,0 +1,25 @@
+#define fp_barrierf fp_barrierf
+static inline float fp_barrierf(float x)
+{
+ __asm__ __volatile__ ("" : "+w"(x));
+ return x;
+}
+
+#define fp_barrier fp_barrier
+static inline double fp_barrier(double x)
+{
+ __asm__ __volatile__ ("" : "+w"(x));
+ return x;
+}
+
+#define fp_force_evalf fp_force_evalf
+static inline void fp_force_evalf(float x)
+{
+ __asm__ __volatile__ ("" : "+w"(x));
+}
+
+#define fp_force_eval fp_force_eval
+static inline void fp_force_eval(double x)
+{
+ __asm__ __volatile__ ("" : "+w"(x));
+}
diff --git a/arch/arm/bits/syscall.h.in b/arch/arm/bits/syscall.h.in
index 13a3b66c..1787099d 100644
--- a/arch/arm/bits/syscall.h.in
+++ b/arch/arm/bits/syscall.h.in
@@ -354,6 +354,7 @@
#define __NR_pkey_free 396
#define __NR_statx 397
#define __NR_rseq 398
+#define __NR_io_pgetevents 399
#define __ARM_NR_breakpoint 0x0f0001
#define __ARM_NR_cacheflush 0x0f0002
diff --git a/arch/generic/bits/ioctl.h b/arch/generic/bits/ioctl.h
index 42a8f1a2..d1a6c035 100644
--- a/arch/generic/bits/ioctl.h
+++ b/arch/generic/bits/ioctl.h
@@ -64,6 +64,8 @@
#define TIOCGPTLCK 0x80045439
#define TIOCGEXCL 0x80045440
#define TIOCGPTPEER 0x5441
+#define TIOCGISO7816 0x80285442
+#define TIOCSISO7816 0xc0285443
#define FIONCLEX 0x5450
#define FIOCLEX 0x5451
@@ -82,24 +84,6 @@
#define TIOCGICOUNT 0x545D
#define FIOQSIZE 0x5460
-#define TIOCPKT_DATA 0
-#define TIOCPKT_FLUSHREAD 1
-#define TIOCPKT_FLUSHWRITE 2
-#define TIOCPKT_STOP 4
-#define TIOCPKT_START 8
-#define TIOCPKT_NOSTOP 16
-#define TIOCPKT_DOSTOP 32
-#define TIOCPKT_IOCTL 64
-
-#define TIOCSER_TEMT 0x01
-
-struct winsize {
- unsigned short ws_row;
- unsigned short ws_col;
- unsigned short ws_xpixel;
- unsigned short ws_ypixel;
-};
-
#define TIOCM_LE 0x001
#define TIOCM_DTR 0x002
#define TIOCM_RTS 0x004
@@ -115,23 +99,6 @@ struct winsize {
#define TIOCM_OUT2 0x4000
#define TIOCM_LOOP 0x8000
-#define N_TTY 0
-#define N_SLIP 1
-#define N_MOUSE 2
-#define N_PPP 3
-#define N_STRIP 4
-#define N_AX25 5
-#define N_X25 6
-#define N_6PACK 7
-#define N_MASC 8
-#define N_R3964 9
-#define N_PROFIBUS_FDL 10
-#define N_IRDA 11
-#define N_SMSBLOCK 12
-#define N_HDLC 13
-#define N_SYNC_PPP 14
-#define N_HCI 15
-
#define FIOSETOWN 0x8901
#define SIOCSPGRP 0x8902
#define FIOGETOWN 0x8903
@@ -140,67 +107,4 @@ struct winsize {
#define SIOCGSTAMP 0x8906
#define SIOCGSTAMPNS 0x8907
-#define SIOCADDRT 0x890B
-#define SIOCDELRT 0x890C
-#define SIOCRTMSG 0x890D
-
-#define SIOCGIFNAME 0x8910
-#define SIOCSIFLINK 0x8911
-#define SIOCGIFCONF 0x8912
-#define SIOCGIFFLAGS 0x8913
-#define SIOCSIFFLAGS 0x8914
-#define SIOCGIFADDR 0x8915
-#define SIOCSIFADDR 0x8916
-#define SIOCGIFDSTADDR 0x8917
-#define SIOCSIFDSTADDR 0x8918
-#define SIOCGIFBRDADDR 0x8919
-#define SIOCSIFBRDADDR 0x891a
-#define SIOCGIFNETMASK 0x891b
-#define SIOCSIFNETMASK 0x891c
-#define SIOCGIFMETRIC 0x891d
-#define SIOCSIFMETRIC 0x891e
-#define SIOCGIFMEM 0x891f
-#define SIOCSIFMEM 0x8920
-#define SIOCGIFMTU 0x8921
-#define SIOCSIFMTU 0x8922
-#define SIOCSIFNAME 0x8923
-#define SIOCSIFHWADDR 0x8924
-#define SIOCGIFENCAP 0x8925
-#define SIOCSIFENCAP 0x8926
-#define SIOCGIFHWADDR 0x8927
-#define SIOCGIFSLAVE 0x8929
-#define SIOCSIFSLAVE 0x8930
-#define SIOCADDMULTI 0x8931
-#define SIOCDELMULTI 0x8932
-#define SIOCGIFINDEX 0x8933
-#define SIOGIFINDEX SIOCGIFINDEX
-#define SIOCSIFPFLAGS 0x8934
-#define SIOCGIFPFLAGS 0x8935
-#define SIOCDIFADDR 0x8936
-#define SIOCSIFHWBROADCAST 0x8937
-#define SIOCGIFCOUNT 0x8938
-
-#define SIOCGIFBR 0x8940
-#define SIOCSIFBR 0x8941
-
-#define SIOCGIFTXQLEN 0x8942
-#define SIOCSIFTXQLEN 0x8943
-
-#define SIOCDARP 0x8953
-#define SIOCGARP 0x8954
-#define SIOCSARP 0x8955
-
-#define SIOCDRARP 0x8960
-#define SIOCGRARP 0x8961
-#define SIOCSRARP 0x8962
-
-#define SIOCGIFMAP 0x8970
-#define SIOCSIFMAP 0x8971
-
-#define SIOCADDDLCI 0x8980
-#define SIOCDELDLCI 0x8981
-
-#define SIOCDEVPRIVATE 0x89F0
-#define SIOCPROTOPRIVATE 0x89E0
-
#include <bits/ioctl_fix.h>
diff --git a/src/internal/syscall.c b/arch/generic/fp_arch.h
index e69de29b..e69de29b 100644
--- a/src/internal/syscall.c
+++ b/arch/generic/fp_arch.h
diff --git a/arch/i386/syscall_arch.h b/arch/i386/syscall_arch.h
index 4c9d874a..22b0b28b 100644
--- a/arch/i386/syscall_arch.h
+++ b/arch/i386/syscall_arch.h
@@ -3,52 +3,82 @@
((union { long long ll; long l[2]; }){ .ll = x }).l[1]
#define __SYSCALL_LL_O(x) __SYSCALL_LL_E((x))
+#if SYSCALL_NO_TLS
+#define SYSCALL_INSNS "int $128"
+#else
+#define SYSCALL_INSNS "call *%%gs:16"
+#endif
+
+#define SYSCALL_INSNS_12 "xchg %%ebx,%%edx ; " SYSCALL_INSNS " ; xchg %%ebx,%%edx"
+#define SYSCALL_INSNS_34 "xchg %%ebx,%%edi ; " SYSCALL_INSNS " ; xchg %%ebx,%%edi"
+
static inline long __syscall0(long n)
{
unsigned long __ret;
- __asm__ __volatile__ (".hidden __vsyscall ; call __vsyscall" : "=a"(__ret) : "a"(n) : "memory");
+ __asm__ __volatile__ (SYSCALL_INSNS : "=a"(__ret) : "a"(n) : "memory");
return __ret;
}
static inline long __syscall1(long n, long a1)
{
unsigned long __ret;
- __asm__ __volatile__ (".hidden __vsyscall ; call __vsyscall" : "=a"(__ret) : "a"(n), "d"(a1) : "memory");
+ __asm__ __volatile__ (SYSCALL_INSNS_12 : "=a"(__ret) : "a"(n), "d"(a1) : "memory");
return __ret;
}
static inline long __syscall2(long n, long a1, long a2)
{
unsigned long __ret;
- __asm__ __volatile__ (".hidden __vsyscall ; call __vsyscall" : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2) : "memory");
+ __asm__ __volatile__ (SYSCALL_INSNS_12 : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2) : "memory");
return __ret;
}
static inline long __syscall3(long n, long a1, long a2, long a3)
{
unsigned long __ret;
- __asm__ __volatile__ (".hidden __vsyscall ; call __vsyscall" : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2), "D"(a3) : "memory");
+#if !defined(__PIC__) || !defined(BROKEN_EBX_ASM)
+ __asm__ __volatile__ (SYSCALL_INSNS : "=a"(__ret) : "a"(n), "b"(a1), "c"(a2), "d"(a3) : "memory");
+#else
+ __asm__ __volatile__ (SYSCALL_INSNS_34 : "=a"(__ret) : "a"(n), "D"(a1), "c"(a2), "d"(a3) : "memory");
+#endif
return __ret;
}
static inline long __syscall4(long n, long a1, long a2, long a3, long a4)
{
unsigned long __ret;
- __asm__ __volatile__ (".hidden __vsyscall ; call __vsyscall" : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2), "D"(a3), "S"(a4) : "memory");
+#if !defined(__PIC__) || !defined(BROKEN_EBX_ASM)
+ __asm__ __volatile__ (SYSCALL_INSNS : "=a"(__ret) : "a"(n), "b"(a1), "c"(a2), "d"(a3), "S"(a4) : "memory");
+#else
+ __asm__ __volatile__ (SYSCALL_INSNS_34 : "=a"(__ret) : "a"(n), "D"(a1), "c"(a2), "d"(a3), "S"(a4) : "memory");
+#endif
return __ret;
}
static inline long __syscall5(long n, long a1, long a2, long a3, long a4, long a5)
{
unsigned long __ret;
- __asm__ __volatile__ ("push %6 ; .hidden __vsyscall ; call __vsyscall ; add $4,%%esp" : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2), "D"(a3), "S"(a4), "g"(a5) : "memory");
+#if !defined(__PIC__) || !defined(BROKEN_EBX_ASM)
+ __asm__ __volatile__ (SYSCALL_INSNS
+ : "=a"(__ret) : "a"(n), "b"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5) : "memory");
+#else
+ __asm__ __volatile__ ("pushl %2 ; push %%ebx ; mov 4(%%esp),%%ebx ; " SYSCALL_INSNS " ; pop %%ebx ; add $4,%%esp"
+ : "=a"(__ret) : "a"(n), "g"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5) : "memory");
+#endif
return __ret;
}
static inline long __syscall6(long n, long a1, long a2, long a3, long a4, long a5, long a6)
{
unsigned long __ret;
- __asm__ __volatile__ ("push %6 ; .hidden __vsyscall6 ; call __vsyscall6 ; add $4,%%esp" : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2), "D"(a3), "S"(a4), "g"(0+(long[]){a5, a6}) : "memory");
+#if !defined(__PIC__) || !defined(BROKEN_EBX_ASM)
+ __asm__ __volatile__ ("pushl %7 ; push %%ebp ; mov 4(%%esp),%%ebp ; " SYSCALL_INSNS " ; pop %%ebp ; add $4,%%esp"
+ : "=a"(__ret) : "a"(n), "b"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5), "g"(a6) : "memory");
+#else
+ unsigned long a1a6[2] = { a1, a6 };
+ __asm__ __volatile__ ("pushl %1 ; push %%ebx ; push %%ebp ; mov 8(%%esp),%%ebx ; mov 4(%%ebx),%%ebp ; mov (%%ebx),%%ebx ; " SYSCALL_INSNS " ; pop %%ebp ; pop %%ebx ; add $4,%%esp"
+ : "=a"(__ret) : "g"(&a1a6), "a"(n), "c"(a2), "d"(a3), "S"(a4), "D"(a5) : "memory");
+#endif
return __ret;
}
diff --git a/arch/microblaze/syscall_arch.h b/arch/microblaze/syscall_arch.h
index 6cf631ad..169013f8 100644
--- a/arch/microblaze/syscall_arch.h
+++ b/arch/microblaze/syscall_arch.h
@@ -1,9 +1,7 @@
#define __SYSCALL_LL_E(x) \
((union { long long ll; long l[2]; }){ .ll = x }).l[0], \
((union { long long ll; long l[2]; }){ .ll = x }).l[1]
-#define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x))
-
-#ifndef __clang__
+#define __SYSCALL_LL_O(x) __SYSCALL_LL_E((x))
static __inline long __syscall0(long n)
{
@@ -96,11 +94,4 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo
return r3;
}
-#else
-
-#undef SYSCALL_NO_INLINE
-#define SYSCALL_NO_INLINE
-
-#endif
-
#define SYSCALL_IPC_BROKEN_MODE
diff --git a/arch/mips/bits/ioctl.h b/arch/mips/bits/ioctl.h
index b8f77cb5..e277c3f0 100644
--- a/arch/mips/bits/ioctl.h
+++ b/arch/mips/bits/ioctl.h
@@ -90,24 +90,6 @@
#define TIOCMIWAIT 0x5491
#define TIOCGICOUNT 0x5492
-#define TIOCPKT_DATA 0
-#define TIOCPKT_FLUSHREAD 1
-#define TIOCPKT_FLUSHWRITE 2
-#define TIOCPKT_STOP 4
-#define TIOCPKT_START 8
-#define TIOCPKT_NOSTOP 16
-#define TIOCPKT_DOSTOP 32
-#define TIOCPKT_IOCTL 64
-
-#define TIOCSER_TEMT 0x01
-
-struct winsize {
- unsigned short ws_row;
- unsigned short ws_col;
- unsigned short ws_xpixel;
- unsigned short ws_ypixel;
-};
-
#define TIOCM_LE 0x001
#define TIOCM_DTR 0x002
#define TIOCM_RTS 0x004
@@ -123,23 +105,6 @@ struct winsize {
#define TIOCM_OUT2 0x4000
#define TIOCM_LOOP 0x8000
-#define N_TTY 0
-#define N_SLIP 1
-#define N_MOUSE 2
-#define N_PPP 3
-#define N_STRIP 4
-#define N_AX25 5
-#define N_X25 6
-#define N_6PACK 7
-#define N_MASC 8
-#define N_R3964 9
-#define N_PROFIBUS_FDL 10
-#define N_IRDA 11
-#define N_SMSBLOCK 12
-#define N_HDLC 13
-#define N_SYNC_PPP 14
-#define N_HCI 15
-
#define FIOGETOWN _IOR('f', 123, int)
#define FIOSETOWN _IOW('f', 124, int)
#define SIOCATMARK _IOR('s', 7, int)
@@ -147,66 +112,3 @@ struct winsize {
#define SIOCGPGRP _IOR('s', 9, pid_t)
#define SIOCGSTAMP 0x8906
#define SIOCGSTAMPNS 0x8907
-
-#define SIOCADDRT 0x890B
-#define SIOCDELRT 0x890C
-#define SIOCRTMSG 0x890D
-
-#define SIOCGIFNAME 0x8910
-#define SIOCSIFLINK 0x8911
-#define SIOCGIFCONF 0x8912
-#define SIOCGIFFLAGS 0x8913
-#define SIOCSIFFLAGS 0x8914
-#define SIOCGIFADDR 0x8915
-#define SIOCSIFADDR 0x8916
-#define SIOCGIFDSTADDR 0x8917
-#define SIOCSIFDSTADDR 0x8918
-#define SIOCGIFBRDADDR 0x8919
-#define SIOCSIFBRDADDR 0x891a
-#define SIOCGIFNETMASK 0x891b
-#define SIOCSIFNETMASK 0x891c
-#define SIOCGIFMETRIC 0x891d
-#define SIOCSIFMETRIC 0x891e
-#define SIOCGIFMEM 0x891f
-#define SIOCSIFMEM 0x8920
-#define SIOCGIFMTU 0x8921
-#define SIOCSIFMTU 0x8922
-#define SIOCSIFNAME 0x8923
-#define SIOCSIFHWADDR 0x8924
-#define SIOCGIFENCAP 0x8925
-#define SIOCSIFENCAP 0x8926
-#define SIOCGIFHWADDR 0x8927
-#define SIOCGIFSLAVE 0x8929
-#define SIOCSIFSLAVE 0x8930
-#define SIOCADDMULTI 0x8931
-#define SIOCDELMULTI 0x8932
-#define SIOCGIFINDEX 0x8933
-#define SIOGIFINDEX SIOCGIFINDEX
-#define SIOCSIFPFLAGS 0x8934
-#define SIOCGIFPFLAGS 0x8935
-#define SIOCDIFADDR 0x8936
-#define SIOCSIFHWBROADCAST 0x8937
-#define SIOCGIFCOUNT 0x8938
-
-#define SIOCGIFBR 0x8940
-#define SIOCSIFBR 0x8941
-
-#define SIOCGIFTXQLEN 0x8942
-#define SIOCSIFTXQLEN 0x8943
-
-#define SIOCDARP 0x8953
-#define SIOCGARP 0x8954
-#define SIOCSARP 0x8955
-
-#define SIOCDRARP 0x8960
-#define SIOCGRARP 0x8961
-#define SIOCSRARP 0x8962
-
-#define SIOCGIFMAP 0x8970
-#define SIOCSIFMAP 0x8971
-
-#define SIOCADDDLCI 0x8980
-#define SIOCDELDLCI 0x8981
-
-#define SIOCDEVPRIVATE 0x89F0
-#define SIOCPROTOPRIVATE 0x89E0
diff --git a/arch/mips/syscall_arch.h b/arch/mips/syscall_arch.h
index 01de67b8..43bcdee7 100644
--- a/arch/mips/syscall_arch.h
+++ b/arch/mips/syscall_arch.h
@@ -3,8 +3,6 @@
((union { long long ll; long l[2]; }){ .ll = x }).l[1]
#define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x))
-hidden long (__syscall)(long, ...);
-
#define SYSCALL_RLIM_INFINITY (-1UL/2)
#if _MIPSEL || __MIPSEL || __MIPSEL__
@@ -104,8 +102,22 @@ static inline long __syscall4(long n, long a, long b, long c, long d)
static inline long __syscall5(long n, long a, long b, long c, long d, long e)
{
- long r2 = (__syscall)(n, a, b, c, d, e);
- if (r2 > -4096UL) return r2;
+ register long r4 __asm__("$4") = a;
+ register long r5 __asm__("$5") = b;
+ register long r6 __asm__("$6") = c;
+ register long r7 __asm__("$7") = d;
+ register long r8 __asm__("$8") = e;
+ register long r2 __asm__("$2");
+ __asm__ __volatile__ (
+ "subu $sp,$sp,32 ; sw $8,16($sp) ; "
+ "addu $2,$0,%3 ; syscall ;"
+ "addu $sp,$sp,32"
+ : "=&r"(r2), "=r"(r7), "+r"(r8)
+ : "ir"(n), "0"(r2), "1"(r7), "r"(r4), "r"(r5), "r"(r6)
+ : "$1", "$3", "$9", "$10", "$11", "$12", "$13",
+ "$14", "$15", "$24", "$25", "hi", "lo", "memory");
+ if (r7) return -r2;
+ long ret = r2;
if (n == SYS_stat64 || n == SYS_fstat64 || n == SYS_lstat64) __stat_fix(b);
if (n == SYS_fstatat64) __stat_fix(c);
return r2;
@@ -113,8 +125,48 @@ static inline long __syscall5(long n, long a, long b, long c, long d, long e)
static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f)
{
- long r2 = (__syscall)(n, a, b, c, d, e, f);
- if (r2 > -4096UL) return r2;
+ register long r4 __asm__("$4") = a;
+ register long r5 __asm__("$5") = b;
+ register long r6 __asm__("$6") = c;
+ register long r7 __asm__("$7") = d;
+ register long r8 __asm__("$8") = e;
+ register long r9 __asm__("$9") = f;
+ register long r2 __asm__("$2");
+ __asm__ __volatile__ (
+ "subu $sp,$sp,32 ; sw $8,16($sp) ; sw $9,20($sp) ; "
+ "addu $2,$0,%4 ; syscall ;"
+ "addu $sp,$sp,32"
+ : "=&r"(r2), "=r"(r7), "+r"(r8), "+r"(r9)
+ : "ir"(n), "0"(r2), "1"(r7), "r"(r4), "r"(r5), "r"(r6)
+ : "$1", "$3", "$10", "$11", "$12", "$13",
+ "$14", "$15", "$24", "$25", "hi", "lo", "memory");
+ if (r7) return -r2;
+ long ret = r2;
+ if (n == SYS_stat64 || n == SYS_fstat64 || n == SYS_lstat64) __stat_fix(b);
+ if (n == SYS_fstatat64) __stat_fix(c);
+ return r2;
+}
+
+static inline long __syscall7(long n, long a, long b, long c, long d, long e, long f, long g)
+{
+ register long r4 __asm__("$4") = a;
+ register long r5 __asm__("$5") = b;
+ register long r6 __asm__("$6") = c;
+ register long r7 __asm__("$7") = d;
+ register long r8 __asm__("$8") = e;
+ register long r9 __asm__("$9") = f;
+ register long r10 __asm__("$10") = g;
+ register long r2 __asm__("$2");
+ __asm__ __volatile__ (
+ "subu $sp,$sp,32 ; sw $8,16($sp) ; sw $9,20($sp) ; sw $10,24($sp) ; "
+ "addu $2,$0,%5 ; syscall ;"
+ "addu $sp,$sp,32"
+ : "=&r"(r2), "=r"(r7), "+r"(r8), "+r"(r9), "+r"(r10)
+ : "ir"(n), "0"(r2), "1"(r7), "r"(r4), "r"(r5), "r"(r6)
+ : "$1", "$3", "$11", "$12", "$13",
+ "$14", "$15", "$24", "$25", "hi", "lo", "memory");
+ if (r7) return -r2;
+ long ret = r2;
if (n == SYS_stat64 || n == SYS_fstat64 || n == SYS_lstat64) __stat_fix(b);
if (n == SYS_fstatat64) __stat_fix(c);
return r2;
diff --git a/arch/mips64/bits/ioctl.h b/arch/mips64/bits/ioctl.h
index b8f77cb5..e277c3f0 100644
--- a/arch/mips64/bits/ioctl.h
+++ b/arch/mips64/bits/ioctl.h
@@ -90,24 +90,6 @@
#define TIOCMIWAIT 0x5491
#define TIOCGICOUNT 0x5492
-#define TIOCPKT_DATA 0
-#define TIOCPKT_FLUSHREAD 1
-#define TIOCPKT_FLUSHWRITE 2
-#define TIOCPKT_STOP 4
-#define TIOCPKT_START 8
-#define TIOCPKT_NOSTOP 16
-#define TIOCPKT_DOSTOP 32
-#define TIOCPKT_IOCTL 64
-
-#define TIOCSER_TEMT 0x01
-
-struct winsize {
- unsigned short ws_row;
- unsigned short ws_col;
- unsigned short ws_xpixel;
- unsigned short ws_ypixel;
-};
-
#define TIOCM_LE 0x001
#define TIOCM_DTR 0x002
#define TIOCM_RTS 0x004
@@ -123,23 +105,6 @@ struct winsize {
#define TIOCM_OUT2 0x4000
#define TIOCM_LOOP 0x8000
-#define N_TTY 0
-#define N_SLIP 1
-#define N_MOUSE 2
-#define N_PPP 3
-#define N_STRIP 4
-#define N_AX25 5
-#define N_X25 6
-#define N_6PACK 7
-#define N_MASC 8
-#define N_R3964 9
-#define N_PROFIBUS_FDL 10
-#define N_IRDA 11
-#define N_SMSBLOCK 12
-#define N_HDLC 13
-#define N_SYNC_PPP 14
-#define N_HCI 15
-
#define FIOGETOWN _IOR('f', 123, int)
#define FIOSETOWN _IOW('f', 124, int)
#define SIOCATMARK _IOR('s', 7, int)
@@ -147,66 +112,3 @@ struct winsize {
#define SIOCGPGRP _IOR('s', 9, pid_t)
#define SIOCGSTAMP 0x8906
#define SIOCGSTAMPNS 0x8907
-
-#define SIOCADDRT 0x890B
-#define SIOCDELRT 0x890C
-#define SIOCRTMSG 0x890D
-
-#define SIOCGIFNAME 0x8910
-#define SIOCSIFLINK 0x8911
-#define SIOCGIFCONF 0x8912
-#define SIOCGIFFLAGS 0x8913
-#define SIOCSIFFLAGS 0x8914
-#define SIOCGIFADDR 0x8915
-#define SIOCSIFADDR 0x8916
-#define SIOCGIFDSTADDR 0x8917
-#define SIOCSIFDSTADDR 0x8918
-#define SIOCGIFBRDADDR 0x8919
-#define SIOCSIFBRDADDR 0x891a
-#define SIOCGIFNETMASK 0x891b
-#define SIOCSIFNETMASK 0x891c
-#define SIOCGIFMETRIC 0x891d
-#define SIOCSIFMETRIC 0x891e
-#define SIOCGIFMEM 0x891f
-#define SIOCSIFMEM 0x8920
-#define SIOCGIFMTU 0x8921
-#define SIOCSIFMTU 0x8922
-#define SIOCSIFNAME 0x8923
-#define SIOCSIFHWADDR 0x8924
-#define SIOCGIFENCAP 0x8925
-#define SIOCSIFENCAP 0x8926
-#define SIOCGIFHWADDR 0x8927
-#define SIOCGIFSLAVE 0x8929
-#define SIOCSIFSLAVE 0x8930
-#define SIOCADDMULTI 0x8931
-#define SIOCDELMULTI 0x8932
-#define SIOCGIFINDEX 0x8933
-#define SIOGIFINDEX SIOCGIFINDEX
-#define SIOCSIFPFLAGS 0x8934
-#define SIOCGIFPFLAGS 0x8935
-#define SIOCDIFADDR 0x8936
-#define SIOCSIFHWBROADCAST 0x8937
-#define SIOCGIFCOUNT 0x8938
-
-#define SIOCGIFBR 0x8940
-#define SIOCSIFBR 0x8941
-
-#define SIOCGIFTXQLEN 0x8942
-#define SIOCSIFTXQLEN 0x8943
-
-#define SIOCDARP 0x8953
-#define SIOCGARP 0x8954
-#define SIOCSARP 0x8955
-
-#define SIOCDRARP 0x8960
-#define SIOCGRARP 0x8961
-#define SIOCSRARP 0x8962
-
-#define SIOCGIFMAP 0x8970
-#define SIOCSIFMAP 0x8971
-
-#define SIOCADDDLCI 0x8980
-#define SIOCDELDLCI 0x8981
-
-#define SIOCDEVPRIVATE 0x89F0
-#define SIOCPROTOPRIVATE 0x89E0
diff --git a/arch/mips64/syscall_arch.h b/arch/mips64/syscall_arch.h
index 5eabdf46..99eebc32 100644
--- a/arch/mips64/syscall_arch.h
+++ b/arch/mips64/syscall_arch.h
@@ -1,9 +1,6 @@
#define __SYSCALL_LL_E(x) (x)
#define __SYSCALL_LL_O(x) (x)
-__attribute__((visibility("hidden")))
-long (__syscall)(long, ...);
-
#define SYSCALL_RLIM_INFINITY (-1UL/2)
#include <sys/stat.h>
@@ -79,16 +76,14 @@ static inline long __syscall2(long n, long a, long b)
{
struct kernel_stat kst;
long ret;
- register long r4 __asm__("$4");
- register long r5 __asm__("$5");
+ register long r4 __asm__("$4") = a;
+ register long r5 __asm__("$5") = b;
register long r7 __asm__("$7");
register long r2 __asm__("$2");
- r5 = b;
if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat)
r5 = (long) &kst;
- r4 = a;
__asm__ __volatile__ (
"daddu $2,$0,%2 ; syscall"
: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
@@ -109,18 +104,15 @@ static inline long __syscall3(long n, long a, long b, long c)
{
struct kernel_stat kst;
long ret;
- register long r4 __asm__("$4");
- register long r5 __asm__("$5");
- register long r6 __asm__("$6");
+ register long r4 __asm__("$4") = a;
+ register long r5 __asm__("$5") = b;
+ register long r6 __asm__("$6") = c;
register long r7 __asm__("$7");
register long r2 __asm__("$2");
- r5 = b;
if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat)
r5 = (long) &kst;
- r4 = a;
- r6 = c;
__asm__ __volatile__ (
"daddu $2,$0,%2 ; syscall"
: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
@@ -141,16 +133,12 @@ static inline long __syscall4(long n, long a, long b, long c, long d)
{
struct kernel_stat kst;
long ret;
- register long r4 __asm__("$4");
- register long r5 __asm__("$5");
- register long r6 __asm__("$6");
- register long r7 __asm__("$7");
+ register long r4 __asm__("$4") = a;
+ register long r5 __asm__("$5") = b;
+ register long r6 __asm__("$6") = c;
+ register long r7 __asm__("$7") = d;
register long r2 __asm__("$2");
- r4 = a;
- r5 = b;
- r6 = c;
- r7 = d;
if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat)
r5 = (long) &kst;
if (n == SYS_newfstatat)
@@ -176,48 +164,71 @@ static inline long __syscall4(long n, long a, long b, long c, long d)
static inline long __syscall5(long n, long a, long b, long c, long d, long e)
{
- long r2;
- long old_b = b;
- long old_c = c;
struct kernel_stat kst;
+ long ret;
+ register long r4 __asm__("$4") = a;
+ register long r5 __asm__("$5") = b;
+ register long r6 __asm__("$6") = c;
+ register long r7 __asm__("$7") = d;
+ register long r8 __asm__("$8") = e;
+ register long r2 __asm__("$2");
if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat)
- b = (long) &kst;
+ r5 = (long) &kst;
if (n == SYS_newfstatat)
- c = (long) &kst;
+ r6 = (long) &kst;
- r2 = (__syscall)(n, a, b, c, d, e);
- if (r2 > -4096UL) return r2;
+ __asm__ __volatile__ (
+ "daddu $2,$0,%2 ; syscall"
+ : "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
+ "r"(r4), "r"(r5), "r"(r6), "r"(r8)
+ : "$1", "$3", "$9", "$10", "$11", "$12", "$13",
+ "$14", "$15", "$24", "$25", "hi", "lo", "memory");
+
+ if (r7) return -r2;
+ ret = r2;
if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat)
- __stat_fix(&kst, (struct stat *)old_b);
+ __stat_fix(&kst, (struct stat *)b);
if (n == SYS_newfstatat)
- __stat_fix(&kst, (struct stat *)old_c);
+ __stat_fix(&kst, (struct stat *)c);
- return r2;
+ return ret;
}
static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f)
{
- long r2;
- long old_b = b;
- long old_c = c;
struct kernel_stat kst;
+ long ret;
+ register long r4 __asm__("$4") = a;
+ register long r5 __asm__("$5") = b;
+ register long r6 __asm__("$6") = c;
+ register long r7 __asm__("$7") = d;
+ register long r8 __asm__("$8") = e;
+ register long r9 __asm__("$9") = f;
+ register long r2 __asm__("$2");
if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat)
- b = (long) &kst;
+ r5 = (long) &kst;
if (n == SYS_newfstatat)
- c = (long) &kst;
+ r6 = (long) &kst;
- r2 = (__syscall)(n, a, b, c, d, e, f);
- if (r2 > -4096UL) return r2;
+ __asm__ __volatile__ (
+ "daddu $2,$0,%2 ; syscall"
+ : "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
+ "r"(r4), "r"(r5), "r"(r6), "r"(r8), "r"(r9)
+ : "$1", "$3", "$10", "$11", "$12", "$13",
+ "$14", "$15", "$24", "$25", "hi", "lo", "memory");
+
+ if (r7) return -r2;
+ ret = r2;
if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat)
- __stat_fix(&kst, (struct stat *)old_b);
+ __stat_fix(&kst, (struct stat *)b);
if (n == SYS_newfstatat)
- __stat_fix(&kst, (struct stat *)old_c);
+ __stat_fix(&kst, (struct stat *)c);
- return r2;
+ return ret;
}
#define VDSO_USEFUL
diff --git a/arch/mipsn32/bits/ioctl.h b/arch/mipsn32/bits/ioctl.h
index b8f77cb5..e277c3f0 100644
--- a/arch/mipsn32/bits/ioctl.h
+++ b/arch/mipsn32/bits/ioctl.h
@@ -90,24 +90,6 @@
#define TIOCMIWAIT 0x5491
#define TIOCGICOUNT 0x5492
-#define TIOCPKT_DATA 0
-#define TIOCPKT_FLUSHREAD 1
-#define TIOCPKT_FLUSHWRITE 2
-#define TIOCPKT_STOP 4
-#define TIOCPKT_START 8
-#define TIOCPKT_NOSTOP 16
-#define TIOCPKT_DOSTOP 32
-#define TIOCPKT_IOCTL 64
-
-#define TIOCSER_TEMT 0x01
-
-struct winsize {
- unsigned short ws_row;
- unsigned short ws_col;
- unsigned short ws_xpixel;
- unsigned short ws_ypixel;
-};
-
#define TIOCM_LE 0x001
#define TIOCM_DTR 0x002
#define TIOCM_RTS 0x004
@@ -123,23 +105,6 @@ struct winsize {
#define TIOCM_OUT2 0x4000
#define TIOCM_LOOP 0x8000
-#define N_TTY 0
-#define N_SLIP 1
-#define N_MOUSE 2
-#define N_PPP 3
-#define N_STRIP 4
-#define N_AX25 5
-#define N_X25 6
-#define N_6PACK 7
-#define N_MASC 8
-#define N_R3964 9
-#define N_PROFIBUS_FDL 10
-#define N_IRDA 11
-#define N_SMSBLOCK 12
-#define N_HDLC 13
-#define N_SYNC_PPP 14
-#define N_HCI 15
-
#define FIOGETOWN _IOR('f', 123, int)
#define FIOSETOWN _IOW('f', 124, int)
#define SIOCATMARK _IOR('s', 7, int)
@@ -147,66 +112,3 @@ struct winsize {
#define SIOCGPGRP _IOR('s', 9, pid_t)
#define SIOCGSTAMP 0x8906
#define SIOCGSTAMPNS 0x8907
-
-#define SIOCADDRT 0x890B
-#define SIOCDELRT 0x890C
-#define SIOCRTMSG 0x890D
-
-#define SIOCGIFNAME 0x8910
-#define SIOCSIFLINK 0x8911
-#define SIOCGIFCONF 0x8912
-#define SIOCGIFFLAGS 0x8913
-#define SIOCSIFFLAGS 0x8914
-#define SIOCGIFADDR 0x8915
-#define SIOCSIFADDR 0x8916
-#define SIOCGIFDSTADDR 0x8917
-#define SIOCSIFDSTADDR 0x8918
-#define SIOCGIFBRDADDR 0x8919
-#define SIOCSIFBRDADDR 0x891a
-#define SIOCGIFNETMASK 0x891b
-#define SIOCSIFNETMASK 0x891c
-#define SIOCGIFMETRIC 0x891d
-#define SIOCSIFMETRIC 0x891e
-#define SIOCGIFMEM 0x891f
-#define SIOCSIFMEM 0x8920
-#define SIOCGIFMTU 0x8921
-#define SIOCSIFMTU 0x8922
-#define SIOCSIFNAME 0x8923
-#define SIOCSIFHWADDR 0x8924
-#define SIOCGIFENCAP 0x8925
-#define SIOCSIFENCAP 0x8926
-#define SIOCGIFHWADDR 0x8927
-#define SIOCGIFSLAVE 0x8929
-#define SIOCSIFSLAVE 0x8930
-#define SIOCADDMULTI 0x8931
-#define SIOCDELMULTI 0x8932
-#define SIOCGIFINDEX 0x8933
-#define SIOGIFINDEX SIOCGIFINDEX
-#define SIOCSIFPFLAGS 0x8934
-#define SIOCGIFPFLAGS 0x8935
-#define SIOCDIFADDR 0x8936
-#define SIOCSIFHWBROADCAST 0x8937
-#define SIOCGIFCOUNT 0x8938
-
-#define SIOCGIFBR 0x8940
-#define SIOCSIFBR 0x8941
-
-#define SIOCGIFTXQLEN 0x8942
-#define SIOCSIFTXQLEN 0x8943
-
-#define SIOCDARP 0x8953
-#define SIOCGARP 0x8954
-#define SIOCSARP 0x8955
-
-#define SIOCDRARP 0x8960
-#define SIOCGRARP 0x8961
-#define SIOCSRARP 0x8962
-
-#define SIOCGIFMAP 0x8970
-#define SIOCSIFMAP 0x8971
-
-#define SIOCADDDLCI 0x8980
-#define SIOCDELDLCI 0x8981
-
-#define SIOCDEVPRIVATE 0x89F0
-#define SIOCPROTOPRIVATE 0x89E0
diff --git a/arch/mipsn32/syscall_arch.h b/arch/mipsn32/syscall_arch.h
index f6a1fbae..7b11740f 100644
--- a/arch/mipsn32/syscall_arch.h
+++ b/arch/mipsn32/syscall_arch.h
@@ -1,8 +1,6 @@
#define __SYSCALL_LL_E(x) (x)
#define __SYSCALL_LL_O(x) (x)
-hidden long (__syscall)(long, ...);
-
#define SYSCALL_RLIM_INFINITY (-1UL/2)
#if _MIPSEL || __MIPSEL || __MIPSEL__
@@ -102,8 +100,18 @@ static inline long __syscall4(long n, long a, long b, long c, long d)
static inline long __syscall5(long n, long a, long b, long c, long d, long e)
{
- long r2 = (__syscall)(n, a, b, c, d, e);
- if (r2 > -4096UL) return r2;
+ register long r4 __asm__("$4") = a;
+ register long r5 __asm__("$5") = b;
+ register long r6 __asm__("$6") = c;
+ register long r7 __asm__("$7") = d;
+ register long r8 __asm__("$8") = e;
+ register long r2 __asm__("$2");
+ __asm__ __volatile__ (
+ "addu $2,$0,%2 ; syscall"
+ : "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
+ "r"(r4), "r"(r5), "r"(r6), "r"(r8)
+ : "$1", "$3", "$9", "$10", "$11", "$12", "$13",
+ "$14", "$15", "$24", "$25", "hi", "lo", "memory");
if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat) __stat_fix(b);
if (n == SYS_newfstatat) __stat_fix(c);
return r2;
@@ -111,8 +119,19 @@ static inline long __syscall5(long n, long a, long b, long c, long d, long e)
static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f)
{
- long r2 = (__syscall)(n, a, b, c, d, e, f);
- if (r2 > -4096UL) return r2;
+ register long r4 __asm__("$4") = a;
+ register long r5 __asm__("$5") = b;
+ register long r6 __asm__("$6") = c;
+ register long r7 __asm__("$7") = d;
+ register long r8 __asm__("$8") = e;
+ register long r9 __asm__("$9") = f;
+ register long r2 __asm__("$2");
+ __asm__ __volatile__ (
+ "addu $2,$0,%2 ; syscall"
+ : "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
+ "r"(r4), "r"(r5), "r"(r6), "r"(r8), "r"(r9)
+ : "$1", "$3", "$10", "$11", "$12", "$13",
+ "$14", "$15", "$24", "$25", "hi", "lo", "memory");
if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat) __stat_fix(b);
if (n == SYS_newfstatat) __stat_fix(c);
return r2;
diff --git a/arch/or1k/bits/syscall.h.in b/arch/or1k/bits/syscall.h.in
index 76ba2c6e..4c3df0a3 100644
--- a/arch/or1k/bits/syscall.h.in
+++ b/arch/or1k/bits/syscall.h.in
@@ -277,4 +277,6 @@
#define __NR_pkey_free 290
#define __NR_statx 291
#define __NR_io_pgetevents 292
+#define __NR_rseq 293
+#define __NR_kexec_file_load 294
diff --git a/arch/or1k/syscall_arch.h b/arch/or1k/syscall_arch.h
index caff7ece..5a9b074a 100644
--- a/arch/or1k/syscall_arch.h
+++ b/arch/or1k/syscall_arch.h
@@ -5,8 +5,6 @@
#define SYSCALL_MMAP2_UNIT 8192ULL
-#ifndef __clang__
-
static __inline long __syscall0(long n)
{
register unsigned long r11 __asm__("r11") = n;
@@ -113,10 +111,3 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo
"r23", "r25", "r27", "r29", "r31");
return r11;
}
-
-#else
-
-#undef SYSCALL_NO_INLINE
-#define SYSCALL_NO_INLINE
-
-#endif
diff --git a/arch/powerpc/bits/ioctl.h b/arch/powerpc/bits/ioctl.h
index 47586234..b6cbb18f 100644
--- a/arch/powerpc/bits/ioctl.h
+++ b/arch/powerpc/bits/ioctl.h
@@ -78,14 +78,6 @@
#define TIOCGSERIAL 0x541E
#define TIOCSSERIAL 0x541F
#define TIOCPKT 0x5420
-#define TIOCPKT_DATA 0
-#define TIOCPKT_FLUSHREAD 1
-#define TIOCPKT_FLUSHWRITE 2
-#define TIOCPKT_STOP 4
-#define TIOCPKT_START 8
-#define TIOCPKT_NOSTOP 16
-#define TIOCPKT_DOSTOP 32
-#define TIOCPKT_IOCTL 64
#define TIOCNOTTY 0x5422
#define TIOCSETD 0x5423
@@ -113,38 +105,12 @@
#define TIOCSLCKTRMIOS 0x5457
#define TIOCSERGSTRUCT 0x5458
#define TIOCSERGETLSR 0x5459
-#define TIOCSER_TEMT 0x01
#define TIOCSERGETMULTI 0x545A
#define TIOCSERSETMULTI 0x545B
#define TIOCMIWAIT 0x545C
#define TIOCGICOUNT 0x545D
-
-struct winsize {
- unsigned short ws_row;
- unsigned short ws_col;
- unsigned short ws_xpixel;
- unsigned short ws_ypixel;
-};
-
-#define N_TTY 0
-#define N_SLIP 1
-#define N_MOUSE 2
-#define N_PPP 3
-#define N_STRIP 4
-#define N_AX25 5
-#define N_X25 6
-#define N_6PACK 7
-#define N_MASC 8
-#define N_R3964 9
-#define N_PROFIBUS_FDL 10
-#define N_IRDA 11
-#define N_SMSBLOCK 12
-#define N_HDLC 13
-#define N_SYNC_PPP 14
-#define N_HCI 15
-
#define FIOSETOWN 0x8901
#define SIOCSPGRP 0x8902
#define FIOGETOWN 0x8903
@@ -152,66 +118,3 @@ struct winsize {
#define SIOCATMARK 0x8905
#define SIOCGSTAMP 0x8906
#define SIOCGSTAMPNS 0x8907
-
-#define SIOCADDRT 0x890B
-#define SIOCDELRT 0x890C
-#define SIOCRTMSG 0x890D
-
-#define SIOCGIFNAME 0x8910
-#define SIOCSIFLINK 0x8911
-#define SIOCGIFCONF 0x8912
-#define SIOCGIFFLAGS 0x8913
-#define SIOCSIFFLAGS 0x8914
-#define SIOCGIFADDR 0x8915
-#define SIOCSIFADDR 0x8916
-#define SIOCGIFDSTADDR 0x8917
-#define SIOCSIFDSTADDR 0x8918
-#define SIOCGIFBRDADDR 0x8919
-#define SIOCSIFBRDADDR 0x891a
-#define SIOCGIFNETMASK 0x891b
-#define SIOCSIFNETMASK 0x891c
-#define SIOCGIFMETRIC 0x891d
-#define SIOCSIFMETRIC 0x891e
-#define SIOCGIFMEM 0x891f
-#define SIOCSIFMEM 0x8920
-#define SIOCGIFMTU 0x8921
-#define SIOCSIFMTU 0x8922
-#define SIOCSIFNAME 0x8923
-#define SIOCSIFHWADDR 0x8924
-#define SIOCGIFENCAP 0x8925
-#define SIOCSIFENCAP 0x8926
-#define SIOCGIFHWADDR 0x8927
-#define SIOCGIFSLAVE 0x8929
-#define SIOCSIFSLAVE 0x8930
-#define SIOCADDMULTI 0x8931
-#define SIOCDELMULTI 0x8932
-#define SIOCGIFINDEX 0x8933
-#define SIOGIFINDEX SIOCGIFINDEX
-#define SIOCSIFPFLAGS 0x8934
-#define SIOCGIFPFLAGS 0x8935
-#define SIOCDIFADDR 0x8936
-#define SIOCSIFHWBROADCAST 0x8937
-#define SIOCGIFCOUNT 0x8938
-
-#define SIOCGIFBR 0x8940
-#define SIOCSIFBR 0x8941
-
-#define SIOCGIFTXQLEN 0x8942
-#define SIOCSIFTXQLEN 0x8943
-
-#define SIOCDARP 0x8953
-#define SIOCGARP 0x8954
-#define SIOCSARP 0x8955
-
-#define SIOCDRARP 0x8960
-#define SIOCGRARP 0x8961
-#define SIOCSRARP 0x8962
-
-#define SIOCGIFMAP 0x8970
-#define SIOCSIFMAP 0x8971
-
-#define SIOCADDDLCI 0x8980
-#define SIOCDELDLCI 0x8981
-
-#define SIOCDEVPRIVATE 0x89F0
-#define SIOCPROTOPRIVATE 0x89E0
diff --git a/arch/powerpc/bits/ptrace.h b/arch/powerpc/bits/ptrace.h
index 75086ca0..303a0735 100644
--- a/arch/powerpc/bits/ptrace.h
+++ b/arch/powerpc/bits/ptrace.h
@@ -8,6 +8,8 @@
#define PTRACE_SET_DEBUGREG 0x1a
#define PTRACE_GETVSRREGS 0x1b
#define PTRACE_SETVSRREGS 0x1c
+#define PTRACE_SYSEMU 0x1d
+#define PTRACE_SYSEMU_SINGLESTEP 0x1e
#define PTRACE_SINGLEBLOCK 0x100
#define PT_GETVRREGS PTRACE_GETVRREGS
diff --git a/arch/powerpc/syscall_arch.h b/arch/powerpc/syscall_arch.h
index 004060e6..e26a3c34 100644
--- a/arch/powerpc/syscall_arch.h
+++ b/arch/powerpc/syscall_arch.h
@@ -3,7 +3,89 @@
((union { long long ll; long l[2]; }){ .ll = x }).l[1]
#define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x))
-#undef SYSCALL_NO_INLINE
-#define SYSCALL_NO_INLINE
+static inline long __syscall0(long n)
+{
+ register long r0 __asm__("r0") = n;
+ register long r3 __asm__("r3");
+ __asm__ __volatile__("sc ; bns+ 1f ; neg %1, %1 ; 1:"
+ : "+r"(r0), "=r"(r3)
+ :: "memory", "cr0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
+ return r3;
+}
+
+static inline long __syscall1(long n, long a)
+{
+ register long r0 __asm__("r0") = n;
+ register long r3 __asm__("r3") = a;
+ __asm__ __volatile__("sc ; bns+ 1f ; neg %1, %1 ; 1:"
+ : "+r"(r0), "+r"(r3)
+ :: "memory", "cr0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
+ return r3;
+}
+
+static inline long __syscall2(long n, long a, long b)
+{
+ register long r0 __asm__("r0") = n;
+ register long r3 __asm__("r3") = a;
+ register long r4 __asm__("r4") = b;
+ __asm__ __volatile__("sc ; bns+ 1f ; neg %1, %1 ; 1:"
+ : "+r"(r0), "+r"(r3), "+r"(r4)
+ :: "memory", "cr0", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
+ return r3;
+}
+
+static inline long __syscall3(long n, long a, long b, long c)
+{
+ register long r0 __asm__("r0") = n;
+ register long r3 __asm__("r3") = a;
+ register long r4 __asm__("r4") = b;
+ register long r5 __asm__("r5") = c;
+ __asm__ __volatile__("sc ; bns+ 1f ; neg %1, %1 ; 1:"
+ : "+r"(r0), "+r"(r3), "+r"(r4), "+r"(r5)
+ :: "memory", "cr0", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
+ return r3;
+}
+
+static inline long __syscall4(long n, long a, long b, long c, long d)
+{
+ register long r0 __asm__("r0") = n;
+ register long r3 __asm__("r3") = a;
+ register long r4 __asm__("r4") = b;
+ register long r5 __asm__("r5") = c;
+ register long r6 __asm__("r6") = d;
+ __asm__ __volatile__("sc ; bns+ 1f ; neg %1, %1 ; 1:"
+ : "+r"(r0), "+r"(r3), "+r"(r4), "+r"(r5), "+r"(r6)
+ :: "memory", "cr0", "r7", "r8", "r9", "r10", "r11", "r12");
+ return r3;
+}
+
+static inline long __syscall5(long n, long a, long b, long c, long d, long e)
+{
+ register long r0 __asm__("r0") = n;
+ register long r3 __asm__("r3") = a;
+ register long r4 __asm__("r4") = b;
+ register long r5 __asm__("r5") = c;
+ register long r6 __asm__("r6") = d;
+ register long r7 __asm__("r7") = e;
+ __asm__ __volatile__("sc ; bns+ 1f ; neg %1, %1 ; 1:"
+ : "+r"(r0), "+r"(r3), "+r"(r4), "+r"(r5), "+r"(r6), "+r"(r7)
+ :: "memory", "cr0", "r8", "r9", "r10", "r11", "r12");
+ return r3;
+}
+
+static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f)
+{
+ register long r0 __asm__("r0") = n;
+ register long r3 __asm__("r3") = a;
+ register long r4 __asm__("r4") = b;
+ register long r5 __asm__("r5") = c;
+ register long r6 __asm__("r6") = d;
+ register long r7 __asm__("r7") = e;
+ register long r8 __asm__("r8") = f;
+ __asm__ __volatile__("sc ; bns+ 1f ; neg %1, %1 ; 1:"
+ : "+r"(r0), "+r"(r3), "+r"(r4), "+r"(r5), "+r"(r6), "+r"(r7), "+r"(r8)
+ :: "memory", "cr0", "r9", "r10", "r11", "r12");
+ return r3;
+}
#define SYSCALL_FADVISE_6_ARG
diff --git a/arch/powerpc64/bits/ioctl.h b/arch/powerpc64/bits/ioctl.h
index 47586234..b6cbb18f 100644
--- a/arch/powerpc64/bits/ioctl.h
+++ b/arch/powerpc64/bits/ioctl.h
@@ -78,14 +78,6 @@
#define TIOCGSERIAL 0x541E
#define TIOCSSERIAL 0x541F
#define TIOCPKT 0x5420
-#define TIOCPKT_DATA 0
-#define TIOCPKT_FLUSHREAD 1
-#define TIOCPKT_FLUSHWRITE 2
-#define TIOCPKT_STOP 4
-#define TIOCPKT_START 8
-#define TIOCPKT_NOSTOP 16
-#define TIOCPKT_DOSTOP 32
-#define TIOCPKT_IOCTL 64
#define TIOCNOTTY 0x5422
#define TIOCSETD 0x5423
@@ -113,38 +105,12 @@
#define TIOCSLCKTRMIOS 0x5457
#define TIOCSERGSTRUCT 0x5458
#define TIOCSERGETLSR 0x5459
-#define TIOCSER_TEMT 0x01
#define TIOCSERGETMULTI 0x545A
#define TIOCSERSETMULTI 0x545B
#define TIOCMIWAIT 0x545C
#define TIOCGICOUNT 0x545D
-
-struct winsize {
- unsigned short ws_row;
- unsigned short ws_col;
- unsigned short ws_xpixel;
- unsigned short ws_ypixel;
-};
-
-#define N_TTY 0
-#define N_SLIP 1
-#define N_MOUSE 2
-#define N_PPP 3
-#define N_STRIP 4
-#define N_AX25 5
-#define N_X25 6
-#define N_6PACK 7
-#define N_MASC 8
-#define N_R3964 9
-#define N_PROFIBUS_FDL 10
-#define N_IRDA 11
-#define N_SMSBLOCK 12
-#define N_HDLC 13
-#define N_SYNC_PPP 14
-#define N_HCI 15
-
#define FIOSETOWN 0x8901
#define SIOCSPGRP 0x8902
#define FIOGETOWN 0x8903
@@ -152,66 +118,3 @@ struct winsize {
#define SIOCATMARK 0x8905
#define SIOCGSTAMP 0x8906
#define SIOCGSTAMPNS 0x8907
-
-#define SIOCADDRT 0x890B
-#define SIOCDELRT 0x890C
-#define SIOCRTMSG 0x890D
-
-#define SIOCGIFNAME 0x8910
-#define SIOCSIFLINK 0x8911
-#define SIOCGIFCONF 0x8912
-#define SIOCGIFFLAGS 0x8913
-#define SIOCSIFFLAGS 0x8914
-#define SIOCGIFADDR 0x8915
-#define SIOCSIFADDR 0x8916
-#define SIOCGIFDSTADDR 0x8917
-#define SIOCSIFDSTADDR 0x8918
-#define SIOCGIFBRDADDR 0x8919
-#define SIOCSIFBRDADDR 0x891a
-#define SIOCGIFNETMASK 0x891b
-#define SIOCSIFNETMASK 0x891c
-#define SIOCGIFMETRIC 0x891d
-#define SIOCSIFMETRIC 0x891e
-#define SIOCGIFMEM 0x891f
-#define SIOCSIFMEM 0x8920
-#define SIOCGIFMTU 0x8921
-#define SIOCSIFMTU 0x8922
-#define SIOCSIFNAME 0x8923
-#define SIOCSIFHWADDR 0x8924
-#define SIOCGIFENCAP 0x8925
-#define SIOCSIFENCAP 0x8926
-#define SIOCGIFHWADDR 0x8927
-#define SIOCGIFSLAVE 0x8929
-#define SIOCSIFSLAVE 0x8930
-#define SIOCADDMULTI 0x8931
-#define SIOCDELMULTI 0x8932
-#define SIOCGIFINDEX 0x8933
-#define SIOGIFINDEX SIOCGIFINDEX
-#define SIOCSIFPFLAGS 0x8934
-#define SIOCGIFPFLAGS 0x8935
-#define SIOCDIFADDR 0x8936
-#define SIOCSIFHWBROADCAST 0x8937
-#define SIOCGIFCOUNT 0x8938
-
-#define SIOCGIFBR 0x8940
-#define SIOCSIFBR 0x8941
-
-#define SIOCGIFTXQLEN 0x8942
-#define SIOCSIFTXQLEN 0x8943
-
-#define SIOCDARP 0x8953
-#define SIOCGARP 0x8954
-#define SIOCSARP 0x8955
-
-#define SIOCDRARP 0x8960
-#define SIOCGRARP 0x8961
-#define SIOCSRARP 0x8962
-
-#define SIOCGIFMAP 0x8970
-#define SIOCSIFMAP 0x8971
-
-#define SIOCADDDLCI 0x8980
-#define SIOCDELDLCI 0x8981
-
-#define SIOCDEVPRIVATE 0x89F0
-#define SIOCPROTOPRIVATE 0x89E0
diff --git a/arch/powerpc64/bits/ptrace.h b/arch/powerpc64/bits/ptrace.h
index 75086ca0..303a0735 100644
--- a/arch/powerpc64/bits/ptrace.h
+++ b/arch/powerpc64/bits/ptrace.h
@@ -8,6 +8,8 @@
#define PTRACE_SET_DEBUGREG 0x1a
#define PTRACE_GETVSRREGS 0x1b
#define PTRACE_SETVSRREGS 0x1c
+#define PTRACE_SYSEMU 0x1d
+#define PTRACE_SYSEMU_SINGLESTEP 0x1e
#define PTRACE_SINGLEBLOCK 0x100
#define PT_GETVRREGS PTRACE_GETVRREGS
diff --git a/arch/powerpc64/bits/signal.h b/arch/powerpc64/bits/signal.h
index 34693a68..2cc0604c 100644
--- a/arch/powerpc64/bits/signal.h
+++ b/arch/powerpc64/bits/signal.h
@@ -16,11 +16,18 @@ typedef struct {
} fpregset_t;
typedef struct {
- unsigned __int128 vrregs[32];
- unsigned _pad[3];
- unsigned vrsave;
- unsigned vscr;
- unsigned _pad2[3];
+#ifdef __GNUC__
+ __attribute__((__aligned__(16)))
+#endif
+ unsigned vrregs[32][4];
+ struct {
+#if __BIG_ENDIAN__
+ unsigned _pad[3], vscr_word;
+#else
+ unsigned vscr_word, _pad[3];
+#endif
+ } vscr;
+ unsigned vrsave, _pad[3];
} vrregset_t;
typedef struct sigcontext {
diff --git a/arch/s390x/bits/fcntl.h b/arch/s390x/bits/fcntl.h
index 1eca6ba5..a231efb4 100644
--- a/arch/s390x/bits/fcntl.h
+++ b/arch/s390x/bits/fcntl.h
@@ -38,3 +38,6 @@
#define F_GETOWN_EX 16
#define F_GETOWNER_UIDS 17
+
+#define POSIX_FADV_DONTNEED 6
+#define POSIX_FADV_NOREUSE 7
diff --git a/arch/s390x/bits/socket.h b/arch/s390x/bits/socket.h
index 2b81bfec..f2983ee1 100644
--- a/arch/s390x/bits/socket.h
+++ b/arch/s390x/bits/socket.h
@@ -38,6 +38,7 @@ struct cmsghdr {
#define SO_RCVTIMEO 20
#define SO_SNDTIMEO 21
#define SO_ACCEPTCONN 30
+#define SO_PEERSEC 31
#define SO_SNDBUFFORCE 32
#define SO_RCVBUFFORCE 33
#define SO_PROTOCOL 38
diff --git a/arch/sh/bits/ioctl.h b/arch/sh/bits/ioctl.h
index 3c7ab4bb..c4305655 100644
--- a/arch/sh/bits/ioctl.h
+++ b/arch/sh/bits/ioctl.h
@@ -65,14 +65,6 @@
#define TIOCGSERIAL _IOR('T', 30, char[60])
#define TIOCSSERIAL _IOW('T', 31, char[60])
#define TIOCPKT _IOW('T', 32, int)
-#define TIOCPKT_DATA 0
-#define TIOCPKT_FLUSHREAD 1
-#define TIOCPKT_FLUSHWRITE 2
-#define TIOCPKT_STOP 4
-#define TIOCPKT_START 8
-#define TIOCPKT_NOSTOP 16
-#define TIOCPKT_DOSTOP 32
-#define TIOCPKT_IOCTL 64
#define TIOCNOTTY _IO('T', 34)
#define TIOCSETD _IOW('T', 35, int)
@@ -104,37 +96,12 @@
#define TIOCSLCKTRMIOS _IO('T', 87)
#define TIOCSERGSTRUCT _IOR('T', 88, char[216])
#define TIOCSERGETLSR _IOR('T', 89, unsigned int)
-#define TIOCSER_TEMT 0x01
#define TIOCSERGETMULTI _IOR('T', 90, char[168])
#define TIOCSERSETMULTI _IOW('T', 91, char[168])
#define TIOCMIWAIT _IO('T', 92)
#define TIOCGICOUNT _IO('T', 93)
-struct winsize {
- unsigned short ws_row;
- unsigned short ws_col;
- unsigned short ws_xpixel;
- unsigned short ws_ypixel;
-};
-
-#define N_TTY 0
-#define N_SLIP 1
-#define N_MOUSE 2
-#define N_PPP 3
-#define N_STRIP 4
-#define N_AX25 5
-#define N_X25 6
-#define N_6PACK 7
-#define N_MASC 8
-#define N_R3964 9
-#define N_PROFIBUS_FDL 10
-#define N_IRDA 11
-#define N_SMSBLOCK 12
-#define N_HDLC 13
-#define N_SYNC_PPP 14
-#define N_HCI 15
-
#define FIOGETOWN _IOR('f', 123, int)
#define FIOSETOWN _IOW('f', 124, int)
@@ -143,66 +110,3 @@ struct winsize {
#define SIOCGPGRP _IOW('s', 9, int)
#define SIOCGSTAMP _IOR('s', 100, char[8])
#define SIOCGSTAMPNS _IOR('s', 101, char[8])
-
-#define SIOCADDRT 0x890B
-#define SIOCDELRT 0x890C
-#define SIOCRTMSG 0x890D
-
-#define SIOCGIFNAME 0x8910
-#define SIOCSIFLINK 0x8911
-#define SIOCGIFCONF 0x8912
-#define SIOCGIFFLAGS 0x8913
-#define SIOCSIFFLAGS 0x8914
-#define SIOCGIFADDR 0x8915
-#define SIOCSIFADDR 0x8916
-#define SIOCGIFDSTADDR 0x8917
-#define SIOCSIFDSTADDR 0x8918
-#define SIOCGIFBRDADDR 0x8919
-#define SIOCSIFBRDADDR 0x891a
-#define SIOCGIFNETMASK 0x891b
-#define SIOCSIFNETMASK 0x891c
-#define SIOCGIFMETRIC 0x891d
-#define SIOCSIFMETRIC 0x891e
-#define SIOCGIFMEM 0x891f
-#define SIOCSIFMEM 0x8920
-#define SIOCGIFMTU 0x8921
-#define SIOCSIFMTU 0x8922
-#define SIOCSIFNAME 0x8923
-#define SIOCSIFHWADDR 0x8924
-#define SIOCGIFENCAP 0x8925
-#define SIOCSIFENCAP 0x8926
-#define SIOCGIFHWADDR 0x8927
-#define SIOCGIFSLAVE 0x8929
-#define SIOCSIFSLAVE 0x8930
-#define SIOCADDMULTI 0x8931
-#define SIOCDELMULTI 0x8932
-#define SIOCGIFINDEX 0x8933
-#define SIOGIFINDEX SIOCGIFINDEX
-#define SIOCSIFPFLAGS 0x8934
-#define SIOCGIFPFLAGS 0x8935
-#define SIOCDIFADDR 0x8936
-#define SIOCSIFHWBROADCAST 0x8937
-#define SIOCGIFCOUNT 0x8938
-
-#define SIOCGIFBR 0x8940
-#define SIOCSIFBR 0x8941
-
-#define SIOCGIFTXQLEN 0x8942
-#define SIOCSIFTXQLEN 0x8943
-
-#define SIOCDARP 0x8953
-#define SIOCGARP 0x8954
-#define SIOCSARP 0x8955
-
-#define SIOCDRARP 0x8960
-#define SIOCGRARP 0x8961
-#define SIOCSRARP 0x8962
-
-#define SIOCGIFMAP 0x8970
-#define SIOCSIFMAP 0x8971
-
-#define SIOCADDDLCI 0x8980
-#define SIOCDELDLCI 0x8981
-
-#define SIOCDEVPRIVATE 0x89F0
-#define SIOCPROTOPRIVATE 0x89E0
diff --git a/configure b/configure
index 997e6652..2123ddce 100755
--- a/configure
+++ b/configure
@@ -320,8 +320,8 @@ mips64*|mipsisa64*) ARCH=mips64 ;;
mips*) ARCH=mips ;;
microblaze*) ARCH=microblaze ;;
or1k*) ARCH=or1k ;;
-powerpc64*) ARCH=powerpc64 ;;
-powerpc*) ARCH=powerpc ;;
+powerpc64*|ppc64*) ARCH=powerpc64 ;;
+powerpc*|ppc*) ARCH=powerpc ;;
sh[1-9bel-]*|sh|superh*) ARCH=sh ;;
s390x*) ARCH=s390x ;;
unknown) fail "$0: unable to detect target arch; try $0 --target=..." ;;
@@ -573,6 +573,20 @@ printf "using compiler runtime libraries: %s\n" "$LIBCC"
SUBARCH=
t="$CFLAGS_C99FSE $CPPFLAGS $CFLAGS"
+if test "$ARCH" = "i386" ; then
+printf "checking whether compiler can use ebx in PIC asm constraints... "
+cat > "$tmpc" <<EOF
+int foo(int x) { __asm__ ( "" : "+b"(x) ); return x; }
+EOF
+if $CC $CFLAGS_C99FSE $CPPFLAGS $CFLAGS -fPIC \
+ -c -o /dev/null "$tmpc" >/dev/null 2>&1 ; then
+printf "yes\n"
+else
+printf "no\n"
+CFLAGS_AUTO="$CFLAGS_AUTO -DBROKEN_EBX_ASM"
+fi
+fi
+
if test "$ARCH" = "x86_64" ; then
trycppif __ILP32__ "$t" && ARCH=x32
fi
diff --git a/include/alltypes.h.in b/include/alltypes.h.in
index 622ca01d..4cc879b1 100644
--- a/include/alltypes.h.in
+++ b/include/alltypes.h.in
@@ -57,6 +57,7 @@ TYPEDEF struct { unsigned __attr; } pthread_condattr_t;
TYPEDEF struct { unsigned __attr; } pthread_barrierattr_t;
TYPEDEF struct { unsigned __attr[2]; } pthread_rwlockattr_t;
+STRUCT _IO_FILE { char __x; };
TYPEDEF struct _IO_FILE FILE;
TYPEDEF struct __mbstate_t { unsigned __opaque1, __opaque2; } mbstate_t;
diff --git a/include/elf.h b/include/elf.h
index 54f41a10..aad522e4 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -314,7 +314,8 @@ typedef struct {
#define EM_AMDGPU 224
#define EM_RISCV 243
#define EM_BPF 247
-#define EM_NUM 248
+#define EM_CSKY 252
+#define EM_NUM 253
#define EM_ALPHA 0x9026
@@ -681,11 +682,15 @@ typedef struct {
#define NT_ARM_HW_WATCH 0x403
#define NT_ARM_SYSTEM_CALL 0x404
#define NT_ARM_SVE 0x405
+#define NT_ARM_PAC_MASK 0x406
#define NT_METAG_CBUF 0x500
#define NT_METAG_RPIPE 0x501
#define NT_METAG_TLS 0x502
#define NT_ARC_V2 0x600
#define NT_VMCOREDD 0x700
+#define NT_MIPS_DSP 0x800
+#define NT_MIPS_FP_MODE 0x801
+#define NT_MIPS_MSA 0x802
#define NT_VERSION 1
@@ -2619,6 +2624,61 @@ enum
#define R_ARM_NUM 256
+#define R_CKCORE_NONE 0
+#define R_CKCORE_ADDR32 1
+#define R_CKCORE_PCRELIMM8BY4 2
+#define R_CKCORE_PCRELIMM11BY2 3
+#define R_CKCORE_PCREL32 5
+#define R_CKCORE_PCRELJSR_IMM11BY2 6
+#define R_CKCORE_RELATIVE 9
+#define R_CKCORE_COPY 10
+#define R_CKCORE_GLOB_DAT 11
+#define R_CKCORE_JUMP_SLOT 12
+#define R_CKCORE_GOTOFF 13
+#define R_CKCORE_GOTPC 14
+#define R_CKCORE_GOT32 15
+#define R_CKCORE_PLT32 16
+#define R_CKCORE_ADDRGOT 17
+#define R_CKCORE_ADDRPLT 18
+#define R_CKCORE_PCREL_IMM26BY2 19
+#define R_CKCORE_PCREL_IMM16BY2 20
+#define R_CKCORE_PCREL_IMM16BY4 21
+#define R_CKCORE_PCREL_IMM10BY2 22
+#define R_CKCORE_PCREL_IMM10BY4 23
+#define R_CKCORE_ADDR_HI16 24
+#define R_CKCORE_ADDR_LO16 25
+#define R_CKCORE_GOTPC_HI16 26
+#define R_CKCORE_GOTPC_LO16 27
+#define R_CKCORE_GOTOFF_HI16 28
+#define R_CKCORE_GOTOFF_LO16 29
+#define R_CKCORE_GOT12 30
+#define R_CKCORE_GOT_HI16 31
+#define R_CKCORE_GOT_LO16 32
+#define R_CKCORE_PLT12 33
+#define R_CKCORE_PLT_HI16 34
+#define R_CKCORE_PLT_LO16 35
+#define R_CKCORE_ADDRGOT_HI16 36
+#define R_CKCORE_ADDRGOT_LO16 37
+#define R_CKCORE_ADDRPLT_HI16 38
+#define R_CKCORE_ADDRPLT_LO16 39
+#define R_CKCORE_PCREL_JSR_IMM26BY2 40
+#define R_CKCORE_TOFFSET_LO16 41
+#define R_CKCORE_DOFFSET_LO16 42
+#define R_CKCORE_PCREL_IMM18BY2 43
+#define R_CKCORE_DOFFSET_IMM18 44
+#define R_CKCORE_DOFFSET_IMM18BY2 45
+#define R_CKCORE_DOFFSET_IMM18BY4 46
+#define R_CKCORE_GOT_IMM18BY4 48
+#define R_CKCORE_PLT_IMM18BY4 49
+#define R_CKCORE_PCREL_IMM7BY4 50
+#define R_CKCORE_TLS_LE32 51
+#define R_CKCORE_TLS_IE32 52
+#define R_CKCORE_TLS_GD32 53
+#define R_CKCORE_TLS_LDM32 54
+#define R_CKCORE_TLS_LDO32 55
+#define R_CKCORE_TLS_DTPMOD32 56
+#define R_CKCORE_TLS_DTPOFF32 57
+#define R_CKCORE_TLS_TPOFF32 58
#define EF_IA_64_MASKOS 0x0000000f
diff --git a/include/fcntl.h b/include/fcntl.h
index 4d91338b..1fba777d 100644
--- a/include/fcntl.h
+++ b/include/fcntl.h
@@ -36,8 +36,9 @@ int openat(int, const char *, int, ...);
int posix_fadvise(int, off_t, off_t, int);
int posix_fallocate(int, off_t, off_t);
-#define O_SEARCH O_PATH
-#define O_EXEC O_PATH
+#define O_SEARCH O_PATH
+#define O_EXEC O_PATH
+#define O_TTY_INIT 0
#define O_ACCMODE (03|O_SEARCH)
#define O_RDONLY 00
@@ -66,8 +67,10 @@ int posix_fallocate(int, off_t, off_t);
#define POSIX_FADV_RANDOM 1
#define POSIX_FADV_SEQUENTIAL 2
#define POSIX_FADV_WILLNEED 3
+#ifndef POSIX_FADV_DONTNEED
#define POSIX_FADV_DONTNEED 4
#define POSIX_FADV_NOREUSE 5
+#endif
#undef SEEK_SET
#undef SEEK_CUR
diff --git a/include/math.h b/include/math.h
index fea34686..14f28ec8 100644
--- a/include/math.h
+++ b/include/math.h
@@ -36,6 +36,18 @@ extern "C" {
#define FP_SUBNORMAL 3
#define FP_NORMAL 4
+#ifdef __FP_FAST_FMA
+#define FP_FAST_FMA 1
+#endif
+
+#ifdef __FP_FAST_FMAF
+#define FP_FAST_FMAF 1
+#endif
+
+#ifdef __FP_FAST_FMAL
+#define FP_FAST_FMAL 1
+#endif
+
int __fpclassify(double);
int __fpclassifyf(float);
int __fpclassifyl(long double);
diff --git a/include/netinet/in.h b/include/netinet/in.h
index 192679a6..3ba7546c 100644
--- a/include/netinet/in.h
+++ b/include/netinet/in.h
@@ -335,6 +335,7 @@ struct ip6_mtuinfo {
#define IPV6_V6ONLY 26
#define IPV6_JOIN_ANYCAST 27
#define IPV6_LEAVE_ANYCAST 28
+#define IPV6_MULTICAST_ALL 29
#define IPV6_IPSEC_POLICY 34
#define IPV6_XFRM_POLICY 35
#define IPV6_HDRINCL 36
diff --git a/include/netinet/tcp.h b/include/netinet/tcp.h
index 584af2f5..c7a86480 100644
--- a/include/netinet/tcp.h
+++ b/include/netinet/tcp.h
@@ -72,6 +72,11 @@ enum {
TCP_NLA_SND_SSTHRESH,
TCP_NLA_DELIVERED,
TCP_NLA_DELIVERED_CE,
+ TCP_NLA_BYTES_SENT,
+ TCP_NLA_BYTES_RETRANS,
+ TCP_NLA_DSACK_DUPS,
+ TCP_NLA_REORD_SEEN,
+ TCP_NLA_SRTT,
};
#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
@@ -225,6 +230,10 @@ struct tcp_info {
uint64_t tcpi_sndbuf_limited;
uint32_t tcpi_delivered;
uint32_t tcpi_delivered_ce;
+ uint64_t tcpi_bytes_sent;
+ uint64_t tcpi_bytes_retrans;
+ uint32_t tcpi_dsack_dups;
+ uint32_t tcpi_reord_seen;
};
#define TCP_MD5SIG_MAXKEYLEN 80
diff --git a/include/netinet/udp.h b/include/netinet/udp.h
index 993c3478..ffd89079 100644
--- a/include/netinet/udp.h
+++ b/include/netinet/udp.h
@@ -27,12 +27,14 @@ struct udphdr {
#define UDP_NO_CHECK6_TX 101
#define UDP_NO_CHECK6_RX 102
#define UDP_SEGMENT 103
+#define UDP_GRO 104
#define UDP_ENCAP_ESPINUDP_NON_IKE 1
#define UDP_ENCAP_ESPINUDP 2
#define UDP_ENCAP_L2TPINUDP 3
#define UDP_ENCAP_GTP0 4
#define UDP_ENCAP_GTP1U 5
+#define UDP_ENCAP_RXRPC 6
#define SOL_UDP 17
diff --git a/include/netpacket/packet.h b/include/netpacket/packet.h
index f2210ce8..b36e092a 100644
--- a/include/netpacket/packet.h
+++ b/include/netpacket/packet.h
@@ -48,6 +48,7 @@ struct packet_mreq {
#define PACKET_QDISC_BYPASS 20
#define PACKET_ROLLOVER_STATS 21
#define PACKET_FANOUT_DATA 22
+#define PACKET_IGNORE_OUTGOING 23
#define PACKET_MR_MULTICAST 0
#define PACKET_MR_PROMISC 1
diff --git a/include/stdio.h b/include/stdio.h
index afadd912..3604198c 100644
--- a/include/stdio.h
+++ b/include/stdio.h
@@ -11,6 +11,10 @@ extern "C" {
#define __NEED___isoc_va_list
#define __NEED_size_t
+#if __STDC_VERSION__ < 201112L
+#define __NEED_struct__IO_FILE
+#endif
+
#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
|| defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) \
|| defined(_BSD_SOURCE)
diff --git a/include/sys/inotify.h b/include/sys/inotify.h
index 46638cac..69b58631 100644
--- a/include/sys/inotify.h
+++ b/include/sys/inotify.h
@@ -40,6 +40,7 @@ struct inotify_event {
#define IN_ONLYDIR 0x01000000
#define IN_DONT_FOLLOW 0x02000000
#define IN_EXCL_UNLINK 0x04000000
+#define IN_MASK_CREATE 0x10000000
#define IN_MASK_ADD 0x20000000
#define IN_ISDIR 0x40000000
diff --git a/include/sys/ioctl.h b/include/sys/ioctl.h
index d0415b3d..372e3ddc 100644
--- a/include/sys/ioctl.h
+++ b/include/sys/ioctl.h
@@ -6,6 +6,116 @@ extern "C" {
#include <bits/ioctl.h>
+#define N_TTY 0
+#define N_SLIP 1
+#define N_MOUSE 2
+#define N_PPP 3
+#define N_STRIP 4
+#define N_AX25 5
+#define N_X25 6
+#define N_6PACK 7
+#define N_MASC 8
+#define N_R3964 9
+#define N_PROFIBUS_FDL 10
+#define N_IRDA 11
+#define N_SMSBLOCK 12
+#define N_HDLC 13
+#define N_SYNC_PPP 14
+#define N_HCI 15
+#define N_GIGASET_M101 16
+#define N_SLCAN 17
+#define N_PPS 18
+#define N_V253 19
+#define N_CAIF 20
+#define N_GSM0710 21
+#define N_TI_WL 22
+#define N_TRACESINK 23
+#define N_TRACEROUTER 24
+#define N_NCI 25
+#define N_SPEAKUP 26
+#define N_NULL 27
+
+#define TIOCPKT_DATA 0
+#define TIOCPKT_FLUSHREAD 1
+#define TIOCPKT_FLUSHWRITE 2
+#define TIOCPKT_STOP 4
+#define TIOCPKT_START 8
+#define TIOCPKT_NOSTOP 16
+#define TIOCPKT_DOSTOP 32
+#define TIOCPKT_IOCTL 64
+
+#define TIOCSER_TEMT 1
+
+struct winsize {
+ unsigned short ws_row;
+ unsigned short ws_col;
+ unsigned short ws_xpixel;
+ unsigned short ws_ypixel;
+};
+
+#define SIOCADDRT 0x890B
+#define SIOCDELRT 0x890C
+#define SIOCRTMSG 0x890D
+
+#define SIOCGIFNAME 0x8910
+#define SIOCSIFLINK 0x8911
+#define SIOCGIFCONF 0x8912
+#define SIOCGIFFLAGS 0x8913
+#define SIOCSIFFLAGS 0x8914
+#define SIOCGIFADDR 0x8915
+#define SIOCSIFADDR 0x8916
+#define SIOCGIFDSTADDR 0x8917
+#define SIOCSIFDSTADDR 0x8918
+#define SIOCGIFBRDADDR 0x8919
+#define SIOCSIFBRDADDR 0x891a
+#define SIOCGIFNETMASK 0x891b
+#define SIOCSIFNETMASK 0x891c
+#define SIOCGIFMETRIC 0x891d
+#define SIOCSIFMETRIC 0x891e
+#define SIOCGIFMEM 0x891f
+#define SIOCSIFMEM 0x8920
+#define SIOCGIFMTU 0x8921
+#define SIOCSIFMTU 0x8922
+#define SIOCSIFNAME 0x8923
+#define SIOCSIFHWADDR 0x8924
+#define SIOCGIFENCAP 0x8925
+#define SIOCSIFENCAP 0x8926
+#define SIOCGIFHWADDR 0x8927
+#define SIOCGIFSLAVE 0x8929
+#define SIOCSIFSLAVE 0x8930
+#define SIOCADDMULTI 0x8931
+#define SIOCDELMULTI 0x8932
+#define SIOCGIFINDEX 0x8933
+#define SIOGIFINDEX SIOCGIFINDEX
+#define SIOCSIFPFLAGS 0x8934
+#define SIOCGIFPFLAGS 0x8935
+#define SIOCDIFADDR 0x8936
+#define SIOCSIFHWBROADCAST 0x8937
+#define SIOCGIFCOUNT 0x8938
+
+#define SIOCGIFBR 0x8940
+#define SIOCSIFBR 0x8941
+
+#define SIOCGIFTXQLEN 0x8942
+#define SIOCSIFTXQLEN 0x8943
+
+#define SIOCDARP 0x8953
+#define SIOCGARP 0x8954
+#define SIOCSARP 0x8955
+
+#define SIOCDRARP 0x8960
+#define SIOCGRARP 0x8961
+#define SIOCSRARP 0x8962
+
+#define SIOCGIFMAP 0x8970
+#define SIOCSIFMAP 0x8971
+
+#define SIOCADDDLCI 0x8980
+#define SIOCDELDLCI 0x8981
+
+#define SIOCDEVPRIVATE 0x89F0
+#define SIOCPROTOPRIVATE 0x89E0
+
int ioctl (int, int, ...);
#ifdef __cplusplus
diff --git a/include/sys/membarrier.h b/include/sys/membarrier.h
new file mode 100644
index 00000000..10cb3108
--- /dev/null
+++ b/include/sys/membarrier.h
@@ -0,0 +1,17 @@
+#ifndef _SYS_MEMBARRIER_H
+#define _SYS_MEMBARRIER_H
+
+#define MEMBARRIER_CMD_QUERY 0
+#define MEMBARRIER_CMD_GLOBAL 1
+#define MEMBARRIER_CMD_GLOBAL_EXPEDITED 2
+#define MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED 4
+#define MEMBARRIER_CMD_PRIVATE_EXPEDITED 8
+#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED 16
+#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE 32
+#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE 64
+
+#define MEMBARRIER_CMD_SHARED MEMBARRIER_CMD_GLOBAL
+
+int membarrier(int, int);
+
+#endif
diff --git a/include/sys/mman.h b/include/sys/mman.h
index 99d02a2e..d0761b18 100644
--- a/include/sys/mman.h
+++ b/include/sys/mman.h
@@ -46,7 +46,9 @@ extern "C" {
#define MAP_HUGE_2MB (21 << 26)
#define MAP_HUGE_8MB (23 << 26)
#define MAP_HUGE_16MB (24 << 26)
+#define MAP_HUGE_32MB (25 << 26)
#define MAP_HUGE_256MB (28 << 26)
+#define MAP_HUGE_512MB (29 << 26)
#define MAP_HUGE_1GB (30 << 26)
#define MAP_HUGE_2GB (31 << 26)
#define MAP_HUGE_16GB (34U << 26)
diff --git a/include/sys/prctl.h b/include/sys/prctl.h
index af76408c..9cc3a80e 100644
--- a/include/sys/prctl.h
+++ b/include/sys/prctl.h
@@ -139,12 +139,20 @@ struct prctl_mm_map {
#define PR_GET_SPECULATION_CTRL 52
#define PR_SET_SPECULATION_CTRL 53
#define PR_SPEC_STORE_BYPASS 0
+#define PR_SPEC_INDIRECT_BRANCH 1
#define PR_SPEC_NOT_AFFECTED 0
#define PR_SPEC_PRCTL (1UL << 0)
#define PR_SPEC_ENABLE (1UL << 1)
#define PR_SPEC_DISABLE (1UL << 2)
#define PR_SPEC_FORCE_DISABLE (1UL << 3)
+#define PR_PAC_RESET_KEYS 54
+#define PR_PAC_APIAKEY (1UL << 0)
+#define PR_PAC_APIBKEY (1UL << 1)
+#define PR_PAC_APDAKEY (1UL << 2)
+#define PR_PAC_APDBKEY (1UL << 3)
+#define PR_PAC_APGAKEY (1UL << 4)
+
int prctl (int, ...);
#ifdef __cplusplus
diff --git a/include/sys/shm.h b/include/sys/shm.h
index 662fde59..8ef4e8f2 100644
--- a/include/sys/shm.h
+++ b/include/sys/shm.h
@@ -49,7 +49,9 @@ extern "C" {
#define SHM_HUGE_2MB (21 << 26)
#define SHM_HUGE_8MB (23 << 26)
#define SHM_HUGE_16MB (24 << 26)
+#define SHM_HUGE_32MB (25 << 26)
#define SHM_HUGE_256MB (28 << 26)
+#define SHM_HUGE_512MB (29 << 26)
#define SHM_HUGE_1GB (30 << 26)
#define SHM_HUGE_2GB (31 << 26)
#define SHM_HUGE_16GB (34U << 26)
diff --git a/include/sys/socket.h b/include/sys/socket.h
index 1dec4b00..cd38a4c8 100644
--- a/include/sys/socket.h
+++ b/include/sys/socket.h
@@ -235,6 +235,8 @@ struct linger {
#define SCM_TIMESTAMPING_PKTINFO 58
#define SO_PEERGROUPS 59
#define SO_ZEROCOPY 60
+#define SO_TXTIME 61
+#define SCM_TXTIME SO_TXTIME
#ifndef SOL_SOCKET
#define SOL_SOCKET 1
diff --git a/include/tar.h b/include/tar.h
index 2eba66ec..be589842 100644
--- a/include/tar.h
+++ b/include/tar.h
@@ -1,13 +1,9 @@
#ifndef _TAR_H
#define _TAR_H
-#include <features.h>
-
#define TSUID 04000
#define TSGID 02000
-#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) || defined(_XOPEN_SOURCE)
#define TSVTX 01000
-#endif
#define TUREAD 00400
#define TUWRITE 00200
#define TUEXEC 00100
diff --git a/include/ucontext.h b/include/ucontext.h
index 3bb776ed..0f757125 100644
--- a/include/ucontext.h
+++ b/include/ucontext.h
@@ -15,7 +15,7 @@ extern "C" {
struct __ucontext;
int getcontext(struct __ucontext *);
-void makecontext(struct __ucontext *, void (*)(void), int, ...);
+void makecontext(struct __ucontext *, void (*)(), int, ...);
int setcontext(const struct __ucontext *);
int swapcontext(struct __ucontext *, const struct __ucontext *);
diff --git a/include/wchar.h b/include/wchar.h
index 369b1e9f..88eb55b1 100644
--- a/include/wchar.h
+++ b/include/wchar.h
@@ -14,6 +14,10 @@ extern "C" {
#define __NEED_wint_t
#define __NEED_mbstate_t
+#if __STDC_VERSION__ < 201112L
+#define __NEED_struct__IO_FILE
+#endif
+
#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
|| defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
#define __NEED_locale_t
diff --git a/ldso/dynlink.c b/ldso/dynlink.c
index ec921dfd..1398ff45 100644
--- a/ldso/dynlink.c
+++ b/ldso/dynlink.c
@@ -1,4 +1,5 @@
#define _GNU_SOURCE
+#define SYSCALL_NO_TLS 1
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
@@ -17,6 +18,8 @@
#include <pthread.h>
#include <ctype.h>
#include <dlfcn.h>
+#include <semaphore.h>
+#include <sys/membarrier.h>
#include "pthread_impl.h"
#include "libc.h"
#include "dynlink.h"
@@ -27,6 +30,9 @@ static void error(const char *, ...);
#define MAXP2(a,b) (-(-(a)&-(b)))
#define ALIGN(x,y) ((x)+(y)-1 & -(y))
+#define container_of(p,t,m) ((t*)((char *)(p)-offsetof(t,m)))
+#define countof(a) ((sizeof (a))/(sizeof (a)[0]))
+
struct debug {
int ver;
void *head;
@@ -67,14 +73,19 @@ struct dso {
char relocated;
char constructed;
char kernel_mapped;
+ char mark;
+ char bfs_built;
+ char runtime_loaded;
struct dso **deps, *needed_by;
+ size_t ndeps_direct;
+ size_t next_dep;
+ int ctor_visitor;
char *rpath_orig, *rpath;
struct tls_module tls;
size_t tls_id;
size_t relro_start, relro_end;
uintptr_t *new_dtv;
unsigned char *new_tls;
- volatile int new_dtv_idx, new_tls_idx;
struct td_index *td_index;
struct dso *fini_next;
char *shortname;
@@ -114,16 +125,21 @@ static int runtime;
static int ldd_mode;
static int ldso_fail;
static int noload;
+static int shutting_down;
static jmp_buf *rtld_fail;
static pthread_rwlock_t lock;
static struct debug debug;
static struct tls_module *tls_tail;
static size_t tls_cnt, tls_offset, tls_align = MIN_TLS_ALIGN;
static size_t static_tls_cnt;
-static pthread_mutex_t init_fini_lock = { ._m_type = PTHREAD_MUTEX_RECURSIVE };
+static pthread_mutex_t init_fini_lock;
+static pthread_cond_t ctor_cond;
+static struct dso *builtin_deps[2];
+static struct dso *const no_deps[1];
+static struct dso *builtin_ctor_queue[4];
+static struct dso **main_ctor_queue;
static struct fdpic_loadmap *app_loadmap;
static struct fdpic_dummy_loadmap app_dummy_loadmap;
-static struct dso *const nodeps_dummy;
struct debug *_dl_debug_addr = &debug;
@@ -904,7 +920,7 @@ static void *dl_mmap(size_t n)
#else
p = (void *)__syscall(SYS_mmap, 0, n, prot, flags, -1, 0);
#endif
- return p == MAP_FAILED ? 0 : p;
+ return (unsigned long)p > -4096UL ? 0 : p;
}
static void makefuncdescs(struct dso *p)
@@ -1101,6 +1117,7 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
p->ino = st.st_ino;
p->needed_by = needed_by;
p->name = p->buf;
+ p->runtime_loaded = runtime;
strcpy(p->name, pathname);
/* Add a shortname only if name arg was not an explicit pathname. */
if (pathname != name) p->shortname = strrchr(p->name, '/')+1;
@@ -1108,9 +1125,9 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
p->tls_id = ++tls_cnt;
tls_align = MAXP2(tls_align, p->tls.align);
#ifdef TLS_ABOVE_TP
- p->tls.offset = tls_offset + ( (tls_align-1) &
- -(tls_offset + (uintptr_t)p->tls.image) );
- tls_offset += p->tls.size;
+ p->tls.offset = tls_offset + ( (p->tls.align-1) &
+ (-tls_offset + (uintptr_t)p->tls.image) );
+ tls_offset = p->tls.offset + p->tls.size;
#else
tls_offset += p->tls.size + p->tls.align - 1;
tls_offset -= (tls_offset + (uintptr_t)p->tls.image)
@@ -1136,30 +1153,99 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
return p;
}
+static void load_direct_deps(struct dso *p)
+{
+ size_t i, cnt=0;
+
+ if (p->deps) return;
+ /* For head, all preloads are direct pseudo-dependencies.
+ * Count and include them now to avoid realloc later. */
+ if (p==head) for (struct dso *q=p->next; q; q=q->next)
+ cnt++;
+ for (i=0; p->dynv[i]; i+=2)
+ if (p->dynv[i] == DT_NEEDED) cnt++;
+ /* Use builtin buffer for apps with no external deps, to
+ * preserve property of no runtime failure paths. */
+ p->deps = (p==head && cnt<2) ? builtin_deps :
+ calloc(cnt+1, sizeof *p->deps);
+ if (!p->deps) {
+ error("Error loading dependencies for %s", p->name);
+ if (runtime) longjmp(*rtld_fail, 1);
+ }
+ cnt=0;
+ if (p==head) for (struct dso *q=p->next; q; q=q->next)
+ p->deps[cnt++] = q;
+ for (i=0; p->dynv[i]; i+=2) {
+ if (p->dynv[i] != DT_NEEDED) continue;
+ struct dso *dep = load_library(p->strings + p->dynv[i+1], p);
+ if (!dep) {
+ error("Error loading shared library %s: %m (needed by %s)",
+ p->strings + p->dynv[i+1], p->name);
+ if (runtime) longjmp(*rtld_fail, 1);
+ continue;
+ }
+ p->deps[cnt++] = dep;
+ }
+ p->deps[cnt] = 0;
+ p->ndeps_direct = cnt;
+}
+
static void load_deps(struct dso *p)
{
- size_t i, ndeps=0;
- struct dso ***deps = &p->deps, **tmp, *dep;
- for (; p; p=p->next) {
- for (i=0; p->dynv[i]; i+=2) {
- if (p->dynv[i] != DT_NEEDED) continue;
- dep = load_library(p->strings + p->dynv[i+1], p);
- if (!dep) {
- error("Error loading shared library %s: %m (needed by %s)",
- p->strings + p->dynv[i+1], p->name);
- if (runtime) longjmp(*rtld_fail, 1);
- continue;
- }
- if (runtime) {
- tmp = realloc(*deps, sizeof(*tmp)*(ndeps+2));
- if (!tmp) longjmp(*rtld_fail, 1);
- tmp[ndeps++] = dep;
- tmp[ndeps] = 0;
- *deps = tmp;
- }
+ if (p->deps) return;
+ for (; p; p=p->next)
+ load_direct_deps(p);
+}
+
+static void extend_bfs_deps(struct dso *p)
+{
+ size_t i, j, cnt, ndeps_all;
+ struct dso **tmp;
+
+ /* Can't use realloc if the original p->deps was allocated at
+ * program entry and malloc has been replaced, or if it's
+ * the builtin non-allocated trivial main program deps array. */
+ int no_realloc = (__malloc_replaced && !p->runtime_loaded)
+ || p->deps == builtin_deps;
+
+ if (p->bfs_built) return;
+ ndeps_all = p->ndeps_direct;
+
+ /* Mark existing (direct) deps so they won't be duplicated. */
+ for (i=0; p->deps[i]; i++)
+ p->deps[i]->mark = 1;
+
+ /* For each dependency already in the list, copy its list of direct
+ * dependencies to the list, excluding any items already in the
+ * list. Note that the list this loop iterates over will grow during
+ * the loop, but since duplicates are excluded, growth is bounded. */
+ for (i=0; p->deps[i]; i++) {
+ struct dso *dep = p->deps[i];
+ for (j=cnt=0; j<dep->ndeps_direct; j++)
+ if (!dep->deps[j]->mark) cnt++;
+ tmp = no_realloc ?
+ malloc(sizeof(*tmp) * (ndeps_all+cnt+1)) :
+ realloc(p->deps, sizeof(*tmp) * (ndeps_all+cnt+1));
+ if (!tmp) {
+ error("Error recording dependencies for %s", p->name);
+ if (runtime) longjmp(*rtld_fail, 1);
+ continue;
+ }
+ if (no_realloc) {
+ memcpy(tmp, p->deps, sizeof(*tmp) * (ndeps_all+1));
+ no_realloc = 0;
+ }
+ p->deps = tmp;
+ for (j=0; j<dep->ndeps_direct; j++) {
+ if (dep->deps[j]->mark) continue;
+ dep->deps[j]->mark = 1;
+ p->deps[ndeps_all++] = dep->deps[j];
}
+ p->deps[ndeps_all] = 0;
}
- if (!*deps) *deps = (struct dso **)&nodeps_dummy;
+ p->bfs_built = 1;
+ for (p=head; p; p=p->next)
+ p->mark = 0;
}
static void load_preload(char *s)
@@ -1275,7 +1361,18 @@ void __libc_exit_fini()
{
struct dso *p;
size_t dyn[DYN_CNT];
+ int self = __pthread_self()->tid;
+
+ /* Take both locks before setting shutting_down, so that
+ * either lock is sufficient to read its value. The lock
+ * order matches that in dlopen to avoid deadlock. */
+ pthread_rwlock_wrlock(&lock);
+ pthread_mutex_lock(&init_fini_lock);
+ shutting_down = 1;
+ pthread_rwlock_unlock(&lock);
for (p=fini_head; p; p=p->fini_next) {
+ while (p->ctor_visitor && p->ctor_visitor!=self)
+ pthread_cond_wait(&ctor_cond, &init_fini_lock);
if (!p->constructed) continue;
decode_vec(p->dynv, dyn, DYN_CNT);
if (dyn[0] & (1<<DT_FINI_ARRAY)) {
@@ -1290,22 +1387,90 @@ void __libc_exit_fini()
}
}
-static void do_init_fini(struct dso *p)
+static struct dso **queue_ctors(struct dso *dso)
{
- size_t dyn[DYN_CNT];
- int need_locking = libc.threads_minus_1;
- /* Allow recursive calls that arise when a library calls
- * dlopen from one of its constructors, but block any
- * other threads until all ctors have finished. */
- if (need_locking) pthread_mutex_lock(&init_fini_lock);
- for (; p; p=p->prev) {
- if (p->constructed) continue;
- p->constructed = 1;
+ size_t cnt, qpos, spos, i;
+ struct dso *p, **queue, **stack;
+
+ if (ldd_mode) return 0;
+
+ /* Bound on queue size is the total number of indirect deps.
+ * If a bfs deps list was built, we can use it. Otherwise,
+ * bound by the total number of DSOs, which is always safe and
+ * is reasonable we use it (for main app at startup). */
+ if (dso->bfs_built) {
+ for (cnt=0; dso->deps[cnt]; cnt++)
+ dso->deps[cnt]->mark = 0;
+ cnt++; /* self, not included in deps */
+ } else {
+ for (cnt=0, p=head; p; cnt++, p=p->next)
+ p->mark = 0;
+ }
+ cnt++; /* termination slot */
+ if (dso==head && cnt <= countof(builtin_ctor_queue))
+ queue = builtin_ctor_queue;
+ else
+ queue = calloc(cnt, sizeof *queue);
+
+ if (!queue) {
+ error("Error allocating constructor queue: %m\n");
+ if (runtime) longjmp(*rtld_fail, 1);
+ return 0;
+ }
+
+ /* Opposite ends of the allocated buffer serve as an output queue
+ * and a working stack. Setup initial stack with just the argument
+ * dso and initial queue empty... */
+ stack = queue;
+ qpos = 0;
+ spos = cnt;
+ stack[--spos] = dso;
+ dso->next_dep = 0;
+ dso->mark = 1;
+
+ /* Then perform pseudo-DFS sort, but ignoring circular deps. */
+ while (spos<cnt) {
+ p = stack[spos++];
+ while (p->next_dep < p->ndeps_direct) {
+ if (p->deps[p->next_dep]->mark) {
+ p->next_dep++;
+ } else {
+ stack[--spos] = p;
+ p = p->deps[p->next_dep];
+ p->next_dep = 0;
+ p->mark = 1;
+ }
+ }
+ queue[qpos++] = p;
+ }
+ queue[qpos] = 0;
+ for (i=0; i<qpos; i++) queue[i]->mark = 0;
+
+ return queue;
+}
+
+static void do_init_fini(struct dso **queue)
+{
+ struct dso *p;
+ size_t dyn[DYN_CNT], i;
+ int self = __pthread_self()->tid;
+
+ pthread_mutex_lock(&init_fini_lock);
+ for (i=0; (p=queue[i]); i++) {
+ while ((p->ctor_visitor && p->ctor_visitor!=self) || shutting_down)
+ pthread_cond_wait(&ctor_cond, &init_fini_lock);
+ if (p->ctor_visitor || p->constructed)
+ continue;
+ p->ctor_visitor = self;
+
decode_vec(p->dynv, dyn, DYN_CNT);
if (dyn[0] & ((1<<DT_FINI) | (1<<DT_FINI_ARRAY))) {
p->fini_next = fini_head;
fini_head = p;
}
+
+ pthread_mutex_unlock(&init_fini_lock);
+
#ifndef NO_LEGACY_INITFINI
if ((dyn[0] & (1<<DT_INIT)) && dyn[DT_INIT])
fpaddr(p, dyn[DT_INIT])();
@@ -1315,17 +1480,21 @@ static void do_init_fini(struct dso *p)
size_t *fn = laddr(p, dyn[DT_INIT_ARRAY]);
while (n--) ((void (*)(void))*fn++)();
}
- if (!need_locking && libc.threads_minus_1) {
- need_locking = 1;
- pthread_mutex_lock(&init_fini_lock);
- }
+
+ pthread_mutex_lock(&init_fini_lock);
+ p->ctor_visitor = 0;
+ p->constructed = 1;
+ pthread_cond_broadcast(&ctor_cond);
}
- if (need_locking) pthread_mutex_unlock(&init_fini_lock);
+ pthread_mutex_unlock(&init_fini_lock);
}
void __libc_start_init(void)
{
- do_init_fini(tail);
+ do_init_fini(main_ctor_queue);
+ if (!__malloc_replaced && main_ctor_queue != builtin_ctor_queue)
+ free(main_ctor_queue);
+ main_ctor_queue = 0;
}
static void dl_debug_state(void)
@@ -1338,48 +1507,6 @@ void __init_tls(size_t *auxv)
{
}
-hidden void *__tls_get_new(tls_mod_off_t *v)
-{
- pthread_t self = __pthread_self();
-
- /* Block signals to make accessing new TLS async-signal-safe */
- sigset_t set;
- __block_all_sigs(&set);
- if (v[0] <= self->dtv[0]) {
- __restore_sigs(&set);
- return (void *)(self->dtv[v[0]] + v[1]);
- }
-
- /* This is safe without any locks held because, if the caller
- * is able to request the Nth entry of the DTV, the DSO list
- * must be valid at least that far out and it was synchronized
- * at program startup or by an already-completed call to dlopen. */
- struct dso *p;
- for (p=head; p->tls_id != v[0]; p=p->next);
-
- /* Get new DTV space from new DSO */
- uintptr_t *newdtv = p->new_dtv +
- (v[0]+1)*a_fetch_add(&p->new_dtv_idx,1);
- memcpy(newdtv, self->dtv, (self->dtv[0]+1) * sizeof(uintptr_t));
- newdtv[0] = v[0];
- self->dtv = self->dtv_copy = newdtv;
-
- /* Get new TLS memory from all new DSOs up to the requested one */
- unsigned char *mem;
- for (p=head; ; p=p->next) {
- if (!p->tls_id || self->dtv[p->tls_id]) continue;
- mem = p->new_tls + (p->tls.size + p->tls.align)
- * a_fetch_add(&p->new_tls_idx,1);
- mem += ((uintptr_t)p->tls.image - (uintptr_t)mem)
- & (p->tls.align-1);
- self->dtv[p->tls_id] = (uintptr_t)mem + DTP_OFFSET;
- memcpy(mem, p->tls.image, p->tls.len);
- if (p->tls_id == v[0]) break;
- }
- __restore_sigs(&set);
- return mem + v[1] + DTP_OFFSET;
-}
-
static void update_tls_size()
{
libc.tls_cnt = tls_cnt;
@@ -1392,6 +1519,56 @@ static void update_tls_size()
tls_align);
}
+static void install_new_tls(void)
+{
+ sigset_t set;
+ pthread_t self = __pthread_self(), td;
+ struct dso *dtv_provider = container_of(tls_tail, struct dso, tls);
+ uintptr_t (*newdtv)[tls_cnt+1] = (void *)dtv_provider->new_dtv;
+ struct dso *p;
+ size_t i, j;
+ size_t old_cnt = self->dtv[0];
+
+ __block_app_sigs(&set);
+ __tl_lock();
+ /* Copy existing dtv contents from all existing threads. */
+ for (i=0, td=self; !i || td!=self; i++, td=td->next) {
+ memcpy(newdtv+i, td->dtv,
+ (old_cnt+1)*sizeof(uintptr_t));
+ newdtv[i][0] = tls_cnt;
+ }
+ /* Install new dtls into the enlarged, uninstalled dtv copies. */
+ for (p=head; ; p=p->next) {
+ if (p->tls_id <= old_cnt) continue;
+ unsigned char *mem = p->new_tls;
+ for (j=0; j<i; j++) {
+ unsigned char *new = mem;
+ new += ((uintptr_t)p->tls.image - (uintptr_t)mem)
+ & (p->tls.align-1);
+ memcpy(new, p->tls.image, p->tls.len);
+ newdtv[j][p->tls_id] =
+ (uintptr_t)new + DTP_OFFSET;
+ mem += p->tls.size + p->tls.align;
+ }
+ if (p->tls_id == tls_cnt) break;
+ }
+
+ /* Broadcast barrier to ensure contents of new dtv is visible
+ * if the new dtv pointer is. The __membarrier function has a
+ * fallback emulation using signals for kernels that lack the
+ * feature at the syscall level. */
+
+ __membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0);
+
+ /* Install new dtv for each thread. */
+ for (j=0, td=self; !j || td!=self; j++, td=td->next) {
+ td->dtv = td->dtv_copy = newdtv[j];
+ }
+
+ __tl_unlock();
+ __restore_sigs(&set);
+}
+
/* Stage 1 of the dynamic linker is defined in dlstart.c. It calls the
* following stage 2 and stage 3 functions via primitive symbolic lookup
* since it does not have access to their addresses to begin with. */
@@ -1508,6 +1685,8 @@ _Noreturn void __dls3(size_t *sp)
libc.auxv = auxv = (void *)(argv+i+1);
decode_vec(auxv, aux, AUX_CNT);
__hwcap = aux[AT_HWCAP];
+ search_vec(auxv, &__sysinfo, AT_SYSINFO);
+ __pthread_self()->sysinfo = __sysinfo;
libc.page_size = aux[AT_PAGESZ];
libc.secure = ((aux[0]&0x7800)!=0x7800 || aux[AT_UID]!=aux[AT_EUID]
|| aux[AT_GID]!=aux[AT_EGID] || aux[AT_SECURE]);
@@ -1617,10 +1796,9 @@ _Noreturn void __dls3(size_t *sp)
app.tls_id = tls_cnt = 1;
#ifdef TLS_ABOVE_TP
app.tls.offset = GAP_ABOVE_TP;
- app.tls.offset += -GAP_ABOVE_TP & (app.tls.align-1);
- tls_offset = app.tls.offset + app.tls.size
- + ( -((uintptr_t)app.tls.image + app.tls.size)
- & (app.tls.align-1) );
+ app.tls.offset += (-GAP_ABOVE_TP + (uintptr_t)app.tls.image)
+ & (app.tls.align-1);
+ tls_offset = app.tls.offset + app.tls.size;
#else
tls_offset = app.tls.offset = app.tls.size
+ ( -((uintptr_t)app.tls.image + app.tls.size)
@@ -1650,6 +1828,7 @@ _Noreturn void __dls3(size_t *sp)
reclaim_gaps(&ldso);
/* Load preload/needed libraries, add symbols to global namespace. */
+ ldso.deps = (struct dso **)no_deps;
if (env_preload) load_preload(env_preload);
load_deps(&app);
for (struct dso *p=head; p; p=p->next)
@@ -1671,6 +1850,7 @@ _Noreturn void __dls3(size_t *sp)
vdso.name = "";
vdso.shortname = "linux-gate.so.1";
vdso.relocated = 1;
+ vdso.deps = (struct dso **)no_deps;
decode_dyn(&vdso);
vdso.prev = tail;
tail->next = &vdso;
@@ -1686,6 +1866,14 @@ _Noreturn void __dls3(size_t *sp)
}
}
+ /* This must be done before final relocations, since it calls
+ * malloc, which may be provided by the application. Calling any
+ * application code prior to the jump to its entry point is not
+ * valid in our model and does not work with FDPIC, where there
+ * are additional relocation-like fixups that only the entry point
+ * code can see to perform. */
+ main_ctor_queue = queue_ctors(&app);
+
/* The main program must be relocated LAST since it may contin
* copy relocations which depend on libraries' relocations. */
reloc_all(app.next);
@@ -1773,6 +1961,7 @@ void *dlopen(const char *file, int mode)
size_t i;
int cs;
jmp_buf jb;
+ struct dso **volatile ctor_queue = 0;
if (!file) return head;
@@ -1781,6 +1970,10 @@ void *dlopen(const char *file, int mode)
__inhibit_ptc();
p = 0;
+ if (shutting_down) {
+ error("Cannot dlopen while program is exiting.");
+ goto end;
+ }
orig_tls_tail = tls_tail;
orig_tls_cnt = tls_cnt;
orig_tls_offset = tls_offset;
@@ -1804,11 +1997,12 @@ void *dlopen(const char *file, int mode)
free(p->funcdescs);
if (p->rpath != p->rpath_orig)
free(p->rpath);
- if (p->deps != &nodeps_dummy)
- free(p->deps);
+ free(p->deps);
unmap_library(p);
free(p);
}
+ free(ctor_queue);
+ ctor_queue = 0;
if (!orig_tls_tail) libc.tls_head = 0;
tls_tail = orig_tls_tail;
if (tls_tail) tls_tail->next = 0;
@@ -1831,24 +2025,25 @@ void *dlopen(const char *file, int mode)
}
/* First load handling */
- int first_load = !p->deps;
- if (first_load) {
- load_deps(p);
- if (!p->relocated && (mode & RTLD_LAZY)) {
- prepare_lazy(p);
- for (i=0; p->deps[i]; i++)
- if (!p->deps[i]->relocated)
- prepare_lazy(p->deps[i]);
- }
+ load_deps(p);
+ extend_bfs_deps(p);
+ pthread_mutex_lock(&init_fini_lock);
+ if (!p->constructed) ctor_queue = queue_ctors(p);
+ pthread_mutex_unlock(&init_fini_lock);
+ if (!p->relocated && (mode & RTLD_LAZY)) {
+ prepare_lazy(p);
+ for (i=0; p->deps[i]; i++)
+ if (!p->deps[i]->relocated)
+ prepare_lazy(p->deps[i]);
}
- if (first_load || (mode & RTLD_GLOBAL)) {
+ if (!p->relocated || (mode & RTLD_GLOBAL)) {
/* Make new symbols global, at least temporarily, so we can do
* relocations. If not RTLD_GLOBAL, this is reverted below. */
add_syms(p);
for (i=0; p->deps[i]; i++)
add_syms(p->deps[i]);
}
- if (first_load) {
+ if (!p->relocated) {
reloc_all(p);
}
@@ -1864,13 +2059,18 @@ void *dlopen(const char *file, int mode)
redo_lazy_relocs();
update_tls_size();
+ if (tls_cnt != orig_tls_cnt)
+ install_new_tls();
_dl_debug_state();
orig_tail = tail;
end:
__release_ptc();
if (p) gencnt++;
pthread_rwlock_unlock(&lock);
- if (p) do_init_fini(orig_tail);
+ if (ctor_queue) {
+ do_init_fini(ctor_queue);
+ free(ctor_queue);
+ }
pthread_setcancelstate(cs, 0);
return p;
}
diff --git a/src/aio/aio.c b/src/aio/aio.c
index dae97cc6..6d34fa86 100644
--- a/src/aio/aio.c
+++ b/src/aio/aio.c
@@ -280,6 +280,8 @@ static int submit(struct aiocb *cb, int op)
if (!q) {
if (errno != EBADF) errno = EAGAIN;
+ cb->__ret = -1;
+ cb->__err = errno;
return -1;
}
q->ref++;
@@ -303,8 +305,8 @@ static int submit(struct aiocb *cb, int op)
if (pthread_create(&td, &a, io_thread_func, &args)) {
pthread_mutex_lock(&q->lock);
__aio_unref_queue(q);
- errno = EAGAIN;
- ret = -1;
+ cb->__err = errno = EAGAIN;
+ cb->__ret = ret = -1;
}
pthread_sigmask(SIG_SETMASK, &origmask, 0);
diff --git a/src/complex/__cexp.c b/src/complex/__cexp.c
index 05ac28c7..003d20af 100644
--- a/src/complex/__cexp.c
+++ b/src/complex/__cexp.c
@@ -25,7 +25,7 @@
* SUCH DAMAGE.
*/
-#include "libm.h"
+#include "complex_impl.h"
static const uint32_t k = 1799; /* constant for reduction */
static const double kln2 = 1246.97177782734161156; /* k * ln2 */
diff --git a/src/complex/__cexpf.c b/src/complex/__cexpf.c
index 69b54045..ee5ff2bc 100644
--- a/src/complex/__cexpf.c
+++ b/src/complex/__cexpf.c
@@ -25,7 +25,7 @@
* SUCH DAMAGE.
*/
-#include "libm.h"
+#include "complex_impl.h"
static const uint32_t k = 235; /* constant for reduction */
static const float kln2 = 162.88958740F; /* k * ln2 */
diff --git a/src/complex/cabs.c b/src/complex/cabs.c
index f61d364e..c5ad58ab 100644
--- a/src/complex/cabs.c
+++ b/src/complex/cabs.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
double cabs(double complex z)
{
diff --git a/src/complex/cabsf.c b/src/complex/cabsf.c
index 30b25c70..619f28d3 100644
--- a/src/complex/cabsf.c
+++ b/src/complex/cabsf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
float cabsf(float complex z)
{
diff --git a/src/complex/cabsl.c b/src/complex/cabsl.c
index 40a067c1..d37e3f2e 100644
--- a/src/complex/cabsl.c
+++ b/src/complex/cabsl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double cabsl(long double complex z)
diff --git a/src/complex/cacos.c b/src/complex/cacos.c
index 27c35636..c39d257b 100644
--- a/src/complex/cacos.c
+++ b/src/complex/cacos.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
// FIXME: Hull et al. "Implementing the complex arcsine and arccosine functions using exception handling" 1997
diff --git a/src/complex/cacosf.c b/src/complex/cacosf.c
index 11852659..2e048540 100644
--- a/src/complex/cacosf.c
+++ b/src/complex/cacosf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
// FIXME
diff --git a/src/complex/cacosh.c b/src/complex/cacosh.c
index 8c68cb01..8e42f1ae 100644
--- a/src/complex/cacosh.c
+++ b/src/complex/cacosh.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
/* acosh(z) = i acos(z) */
diff --git a/src/complex/cacoshf.c b/src/complex/cacoshf.c
index ade01c09..d7e6b545 100644
--- a/src/complex/cacoshf.c
+++ b/src/complex/cacoshf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
float complex cacoshf(float complex z)
{
diff --git a/src/complex/cacoshl.c b/src/complex/cacoshl.c
index 65342557..d3eaee20 100644
--- a/src/complex/cacoshl.c
+++ b/src/complex/cacoshl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double complex cacoshl(long double complex z)
diff --git a/src/complex/cacosl.c b/src/complex/cacosl.c
index 7fd4a2f6..cc20dcd7 100644
--- a/src/complex/cacosl.c
+++ b/src/complex/cacosl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double complex cacosl(long double complex z)
diff --git a/src/complex/carg.c b/src/complex/carg.c
index d2d1b462..dfe9b97a 100644
--- a/src/complex/carg.c
+++ b/src/complex/carg.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
double carg(double complex z)
{
diff --git a/src/complex/cargf.c b/src/complex/cargf.c
index ce183c4b..9a6c19b6 100644
--- a/src/complex/cargf.c
+++ b/src/complex/cargf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
float cargf(float complex z)
{
diff --git a/src/complex/cargl.c b/src/complex/cargl.c
index e0d50478..88f95f96 100644
--- a/src/complex/cargl.c
+++ b/src/complex/cargl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double cargl(long double complex z)
diff --git a/src/complex/casin.c b/src/complex/casin.c
index 01ed6184..3244bebb 100644
--- a/src/complex/casin.c
+++ b/src/complex/casin.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
// FIXME
diff --git a/src/complex/casinf.c b/src/complex/casinf.c
index 4fcb76fc..2cda2f08 100644
--- a/src/complex/casinf.c
+++ b/src/complex/casinf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
// FIXME
diff --git a/src/complex/casinh.c b/src/complex/casinh.c
index b57fe8c4..50bf27ce 100644
--- a/src/complex/casinh.c
+++ b/src/complex/casinh.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
/* asinh(z) = -i asin(i z) */
diff --git a/src/complex/casinhf.c b/src/complex/casinhf.c
index a11bf902..93d82e5f 100644
--- a/src/complex/casinhf.c
+++ b/src/complex/casinhf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
float complex casinhf(float complex z)
{
diff --git a/src/complex/casinhl.c b/src/complex/casinhl.c
index 714f1893..68ba3ddf 100644
--- a/src/complex/casinhl.c
+++ b/src/complex/casinhl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double complex casinhl(long double complex z)
diff --git a/src/complex/casinl.c b/src/complex/casinl.c
index 3b7ceba7..072adc45 100644
--- a/src/complex/casinl.c
+++ b/src/complex/casinl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double complex casinl(long double complex z)
diff --git a/src/complex/catan.c b/src/complex/catan.c
index 7dc2afeb..ccc2fb53 100644
--- a/src/complex/catan.c
+++ b/src/complex/catan.c
@@ -58,7 +58,7 @@
* 2.9e-17. See also clog().
*/
-#include "libm.h"
+#include "complex_impl.h"
#define MAXNUM 1.0e308
diff --git a/src/complex/catanf.c b/src/complex/catanf.c
index 8533bde3..e10d9c09 100644
--- a/src/complex/catanf.c
+++ b/src/complex/catanf.c
@@ -53,7 +53,7 @@
* IEEE -10,+10 30000 2.3e-6 5.2e-8
*/
-#include "libm.h"
+#include "complex_impl.h"
#define MAXNUMF 1.0e38F
diff --git a/src/complex/catanh.c b/src/complex/catanh.c
index e248d9b9..c324c7f2 100644
--- a/src/complex/catanh.c
+++ b/src/complex/catanh.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
/* atanh = -i atan(i z) */
diff --git a/src/complex/catanhf.c b/src/complex/catanhf.c
index 4a5eb040..b0505f60 100644
--- a/src/complex/catanhf.c
+++ b/src/complex/catanhf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
float complex catanhf(float complex z)
{
diff --git a/src/complex/catanhl.c b/src/complex/catanhl.c
index a5dd538e..6025c414 100644
--- a/src/complex/catanhl.c
+++ b/src/complex/catanhl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double complex catanhl(long double complex z)
diff --git a/src/complex/catanl.c b/src/complex/catanl.c
index 5ace7704..a9fc02db 100644
--- a/src/complex/catanl.c
+++ b/src/complex/catanl.c
@@ -59,7 +59,7 @@
#include <complex.h>
#include <float.h>
-#include "libm.h"
+#include "complex_impl.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double complex catanl(long double complex z)
diff --git a/src/complex/ccos.c b/src/complex/ccos.c
index 645aec29..f32e1fad 100644
--- a/src/complex/ccos.c
+++ b/src/complex/ccos.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
/* cos(z) = cosh(i z) */
diff --git a/src/complex/ccosf.c b/src/complex/ccosf.c
index 9a67241f..490be9b3 100644
--- a/src/complex/ccosf.c
+++ b/src/complex/ccosf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
float complex ccosf(float complex z)
{
diff --git a/src/complex/ccosh.c b/src/complex/ccosh.c
index 401f3c60..c995da7b 100644
--- a/src/complex/ccosh.c
+++ b/src/complex/ccosh.c
@@ -34,7 +34,7 @@
* These values and the return value were taken from n1124.pdf.
*/
-#include "libm.h"
+#include "complex_impl.h"
static const double huge = 0x1p1023;
diff --git a/src/complex/ccoshf.c b/src/complex/ccoshf.c
index 90acfe05..189ce946 100644
--- a/src/complex/ccoshf.c
+++ b/src/complex/ccoshf.c
@@ -28,7 +28,7 @@
* Hyperbolic cosine of a complex argument. See s_ccosh.c for details.
*/
-#include "libm.h"
+#include "complex_impl.h"
static const float huge = 0x1p127;
diff --git a/src/complex/ccoshl.c b/src/complex/ccoshl.c
index 9b2aed9e..ffb4d8a1 100644
--- a/src/complex/ccoshl.c
+++ b/src/complex/ccoshl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
//FIXME
long double complex ccoshl(long double complex z)
diff --git a/src/complex/ccosl.c b/src/complex/ccosl.c
index d787047f..2530006b 100644
--- a/src/complex/ccosl.c
+++ b/src/complex/ccosl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double complex ccosl(long double complex z)
diff --git a/src/complex/cexp.c b/src/complex/cexp.c
index 5118e00e..7fb489bb 100644
--- a/src/complex/cexp.c
+++ b/src/complex/cexp.c
@@ -25,7 +25,7 @@
* SUCH DAMAGE.
*/
-#include "libm.h"
+#include "complex_impl.h"
static const uint32_t
exp_ovfl = 0x40862e42, /* high bits of MAX_EXP * ln2 ~= 710 */
diff --git a/src/complex/cexpf.c b/src/complex/cexpf.c
index 1a09964c..00d258f3 100644
--- a/src/complex/cexpf.c
+++ b/src/complex/cexpf.c
@@ -25,7 +25,7 @@
* SUCH DAMAGE.
*/
-#include "libm.h"
+#include "complex_impl.h"
static const uint32_t
exp_ovfl = 0x42b17218, /* MAX_EXP * ln2 ~= 88.722839355 */
diff --git a/src/complex/cexpl.c b/src/complex/cexpl.c
index a27f85c0..d4df950e 100644
--- a/src/complex/cexpl.c
+++ b/src/complex/cexpl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
//FIXME
long double complex cexpl(long double complex z)
diff --git a/src/complex/cimag.c b/src/complex/cimag.c
index 00955641..d6b0e683 100644
--- a/src/complex/cimag.c
+++ b/src/complex/cimag.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
double (cimag)(double complex z)
{
diff --git a/src/complex/cimagf.c b/src/complex/cimagf.c
index f7bcd76e..b7166dcf 100644
--- a/src/complex/cimagf.c
+++ b/src/complex/cimagf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
float (cimagf)(float complex z)
{
diff --git a/src/complex/cimagl.c b/src/complex/cimagl.c
index 9ec24eee..4db77f20 100644
--- a/src/complex/cimagl.c
+++ b/src/complex/cimagl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
long double (cimagl)(long double complex z)
{
diff --git a/src/complex/clog.c b/src/complex/clog.c
index 12aae9c7..b587c291 100644
--- a/src/complex/clog.c
+++ b/src/complex/clog.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
// FIXME
diff --git a/src/complex/clogf.c b/src/complex/clogf.c
index e9b32e60..0389d472 100644
--- a/src/complex/clogf.c
+++ b/src/complex/clogf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
// FIXME
diff --git a/src/complex/clogl.c b/src/complex/clogl.c
index 18f16088..88e83e87 100644
--- a/src/complex/clogl.c
+++ b/src/complex/clogl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double complex clogl(long double complex z)
diff --git a/src/complex/conj.c b/src/complex/conj.c
index 0b3f5f46..a3b19a4a 100644
--- a/src/complex/conj.c
+++ b/src/complex/conj.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
double complex conj(double complex z)
{
diff --git a/src/complex/conjf.c b/src/complex/conjf.c
index 9af6b2c3..b2195c84 100644
--- a/src/complex/conjf.c
+++ b/src/complex/conjf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
float complex conjf(float complex z)
{
diff --git a/src/complex/conjl.c b/src/complex/conjl.c
index 67f11b9d..87a4ebec 100644
--- a/src/complex/conjl.c
+++ b/src/complex/conjl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
long double complex conjl(long double complex z)
{
diff --git a/src/complex/cpow.c b/src/complex/cpow.c
index f863588f..1137d391 100644
--- a/src/complex/cpow.c
+++ b/src/complex/cpow.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
/* pow(z, c) = exp(c log(z)), See C99 G.6.4.1 */
diff --git a/src/complex/cpowf.c b/src/complex/cpowf.c
index 53c65dcb..f3fd4b7b 100644
--- a/src/complex/cpowf.c
+++ b/src/complex/cpowf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
float complex cpowf(float complex z, float complex c)
{
diff --git a/src/complex/cpowl.c b/src/complex/cpowl.c
index c1a80a7b..be36f046 100644
--- a/src/complex/cpowl.c
+++ b/src/complex/cpowl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double complex cpowl(long double complex z, long double complex c)
diff --git a/src/complex/cproj.c b/src/complex/cproj.c
index 15f358a1..9ae1e17c 100644
--- a/src/complex/cproj.c
+++ b/src/complex/cproj.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
double complex cproj(double complex z)
{
diff --git a/src/complex/cprojf.c b/src/complex/cprojf.c
index 653be5e8..03fab339 100644
--- a/src/complex/cprojf.c
+++ b/src/complex/cprojf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
float complex cprojf(float complex z)
{
diff --git a/src/complex/cprojl.c b/src/complex/cprojl.c
index 6731aaa2..38a494c5 100644
--- a/src/complex/cprojl.c
+++ b/src/complex/cprojl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double complex cprojl(long double complex z)
diff --git a/src/complex/csin.c b/src/complex/csin.c
index ad8ae67a..535c4bf8 100644
--- a/src/complex/csin.c
+++ b/src/complex/csin.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
/* sin(z) = -i sinh(i z) */
diff --git a/src/complex/csinf.c b/src/complex/csinf.c
index 60b3cbaa..69f5164e 100644
--- a/src/complex/csinf.c
+++ b/src/complex/csinf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
float complex csinf(float complex z)
{
diff --git a/src/complex/csinh.c b/src/complex/csinh.c
index 0f8035d1..eda0ab59 100644
--- a/src/complex/csinh.c
+++ b/src/complex/csinh.c
@@ -34,7 +34,7 @@
* These values and the return value were taken from n1124.pdf.
*/
-#include "libm.h"
+#include "complex_impl.h"
static const double huge = 0x1p1023;
diff --git a/src/complex/csinhf.c b/src/complex/csinhf.c
index 49697f02..eb1d98c5 100644
--- a/src/complex/csinhf.c
+++ b/src/complex/csinhf.c
@@ -28,7 +28,7 @@
* Hyperbolic sine of a complex argument z. See s_csinh.c for details.
*/
-#include "libm.h"
+#include "complex_impl.h"
static const float huge = 0x1p127;
diff --git a/src/complex/csinhl.c b/src/complex/csinhl.c
index c566653b..09fd18f9 100644
--- a/src/complex/csinhl.c
+++ b/src/complex/csinhl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
//FIXME
long double complex csinhl(long double complex z)
diff --git a/src/complex/csinl.c b/src/complex/csinl.c
index 4e9f86c3..90a4eb37 100644
--- a/src/complex/csinl.c
+++ b/src/complex/csinl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double complex csinl(long double complex z)
diff --git a/src/complex/csqrt.c b/src/complex/csqrt.c
index 8a2ba608..c36de001 100644
--- a/src/complex/csqrt.c
+++ b/src/complex/csqrt.c
@@ -25,7 +25,7 @@
* SUCH DAMAGE.
*/
-#include "libm.h"
+#include "complex_impl.h"
/*
* gcc doesn't implement complex multiplication or division correctly,
diff --git a/src/complex/csqrtf.c b/src/complex/csqrtf.c
index ab5102f0..a6163974 100644
--- a/src/complex/csqrtf.c
+++ b/src/complex/csqrtf.c
@@ -25,7 +25,7 @@
* SUCH DAMAGE.
*/
-#include "libm.h"
+#include "complex_impl.h"
/*
* gcc doesn't implement complex multiplication or division correctly,
diff --git a/src/complex/csqrtl.c b/src/complex/csqrtl.c
index 0600ef3b..22539379 100644
--- a/src/complex/csqrtl.c
+++ b/src/complex/csqrtl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
//FIXME
long double complex csqrtl(long double complex z)
diff --git a/src/complex/ctan.c b/src/complex/ctan.c
index c0926374..918717bf 100644
--- a/src/complex/ctan.c
+++ b/src/complex/ctan.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
/* tan(z) = -i tanh(i z) */
diff --git a/src/complex/ctanf.c b/src/complex/ctanf.c
index 009b1921..04c3ff19 100644
--- a/src/complex/ctanf.c
+++ b/src/complex/ctanf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
float complex ctanf(float complex z)
{
diff --git a/src/complex/ctanh.c b/src/complex/ctanh.c
index 3ba3a899..54004cd7 100644
--- a/src/complex/ctanh.c
+++ b/src/complex/ctanh.c
@@ -63,7 +63,7 @@
* precision. I also handle large x differently.
*/
-#include "libm.h"
+#include "complex_impl.h"
double complex ctanh(double complex z)
{
diff --git a/src/complex/ctanhf.c b/src/complex/ctanhf.c
index 72b76da0..7f422ba7 100644
--- a/src/complex/ctanhf.c
+++ b/src/complex/ctanhf.c
@@ -28,7 +28,7 @@
* Hyperbolic tangent of a complex argument z. See s_ctanh.c for details.
*/
-#include "libm.h"
+#include "complex_impl.h"
float complex ctanhf(float complex z)
{
diff --git a/src/complex/ctanhl.c b/src/complex/ctanhl.c
index 89a75d13..45d5862c 100644
--- a/src/complex/ctanhl.c
+++ b/src/complex/ctanhl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
//FIXME
long double complex ctanhl(long double complex z)
diff --git a/src/complex/ctanl.c b/src/complex/ctanl.c
index ac1c3e0a..4b87420d 100644
--- a/src/complex/ctanl.c
+++ b/src/complex/ctanl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double complex ctanl(long double complex z)
diff --git a/src/dirent/fdopendir.c b/src/dirent/fdopendir.c
index c377271d..d78fb87f 100644
--- a/src/dirent/fdopendir.c
+++ b/src/dirent/fdopendir.c
@@ -13,6 +13,10 @@ DIR *fdopendir(int fd)
if (fstat(fd, &st) < 0) {
return 0;
}
+ if (fcntl(fd, F_GETFL) & O_PATH) {
+ errno = EBADF;
+ return 0;
+ }
if (!S_ISDIR(st.st_mode)) {
errno = ENOTDIR;
return 0;
diff --git a/src/env/__init_tls.c b/src/env/__init_tls.c
index 842886f6..772baba3 100644
--- a/src/env/__init_tls.c
+++ b/src/env/__init_tls.c
@@ -1,3 +1,4 @@
+#define SYSCALL_NO_TLS 1
#include <elf.h>
#include <limits.h>
#include <sys/mman.h>
@@ -8,6 +9,8 @@
#include "atomic.h"
#include "syscall.h"
+volatile int __thread_list_lock;
+
int __init_tp(void *p)
{
pthread_t td = p;
@@ -16,9 +19,11 @@ int __init_tp(void *p)
if (r < 0) return -1;
if (!r) libc.can_do_threads = 1;
td->detach_state = DT_JOINABLE;
- td->tid = __syscall(SYS_set_tid_address, &td->detach_state);
+ td->tid = __syscall(SYS_set_tid_address, &__thread_list_lock);
td->locale = &libc.global_locale;
td->robust_list.head = &td->robust_list.head;
+ td->sysinfo = __sysinfo;
+ td->next = td->prev = td;
return 0;
}
@@ -110,7 +115,8 @@ static void static_init_tls(size_t *aux)
& (main_tls.align-1);
#ifdef TLS_ABOVE_TP
main_tls.offset = GAP_ABOVE_TP;
- main_tls.offset += -GAP_ABOVE_TP & (main_tls.align-1);
+ main_tls.offset += (-GAP_ABOVE_TP + (uintptr_t)main_tls.image)
+ & (main_tls.align-1);
#else
main_tls.offset = main_tls.size;
#endif
diff --git a/src/env/__libc_start_main.c b/src/env/__libc_start_main.c
index 7c95f822..8fbe5262 100644
--- a/src/env/__libc_start_main.c
+++ b/src/env/__libc_start_main.c
@@ -28,7 +28,7 @@ void __init_libc(char **envp, char *pn)
libc.auxv = auxv = (void *)(envp+i+1);
for (i=0; auxv[i]; i+=2) if (auxv[i]<AUX_CNT) aux[auxv[i]] = auxv[i+1];
__hwcap = aux[AT_HWCAP];
- __sysinfo = aux[AT_SYSINFO];
+ if (aux[AT_SYSINFO]) __sysinfo = aux[AT_SYSINFO];
libc.page_size = aux[AT_PAGESZ];
if (!pn) pn = (void*)aux[AT_EXECFN];
diff --git a/src/include/pthread.h b/src/include/pthread.h
index d93ac3a5..7167d3e1 100644
--- a/src/include/pthread.h
+++ b/src/include/pthread.h
@@ -18,5 +18,12 @@ hidden int __private_cond_signal(pthread_cond_t *, int);
hidden int __pthread_cond_timedwait(pthread_cond_t *restrict, pthread_mutex_t *restrict, const struct timespec *restrict);
hidden int __pthread_key_create(pthread_key_t *, void (*)(void *));
hidden int __pthread_key_delete(pthread_key_t);
+hidden int __pthread_rwlock_rdlock(pthread_rwlock_t *);
+hidden int __pthread_rwlock_tryrdlock(pthread_rwlock_t *);
+hidden int __pthread_rwlock_timedrdlock(pthread_rwlock_t *__restrict, const struct timespec *__restrict);
+hidden int __pthread_rwlock_wrlock(pthread_rwlock_t *);
+hidden int __pthread_rwlock_trywrlock(pthread_rwlock_t *);
+hidden int __pthread_rwlock_timedwrlock(pthread_rwlock_t *__restrict, const struct timespec *__restrict);
+hidden int __pthread_rwlock_unlock(pthread_rwlock_t *);
#endif
diff --git a/src/include/stdio.h b/src/include/stdio.h
index 534c6907..fae3755b 100644
--- a/src/include/stdio.h
+++ b/src/include/stdio.h
@@ -1,6 +1,8 @@
#ifndef STDIO_H
#define STDIO_H
+#define __DEFINED_struct__IO_FILE
+
#include "../../include/stdio.h"
#undef stdin
diff --git a/src/include/sys/membarrier.h b/src/include/sys/membarrier.h
new file mode 100644
index 00000000..3654491c
--- /dev/null
+++ b/src/include/sys/membarrier.h
@@ -0,0 +1,9 @@
+#ifndef SYS_MEMBARRIER_H
+#define SYS_MEMBARRIER_H
+
+#include "../../../include/sys/membarrier.h"
+#include <features.h>
+
+hidden int __membarrier(int, int);
+
+#endif
diff --git a/src/include/wchar.h b/src/include/wchar.h
new file mode 100644
index 00000000..79f5d0e7
--- /dev/null
+++ b/src/include/wchar.h
@@ -0,0 +1,9 @@
+#ifndef WCHAR_H
+#define WCHAR_H
+
+#define __DEFINED_struct__IO_FILE
+
+#include "../../include/wchar.h"
+
+#endif
+
diff --git a/src/internal/aarch64/syscall.s b/src/internal/aarch64/syscall.s
deleted file mode 100644
index 845986bf..00000000
--- a/src/internal/aarch64/syscall.s
+++ /dev/null
@@ -1,14 +0,0 @@
-.global __syscall
-.hidden __syscall
-.type __syscall,%function
-__syscall:
- uxtw x8,w0
- mov x0,x1
- mov x1,x2
- mov x2,x3
- mov x3,x4
- mov x4,x5
- mov x5,x6
- mov x6,x7
- svc 0
- ret
diff --git a/src/internal/arm/syscall.s b/src/internal/arm/syscall.s
deleted file mode 100644
index 64dba2fc..00000000
--- a/src/internal/arm/syscall.s
+++ /dev/null
@@ -1,15 +0,0 @@
-.syntax unified
-.global __syscall
-.hidden __syscall
-.type __syscall,%function
-__syscall:
- mov ip,sp
- stmfd sp!,{r4,r5,r6,r7}
- mov r7,r0
- mov r0,r1
- mov r1,r2
- mov r2,r3
- ldmfd ip,{r3,r4,r5,r6}
- svc 0
- ldmfd sp!,{r4,r5,r6,r7}
- bx lr
diff --git a/src/internal/complex_impl.h b/src/internal/complex_impl.h
new file mode 100644
index 00000000..51fb298a
--- /dev/null
+++ b/src/internal/complex_impl.h
@@ -0,0 +1,22 @@
+#ifndef _COMPLEX_IMPL_H
+#define _COMPLEX_IMPL_H
+
+#include <complex.h>
+#include "libm.h"
+
+#undef __CMPLX
+#undef CMPLX
+#undef CMPLXF
+#undef CMPLXL
+
+#define __CMPLX(x, y, t) \
+ ((union { _Complex t __z; t __xy[2]; }){.__xy = {(x),(y)}}.__z)
+
+#define CMPLX(x, y) __CMPLX(x, y, double)
+#define CMPLXF(x, y) __CMPLX(x, y, float)
+#define CMPLXL(x, y) __CMPLX(x, y, long double)
+
+hidden double complex __ldexp_cexp(double complex,int);
+hidden float complex __ldexp_cexpf(float complex,int);
+
+#endif
diff --git a/src/internal/defsysinfo.c b/src/internal/defsysinfo.c
new file mode 100644
index 00000000..6d4117db
--- /dev/null
+++ b/src/internal/defsysinfo.c
@@ -0,0 +1,3 @@
+#include "libc.h"
+
+size_t __sysinfo;
diff --git a/src/internal/i386/defsysinfo.s b/src/internal/i386/defsysinfo.s
new file mode 100644
index 00000000..f1b5b0f2
--- /dev/null
+++ b/src/internal/i386/defsysinfo.s
@@ -0,0 +1,9 @@
+1: int $128
+ ret
+
+.data
+.align 4
+.hidden __sysinfo
+.global __sysinfo
+__sysinfo:
+ .long 1b
diff --git a/src/internal/i386/syscall.s b/src/internal/i386/syscall.s
deleted file mode 100644
index 0ebf2218..00000000
--- a/src/internal/i386/syscall.s
+++ /dev/null
@@ -1,78 +0,0 @@
-.hidden __sysinfo
-
-# The calling convention for __vsyscall has the syscall number
-# and 5 args arriving as: eax, edx, ecx, edi, esi, 4(%esp).
-# This ensures that the inline asm in the C code never has to touch
-# ebx or ebp (which are unavailable in PIC and frame-pointer-using
-# code, respectively), and optimizes for size/simplicity in the caller.
-
-.global __vsyscall
-.hidden __vsyscall
-.type __vsyscall,@function
-__vsyscall:
- push %edi
- push %ebx
- mov %edx,%ebx
- mov %edi,%edx
- mov 12(%esp),%edi
- push %eax
- call 1f
-2: mov %ebx,%edx
- pop %ebx
- pop %ebx
- pop %edi
- ret
-
-1: mov (%esp),%eax
- add $[__sysinfo-2b],%eax
- mov (%eax),%eax
- test %eax,%eax
- jz 1f
- push %eax
- mov 8(%esp),%eax
- ret # tail call to kernel vsyscall entry
-1: mov 4(%esp),%eax
- int $128
- ret
-
-# The __vsyscall6 entry point is used only for 6-argument syscalls.
-# Instead of passing the 5th argument on the stack, a pointer to the
-# 5th and 6th arguments is passed. This is ugly, but there are no
-# register constraints the inline asm could use that would make it
-# possible to pass two arguments on the stack.
-
-.global __vsyscall6
-.hidden __vsyscall6
-.type __vsyscall6,@function
-__vsyscall6:
- push %ebp
- push %eax
- mov 12(%esp), %ebp
- mov (%ebp), %eax
- mov 4(%ebp), %ebp
- push %eax
- mov 4(%esp),%eax
- call __vsyscall
- pop %ebp
- pop %ebp
- pop %ebp
- ret
-
-.global __syscall
-.hidden __syscall
-.type __syscall,@function
-__syscall:
- lea 24(%esp),%eax
- push %esi
- push %edi
- push %eax
- mov 16(%esp),%eax
- mov 20(%esp),%edx
- mov 24(%esp),%ecx
- mov 28(%esp),%edi
- mov 32(%esp),%esi
- call __vsyscall6
- pop %edi
- pop %edi
- pop %esi
- ret
diff --git a/src/internal/libc.c b/src/internal/libc.c
index 2e10942d..cb051810 100644
--- a/src/internal/libc.c
+++ b/src/internal/libc.c
@@ -3,7 +3,6 @@
struct __libc __libc;
size_t __hwcap;
-size_t __sysinfo;
char *__progname=0, *__progname_full=0;
weak_alias(__progname, program_invocation_short_name);
diff --git a/src/internal/libm.h b/src/internal/libm.h
index fd916277..b5bd26b8 100644
--- a/src/internal/libm.h
+++ b/src/internal/libm.h
@@ -1,23 +1,11 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/math_private.h */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
#ifndef _LIBM_H
#define _LIBM_H
#include <stdint.h>
#include <float.h>
#include <math.h>
-#include <complex.h>
#include <endian.h>
+#include "fp_arch.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
@@ -71,102 +59,177 @@ union ldshape {
#error Unsupported long double representation
#endif
+/* Support non-nearest rounding mode. */
+#define WANT_ROUNDING 1
+/* Support signaling NaNs. */
+#define WANT_SNAN 0
+
+#if WANT_SNAN
+#error SNaN is unsupported
+#else
+#define issignalingf_inline(x) 0
+#define issignaling_inline(x) 0
+#endif
+
+#ifndef TOINT_INTRINSICS
+#define TOINT_INTRINSICS 0
+#endif
+
+#if TOINT_INTRINSICS
+/* Round x to nearest int in all rounding modes, ties have to be rounded
+ consistently with converttoint so the results match. If the result
+ would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */
+static double_t roundtoint(double_t);
+
+/* Convert x to nearest int in all rounding modes, ties have to be rounded
+ consistently with roundtoint. If the result is not representible in an
+ int32_t then the semantics is unspecified. */
+static int32_t converttoint(double_t);
+#endif
+
+/* Helps static branch prediction so hot path can be better optimized. */
+#ifdef __GNUC__
+#define predict_true(x) __builtin_expect(!!(x), 1)
+#define predict_false(x) __builtin_expect(x, 0)
+#else
+#define predict_true(x) (x)
+#define predict_false(x) (x)
+#endif
+
+/* Evaluate an expression as the specified type. With standard excess
+ precision handling a type cast or assignment is enough (with
+ -ffloat-store an assignment is required, in old compilers argument
+ passing and return statement may not drop excess precision). */
+
+static inline float eval_as_float(float x)
+{
+ float y = x;
+ return y;
+}
+
+static inline double eval_as_double(double x)
+{
+ double y = x;
+ return y;
+}
+
+/* fp_barrier returns its input, but limits code transformations
+ as if it had a side-effect (e.g. observable io) and returned
+ an arbitrary value. */
+
+#ifndef fp_barrierf
+#define fp_barrierf fp_barrierf
+static inline float fp_barrierf(float x)
+{
+ volatile float y = x;
+ return y;
+}
+#endif
+
+#ifndef fp_barrier
+#define fp_barrier fp_barrier
+static inline double fp_barrier(double x)
+{
+ volatile double y = x;
+ return y;
+}
+#endif
+
+#ifndef fp_barrierl
+#define fp_barrierl fp_barrierl
+static inline long double fp_barrierl(long double x)
+{
+ volatile long double y = x;
+ return y;
+}
+#endif
+
+/* fp_force_eval ensures that the input value is computed when that's
+ otherwise unused. To prevent the constant folding of the input
+ expression, an additional fp_barrier may be needed or a compilation
+ mode that does so (e.g. -frounding-math in gcc). Then it can be
+ used to evaluate an expression for its fenv side-effects only. */
+
+#ifndef fp_force_evalf
+#define fp_force_evalf fp_force_evalf
+static inline void fp_force_evalf(float x)
+{
+ volatile float y;
+ y = x;
+}
+#endif
+
+#ifndef fp_force_eval
+#define fp_force_eval fp_force_eval
+static inline void fp_force_eval(double x)
+{
+ volatile double y;
+ y = x;
+}
+#endif
+
+#ifndef fp_force_evall
+#define fp_force_evall fp_force_evall
+static inline void fp_force_evall(long double x)
+{
+ volatile long double y;
+ y = x;
+}
+#endif
+
#define FORCE_EVAL(x) do { \
if (sizeof(x) == sizeof(float)) { \
- volatile float __x; \
- __x = (x); \
+ fp_force_evalf(x); \
} else if (sizeof(x) == sizeof(double)) { \
- volatile double __x; \
- __x = (x); \
+ fp_force_eval(x); \
} else { \
- volatile long double __x; \
- __x = (x); \
+ fp_force_evall(x); \
} \
} while(0)
-/* Get two 32 bit ints from a double. */
+#define asuint(f) ((union{float _f; uint32_t _i;}){f})._i
+#define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f
+#define asuint64(f) ((union{double _f; uint64_t _i;}){f})._i
+#define asdouble(i) ((union{uint64_t _i; double _f;}){i})._f
+
#define EXTRACT_WORDS(hi,lo,d) \
do { \
- union {double f; uint64_t i;} __u; \
- __u.f = (d); \
- (hi) = __u.i >> 32; \
- (lo) = (uint32_t)__u.i; \
+ uint64_t __u = asuint64(d); \
+ (hi) = __u >> 32; \
+ (lo) = (uint32_t)__u; \
} while (0)
-/* Get the more significant 32 bit int from a double. */
#define GET_HIGH_WORD(hi,d) \
do { \
- union {double f; uint64_t i;} __u; \
- __u.f = (d); \
- (hi) = __u.i >> 32; \
+ (hi) = asuint64(d) >> 32; \
} while (0)
-/* Get the less significant 32 bit int from a double. */
#define GET_LOW_WORD(lo,d) \
do { \
- union {double f; uint64_t i;} __u; \
- __u.f = (d); \
- (lo) = (uint32_t)__u.i; \
+ (lo) = (uint32_t)asuint64(d); \
} while (0)
-/* Set a double from two 32 bit ints. */
#define INSERT_WORDS(d,hi,lo) \
do { \
- union {double f; uint64_t i;} __u; \
- __u.i = ((uint64_t)(hi)<<32) | (uint32_t)(lo); \
- (d) = __u.f; \
+ (d) = asdouble(((uint64_t)(hi)<<32) | (uint32_t)(lo)); \
} while (0)
-/* Set the more significant 32 bits of a double from an int. */
#define SET_HIGH_WORD(d,hi) \
-do { \
- union {double f; uint64_t i;} __u; \
- __u.f = (d); \
- __u.i &= 0xffffffff; \
- __u.i |= (uint64_t)(hi) << 32; \
- (d) = __u.f; \
-} while (0)
+ INSERT_WORDS(d, hi, (uint32_t)asuint64(d))
-/* Set the less significant 32 bits of a double from an int. */
#define SET_LOW_WORD(d,lo) \
-do { \
- union {double f; uint64_t i;} __u; \
- __u.f = (d); \
- __u.i &= 0xffffffff00000000ull; \
- __u.i |= (uint32_t)(lo); \
- (d) = __u.f; \
-} while (0)
+ INSERT_WORDS(d, asuint64(d)>>32, lo)
-/* Get a 32 bit int from a float. */
#define GET_FLOAT_WORD(w,d) \
do { \
- union {float f; uint32_t i;} __u; \
- __u.f = (d); \
- (w) = __u.i; \
+ (w) = asuint(d); \
} while (0)
-/* Set a float from a 32 bit int. */
#define SET_FLOAT_WORD(d,w) \
do { \
- union {float f; uint32_t i;} __u; \
- __u.i = (w); \
- (d) = __u.f; \
+ (d) = asfloat(w); \
} while (0)
-#undef __CMPLX
-#undef CMPLX
-#undef CMPLXF
-#undef CMPLXL
-
-#define __CMPLX(x, y, t) \
- ((union { _Complex t __z; t __xy[2]; }){.__xy = {(x),(y)}}.__z)
-
-#define CMPLX(x, y) __CMPLX(x, y, double)
-#define CMPLXF(x, y) __CMPLX(x, y, float)
-#define CMPLXL(x, y) __CMPLX(x, y, long double)
-
-/* fdlibm kernel functions */
-
hidden int __rem_pio2_large(double*,double*,int,int,int);
hidden int __rem_pio2(double,double*);
@@ -174,21 +237,18 @@ hidden double __sin(double,double,int);
hidden double __cos(double,double);
hidden double __tan(double,double,int);
hidden double __expo2(double);
-hidden double complex __ldexp_cexp(double complex,int);
hidden int __rem_pio2f(float,double*);
hidden float __sindf(double);
hidden float __cosdf(double);
hidden float __tandf(double,int);
hidden float __expo2f(float);
-hidden float complex __ldexp_cexpf(float complex,int);
hidden int __rem_pio2l(long double, long double *);
hidden long double __sinl(long double, long double, int);
hidden long double __cosl(long double, long double);
hidden long double __tanl(long double, long double, int);
-/* polynomial evaluation */
hidden long double __polevll(long double, const long double *, int);
hidden long double __p1evll(long double, const long double *, int);
@@ -196,4 +256,16 @@ extern int __signgam;
hidden double __lgamma_r(double, int *);
hidden float __lgammaf_r(float, int *);
+/* error handling functions */
+hidden float __math_xflowf(uint32_t, float);
+hidden float __math_uflowf(uint32_t);
+hidden float __math_oflowf(uint32_t);
+hidden float __math_divzerof(uint32_t);
+hidden float __math_invalidf(float);
+hidden double __math_xflow(uint32_t, double);
+hidden double __math_uflow(uint32_t);
+hidden double __math_oflow(uint32_t);
+hidden double __math_divzero(uint32_t);
+hidden double __math_invalid(double);
+
#endif
diff --git a/src/internal/m68k/syscall.s b/src/internal/m68k/syscall.s
deleted file mode 100644
index 9972a34d..00000000
--- a/src/internal/m68k/syscall.s
+++ /dev/null
@@ -1,9 +0,0 @@
-.global __syscall
-.hidden __syscall
-.type __syscall,%function
-__syscall:
- movem.l %d2-%d5,-(%sp)
- movem.l 20(%sp),%d0-%d5/%a0
- trap #0
- movem.l (%sp)+,%d2-%d5
- rts
diff --git a/src/internal/microblaze/syscall.s b/src/internal/microblaze/syscall.s
deleted file mode 100644
index e0312e78..00000000
--- a/src/internal/microblaze/syscall.s
+++ /dev/null
@@ -1,14 +0,0 @@
-.global __syscall
-.hidden __syscall
-.type __syscall,@function
-__syscall:
- addi r12, r5, 0 # Save the system call number
- add r5, r6, r0 # Shift the arguments, arg1
- add r6, r7, r0 # arg2
- add r7, r8, r0 # arg3
- add r8, r9, r0 # arg4
- add r9, r10, r0 # arg5
- lwi r10, r1, 28 # Get arg6.
- brki r14, 0x8 # syscall
- rtsd r15, 8
- nop
diff --git a/src/internal/mips/syscall.s b/src/internal/mips/syscall.s
deleted file mode 100644
index 5d0def52..00000000
--- a/src/internal/mips/syscall.s
+++ /dev/null
@@ -1,26 +0,0 @@
-.set noreorder
-
-.global __syscall
-.hidden __syscall
-.type __syscall,@function
-__syscall:
- move $2, $4
- move $4, $5
- move $5, $6
- move $6, $7
- lw $7, 16($sp)
- lw $8, 20($sp)
- lw $9, 24($sp)
- lw $10,28($sp)
- subu $sp, $sp, 32
- sw $8, 16($sp)
- sw $9, 20($sp)
- sw $10,24($sp)
- sw $2 ,28($sp)
- lw $2, 28($sp)
- syscall
- beq $7, $0, 1f
- addu $sp, $sp, 32
- subu $2, $0, $2
-1: jr $ra
- nop
diff --git a/src/internal/mips64/syscall.s b/src/internal/mips64/syscall.s
deleted file mode 100644
index 98448667..00000000
--- a/src/internal/mips64/syscall.s
+++ /dev/null
@@ -1,19 +0,0 @@
-.set noreorder
-.global __syscall
-.hidden __syscall
-.type __syscall,@function
-__syscall:
- move $2, $4
- move $4, $5
- move $5, $6
- move $6, $7
- move $7, $8
- move $8, $9
- move $9, $10
- move $10, $11
- syscall
- beq $7, $0, 1f
- nop
- dsubu $2, $0, $2
-1: jr $ra
- nop
diff --git a/src/internal/mipsn32/syscall.s b/src/internal/mipsn32/syscall.s
deleted file mode 100644
index 510a6fa4..00000000
--- a/src/internal/mipsn32/syscall.s
+++ /dev/null
@@ -1,19 +0,0 @@
-.set noreorder
-.global __syscall
-.hidden __syscall
-.type __syscall,@function
-__syscall:
- move $2, $4
- move $4, $5
- move $5, $6
- move $6, $7
- move $7, $8
- move $8, $9
- move $9, $10
- move $10, $11
- syscall
- beq $7, $0, 1f
- nop
- subu $2, $0, $2
-1: jr $ra
- nop
diff --git a/src/internal/or1k/syscall.s b/src/internal/or1k/syscall.s
deleted file mode 100644
index 177964e9..00000000
--- a/src/internal/or1k/syscall.s
+++ /dev/null
@@ -1,14 +0,0 @@
-.global __syscall
-.hidden __syscall
-.type __syscall,@function
-__syscall:
- l.ori r11, r3, 0
- l.lwz r3, 0(r1)
- l.lwz r4, 4(r1)
- l.lwz r5, 8(r1)
- l.lwz r6, 12(r1)
- l.lwz r7, 16(r1)
- l.lwz r8, 20(r1)
- l.sys 1
- l.jr r9
- l.nop
diff --git a/src/internal/powerpc/syscall.s b/src/internal/powerpc/syscall.s
deleted file mode 100644
index 5b16b8f2..00000000
--- a/src/internal/powerpc/syscall.s
+++ /dev/null
@@ -1,19 +0,0 @@
- .global __syscall
- .hidden __syscall
- .type __syscall,@function
-__syscall:
- mr 0, 3 # Save the system call number
- mr 3, 4 # Shift the arguments: arg1
- mr 4, 5 # arg2
- mr 5, 6 # arg3
- mr 6, 7 # arg4
- mr 7, 8 # arg5
- mr 8, 9 # arg6
- sc
- bnslr+ # return if not summary overflow
- #else error:
- # return negated value.
- neg 3, 3
- blr
- .end __syscall
- .size __syscall, .-__syscall
diff --git a/src/internal/powerpc64/syscall.s b/src/internal/powerpc64/syscall.s
deleted file mode 100644
index fe21f9e1..00000000
--- a/src/internal/powerpc64/syscall.s
+++ /dev/null
@@ -1,17 +0,0 @@
- .global __syscall
- .hidden __syscall
- .type __syscall,@function
-__syscall:
- mr 0, 3 # Save the system call number
- mr 3, 4 # Shift the arguments: arg1
- mr 4, 5 # arg2
- mr 5, 6 # arg3
- mr 6, 7 # arg4
- mr 7, 8 # arg5
- mr 8, 9 # arg6
- sc
- bnslr+ # return if not summary overflow
- neg 3, 3 # otherwise error: return negated value.
- blr
- .end __syscall
- .size __syscall, .-__syscall
diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h
index 7a25b88e..9b001421 100644
--- a/src/internal/pthread_impl.h
+++ b/src/internal/pthread_impl.h
@@ -18,7 +18,7 @@ struct pthread {
* internal (accessed via asm) ABI. Do not change. */
struct pthread *self;
uintptr_t *dtv;
- void *unused1, *unused2;
+ struct pthread *prev, *next; /* non-ABI */
uintptr_t sysinfo;
uintptr_t canary, canary2;
@@ -29,15 +29,12 @@ struct pthread {
volatile int cancel;
volatile unsigned char canceldisable, cancelasync;
unsigned char tsd_used:1;
- unsigned char unblock_cancel:1;
unsigned char dlerror_flag:1;
unsigned char *map_base;
size_t map_size;
void *stack;
size_t stack_size;
size_t guard_size;
- void *start_arg;
- void *(*start)(void *);
void *result;
struct __ptcb *cancelbuf;
void **tsd;
@@ -58,20 +55,10 @@ struct pthread {
uintptr_t *dtv_copy;
};
-struct start_sched_args {
- void *start_arg;
- void *(*start_fn)(void *);
- sigset_t mask;
- pthread_attr_t *attr;
- volatile int futex;
-};
-
enum {
- DT_EXITED = 0,
- DT_EXITING,
+ DT_EXITING = 0,
DT_JOINABLE,
DT_DETACHED,
- DT_DYNAMIC,
};
struct __timer {
@@ -143,6 +130,7 @@ hidden int __init_tp(void *);
hidden void *__copy_tls(unsigned char *);
hidden void __reset_tls();
+hidden void __membarrier_init(void);
hidden void __dl_thread_cleanup(void);
hidden void __testcancel();
hidden void __do_cleanup_push(struct __ptcb *);
@@ -152,10 +140,10 @@ hidden void __pthread_tsd_run_dtors();
hidden void __pthread_key_delete_synccall(void (*)(void *), void *);
hidden int __pthread_key_delete_impl(pthread_key_t);
-extern hidden volatile int __block_new_threads;
extern hidden volatile size_t __pthread_tsd_size;
extern hidden void *__pthread_tsd_main[];
extern hidden volatile int __aio_fut;
+extern hidden volatile int __eintr_valid_flag;
hidden int __clone(int (*)(void *), void *, int, void *, ...);
hidden int __set_thread_area(void *);
@@ -183,6 +171,12 @@ hidden void __acquire_ptc(void);
hidden void __release_ptc(void);
hidden void __inhibit_ptc(void);
+hidden void __tl_lock(void);
+hidden void __tl_unlock(void);
+hidden void __tl_sync(pthread_t);
+
+extern hidden volatile int __thread_list_lock;
+
extern hidden unsigned __default_stacksize;
extern hidden unsigned __default_guardsize;
diff --git a/src/internal/s390x/syscall.s b/src/internal/s390x/syscall.s
deleted file mode 100644
index 2322bc36..00000000
--- a/src/internal/s390x/syscall.s
+++ /dev/null
@@ -1,15 +0,0 @@
-.global __syscall
-.hidden __syscall
-.type __syscall, %function
-__syscall:
- stg %r7, 56(%r15)
- lgr %r1, %r2
- lgr %r2, %r3
- lgr %r3, %r4
- lgr %r4, %r5
- lgr %r5, %r6
- lg %r6, 160(%r15)
- lg %r7, 168(%r15)
- svc 0
- lg %r7, 56(%r15)
- br %r14
diff --git a/src/internal/sh/syscall.s b/src/internal/sh/syscall.s
deleted file mode 100644
index 331918a5..00000000
--- a/src/internal/sh/syscall.s
+++ /dev/null
@@ -1,23 +0,0 @@
-.global __syscall
-.hidden __syscall
-.type __syscall, @function
-__syscall:
- ! The kernel syscall entry point documents that the trap number indicates
- ! the number of arguments being passed, but it then ignores that information.
- ! Since we do not actually know how many arguments are being passed, we will
- ! say there are six, since that is the maximum we support here.
- mov r4, r3
- mov r5, r4
- mov r6, r5
- mov r7, r6
- mov.l @r15, r7
- mov.l @(4,r15), r0
- mov.l @(8,r15), r1
- trapa #31
- or r0, r0
- or r0, r0
- or r0, r0
- or r0, r0
- or r0, r0
- rts
- nop
diff --git a/src/internal/shgetc.c b/src/internal/shgetc.c
index ebd5fae7..a4a9c633 100644
--- a/src/internal/shgetc.c
+++ b/src/internal/shgetc.c
@@ -22,7 +22,8 @@ int __shgetc(FILE *f)
off_t cnt = shcnt(f);
if (f->shlim && cnt >= f->shlim || (c=__uflow(f)) < 0) {
f->shcnt = f->buf - f->rpos + cnt;
- f->shend = 0;
+ f->shend = f->rpos;
+ f->shlim = -1;
return EOF;
}
cnt++;
diff --git a/src/internal/shgetc.h b/src/internal/shgetc.h
index 1c30f75f..9435381a 100644
--- a/src/internal/shgetc.h
+++ b/src/internal/shgetc.h
@@ -26,7 +26,7 @@ hidden int __shgetc(FILE *);
#define shcnt(f) ((f)->shcnt + ((f)->rpos - (f)->buf))
#define shlim(f, lim) __shlim((f), (lim))
#define shgetc(f) (((f)->rpos != (f)->shend) ? *(f)->rpos++ : __shgetc(f))
-#define shunget(f) ((f)->shend ? (void)(f)->rpos-- : (void)0)
+#define shunget(f) ((f)->shlim>=0 ? (void)(f)->rpos-- : (void)0)
#define sh_fromstring(f, s) \
((f)->buf = (f)->rpos = (void *)(s), (f)->rend = (void*)-1)
diff --git a/src/internal/syscall.h b/src/internal/syscall.h
index 06c5527f..69f019cd 100644
--- a/src/internal/syscall.h
+++ b/src/internal/syscall.h
@@ -22,27 +22,17 @@
typedef long syscall_arg_t;
#endif
-hidden long __syscall_ret(unsigned long), __syscall(syscall_arg_t, ...),
+hidden long __syscall_ret(unsigned long),
__syscall_cp(syscall_arg_t, syscall_arg_t, syscall_arg_t, syscall_arg_t,
syscall_arg_t, syscall_arg_t, syscall_arg_t);
-#ifdef SYSCALL_NO_INLINE
-#define __syscall0(n) (__syscall)(n)
-#define __syscall1(n,a) (__syscall)(n,__scc(a))
-#define __syscall2(n,a,b) (__syscall)(n,__scc(a),__scc(b))
-#define __syscall3(n,a,b,c) (__syscall)(n,__scc(a),__scc(b),__scc(c))
-#define __syscall4(n,a,b,c,d) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d))
-#define __syscall5(n,a,b,c,d,e) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e))
-#define __syscall6(n,a,b,c,d,e,f) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f))
-#else
#define __syscall1(n,a) __syscall1(n,__scc(a))
#define __syscall2(n,a,b) __syscall2(n,__scc(a),__scc(b))
#define __syscall3(n,a,b,c) __syscall3(n,__scc(a),__scc(b),__scc(c))
#define __syscall4(n,a,b,c,d) __syscall4(n,__scc(a),__scc(b),__scc(c),__scc(d))
#define __syscall5(n,a,b,c,d,e) __syscall5(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e))
#define __syscall6(n,a,b,c,d,e,f) __syscall6(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f))
-#endif
-#define __syscall7(n,a,b,c,d,e,f,g) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f),__scc(g))
+#define __syscall7(n,a,b,c,d,e,f,g) __syscall7(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f),__scc(g))
#define __SYSCALL_NARGS_X(a,b,c,d,e,f,g,h,n,...) n
#define __SYSCALL_NARGS(...) __SYSCALL_NARGS_X(__VA_ARGS__,7,6,5,4,3,2,1,0,)
diff --git a/src/internal/x32/syscall.s b/src/internal/x32/syscall.s
deleted file mode 100644
index c4bee804..00000000
--- a/src/internal/x32/syscall.s
+++ /dev/null
@@ -1,13 +0,0 @@
-.global __syscall
-.hidden __syscall
-.type __syscall,@function
-__syscall:
- movq %rdi,%rax
- movq %rsi,%rdi
- movq %rdx,%rsi
- movq %rcx,%rdx
- movq %r8,%r10
- movq %r9,%r8
- movq 8(%rsp),%r9
- syscall
- ret
diff --git a/src/internal/x86_64/syscall.s b/src/internal/x86_64/syscall.s
deleted file mode 100644
index c4bee804..00000000
--- a/src/internal/x86_64/syscall.s
+++ /dev/null
@@ -1,13 +0,0 @@
-.global __syscall
-.hidden __syscall
-.type __syscall,@function
-__syscall:
- movq %rdi,%rax
- movq %rsi,%rdi
- movq %rdx,%rsi
- movq %rcx,%rdx
- movq %r8,%r10
- movq %r9,%r8
- movq 8(%rsp),%r9
- syscall
- ret
diff --git a/src/ldso/aarch64/tlsdesc.s b/src/ldso/aarch64/tlsdesc.s
index 8e4004d7..04d97e73 100644
--- a/src/ldso/aarch64/tlsdesc.s
+++ b/src/ldso/aarch64/tlsdesc.s
@@ -23,73 +23,13 @@ __tlsdesc_static:
.hidden __tlsdesc_dynamic
.type __tlsdesc_dynamic,@function
__tlsdesc_dynamic:
- stp x1,x2,[sp,#-32]!
- stp x3,x4,[sp,#16]
+ stp x1,x2,[sp,#-16]!
mrs x1,tpidr_el0 // tp
ldr x0,[x0,#8] // p
- ldr x2,[x0] // p->modidx
- ldr x3,[x1,#-8] // dtv
- ldr x4,[x3] // dtv[0]
- cmp x2,x4
- b.hi 1f
- ldr x2,[x3,x2,lsl #3] // dtv[p->modidx]
- ldr x0,[x0,#8] // p->off
- add x0,x0,x2
-2: sub x0,x0,x1
- ldp x3,x4,[sp,#16]
- ldp x1,x2,[sp],#32
+ ldp x0,x2,[x0] // p->modidx, p->off
+ sub x2,x2,x1 // p->off - tp
+ ldr x1,[x1,#-8] // dtv
+ ldr x1,[x1,x0,lsl #3] // dtv[p->modidx]
+ add x0,x1,x2 // dtv[p->modidx] + p->off - tp
+ ldp x1,x2,[sp],#16
ret
-
- // save all registers __tls_get_new may clobber
- // update sp in two steps because offset must be in [-512,509]
-1: stp x29,x30,[sp,#-160]!
- stp x5,x6,[sp,#16]
- stp x7,x8,[sp,#32]
- stp x9,x10,[sp,#48]
- stp x11,x12,[sp,#64]
- stp x13,x14,[sp,#80]
- stp x15,x16,[sp,#96]
- stp x17,x18,[sp,#112]
- stp q0,q1,[sp,#128]
- stp q2,q3,[sp,#-480]!
- stp q4,q5,[sp,#32]
- stp q6,q7,[sp,#64]
- stp q8,q9,[sp,#96]
- stp q10,q11,[sp,#128]
- stp q12,q13,[sp,#160]
- stp q14,q15,[sp,#192]
- stp q16,q17,[sp,#224]
- stp q18,q19,[sp,#256]
- stp q20,q21,[sp,#288]
- stp q22,q23,[sp,#320]
- stp q24,q25,[sp,#352]
- stp q26,q27,[sp,#384]
- stp q28,q29,[sp,#416]
- stp q30,q31,[sp,#448]
- bl __tls_get_new
- mrs x1,tpidr_el0
- ldp q4,q5,[sp,#32]
- ldp q6,q7,[sp,#64]
- ldp q8,q9,[sp,#96]
- ldp q10,q11,[sp,#128]
- ldp q12,q13,[sp,#160]
- ldp q14,q15,[sp,#192]
- ldp q16,q17,[sp,#224]
- ldp q18,q19,[sp,#256]
- ldp q20,q21,[sp,#288]
- ldp q22,q23,[sp,#320]
- ldp q24,q25,[sp,#352]
- ldp q26,q27,[sp,#384]
- ldp q28,q29,[sp,#416]
- ldp q30,q31,[sp,#448]
- ldp q2,q3,[sp],#480
- ldp x5,x6,[sp,#16]
- ldp x7,x8,[sp,#32]
- ldp x9,x10,[sp,#48]
- ldp x11,x12,[sp,#64]
- ldp x13,x14,[sp,#80]
- ldp x15,x16,[sp,#96]
- ldp x17,x18,[sp,#112]
- ldp q0,q1,[sp,#128]
- ldp x29,x30,[sp],#160
- b 2b
diff --git a/src/ldso/arm/tlsdesc.S b/src/ldso/arm/tlsdesc.S
index 4e67c3e2..455eac1d 100644
--- a/src/ldso/arm/tlsdesc.S
+++ b/src/ldso/arm/tlsdesc.S
@@ -35,13 +35,9 @@ __tlsdesc_dynamic:
#endif
#endif
ldr r3,[r0,#-4] // r3 = dtv
- ldr ip,[r3] // ip = dtv slot count
- cmp r1,ip
- bhi 3f
ldr ip,[r3,r1,LSL #2]
sub r0,ip,r0
add r0,r0,r2 // r0 = r3[r1]-r0+r2
-4:
#if __ARM_ARCH >= 5
pop {r2,r3,ip,pc}
#else
@@ -49,21 +45,6 @@ __tlsdesc_dynamic:
bx lr
#endif
-3:
-#if __ARM_PCS_VFP || !__SOFTFP__
- .fpu vfp
- vpush {d0-d7}
-#endif
- push {r0-r3}
- add r0,sp,#4
- bl __tls_get_new
- pop {r1-r3,ip}
-#if __ARM_PCS_VFP || !__SOFTFP__
- vpop {d0-d7}
-#endif
- sub r0,r0,r1 // r0 = retval-tp
- b 4b
-
#if ((__ARM_ARCH_6K__ || __ARM_ARCH_6KZ__ || __ARM_ARCH_6ZK__) && !__thumb__) \
|| __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
#else
diff --git a/src/ldso/dlerror.c b/src/ldso/dlerror.c
index 06ed8542..3fcc7779 100644
--- a/src/ldso/dlerror.c
+++ b/src/ldso/dlerror.c
@@ -3,6 +3,7 @@
#include <stdarg.h>
#include "pthread_impl.h"
#include "dynlink.h"
+#include "lock.h"
char *dlerror()
{
@@ -16,21 +17,38 @@ char *dlerror()
return s;
}
+static volatile int freebuf_queue_lock[1];
+static void **freebuf_queue;
+
void __dl_thread_cleanup(void)
{
pthread_t self = __pthread_self();
- if (self->dlerror_buf != (void *)-1)
- free(self->dlerror_buf);
+ if (self->dlerror_buf && self->dlerror_buf != (void *)-1) {
+ LOCK(freebuf_queue_lock);
+ void **p = (void **)self->dlerror_buf;
+ *p = freebuf_queue;
+ freebuf_queue = p;
+ UNLOCK(freebuf_queue_lock);
+ }
}
hidden void __dl_vseterr(const char *fmt, va_list ap)
{
+ LOCK(freebuf_queue_lock);
+ while (freebuf_queue) {
+ void **p = freebuf_queue;
+ freebuf_queue = *p;
+ free(p);
+ }
+ UNLOCK(freebuf_queue_lock);
+
va_list ap2;
va_copy(ap2, ap);
pthread_t self = __pthread_self();
if (self->dlerror_buf != (void *)-1)
free(self->dlerror_buf);
size_t len = vsnprintf(0, 0, fmt, ap2);
+ if (len < sizeof(void *)) len = sizeof(void *);
va_end(ap2);
char *buf = malloc(len+1);
if (buf) {
diff --git a/src/ldso/i386/tlsdesc.s b/src/ldso/i386/tlsdesc.s
index 4a553bce..a5c0100c 100644
--- a/src/ldso/i386/tlsdesc.s
+++ b/src/ldso/i386/tlsdesc.s
@@ -17,15 +17,9 @@ __tlsdesc_dynamic:
mov %gs:4,%edx
push %ecx
mov (%eax),%ecx
- cmp %ecx,(%edx)
- jc 1f
mov 4(%eax),%eax
add (%edx,%ecx,4),%eax
-2: pop %ecx
+ pop %ecx
sub %gs:0,%eax
pop %edx
ret
-1: push %eax
- call __tls_get_new
- pop %ecx
- jmp 2b
diff --git a/src/ldso/x86_64/tlsdesc.s b/src/ldso/x86_64/tlsdesc.s
index 8238c3eb..0151d15c 100644
--- a/src/ldso/x86_64/tlsdesc.s
+++ b/src/ldso/x86_64/tlsdesc.s
@@ -17,28 +17,9 @@ __tlsdesc_dynamic:
mov %fs:8,%rdx
push %rcx
mov (%rax),%rcx
- cmp %rcx,(%rdx)
- jc 1f
mov 8(%rax),%rax
add (%rdx,%rcx,8),%rax
-2: pop %rcx
+ pop %rcx
sub %fs:0,%rax
pop %rdx
ret
-1: push %rdi
- push %rdi
- push %rsi
- push %r8
- push %r9
- push %r10
- push %r11
- mov %rax,%rdi
- call __tls_get_new
- pop %r11
- pop %r10
- pop %r9
- pop %r8
- pop %rsi
- pop %rdi
- pop %rdi
- jmp 2b
diff --git a/src/linux/membarrier.c b/src/linux/membarrier.c
new file mode 100644
index 00000000..9ebe906e
--- /dev/null
+++ b/src/linux/membarrier.c
@@ -0,0 +1,77 @@
+#include <sys/membarrier.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <string.h>
+#include "pthread_impl.h"
+#include "syscall.h"
+
+static void dummy_0(void)
+{
+}
+
+static void dummy_1(pthread_t t)
+{
+}
+
+weak_alias(dummy_0, __tl_lock);
+weak_alias(dummy_0, __tl_unlock);
+weak_alias(dummy_1, __tl_sync);
+
+static sem_t barrier_sem;
+
+static void bcast_barrier(int s)
+{
+ sem_post(&barrier_sem);
+}
+
+int __membarrier(int cmd, int flags)
+{
+ int r = __syscall(SYS_membarrier, cmd, flags);
+ /* Emulate the private expedited command, which is needed by the
+ * dynamic linker for installation of dynamic TLS, for older
+ * kernels that lack the syscall. Unlike the syscall, this only
+ * synchronizes with threads of the process, not other processes
+ * sharing the VM, but such sharing is not a supported usage
+ * anyway. */
+ if (r && cmd == MEMBARRIER_CMD_PRIVATE_EXPEDITED && !flags) {
+ pthread_t self=__pthread_self(), td;
+ sigset_t set;
+ __block_app_sigs(&set);
+ __tl_lock();
+ sem_init(&barrier_sem, 0, 0);
+ struct sigaction sa = {
+ .sa_flags = SA_RESTART,
+ .sa_handler = bcast_barrier
+ };
+ memset(&sa.sa_mask, -1, sizeof sa.sa_mask);
+ if (!__libc_sigaction(SIGSYNCCALL, &sa, 0)) {
+ for (td=self->next; td!=self; td=td->next)
+ __syscall(SYS_tkill, td->tid, SIGSYNCCALL);
+ for (td=self->next; td!=self; td=td->next)
+ sem_wait(&barrier_sem);
+ r = 0;
+ sa.sa_handler = SIG_IGN;
+ __libc_sigaction(SIGSYNCCALL, &sa, 0);
+ }
+ sem_destroy(&barrier_sem);
+ __tl_unlock();
+ __restore_sigs(&set);
+ }
+ return __syscall_ret(r);
+}
+
+void __membarrier_init(void)
+{
+ /* If membarrier is linked, attempt to pre-register to be able to use
+ * the private expedited command before the process becomes multi-
+ * threaded, since registering later has bad, potentially unbounded
+ * latency. This syscall should be essentially free, and it's arguably
+ * a mistake in the API design that registration was even required.
+ * For other commands, registration may impose some cost, so it's left
+ * to the application to do so if desired. Unfortunately this means
+ * library code initialized after the process becomes multi-threaded
+ * cannot use these features without accepting registration latency. */
+ __syscall(SYS_membarrier, MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 0);
+}
+
+weak_alias(__membarrier, membarrier);
diff --git a/src/locale/dcngettext.c b/src/locale/dcngettext.c
index 8b891d00..4c304393 100644
--- a/src/locale/dcngettext.c
+++ b/src/locale/dcngettext.c
@@ -122,6 +122,7 @@ char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2,
const struct __locale_map *lm;
size_t domlen;
struct binding *q;
+ int old_errno = errno;
if ((unsigned)category >= LC_ALL) goto notrans;
@@ -138,6 +139,7 @@ char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2,
lm = loc->cat[category];
if (!lm) {
notrans:
+ errno = old_errno;
return (char *) ((n == 1) ? msgid1 : msgid2);
}
@@ -250,6 +252,7 @@ notrans:
trans += l+1;
}
}
+ errno = old_errno;
return (char *)trans;
}
diff --git a/src/math/__math_divzero.c b/src/math/__math_divzero.c
new file mode 100644
index 00000000..59d21350
--- /dev/null
+++ b/src/math/__math_divzero.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+double __math_divzero(uint32_t sign)
+{
+ return fp_barrier(sign ? -1.0 : 1.0) / 0.0;
+}
diff --git a/src/math/__math_divzerof.c b/src/math/__math_divzerof.c
new file mode 100644
index 00000000..ce046f3e
--- /dev/null
+++ b/src/math/__math_divzerof.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+float __math_divzerof(uint32_t sign)
+{
+ return fp_barrierf(sign ? -1.0f : 1.0f) / 0.0f;
+}
diff --git a/src/math/__math_invalid.c b/src/math/__math_invalid.c
new file mode 100644
index 00000000..17740490
--- /dev/null
+++ b/src/math/__math_invalid.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+double __math_invalid(double x)
+{
+ return (x - x) / (x - x);
+}
diff --git a/src/math/__math_invalidf.c b/src/math/__math_invalidf.c
new file mode 100644
index 00000000..357d4b12
--- /dev/null
+++ b/src/math/__math_invalidf.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+float __math_invalidf(float x)
+{
+ return (x - x) / (x - x);
+}
diff --git a/src/math/__math_oflow.c b/src/math/__math_oflow.c
new file mode 100644
index 00000000..c85dbf98
--- /dev/null
+++ b/src/math/__math_oflow.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+double __math_oflow(uint32_t sign)
+{
+ return __math_xflow(sign, 0x1p769);
+}
diff --git a/src/math/__math_oflowf.c b/src/math/__math_oflowf.c
new file mode 100644
index 00000000..fa7d0620
--- /dev/null
+++ b/src/math/__math_oflowf.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+float __math_oflowf(uint32_t sign)
+{
+ return __math_xflowf(sign, 0x1p97f);
+}
diff --git a/src/math/__math_uflow.c b/src/math/__math_uflow.c
new file mode 100644
index 00000000..b90594ae
--- /dev/null
+++ b/src/math/__math_uflow.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+double __math_uflow(uint32_t sign)
+{
+ return __math_xflow(sign, 0x1p-767);
+}
diff --git a/src/math/__math_uflowf.c b/src/math/__math_uflowf.c
new file mode 100644
index 00000000..94d50f2b
--- /dev/null
+++ b/src/math/__math_uflowf.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+float __math_uflowf(uint32_t sign)
+{
+ return __math_xflowf(sign, 0x1p-95f);
+}
diff --git a/src/math/__math_xflow.c b/src/math/__math_xflow.c
new file mode 100644
index 00000000..744203c4
--- /dev/null
+++ b/src/math/__math_xflow.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+double __math_xflow(uint32_t sign, double y)
+{
+ return eval_as_double(fp_barrier(sign ? -y : y) * y);
+}
diff --git a/src/math/__math_xflowf.c b/src/math/__math_xflowf.c
new file mode 100644
index 00000000..f2c84784
--- /dev/null
+++ b/src/math/__math_xflowf.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+float __math_xflowf(uint32_t sign, float y)
+{
+ return eval_as_float(fp_barrierf(sign ? -y : y) * y);
+}
diff --git a/src/math/atanl.c b/src/math/atanl.c
index 79a3edb8..c3b0c926 100644
--- a/src/math/atanl.c
+++ b/src/math/atanl.c
@@ -70,21 +70,21 @@ static long double T_odd(long double x)
#elif LDBL_MANT_DIG == 113
#define EXPMAN(u) ((u.i.se & 0x7fff)<<8 | u.i.top>>8)
-const long double atanhi[] = {
+static const long double atanhi[] = {
4.63647609000806116214256231461214397e-01L,
7.85398163397448309615660845819875699e-01L,
9.82793723247329067985710611014666038e-01L,
1.57079632679489661923132169163975140e+00L,
};
-const long double atanlo[] = {
+static const long double atanlo[] = {
4.89509642257333492668618435220297706e-36L,
2.16795253253094525619926100651083806e-35L,
-2.31288434538183565909319952098066272e-35L,
4.33590506506189051239852201302167613e-35L,
};
-const long double aT[] = {
+static const long double aT[] = {
3.33333333333333333333333333333333125e-01L,
-1.99999999999999999999999999999180430e-01L,
1.42857142857142857142857142125269827e-01L,
diff --git a/src/math/exp.c b/src/math/exp.c
index 9ea672fa..b764d73c 100644
--- a/src/math/exp.c
+++ b/src/math/exp.c
@@ -1,134 +1,134 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_exp.c */
/*
- * ====================================================
- * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved.
+ * Double-precision e^x function.
*
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/* exp(x)
- * Returns the exponential of x.
- *
- * Method
- * 1. Argument reduction:
- * Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658.
- * Given x, find r and integer k such that
- *
- * x = k*ln2 + r, |r| <= 0.5*ln2.
- *
- * Here r will be represented as r = hi-lo for better
- * accuracy.
- *
- * 2. Approximation of exp(r) by a special rational function on
- * the interval [0,0.34658]:
- * Write
- * R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ...
- * We use a special Remez algorithm on [0,0.34658] to generate
- * a polynomial of degree 5 to approximate R. The maximum error
- * of this polynomial approximation is bounded by 2**-59. In
- * other words,
- * R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5
- * (where z=r*r, and the values of P1 to P5 are listed below)
- * and
- * | 5 | -59
- * | 2.0+P1*z+...+P5*z - R(z) | <= 2
- * | |
- * The computation of exp(r) thus becomes
- * 2*r
- * exp(r) = 1 + ----------
- * R(r) - r
- * r*c(r)
- * = 1 + r + ----------- (for better accuracy)
- * 2 - c(r)
- * where
- * 2 4 10
- * c(r) = r - (P1*r + P2*r + ... + P5*r ).
- *
- * 3. Scale back to obtain exp(x):
- * From step 1, we have
- * exp(x) = 2^k * exp(r)
- *
- * Special cases:
- * exp(INF) is INF, exp(NaN) is NaN;
- * exp(-INF) is 0, and
- * for finite argument, only exp(0)=1 is exact.
- *
- * Accuracy:
- * according to an error analysis, the error is always less than
- * 1 ulp (unit in the last place).
- *
- * Misc. info.
- * For IEEE double
- * if x > 709.782712893383973096 then exp(x) overflows
- * if x < -745.133219101941108420 then exp(x) underflows
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
*/
+#include <math.h>
+#include <stdint.h>
#include "libm.h"
+#include "exp_data.h"
-static const double
-half[2] = {0.5,-0.5},
-ln2hi = 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */
-ln2lo = 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */
-invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */
-P1 = 1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */
-P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */
-P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */
-P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */
-P5 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */
+#define N (1 << EXP_TABLE_BITS)
+#define InvLn2N __exp_data.invln2N
+#define NegLn2hiN __exp_data.negln2hiN
+#define NegLn2loN __exp_data.negln2loN
+#define Shift __exp_data.shift
+#define T __exp_data.tab
+#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
+#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
+#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
+#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
-double exp(double x)
+/* Handle cases that may overflow or underflow when computing the result that
+ is scale*(1+TMP) without intermediate rounding. The bit representation of
+ scale is in SBITS, however it has a computed exponent that may have
+ overflown into the sign bit so that needs to be adjusted before using it as
+ a double. (int32_t)KI is the k used in the argument reduction and exponent
+ adjustment of scale, positive k here means the result may overflow and
+ negative k means the result may underflow. */
+static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
{
- double_t hi, lo, c, xx, y;
- int k, sign;
- uint32_t hx;
-
- GET_HIGH_WORD(hx, x);
- sign = hx>>31;
- hx &= 0x7fffffff; /* high word of |x| */
+ double_t scale, y;
- /* special cases */
- if (hx >= 0x4086232b) { /* if |x| >= 708.39... */
- if (isnan(x))
- return x;
- if (x > 709.782712893383973096) {
- /* overflow if x!=inf */
- x *= 0x1p1023;
- return x;
- }
- if (x < -708.39641853226410622) {
- /* underflow if x!=-inf */
- FORCE_EVAL((float)(-0x1p-149/x));
- if (x < -745.13321910194110842)
- return 0;
- }
+ if ((ki & 0x80000000) == 0) {
+ /* k > 0, the exponent of scale might have overflowed by <= 460. */
+ sbits -= 1009ull << 52;
+ scale = asdouble(sbits);
+ y = 0x1p1009 * (scale + scale * tmp);
+ return eval_as_double(y);
+ }
+ /* k < 0, need special care in the subnormal range. */
+ sbits += 1022ull << 52;
+ scale = asdouble(sbits);
+ y = scale + scale * tmp;
+ if (y < 1.0) {
+ /* Round y to the right precision before scaling it into the subnormal
+ range to avoid double rounding that can cause 0.5+E/2 ulp error where
+ E is the worst-case ulp error outside the subnormal range. So this
+ is only useful if the goal is better than 1 ulp worst-case error. */
+ double_t hi, lo;
+ lo = scale - y + scale * tmp;
+ hi = 1.0 + y;
+ lo = 1.0 - hi + y + lo;
+ y = eval_as_double(hi + lo) - 1.0;
+ /* Avoid -0.0 with downward rounding. */
+ if (WANT_ROUNDING && y == 0.0)
+ y = 0.0;
+ /* The underflow exception needs to be signaled explicitly. */
+ fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
}
+ y = 0x1p-1022 * y;
+ return eval_as_double(y);
+}
- /* argument reduction */
- if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */
- if (hx >= 0x3ff0a2b2) /* if |x| >= 1.5 ln2 */
- k = (int)(invln2*x + half[sign]);
- else
- k = 1 - sign - sign;
- hi = x - k*ln2hi; /* k*ln2hi is exact here */
- lo = k*ln2lo;
- x = hi - lo;
- } else if (hx > 0x3e300000) { /* if |x| > 2**-28 */
- k = 0;
- hi = x;
- lo = 0;
- } else {
- /* inexact if x!=0 */
- FORCE_EVAL(0x1p1023 + x);
- return 1 + x;
+/* Top 12 bits of a double (sign and exponent bits). */
+static inline uint32_t top12(double x)
+{
+ return asuint64(x) >> 52;
+}
+
+double exp(double x)
+{
+ uint32_t abstop;
+ uint64_t ki, idx, top, sbits;
+ double_t kd, z, r, r2, scale, tail, tmp;
+
+ abstop = top12(x) & 0x7ff;
+ if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
+ if (abstop - top12(0x1p-54) >= 0x80000000)
+ /* Avoid spurious underflow for tiny x. */
+ /* Note: 0 is common input. */
+ return WANT_ROUNDING ? 1.0 + x : 1.0;
+ if (abstop >= top12(1024.0)) {
+ if (asuint64(x) == asuint64(-INFINITY))
+ return 0.0;
+ if (abstop >= top12(INFINITY))
+ return 1.0 + x;
+ if (asuint64(x) >> 63)
+ return __math_uflow(0);
+ else
+ return __math_oflow(0);
+ }
+ /* Large x is special cased below. */
+ abstop = 0;
}
- /* x is now in primary range */
- xx = x*x;
- c = x - xx*(P1+xx*(P2+xx*(P3+xx*(P4+xx*P5))));
- y = 1 + (x*c/(2-c) - lo + hi);
- if (k == 0)
- return y;
- return scalbn(y, k);
+ /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
+ /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
+ z = InvLn2N * x;
+#if TOINT_INTRINSICS
+ kd = roundtoint(z);
+ ki = converttoint(z);
+#elif EXP_USE_TOINT_NARROW
+ /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
+ kd = eval_as_double(z + Shift);
+ ki = asuint64(kd) >> 16;
+ kd = (double_t)(int32_t)ki;
+#else
+ /* z - kd is in [-1, 1] in non-nearest rounding modes. */
+ kd = eval_as_double(z + Shift);
+ ki = asuint64(kd);
+ kd -= Shift;
+#endif
+ r = x + kd * NegLn2hiN + kd * NegLn2loN;
+ /* 2^(k/N) ~= scale * (1 + tail). */
+ idx = 2 * (ki % N);
+ top = ki << (52 - EXP_TABLE_BITS);
+ tail = asdouble(T[idx]);
+ /* This is only a valid scale when -1023*N < k < 1024*N. */
+ sbits = T[idx + 1] + top;
+ /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
+ /* Evaluation is optimized assuming superscalar pipelined execution. */
+ r2 = r * r;
+ /* Without fma the worst case error is 0.25/N ulp larger. */
+ /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
+ tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
+ if (predict_false(abstop == 0))
+ return specialcase(tmp, sbits, ki);
+ scale = asdouble(sbits);
+ /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+ is no spurious underflow here even without fma. */
+ return eval_as_double(scale + scale * tmp);
}
diff --git a/src/math/exp2.c b/src/math/exp2.c
index e14adba5..e0ff54bd 100644
--- a/src/math/exp2.c
+++ b/src/math/exp2.c
@@ -1,375 +1,121 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/s_exp2.c */
-/*-
- * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
+/*
+ * Double-precision 2^x function.
*
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
*/
+#include <math.h>
+#include <stdint.h>
#include "libm.h"
+#include "exp_data.h"
-#define TBLSIZE 256
+#define N (1 << EXP_TABLE_BITS)
+#define Shift __exp_data.exp2_shift
+#define T __exp_data.tab
+#define C1 __exp_data.exp2_poly[0]
+#define C2 __exp_data.exp2_poly[1]
+#define C3 __exp_data.exp2_poly[2]
+#define C4 __exp_data.exp2_poly[3]
+#define C5 __exp_data.exp2_poly[4]
-static const double
-redux = 0x1.8p52 / TBLSIZE,
-P1 = 0x1.62e42fefa39efp-1,
-P2 = 0x1.ebfbdff82c575p-3,
-P3 = 0x1.c6b08d704a0a6p-5,
-P4 = 0x1.3b2ab88f70400p-7,
-P5 = 0x1.5d88003875c74p-10;
+/* Handle cases that may overflow or underflow when computing the result that
+ is scale*(1+TMP) without intermediate rounding. The bit representation of
+ scale is in SBITS, however it has a computed exponent that may have
+ overflown into the sign bit so that needs to be adjusted before using it as
+ a double. (int32_t)KI is the k used in the argument reduction and exponent
+ adjustment of scale, positive k here means the result may overflow and
+ negative k means the result may underflow. */
+static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
+{
+ double_t scale, y;
-static const double tbl[TBLSIZE * 2] = {
-/* exp2(z + eps) eps */
- 0x1.6a09e667f3d5dp-1, 0x1.9880p-44,
- 0x1.6b052fa751744p-1, 0x1.8000p-50,
- 0x1.6c012750bd9fep-1, -0x1.8780p-45,
- 0x1.6cfdcddd476bfp-1, 0x1.ec00p-46,
- 0x1.6dfb23c651a29p-1, -0x1.8000p-50,
- 0x1.6ef9298593ae3p-1, -0x1.c000p-52,
- 0x1.6ff7df9519386p-1, -0x1.fd80p-45,
- 0x1.70f7466f42da3p-1, -0x1.c880p-45,
- 0x1.71f75e8ec5fc3p-1, 0x1.3c00p-46,
- 0x1.72f8286eacf05p-1, -0x1.8300p-44,
- 0x1.73f9a48a58152p-1, -0x1.0c00p-47,
- 0x1.74fbd35d7ccfcp-1, 0x1.f880p-45,
- 0x1.75feb564267f1p-1, 0x1.3e00p-47,
- 0x1.77024b1ab6d48p-1, -0x1.7d00p-45,
- 0x1.780694fde5d38p-1, -0x1.d000p-50,
- 0x1.790b938ac1d00p-1, 0x1.3000p-49,
- 0x1.7a11473eb0178p-1, -0x1.d000p-49,
- 0x1.7b17b0976d060p-1, 0x1.0400p-45,
- 0x1.7c1ed0130c133p-1, 0x1.0000p-53,
- 0x1.7d26a62ff8636p-1, -0x1.6900p-45,
- 0x1.7e2f336cf4e3bp-1, -0x1.2e00p-47,
- 0x1.7f3878491c3e8p-1, -0x1.4580p-45,
- 0x1.80427543e1b4ep-1, 0x1.3000p-44,
- 0x1.814d2add1071ap-1, 0x1.f000p-47,
- 0x1.82589994ccd7ep-1, -0x1.1c00p-45,
- 0x1.8364c1eb942d0p-1, 0x1.9d00p-45,
- 0x1.8471a4623cab5p-1, 0x1.7100p-43,
- 0x1.857f4179f5bbcp-1, 0x1.2600p-45,
- 0x1.868d99b4491afp-1, -0x1.2c40p-44,
- 0x1.879cad931a395p-1, -0x1.3000p-45,
- 0x1.88ac7d98a65b8p-1, -0x1.a800p-45,
- 0x1.89bd0a4785800p-1, -0x1.d000p-49,
- 0x1.8ace5422aa223p-1, 0x1.3280p-44,
- 0x1.8be05bad619fap-1, 0x1.2b40p-43,
- 0x1.8cf3216b54383p-1, -0x1.ed00p-45,
- 0x1.8e06a5e08664cp-1, -0x1.0500p-45,
- 0x1.8f1ae99157807p-1, 0x1.8280p-45,
- 0x1.902fed0282c0ep-1, -0x1.cb00p-46,
- 0x1.9145b0b91ff96p-1, -0x1.5e00p-47,
- 0x1.925c353aa2ff9p-1, 0x1.5400p-48,
- 0x1.93737b0cdc64ap-1, 0x1.7200p-46,
- 0x1.948b82b5f98aep-1, -0x1.9000p-47,
- 0x1.95a44cbc852cbp-1, 0x1.5680p-45,
- 0x1.96bdd9a766f21p-1, -0x1.6d00p-44,
- 0x1.97d829fde4e2ap-1, -0x1.1000p-47,
- 0x1.98f33e47a23a3p-1, 0x1.d000p-45,
- 0x1.9a0f170ca0604p-1, -0x1.8a40p-44,
- 0x1.9b2bb4d53ff89p-1, 0x1.55c0p-44,
- 0x1.9c49182a3f15bp-1, 0x1.6b80p-45,
- 0x1.9d674194bb8c5p-1, -0x1.c000p-49,
- 0x1.9e86319e3238ep-1, 0x1.7d00p-46,
- 0x1.9fa5e8d07f302p-1, 0x1.6400p-46,
- 0x1.a0c667b5de54dp-1, -0x1.5000p-48,
- 0x1.a1e7aed8eb8f6p-1, 0x1.9e00p-47,
- 0x1.a309bec4a2e27p-1, 0x1.ad80p-45,
- 0x1.a42c980460a5dp-1, -0x1.af00p-46,
- 0x1.a5503b23e259bp-1, 0x1.b600p-47,
- 0x1.a674a8af46213p-1, 0x1.8880p-44,
- 0x1.a799e1330b3a7p-1, 0x1.1200p-46,
- 0x1.a8bfe53c12e8dp-1, 0x1.6c00p-47,
- 0x1.a9e6b5579fcd2p-1, -0x1.9b80p-45,
- 0x1.ab0e521356fb8p-1, 0x1.b700p-45,
- 0x1.ac36bbfd3f381p-1, 0x1.9000p-50,
- 0x1.ad5ff3a3c2780p-1, 0x1.4000p-49,
- 0x1.ae89f995ad2a3p-1, -0x1.c900p-45,
- 0x1.afb4ce622f367p-1, 0x1.6500p-46,
- 0x1.b0e07298db790p-1, 0x1.fd40p-45,
- 0x1.b20ce6c9a89a9p-1, 0x1.2700p-46,
- 0x1.b33a2b84f1a4bp-1, 0x1.d470p-43,
- 0x1.b468415b747e7p-1, -0x1.8380p-44,
- 0x1.b59728de5593ap-1, 0x1.8000p-54,
- 0x1.b6c6e29f1c56ap-1, 0x1.ad00p-47,
- 0x1.b7f76f2fb5e50p-1, 0x1.e800p-50,
- 0x1.b928cf22749b2p-1, -0x1.4c00p-47,
- 0x1.ba5b030a10603p-1, -0x1.d700p-47,
- 0x1.bb8e0b79a6f66p-1, 0x1.d900p-47,
- 0x1.bcc1e904bc1ffp-1, 0x1.2a00p-47,
- 0x1.bdf69c3f3a16fp-1, -0x1.f780p-46,
- 0x1.bf2c25bd71db8p-1, -0x1.0a00p-46,
- 0x1.c06286141b2e9p-1, -0x1.1400p-46,
- 0x1.c199bdd8552e0p-1, 0x1.be00p-47,
- 0x1.c2d1cd9fa64eep-1, -0x1.9400p-47,
- 0x1.c40ab5fffd02fp-1, -0x1.ed00p-47,
- 0x1.c544778fafd15p-1, 0x1.9660p-44,
- 0x1.c67f12e57d0cbp-1, -0x1.a100p-46,
- 0x1.c7ba88988c1b6p-1, -0x1.8458p-42,
- 0x1.c8f6d9406e733p-1, -0x1.a480p-46,
- 0x1.ca3405751c4dfp-1, 0x1.b000p-51,
- 0x1.cb720dcef9094p-1, 0x1.1400p-47,
- 0x1.ccb0f2e6d1689p-1, 0x1.0200p-48,
- 0x1.cdf0b555dc412p-1, 0x1.3600p-48,
- 0x1.cf3155b5bab3bp-1, -0x1.6900p-47,
- 0x1.d072d4a0789bcp-1, 0x1.9a00p-47,
- 0x1.d1b532b08c8fap-1, -0x1.5e00p-46,
- 0x1.d2f87080d8a85p-1, 0x1.d280p-46,
- 0x1.d43c8eacaa203p-1, 0x1.1a00p-47,
- 0x1.d5818dcfba491p-1, 0x1.f000p-50,
- 0x1.d6c76e862e6a1p-1, -0x1.3a00p-47,
- 0x1.d80e316c9834ep-1, -0x1.cd80p-47,
- 0x1.d955d71ff6090p-1, 0x1.4c00p-48,
- 0x1.da9e603db32aep-1, 0x1.f900p-48,
- 0x1.dbe7cd63a8325p-1, 0x1.9800p-49,
- 0x1.dd321f301b445p-1, -0x1.5200p-48,
- 0x1.de7d5641c05bfp-1, -0x1.d700p-46,
- 0x1.dfc97337b9aecp-1, -0x1.6140p-46,
- 0x1.e11676b197d5ep-1, 0x1.b480p-47,
- 0x1.e264614f5a3e7p-1, 0x1.0ce0p-43,
- 0x1.e3b333b16ee5cp-1, 0x1.c680p-47,
- 0x1.e502ee78b3fb4p-1, -0x1.9300p-47,
- 0x1.e653924676d68p-1, -0x1.5000p-49,
- 0x1.e7a51fbc74c44p-1, -0x1.7f80p-47,
- 0x1.e8f7977cdb726p-1, -0x1.3700p-48,
- 0x1.ea4afa2a490e8p-1, 0x1.5d00p-49,
- 0x1.eb9f4867ccae4p-1, 0x1.61a0p-46,
- 0x1.ecf482d8e680dp-1, 0x1.5500p-48,
- 0x1.ee4aaa2188514p-1, 0x1.6400p-51,
- 0x1.efa1bee615a13p-1, -0x1.e800p-49,
- 0x1.f0f9c1cb64106p-1, -0x1.a880p-48,
- 0x1.f252b376bb963p-1, -0x1.c900p-45,
- 0x1.f3ac948dd7275p-1, 0x1.a000p-53,
- 0x1.f50765b6e4524p-1, -0x1.4f00p-48,
- 0x1.f6632798844fdp-1, 0x1.a800p-51,
- 0x1.f7bfdad9cbe38p-1, 0x1.abc0p-48,
- 0x1.f91d802243c82p-1, -0x1.4600p-50,
- 0x1.fa7c1819e908ep-1, -0x1.b0c0p-47,
- 0x1.fbdba3692d511p-1, -0x1.0e00p-51,
- 0x1.fd3c22b8f7194p-1, -0x1.0de8p-46,
- 0x1.fe9d96b2a23eep-1, 0x1.e430p-49,
- 0x1.0000000000000p+0, 0x0.0000p+0,
- 0x1.00b1afa5abcbep+0, -0x1.3400p-52,
- 0x1.0163da9fb3303p+0, -0x1.2170p-46,
- 0x1.02168143b0282p+0, 0x1.a400p-52,
- 0x1.02c9a3e77806cp+0, 0x1.f980p-49,
- 0x1.037d42e11bbcap+0, -0x1.7400p-51,
- 0x1.04315e86e7f89p+0, 0x1.8300p-50,
- 0x1.04e5f72f65467p+0, -0x1.a3f0p-46,
- 0x1.059b0d315855ap+0, -0x1.2840p-47,
- 0x1.0650a0e3c1f95p+0, 0x1.1600p-48,
- 0x1.0706b29ddf71ap+0, 0x1.5240p-46,
- 0x1.07bd42b72a82dp+0, -0x1.9a00p-49,
- 0x1.0874518759bd0p+0, 0x1.6400p-49,
- 0x1.092bdf66607c8p+0, -0x1.0780p-47,
- 0x1.09e3ecac6f383p+0, -0x1.8000p-54,
- 0x1.0a9c79b1f3930p+0, 0x1.fa00p-48,
- 0x1.0b5586cf988fcp+0, -0x1.ac80p-48,
- 0x1.0c0f145e46c8ap+0, 0x1.9c00p-50,
- 0x1.0cc922b724816p+0, 0x1.5200p-47,
- 0x1.0d83b23395dd8p+0, -0x1.ad00p-48,
- 0x1.0e3ec32d3d1f3p+0, 0x1.bac0p-46,
- 0x1.0efa55fdfa9a6p+0, -0x1.4e80p-47,
- 0x1.0fb66affed2f0p+0, -0x1.d300p-47,
- 0x1.1073028d7234bp+0, 0x1.1500p-48,
- 0x1.11301d0125b5bp+0, 0x1.c000p-49,
- 0x1.11edbab5e2af9p+0, 0x1.6bc0p-46,
- 0x1.12abdc06c31d5p+0, 0x1.8400p-49,
- 0x1.136a814f2047dp+0, -0x1.ed00p-47,
- 0x1.1429aaea92de9p+0, 0x1.8e00p-49,
- 0x1.14e95934f3138p+0, 0x1.b400p-49,
- 0x1.15a98c8a58e71p+0, 0x1.5300p-47,
- 0x1.166a45471c3dfp+0, 0x1.3380p-47,
- 0x1.172b83c7d5211p+0, 0x1.8d40p-45,
- 0x1.17ed48695bb9fp+0, -0x1.5d00p-47,
- 0x1.18af9388c8d93p+0, -0x1.c880p-46,
- 0x1.1972658375d66p+0, 0x1.1f00p-46,
- 0x1.1a35beb6fcba7p+0, 0x1.0480p-46,
- 0x1.1af99f81387e3p+0, -0x1.7390p-43,
- 0x1.1bbe084045d54p+0, 0x1.4e40p-45,
- 0x1.1c82f95281c43p+0, -0x1.a200p-47,
- 0x1.1d4873168b9b2p+0, 0x1.3800p-49,
- 0x1.1e0e75eb44031p+0, 0x1.ac00p-49,
- 0x1.1ed5022fcd938p+0, 0x1.1900p-47,
- 0x1.1f9c18438cdf7p+0, -0x1.b780p-46,
- 0x1.2063b88628d8fp+0, 0x1.d940p-45,
- 0x1.212be3578a81ep+0, 0x1.8000p-50,
- 0x1.21f49917ddd41p+0, 0x1.b340p-45,
- 0x1.22bdda2791323p+0, 0x1.9f80p-46,
- 0x1.2387a6e7561e7p+0, -0x1.9c80p-46,
- 0x1.2451ffb821427p+0, 0x1.2300p-47,
- 0x1.251ce4fb2a602p+0, -0x1.3480p-46,
- 0x1.25e85711eceb0p+0, 0x1.2700p-46,
- 0x1.26b4565e27d16p+0, 0x1.1d00p-46,
- 0x1.2780e341de00fp+0, 0x1.1ee0p-44,
- 0x1.284dfe1f5633ep+0, -0x1.4c00p-46,
- 0x1.291ba7591bb30p+0, -0x1.3d80p-46,
- 0x1.29e9df51fdf09p+0, 0x1.8b00p-47,
- 0x1.2ab8a66d10e9bp+0, -0x1.27c0p-45,
- 0x1.2b87fd0dada3ap+0, 0x1.a340p-45,
- 0x1.2c57e39771af9p+0, -0x1.0800p-46,
- 0x1.2d285a6e402d9p+0, -0x1.ed00p-47,
- 0x1.2df961f641579p+0, -0x1.4200p-48,
- 0x1.2ecafa93e2ecfp+0, -0x1.4980p-45,
- 0x1.2f9d24abd8822p+0, -0x1.6300p-46,
- 0x1.306fe0a31b625p+0, -0x1.2360p-44,
- 0x1.31432edeea50bp+0, -0x1.0df8p-40,
- 0x1.32170fc4cd7b8p+0, -0x1.2480p-45,
- 0x1.32eb83ba8e9a2p+0, -0x1.5980p-45,
- 0x1.33c08b2641766p+0, 0x1.ed00p-46,
- 0x1.3496266e3fa27p+0, -0x1.c000p-50,
- 0x1.356c55f929f0fp+0, -0x1.0d80p-44,
- 0x1.36431a2de88b9p+0, 0x1.2c80p-45,
- 0x1.371a7373aaa39p+0, 0x1.0600p-45,
- 0x1.37f26231e74fep+0, -0x1.6600p-46,
- 0x1.38cae6d05d838p+0, -0x1.ae00p-47,
- 0x1.39a401b713ec3p+0, -0x1.4720p-43,
- 0x1.3a7db34e5a020p+0, 0x1.8200p-47,
- 0x1.3b57fbfec6e95p+0, 0x1.e800p-44,
- 0x1.3c32dc313a8f2p+0, 0x1.f800p-49,
- 0x1.3d0e544ede122p+0, -0x1.7a00p-46,
- 0x1.3dea64c1234bbp+0, 0x1.6300p-45,
- 0x1.3ec70df1c4eccp+0, -0x1.8a60p-43,
- 0x1.3fa4504ac7e8cp+0, -0x1.cdc0p-44,
- 0x1.40822c367a0bbp+0, 0x1.5b80p-45,
- 0x1.4160a21f72e95p+0, 0x1.ec00p-46,
- 0x1.423fb27094646p+0, -0x1.3600p-46,
- 0x1.431f5d950a920p+0, 0x1.3980p-45,
- 0x1.43ffa3f84b9ebp+0, 0x1.a000p-48,
- 0x1.44e0860618919p+0, -0x1.6c00p-48,
- 0x1.45c2042a7d201p+0, -0x1.bc00p-47,
- 0x1.46a41ed1d0016p+0, -0x1.2800p-46,
- 0x1.4786d668b3326p+0, 0x1.0e00p-44,
- 0x1.486a2b5c13c00p+0, -0x1.d400p-45,
- 0x1.494e1e192af04p+0, 0x1.c200p-47,
- 0x1.4a32af0d7d372p+0, -0x1.e500p-46,
- 0x1.4b17dea6db801p+0, 0x1.7800p-47,
- 0x1.4bfdad53629e1p+0, -0x1.3800p-46,
- 0x1.4ce41b817c132p+0, 0x1.0800p-47,
- 0x1.4dcb299fddddbp+0, 0x1.c700p-45,
- 0x1.4eb2d81d8ab96p+0, -0x1.ce00p-46,
- 0x1.4f9b2769d2d02p+0, 0x1.9200p-46,
- 0x1.508417f4531c1p+0, -0x1.8c00p-47,
- 0x1.516daa2cf662ap+0, -0x1.a000p-48,
- 0x1.5257de83f51eap+0, 0x1.a080p-43,
- 0x1.5342b569d4edap+0, -0x1.6d80p-45,
- 0x1.542e2f4f6ac1ap+0, -0x1.2440p-44,
- 0x1.551a4ca5d94dbp+0, 0x1.83c0p-43,
- 0x1.56070dde9116bp+0, 0x1.4b00p-45,
- 0x1.56f4736b529dep+0, 0x1.15a0p-43,
- 0x1.57e27dbe2c40ep+0, -0x1.9e00p-45,
- 0x1.58d12d497c76fp+0, -0x1.3080p-45,
- 0x1.59c0827ff0b4cp+0, 0x1.dec0p-43,
- 0x1.5ab07dd485427p+0, -0x1.4000p-51,
- 0x1.5ba11fba87af4p+0, 0x1.0080p-44,
- 0x1.5c9268a59460bp+0, -0x1.6c80p-45,
- 0x1.5d84590998e3fp+0, 0x1.69a0p-43,
- 0x1.5e76f15ad20e1p+0, -0x1.b400p-46,
- 0x1.5f6a320dcebcap+0, 0x1.7700p-46,
- 0x1.605e1b976dcb8p+0, 0x1.6f80p-45,
- 0x1.6152ae6cdf715p+0, 0x1.1000p-47,
- 0x1.6247eb03a5531p+0, -0x1.5d00p-46,
- 0x1.633dd1d1929b5p+0, -0x1.2d00p-46,
- 0x1.6434634ccc313p+0, -0x1.a800p-49,
- 0x1.652b9febc8efap+0, -0x1.8600p-45,
- 0x1.6623882553397p+0, 0x1.1fe0p-40,
- 0x1.671c1c708328ep+0, -0x1.7200p-44,
- 0x1.68155d44ca97ep+0, 0x1.6800p-49,
- 0x1.690f4b19e9471p+0, -0x1.9780p-45,
-};
+ if ((ki & 0x80000000) == 0) {
+ /* k > 0, the exponent of scale might have overflowed by 1. */
+ sbits -= 1ull << 52;
+ scale = asdouble(sbits);
+ y = 2 * (scale + scale * tmp);
+ return eval_as_double(y);
+ }
+ /* k < 0, need special care in the subnormal range. */
+ sbits += 1022ull << 52;
+ scale = asdouble(sbits);
+ y = scale + scale * tmp;
+ if (y < 1.0) {
+ /* Round y to the right precision before scaling it into the subnormal
+ range to avoid double rounding that can cause 0.5+E/2 ulp error where
+ E is the worst-case ulp error outside the subnormal range. So this
+ is only useful if the goal is better than 1 ulp worst-case error. */
+ double_t hi, lo;
+ lo = scale - y + scale * tmp;
+ hi = 1.0 + y;
+ lo = 1.0 - hi + y + lo;
+ y = eval_as_double(hi + lo) - 1.0;
+ /* Avoid -0.0 with downward rounding. */
+ if (WANT_ROUNDING && y == 0.0)
+ y = 0.0;
+ /* The underflow exception needs to be signaled explicitly. */
+ fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
+ }
+ y = 0x1p-1022 * y;
+ return eval_as_double(y);
+}
+
+/* Top 12 bits of a double (sign and exponent bits). */
+static inline uint32_t top12(double x)
+{
+ return asuint64(x) >> 52;
+}
-/*
- * exp2(x): compute the base 2 exponential of x
- *
- * Accuracy: Peak error < 0.503 ulp for normalized results.
- *
- * Method: (accurate tables)
- *
- * Reduce x:
- * x = k + y, for integer k and |y| <= 1/2.
- * Thus we have exp2(x) = 2**k * exp2(y).
- *
- * Reduce y:
- * y = i/TBLSIZE + z - eps[i] for integer i near y * TBLSIZE.
- * Thus we have exp2(y) = exp2(i/TBLSIZE) * exp2(z - eps[i]),
- * with |z - eps[i]| <= 2**-9 + 2**-39 for the table used.
- *
- * We compute exp2(i/TBLSIZE) via table lookup and exp2(z - eps[i]) via
- * a degree-5 minimax polynomial with maximum error under 1.3 * 2**-61.
- * The values in exp2t[] and eps[] are chosen such that
- * exp2t[i] = exp2(i/TBLSIZE + eps[i]), and eps[i] is a small offset such
- * that exp2t[i] is accurate to 2**-64.
- *
- * Note that the range of i is +-TBLSIZE/2, so we actually index the tables
- * by i0 = i + TBLSIZE/2. For cache efficiency, exp2t[] and eps[] are
- * virtual tables, interleaved in the real table tbl[].
- *
- * This method is due to Gal, with many details due to Gal and Bachelis:
- *
- * Gal, S. and Bachelis, B. An Accurate Elementary Mathematical Library
- * for the IEEE Floating Point Standard. TOMS 17(1), 26-46 (1991).
- */
double exp2(double x)
{
- double_t r, t, z;
- uint32_t ix, i0;
- union {double f; uint64_t i;} u = {x};
- union {uint32_t u; int32_t i;} k;
+ uint32_t abstop;
+ uint64_t ki, idx, top, sbits;
+ double_t kd, r, r2, scale, tail, tmp;
- /* Filter out exceptional cases. */
- ix = u.i>>32 & 0x7fffffff;
- if (ix >= 0x408ff000) { /* |x| >= 1022 or nan */
- if (ix >= 0x40900000 && u.i>>63 == 0) { /* x >= 1024 or nan */
- /* overflow */
- x *= 0x1p1023;
- return x;
- }
- if (ix >= 0x7ff00000) /* -inf or -nan */
- return -1/x;
- if (u.i>>63) { /* x <= -1022 */
- /* underflow */
- if (x <= -1075 || x - 0x1p52 + 0x1p52 != x)
- FORCE_EVAL((float)(-0x1p-149/x));
- if (x <= -1075)
- return 0;
+ abstop = top12(x) & 0x7ff;
+ if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
+ if (abstop - top12(0x1p-54) >= 0x80000000)
+ /* Avoid spurious underflow for tiny x. */
+ /* Note: 0 is common input. */
+ return WANT_ROUNDING ? 1.0 + x : 1.0;
+ if (abstop >= top12(1024.0)) {
+ if (asuint64(x) == asuint64(-INFINITY))
+ return 0.0;
+ if (abstop >= top12(INFINITY))
+ return 1.0 + x;
+ if (!(asuint64(x) >> 63))
+ return __math_oflow(0);
+ else if (asuint64(x) >= asuint64(-1075.0))
+ return __math_uflow(0);
}
- } else if (ix < 0x3c900000) { /* |x| < 0x1p-54 */
- return 1.0 + x;
+ if (2 * asuint64(x) > 2 * asuint64(928.0))
+ /* Large x is special cased below. */
+ abstop = 0;
}
- /* Reduce x, computing z, i0, and k. */
- u.f = x + redux;
- i0 = u.i;
- i0 += TBLSIZE / 2;
- k.u = i0 / TBLSIZE * TBLSIZE;
- k.i /= TBLSIZE;
- i0 %= TBLSIZE;
- u.f -= redux;
- z = x - u.f;
-
- /* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */
- t = tbl[2*i0]; /* exp2t[i0] */
- z -= tbl[2*i0 + 1]; /* eps[i0] */
- r = t + t * z * (P1 + z * (P2 + z * (P3 + z * (P4 + z * P5))));
-
- return scalbn(r, k.i);
+ /* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
+ /* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
+ kd = eval_as_double(x + Shift);
+ ki = asuint64(kd); /* k. */
+ kd -= Shift; /* k/N for int k. */
+ r = x - kd;
+ /* 2^(k/N) ~= scale * (1 + tail). */
+ idx = 2 * (ki % N);
+ top = ki << (52 - EXP_TABLE_BITS);
+ tail = asdouble(T[idx]);
+ /* This is only a valid scale when -1023*N < k < 1024*N. */
+ sbits = T[idx + 1] + top;
+ /* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
+ /* Evaluation is optimized assuming superscalar pipelined execution. */
+ r2 = r * r;
+ /* Without fma the worst case error is 0.5/N ulp larger. */
+ /* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
+ tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
+ if (predict_false(abstop == 0))
+ return specialcase(tmp, sbits, ki);
+ scale = asdouble(sbits);
+ /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
+ is no spurious underflow here even without fma. */
+ return eval_as_double(scale + scale * tmp);
}
diff --git a/src/math/exp2f.c b/src/math/exp2f.c
index 296b6343..0360482c 100644
--- a/src/math/exp2f.c
+++ b/src/math/exp2f.c
@@ -1,126 +1,69 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/s_exp2f.c */
-/*-
- * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
+/*
+ * Single-precision 2^x function.
*
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
*/
+#include <math.h>
+#include <stdint.h>
#include "libm.h"
+#include "exp2f_data.h"
-#define TBLSIZE 16
+/*
+EXP2F_TABLE_BITS = 5
+EXP2F_POLY_ORDER = 3
-static const float
-redux = 0x1.8p23f / TBLSIZE,
-P1 = 0x1.62e430p-1f,
-P2 = 0x1.ebfbe0p-3f,
-P3 = 0x1.c6b348p-5f,
-P4 = 0x1.3b2c9cp-7f;
+ULP error: 0.502 (nearest rounding.)
+Relative error: 1.69 * 2^-34 in [-1/64, 1/64] (before rounding.)
+Wrong count: 168353 (all nearest rounding wrong results with fma.)
+Non-nearest ULP error: 1 (rounded ULP error)
+*/
-static const double exp2ft[TBLSIZE] = {
- 0x1.6a09e667f3bcdp-1,
- 0x1.7a11473eb0187p-1,
- 0x1.8ace5422aa0dbp-1,
- 0x1.9c49182a3f090p-1,
- 0x1.ae89f995ad3adp-1,
- 0x1.c199bdd85529cp-1,
- 0x1.d5818dcfba487p-1,
- 0x1.ea4afa2a490dap-1,
- 0x1.0000000000000p+0,
- 0x1.0b5586cf9890fp+0,
- 0x1.172b83c7d517bp+0,
- 0x1.2387a6e756238p+0,
- 0x1.306fe0a31b715p+0,
- 0x1.3dea64c123422p+0,
- 0x1.4bfdad5362a27p+0,
- 0x1.5ab07dd485429p+0,
-};
+#define N (1 << EXP2F_TABLE_BITS)
+#define T __exp2f_data.tab
+#define C __exp2f_data.poly
+#define SHIFT __exp2f_data.shift_scaled
+
+static inline uint32_t top12(float x)
+{
+ return asuint(x) >> 20;
+}
-/*
- * exp2f(x): compute the base 2 exponential of x
- *
- * Accuracy: Peak error < 0.501 ulp; location of peak: -0.030110927.
- *
- * Method: (equally-spaced tables)
- *
- * Reduce x:
- * x = k + y, for integer k and |y| <= 1/2.
- * Thus we have exp2f(x) = 2**k * exp2(y).
- *
- * Reduce y:
- * y = i/TBLSIZE + z for integer i near y * TBLSIZE.
- * Thus we have exp2(y) = exp2(i/TBLSIZE) * exp2(z),
- * with |z| <= 2**-(TBLSIZE+1).
- *
- * We compute exp2(i/TBLSIZE) via table lookup and exp2(z) via a
- * degree-4 minimax polynomial with maximum error under 1.4 * 2**-33.
- * Using double precision for everything except the reduction makes
- * roundoff error insignificant and simplifies the scaling step.
- *
- * This method is due to Tang, but I do not use his suggested parameters:
- *
- * Tang, P. Table-driven Implementation of the Exponential Function
- * in IEEE Floating-Point Arithmetic. TOMS 15(2), 144-157 (1989).
- */
float exp2f(float x)
{
- double_t t, r, z;
- union {float f; uint32_t i;} u = {x};
- union {double f; uint64_t i;} uk;
- uint32_t ix, i0, k;
+ uint32_t abstop;
+ uint64_t ki, t;
+ double_t kd, xd, z, r, r2, y, s;
- /* Filter out exceptional cases. */
- ix = u.i & 0x7fffffff;
- if (ix > 0x42fc0000) { /* |x| > 126 */
- if (ix > 0x7f800000) /* NaN */
- return x;
- if (u.i >= 0x43000000 && u.i < 0x80000000) { /* x >= 128 */
- x *= 0x1p127f;
- return x;
- }
- if (u.i >= 0x80000000) { /* x < -126 */
- if (u.i >= 0xc3160000 || (u.i & 0x0000ffff))
- FORCE_EVAL(-0x1p-149f/x);
- if (u.i >= 0xc3160000) /* x <= -150 */
- return 0;
- }
- } else if (ix <= 0x33000000) { /* |x| <= 0x1p-25 */
- return 1.0f + x;
+ xd = (double_t)x;
+ abstop = top12(x) & 0x7ff;
+ if (predict_false(abstop >= top12(128.0f))) {
+ /* |x| >= 128 or x is nan. */
+ if (asuint(x) == asuint(-INFINITY))
+ return 0.0f;
+ if (abstop >= top12(INFINITY))
+ return x + x;
+ if (x > 0.0f)
+ return __math_oflowf(0);
+ if (x <= -150.0f)
+ return __math_uflowf(0);
}
- /* Reduce x, computing z, i0, and k. */
- u.f = x + redux;
- i0 = u.i;
- i0 += TBLSIZE / 2;
- k = i0 / TBLSIZE;
- uk.i = (uint64_t)(0x3ff + k)<<52;
- i0 &= TBLSIZE - 1;
- u.f -= redux;
- z = x - u.f;
- /* Compute r = exp2(y) = exp2ft[i0] * p(z). */
- r = exp2ft[i0];
- t = r * z;
- r = r + t * (P1 + z * P2) + t * (z * z) * (P3 + z * P4);
+ /* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k. */
+ kd = eval_as_double(xd + SHIFT);
+ ki = asuint64(kd);
+ kd -= SHIFT; /* k/N for int k. */
+ r = xd - kd;
- /* Scale by 2**k */
- return r * uk.f;
+ /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
+ t = T[ki % N];
+ t += ki << (52 - EXP2F_TABLE_BITS);
+ s = asdouble(t);
+ z = C[0] * r + C[1];
+ r2 = r * r;
+ y = C[2] * r + 1;
+ y = z * r2 + y;
+ y = y * s;
+ return eval_as_float(y);
}
diff --git a/src/math/exp2f_data.c b/src/math/exp2f_data.c
new file mode 100644
index 00000000..be324727
--- /dev/null
+++ b/src/math/exp2f_data.c
@@ -0,0 +1,35 @@
+/*
+ * Shared data between expf, exp2f and powf.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "exp2f_data.h"
+
+#define N (1 << EXP2F_TABLE_BITS)
+
+const struct exp2f_data __exp2f_data = {
+ /* tab[i] = uint(2^(i/N)) - (i << 52-BITS)
+ used for computing 2^(k/N) for an int |k| < 150 N as
+ double(tab[k%N] + (k << 52-BITS)) */
+ .tab = {
+0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
+0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
+0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
+0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585,
+0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13,
+0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
+0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
+0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
+ },
+ .shift_scaled = 0x1.8p+52 / N,
+ .poly = {
+ 0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1,
+ },
+ .shift = 0x1.8p+52,
+ .invln2_scaled = 0x1.71547652b82fep+0 * N,
+ .poly_scaled = {
+ 0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N,
+ },
+};
diff --git a/src/math/exp2f_data.h b/src/math/exp2f_data.h
new file mode 100644
index 00000000..fe744f15
--- /dev/null
+++ b/src/math/exp2f_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _EXP2F_DATA_H
+#define _EXP2F_DATA_H
+
+#include <features.h>
+#include <stdint.h>
+
+/* Shared between expf, exp2f and powf. */
+#define EXP2F_TABLE_BITS 5
+#define EXP2F_POLY_ORDER 3
+extern hidden const struct exp2f_data {
+ uint64_t tab[1 << EXP2F_TABLE_BITS];
+ double shift_scaled;
+ double poly[EXP2F_POLY_ORDER];
+ double shift;
+ double invln2_scaled;
+ double poly_scaled[EXP2F_POLY_ORDER];
+} __exp2f_data;
+
+#endif
diff --git a/src/math/exp_data.c b/src/math/exp_data.c
new file mode 100644
index 00000000..21be0146
--- /dev/null
+++ b/src/math/exp_data.c
@@ -0,0 +1,182 @@
+/*
+ * Shared data between exp, exp2 and pow.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "exp_data.h"
+
+#define N (1 << EXP_TABLE_BITS)
+
+const struct exp_data __exp_data = {
+// N/ln2
+.invln2N = 0x1.71547652b82fep0 * N,
+// -ln2/N
+.negln2hiN = -0x1.62e42fefa0000p-8,
+.negln2loN = -0x1.cf79abc9e3b3ap-47,
+// Used for rounding when !TOINT_INTRINSICS
+#if EXP_USE_TOINT_NARROW
+.shift = 0x1800000000.8p0,
+#else
+.shift = 0x1.8p52,
+#endif
+// exp polynomial coefficients.
+.poly = {
+// abs error: 1.555*2^-66
+// ulp error: 0.509 (0.511 without fma)
+// if |x| < ln2/256+eps
+// abs error if |x| < ln2/256+0x1p-15: 1.09*2^-65
+// abs error if |x| < ln2/128: 1.7145*2^-56
+0x1.ffffffffffdbdp-2,
+0x1.555555555543cp-3,
+0x1.55555cf172b91p-5,
+0x1.1111167a4d017p-7,
+},
+.exp2_shift = 0x1.8p52 / N,
+// exp2 polynomial coefficients.
+.exp2_poly = {
+// abs error: 1.2195*2^-65
+// ulp error: 0.507 (0.511 without fma)
+// if |x| < 1/256
+// abs error if |x| < 1/128: 1.9941*2^-56
+0x1.62e42fefa39efp-1,
+0x1.ebfbdff82c424p-3,
+0x1.c6b08d70cf4b5p-5,
+0x1.3b2abd24650ccp-7,
+0x1.5d7e09b4e3a84p-10,
+},
+// 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
+// tab[2*k] = asuint64(T[k])
+// tab[2*k+1] = asuint64(H[k]) - (k << 52)/N
+.tab = {
+0x0, 0x3ff0000000000000,
+0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
+0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
+0xbc905e7a108766d1, 0x3fefe315e86e7f85,
+0x3c8cd2523567f613, 0x3fefd9b0d3158574,
+0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
+0x3c60f74e61e6c861, 0x3fefc74518759bc8,
+0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
+0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
+0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
+0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
+0xbc6a033489906e0b, 0x3fef9b66affed31b,
+0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
+0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
+0xbc91c923b9d5f416, 0x3fef829aaea92de0,
+0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
+0xbc801b15eaa59348, 0x3fef72b83c7d517b,
+0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
+0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
+0xbc96d99c7611eb26, 0x3fef5be084045cd4,
+0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
+0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
+0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
+0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
+0x3c968efde3a8a894, 0x3fef387a6e756238,
+0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
+0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
+0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
+0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
+0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
+0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
+0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
+0x3c834d754db0abb6, 0x3fef06fe0a31b715,
+0x3c864201e2ac744c, 0x3fef0170fc4cd831,
+0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
+0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
+0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
+0xbc9907f81b512d8e, 0x3feeecae6d05d866,
+0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
+0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
+0x3c859f48a72a4c6d, 0x3feedea64c123422,
+0xbc9312607a28698a, 0x3feeda4504ac801c,
+0xbc58a78f4817895b, 0x3feed60a21f72e2a,
+0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
+0x3c4363ed60c2ac11, 0x3feece086061892d,
+0x3c9666093b0664ef, 0x3feeca41ed1d0057,
+0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
+0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
+0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
+0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
+0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
+0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
+0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
+0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
+0x3c93350518fdd78e, 0x3feeaf4736b527da,
+0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
+0x3c9063e1e21c5409, 0x3feeab07dd485429,
+0x3c34c7855019c6ea, 0x3feea9268a5946b7,
+0x3c9432e62b64c035, 0x3feea76f15ad2148,
+0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
+0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
+0xbc845378892be9ae, 0x3feea34634ccc320,
+0xbc93cedd78565858, 0x3feea23882552225,
+0x3c5710aa807e1964, 0x3feea155d44ca973,
+0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
+0xbc6a12ad8734b982, 0x3feea012750bdabf,
+0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
+0xbc80dc3d54e08851, 0x3fee9f7df9519484,
+0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
+0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
+0xbc8619321e55e68a, 0x3fee9feb564267c9,
+0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
+0xbc7b32dcb94da51d, 0x3feea11473eb0187,
+0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
+0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
+0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
+0xbc9369b6f13b3734, 0x3feea589994cce13,
+0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
+0xbc94d450d872576e, 0x3feea8d99b4492ed,
+0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
+0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
+0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
+0x3c7bf68359f35f44, 0x3feeb1ae99157736,
+0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
+0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
+0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
+0xbc92434322f4f9aa, 0x3feebd829fde4e50,
+0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
+0x3c71affc2b91ce27, 0x3feec49182a3f090,
+0x3c6dd235e10a73bb, 0x3feec86319e32323,
+0xbc87c50422622263, 0x3feecc667b5de565,
+0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
+0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
+0x3c90cc319cee31d2, 0x3feed99e1330b358,
+0x3c8469846e735ab3, 0x3feede6b5579fdbf,
+0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
+0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
+0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
+0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
+0xbc90a40e3da6f640, 0x3feef9728de5593a,
+0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
+0xbc91eee26b588a35, 0x3fef05b030a1064a,
+0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
+0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
+0x3c736eae30af0cb3, 0x3fef199bdd85529c,
+0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
+0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
+0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
+0x3c676b2c6c921968, 0x3fef3720dcef9069,
+0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
+0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
+0xbc900dae3875a949, 0x3fef4f87080d89f2,
+0x3c74a385a63d07a7, 0x3fef5818dcfba487,
+0xbc82919e2040220f, 0x3fef60e316c98398,
+0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
+0x3c843a59ac016b4b, 0x3fef7321f301b460,
+0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
+0xbc892ab93b470dc9, 0x3fef864614f5a129,
+0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
+0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
+0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
+0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
+0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
+0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
+0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
+0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
+0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
+0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
+},
+};
diff --git a/src/math/exp_data.h b/src/math/exp_data.h
new file mode 100644
index 00000000..3e24bac5
--- /dev/null
+++ b/src/math/exp_data.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _EXP_DATA_H
+#define _EXP_DATA_H
+
+#include <features.h>
+#include <stdint.h>
+
+#define EXP_TABLE_BITS 7
+#define EXP_POLY_ORDER 5
+#define EXP_USE_TOINT_NARROW 0
+#define EXP2_POLY_ORDER 5
+extern hidden const struct exp_data {
+ double invln2N;
+ double shift;
+ double negln2hiN;
+ double negln2loN;
+ double poly[4]; /* Last four coefficients. */
+ double exp2_shift;
+ double exp2_poly[EXP2_POLY_ORDER];
+ uint64_t tab[2*(1 << EXP_TABLE_BITS)];
+} __exp_data;
+
+#endif
diff --git a/src/math/expf.c b/src/math/expf.c
index feee2b0e..f9fbf8e7 100644
--- a/src/math/expf.c
+++ b/src/math/expf.c
@@ -1,83 +1,80 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_expf.c */
/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ * Single-precision e^x function.
*
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
*/
+#include <math.h>
+#include <stdint.h>
#include "libm.h"
+#include "exp2f_data.h"
-static const float
-half[2] = {0.5,-0.5},
-ln2hi = 6.9314575195e-1f, /* 0x3f317200 */
-ln2lo = 1.4286067653e-6f, /* 0x35bfbe8e */
-invln2 = 1.4426950216e+0f, /* 0x3fb8aa3b */
/*
- * Domain [-0.34568, 0.34568], range ~[-4.278e-9, 4.447e-9]:
- * |x*(exp(x)+1)/(exp(x)-1) - p(x)| < 2**-27.74
- */
-P1 = 1.6666625440e-1f, /* 0xaaaa8f.0p-26 */
-P2 = -2.7667332906e-3f; /* -0xb55215.0p-32 */
+EXP2F_TABLE_BITS = 5
+EXP2F_POLY_ORDER = 3
-float expf(float x)
+ULP error: 0.502 (nearest rounding.)
+Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
+Wrong count: 170635 (all nearest rounding wrong results with fma.)
+Non-nearest ULP error: 1 (rounded ULP error)
+*/
+
+#define N (1 << EXP2F_TABLE_BITS)
+#define InvLn2N __exp2f_data.invln2_scaled
+#define T __exp2f_data.tab
+#define C __exp2f_data.poly_scaled
+
+static inline uint32_t top12(float x)
{
- float_t hi, lo, c, xx, y;
- int k, sign;
- uint32_t hx;
+ return asuint(x) >> 20;
+}
- GET_FLOAT_WORD(hx, x);
- sign = hx >> 31; /* sign bit of x */
- hx &= 0x7fffffff; /* high word of |x| */
+float expf(float x)
+{
+ uint32_t abstop;
+ uint64_t ki, t;
+ double_t kd, xd, z, r, r2, y, s;
- /* special cases */
- if (hx >= 0x42aeac50) { /* if |x| >= -87.33655f or NaN */
- if (hx > 0x7f800000) /* NaN */
- return x;
- if (hx >= 0x42b17218 && !sign) { /* x >= 88.722839f */
- /* overflow */
- x *= 0x1p127f;
- return x;
- }
- if (sign) {
- /* underflow */
- FORCE_EVAL(-0x1p-149f/x);
- if (hx >= 0x42cff1b5) /* x <= -103.972084f */
- return 0;
- }
+ xd = (double_t)x;
+ abstop = top12(x) & 0x7ff;
+ if (predict_false(abstop >= top12(88.0f))) {
+ /* |x| >= 88 or x is nan. */
+ if (asuint(x) == asuint(-INFINITY))
+ return 0.0f;
+ if (abstop >= top12(INFINITY))
+ return x + x;
+ if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
+ return __math_oflowf(0);
+ if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
+ return __math_uflowf(0);
}
- /* argument reduction */
- if (hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */
- if (hx > 0x3f851592) /* if |x| > 1.5 ln2 */
- k = invln2*x + half[sign];
- else
- k = 1 - sign - sign;
- hi = x - k*ln2hi; /* k*ln2hi is exact here */
- lo = k*ln2lo;
- x = hi - lo;
- } else if (hx > 0x39000000) { /* |x| > 2**-14 */
- k = 0;
- hi = x;
- lo = 0;
- } else {
- /* raise inexact */
- FORCE_EVAL(0x1p127f + x);
- return 1 + x;
- }
+ /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */
+ z = InvLn2N * xd;
+
+ /* Round and convert z to int, the result is in [-150*N, 128*N] and
+ ideally ties-to-even rule is used, otherwise the magnitude of r
+ can be bigger which gives larger approximation error. */
+#if TOINT_INTRINSICS
+ kd = roundtoint(z);
+ ki = converttoint(z);
+#else
+# define SHIFT __exp2f_data.shift
+ kd = eval_as_double(z + SHIFT);
+ ki = asuint64(kd);
+ kd -= SHIFT;
+#endif
+ r = z - kd;
- /* x is now in primary range */
- xx = x*x;
- c = x - xx*(P1+xx*P2);
- y = 1 + (x*c/(2-c) - lo + hi);
- if (k == 0)
- return y;
- return scalbnf(y, k);
+ /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
+ t = T[ki % N];
+ t += ki << (52 - EXP2F_TABLE_BITS);
+ s = asdouble(t);
+ z = C[0] * r + C[1];
+ r2 = r * r;
+ y = C[2] * r + 1;
+ y = z * r2 + y;
+ y = y * s;
+ return eval_as_float(y);
}
diff --git a/src/math/log.c b/src/math/log.c
index e61e113d..cc52585a 100644
--- a/src/math/log.c
+++ b/src/math/log.c
@@ -1,118 +1,112 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_log.c */
/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ * Double-precision log(x) function.
*
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/* log(x)
- * Return the logarithm of x
- *
- * Method :
- * 1. Argument Reduction: find k and f such that
- * x = 2^k * (1+f),
- * where sqrt(2)/2 < 1+f < sqrt(2) .
- *
- * 2. Approximation of log(1+f).
- * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
- * = 2s + 2/3 s**3 + 2/5 s**5 + .....,
- * = 2s + s*R
- * We use a special Remez algorithm on [0,0.1716] to generate
- * a polynomial of degree 14 to approximate R The maximum error
- * of this polynomial approximation is bounded by 2**-58.45. In
- * other words,
- * 2 4 6 8 10 12 14
- * R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s
- * (the values of Lg1 to Lg7 are listed in the program)
- * and
- * | 2 14 | -58.45
- * | Lg1*s +...+Lg7*s - R(z) | <= 2
- * | |
- * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
- * In order to guarantee error in log below 1ulp, we compute log
- * by
- * log(1+f) = f - s*(f - R) (if f is not too large)
- * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
- *
- * 3. Finally, log(x) = k*ln2 + log(1+f).
- * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
- * Here ln2 is split into two floating point number:
- * ln2_hi + ln2_lo,
- * where n*ln2_hi is always exact for |n| < 2000.
- *
- * Special cases:
- * log(x) is NaN with signal if x < 0 (including -INF) ;
- * log(+INF) is +INF; log(0) is -INF with signal;
- * log(NaN) is that NaN with no signal.
- *
- * Accuracy:
- * according to an error analysis, the error is always less than
- * 1 ulp (unit in the last place).
- *
- * Constants:
- * The hexadecimal values are the intended ones for the following
- * constants. The decimal values may be used, provided that the
- * compiler will convert from decimal to binary accurately enough
- * to produce the hexadecimal values shown.
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
+#include "libm.h"
+#include "log_data.h"
+
+#define T __log_data.tab
+#define T2 __log_data.tab2
+#define B __log_data.poly1
+#define A __log_data.poly
+#define Ln2hi __log_data.ln2hi
+#define Ln2lo __log_data.ln2lo
+#define N (1 << LOG_TABLE_BITS)
+#define OFF 0x3fe6000000000000
-static const double
-ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
-ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
-Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
-Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
-Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
-Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
-Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
-Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
-Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
+/* Top 16 bits of a double. */
+static inline uint32_t top16(double x)
+{
+ return asuint64(x) >> 48;
+}
double log(double x)
{
- union {double f; uint64_t i;} u = {x};
- double_t hfsq,f,s,z,R,w,t1,t2,dk;
- uint32_t hx;
- int k;
+ double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
+ uint64_t ix, iz, tmp;
+ uint32_t top;
+ int k, i;
+
+ ix = asuint64(x);
+ top = top16(x);
+#define LO asuint64(1.0 - 0x1p-4)
+#define HI asuint64(1.0 + 0x1.09p-4)
+ if (predict_false(ix - LO < HI - LO)) {
+ /* Handle close to 1.0 inputs separately. */
+ /* Fix sign of zero with downward rounding when x==1. */
+ if (WANT_ROUNDING && predict_false(ix == asuint64(1.0)))
+ return 0;
+ r = x - 1.0;
+ r2 = r * r;
+ r3 = r * r2;
+ y = r3 *
+ (B[1] + r * B[2] + r2 * B[3] +
+ r3 * (B[4] + r * B[5] + r2 * B[6] +
+ r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
+ /* Worst-case error is around 0.507 ULP. */
+ w = r * 0x1p27;
+ double_t rhi = r + w - w;
+ double_t rlo = r - rhi;
+ w = rhi * rhi * B[0]; /* B[0] == -0.5. */
+ hi = r + w;
+ lo = r - hi + w;
+ lo += B[0] * rlo * (rhi + r);
+ y += lo;
+ y += hi;
+ return eval_as_double(y);
+ }
+ if (predict_false(top - 0x0010 >= 0x7ff0 - 0x0010)) {
+ /* x < 0x1p-1022 or inf or nan. */
+ if (ix * 2 == 0)
+ return __math_divzero(1);
+ if (ix == asuint64(INFINITY)) /* log(inf) == inf. */
+ return x;
+ if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
+ return __math_invalid(x);
+ /* x is subnormal, normalize it. */
+ ix = asuint64(x * 0x1p52);
+ ix -= 52ULL << 52;
+ }
+
+ /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ tmp = ix - OFF;
+ i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
+ k = (int64_t)tmp >> 52; /* arithmetic shift */
+ iz = ix - (tmp & 0xfffULL << 52);
+ invc = T[i].invc;
+ logc = T[i].logc;
+ z = asdouble(iz);
- hx = u.i>>32;
- k = 0;
- if (hx < 0x00100000 || hx>>31) {
- if (u.i<<1 == 0)
- return -1/(x*x); /* log(+-0)=-inf */
- if (hx>>31)
- return (x-x)/0.0; /* log(-#) = NaN */
- /* subnormal number, scale x up */
- k -= 54;
- x *= 0x1p54;
- u.f = x;
- hx = u.i>>32;
- } else if (hx >= 0x7ff00000) {
- return x;
- } else if (hx == 0x3ff00000 && u.i<<32 == 0)
- return 0;
+ /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
+ /* r ~= z/c - 1, |r| < 1/(2*N). */
+#if __FP_FAST_FMA
+ /* rounding error: 0x1p-55/N. */
+ r = __builtin_fma(z, invc, -1.0);
+#else
+ /* rounding error: 0x1p-55/N + 0x1p-66. */
+ r = (z - T2[i].chi - T2[i].clo) * invc;
+#endif
+ kd = (double_t)k;
- /* reduce x into [sqrt(2)/2, sqrt(2)] */
- hx += 0x3ff00000 - 0x3fe6a09e;
- k += (int)(hx>>20) - 0x3ff;
- hx = (hx&0x000fffff) + 0x3fe6a09e;
- u.i = (uint64_t)hx<<32 | (u.i&0xffffffff);
- x = u.f;
+ /* hi + lo = r + log(c) + k*Ln2. */
+ w = kd * Ln2hi + logc;
+ hi = w + r;
+ lo = w - hi + r + kd * Ln2lo;
- f = x - 1.0;
- hfsq = 0.5*f*f;
- s = f/(2.0+f);
- z = s*s;
- w = z*z;
- t1 = w*(Lg2+w*(Lg4+w*Lg6));
- t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
- R = t2 + t1;
- dk = k;
- return s*(hfsq+R) + dk*ln2_lo - hfsq + f + dk*ln2_hi;
+ /* log(x) = lo + (log1p(r) - r) + hi. */
+ r2 = r * r; /* rounding error: 0x1p-54/N^2. */
+ /* Worst case error if |y| > 0x1p-5:
+ 0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
+ Worst case error if |y| > 0x1p-4:
+ 0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
+ y = lo + r2 * A[0] +
+ r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
+ return eval_as_double(y);
}
diff --git a/src/math/log2.c b/src/math/log2.c
index 0aafad4b..1276ed4e 100644
--- a/src/math/log2.c
+++ b/src/math/log2.c
@@ -1,122 +1,122 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_log2.c */
/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ * Double-precision log2(x) function.
*
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/*
- * Return the base 2 logarithm of x. See log.c for most comments.
- *
- * Reduce x to 2^k (1+f) and calculate r = log(1+f) - f + f*f/2
- * as in log.c, then combine and scale in extra precision:
- * log2(x) = (f - f*f/2 + r)/log(2) + k
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
+#include "libm.h"
+#include "log2_data.h"
-static const double
-ivln2hi = 1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 */
-ivln2lo = 1.67517131648865118353e-10, /* 0x3de705fc, 0x2eefa200 */
-Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
-Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
-Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
-Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
-Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
-Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
-Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
+#define T __log2_data.tab
+#define T2 __log2_data.tab2
+#define B __log2_data.poly1
+#define A __log2_data.poly
+#define InvLn2hi __log2_data.invln2hi
+#define InvLn2lo __log2_data.invln2lo
+#define N (1 << LOG2_TABLE_BITS)
+#define OFF 0x3fe6000000000000
-double log2(double x)
+/* Top 16 bits of a double. */
+static inline uint32_t top16(double x)
{
- union {double f; uint64_t i;} u = {x};
- double_t hfsq,f,s,z,R,w,t1,t2,y,hi,lo,val_hi,val_lo;
- uint32_t hx;
- int k;
-
- hx = u.i>>32;
- k = 0;
- if (hx < 0x00100000 || hx>>31) {
- if (u.i<<1 == 0)
- return -1/(x*x); /* log(+-0)=-inf */
- if (hx>>31)
- return (x-x)/0.0; /* log(-#) = NaN */
- /* subnormal number, scale x up */
- k -= 54;
- x *= 0x1p54;
- u.f = x;
- hx = u.i>>32;
- } else if (hx >= 0x7ff00000) {
- return x;
- } else if (hx == 0x3ff00000 && u.i<<32 == 0)
- return 0;
-
- /* reduce x into [sqrt(2)/2, sqrt(2)] */
- hx += 0x3ff00000 - 0x3fe6a09e;
- k += (int)(hx>>20) - 0x3ff;
- hx = (hx&0x000fffff) + 0x3fe6a09e;
- u.i = (uint64_t)hx<<32 | (u.i&0xffffffff);
- x = u.f;
+ return asuint64(x) >> 48;
+}
- f = x - 1.0;
- hfsq = 0.5*f*f;
- s = f/(2.0+f);
- z = s*s;
- w = z*z;
- t1 = w*(Lg2+w*(Lg4+w*Lg6));
- t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
- R = t2 + t1;
+double log2(double x)
+{
+ double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
+ uint64_t ix, iz, tmp;
+ uint32_t top;
+ int k, i;
- /*
- * f-hfsq must (for args near 1) be evaluated in extra precision
- * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2).
- * This is fairly efficient since f-hfsq only depends on f, so can
- * be evaluated in parallel with R. Not combining hfsq with R also
- * keeps R small (though not as small as a true `lo' term would be),
- * so that extra precision is not needed for terms involving R.
- *
- * Compiler bugs involving extra precision used to break Dekker's
- * theorem for spitting f-hfsq as hi+lo, unless double_t was used
- * or the multi-precision calculations were avoided when double_t
- * has extra precision. These problems are now automatically
- * avoided as a side effect of the optimization of combining the
- * Dekker splitting step with the clear-low-bits step.
- *
- * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra
- * precision to avoid a very large cancellation when x is very near
- * these values. Unlike the above cancellations, this problem is
- * specific to base 2. It is strange that adding +-1 is so much
- * harder than adding +-ln2 or +-log10_2.
- *
- * This uses Dekker's theorem to normalize y+val_hi, so the
- * compiler bugs are back in some configurations, sigh. And I
- * don't want to used double_t to avoid them, since that gives a
- * pessimization and the support for avoiding the pessimization
- * is not yet available.
- *
- * The multi-precision calculations for the multiplications are
- * routine.
- */
+ ix = asuint64(x);
+ top = top16(x);
+#define LO asuint64(1.0 - 0x1.5b51p-5)
+#define HI asuint64(1.0 + 0x1.6ab2p-5)
+ if (predict_false(ix - LO < HI - LO)) {
+ /* Handle close to 1.0 inputs separately. */
+ /* Fix sign of zero with downward rounding when x==1. */
+ if (WANT_ROUNDING && predict_false(ix == asuint64(1.0)))
+ return 0;
+ r = x - 1.0;
+#if __FP_FAST_FMA
+ hi = r * InvLn2hi;
+ lo = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -hi);
+#else
+ double_t rhi, rlo;
+ rhi = asdouble(asuint64(r) & -1ULL << 32);
+ rlo = r - rhi;
+ hi = rhi * InvLn2hi;
+ lo = rlo * InvLn2hi + r * InvLn2lo;
+#endif
+ r2 = r * r; /* rounding error: 0x1p-62. */
+ r4 = r2 * r2;
+ /* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */
+ p = r2 * (B[0] + r * B[1]);
+ y = hi + p;
+ lo += hi - y + p;
+ lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) +
+ r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
+ y += lo;
+ return eval_as_double(y);
+ }
+ if (predict_false(top - 0x0010 >= 0x7ff0 - 0x0010)) {
+ /* x < 0x1p-1022 or inf or nan. */
+ if (ix * 2 == 0)
+ return __math_divzero(1);
+ if (ix == asuint64(INFINITY)) /* log(inf) == inf. */
+ return x;
+ if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
+ return __math_invalid(x);
+ /* x is subnormal, normalize it. */
+ ix = asuint64(x * 0x1p52);
+ ix -= 52ULL << 52;
+ }
- /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */
- hi = f - hfsq;
- u.f = hi;
- u.i &= (uint64_t)-1<<32;
- hi = u.f;
- lo = f - hi - hfsq + s*(hfsq+R);
+ /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ tmp = ix - OFF;
+ i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
+ k = (int64_t)tmp >> 52; /* arithmetic shift */
+ iz = ix - (tmp & 0xfffULL << 52);
+ invc = T[i].invc;
+ logc = T[i].logc;
+ z = asdouble(iz);
+ kd = (double_t)k;
- val_hi = hi*ivln2hi;
- val_lo = (lo+hi)*ivln2lo + lo*ivln2hi;
+ /* log2(x) = log2(z/c) + log2(c) + k. */
+ /* r ~= z/c - 1, |r| < 1/(2*N). */
+#if __FP_FAST_FMA
+ /* rounding error: 0x1p-55/N. */
+ r = __builtin_fma(z, invc, -1.0);
+ t1 = r * InvLn2hi;
+ t2 = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -t1);
+#else
+ double_t rhi, rlo;
+ /* rounding error: 0x1p-55/N + 0x1p-65. */
+ r = (z - T2[i].chi - T2[i].clo) * invc;
+ rhi = asdouble(asuint64(r) & -1ULL << 32);
+ rlo = r - rhi;
+ t1 = rhi * InvLn2hi;
+ t2 = rlo * InvLn2hi + r * InvLn2lo;
+#endif
- /* spadd(val_hi, val_lo, y), except for not using double_t: */
- y = k;
- w = y + val_hi;
- val_lo += (y - w) + val_hi;
- val_hi = w;
+ /* hi + lo = r/ln2 + log2(c) + k. */
+ t3 = kd + logc;
+ hi = t3 + t1;
+ lo = t3 - hi + t1 + t2;
- return val_lo + val_hi;
+ /* log2(r+1) = r/ln2 + r^2*poly(r). */
+ /* Evaluation is optimized assuming superscalar pipelined execution. */
+ r2 = r * r; /* rounding error: 0x1p-54/N^2. */
+ r4 = r2 * r2;
+ /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
+ ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */
+ p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
+ y = lo + r2 * p + hi;
+ return eval_as_double(y);
}
diff --git a/src/math/log2_data.c b/src/math/log2_data.c
new file mode 100644
index 00000000..3dd1ca51
--- /dev/null
+++ b/src/math/log2_data.c
@@ -0,0 +1,201 @@
+/*
+ * Data for log2.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "log2_data.h"
+
+#define N (1 << LOG2_TABLE_BITS)
+
+const struct log2_data __log2_data = {
+// First coefficient: 0x1.71547652b82fe1777d0ffda0d24p0
+.invln2hi = 0x1.7154765200000p+0,
+.invln2lo = 0x1.705fc2eefa200p-33,
+.poly1 = {
+// relative error: 0x1.2fad8188p-63
+// in -0x1.5b51p-5 0x1.6ab2p-5
+-0x1.71547652b82fep-1,
+0x1.ec709dc3a03f7p-2,
+-0x1.71547652b7c3fp-2,
+0x1.2776c50f05be4p-2,
+-0x1.ec709dd768fe5p-3,
+0x1.a61761ec4e736p-3,
+-0x1.7153fbc64a79bp-3,
+0x1.484d154f01b4ap-3,
+-0x1.289e4a72c383cp-3,
+0x1.0b32f285aee66p-3,
+},
+.poly = {
+// relative error: 0x1.a72c2bf8p-58
+// abs error: 0x1.67a552c8p-66
+// in -0x1.f45p-8 0x1.f45p-8
+-0x1.71547652b8339p-1,
+0x1.ec709dc3a04bep-2,
+-0x1.7154764702ffbp-2,
+0x1.2776c50034c48p-2,
+-0x1.ec7b328ea92bcp-3,
+0x1.a6225e117f92ep-3,
+},
+/* Algorithm:
+
+ x = 2^k z
+ log2(x) = k + log2(c) + log2(z/c)
+ log2(z/c) = poly(z/c - 1)
+
+where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
+into the ith one, then table entries are computed as
+
+ tab[i].invc = 1/c
+ tab[i].logc = (double)log2(c)
+ tab2[i].chi = (double)c
+ tab2[i].clo = (double)(c - (double)c)
+
+where c is near the center of the subinterval and is chosen by trying +-2^29
+floating point invc candidates around 1/center and selecting one for which
+
+ 1) the rounding error in 0x1.8p10 + logc is 0,
+ 2) the rounding error in z - chi - clo is < 0x1p-64 and
+ 3) the rounding error in (double)log2(c) is minimized (< 0x1p-68).
+
+Note: 1) ensures that k + logc can be computed without rounding error, 2)
+ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to a
+single rounding error when there is no fast fma for z*invc - 1, 3) ensures
+that logc + poly(z/c - 1) has small error, however near x == 1 when
+|log2(x)| < 0x1p-4, this is not enough so that is special cased. */
+.tab = {
+{0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
+{0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
+{0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
+{0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2},
+{0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2},
+{0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2},
+{0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2},
+{0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2},
+{0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2},
+{0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2},
+{0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2},
+{0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2},
+{0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2},
+{0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2},
+{0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2},
+{0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2},
+{0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2},
+{0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2},
+{0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2},
+{0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2},
+{0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3},
+{0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3},
+{0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3},
+{0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3},
+{0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3},
+{0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3},
+{0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3},
+{0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3},
+{0x1.19453847f2200p+0, -0x1.162595afdc000p-3},
+{0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4},
+{0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4},
+{0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4},
+{0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4},
+{0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4},
+{0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4},
+{0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5},
+{0x1.07325cac53b83p+0, -0x1.47a954f770000p-5},
+{0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6},
+{0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6},
+{0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8},
+{0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7},
+{0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5},
+{0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5},
+{0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4},
+{0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4},
+{0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4},
+{0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3},
+{0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3},
+{0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3},
+{0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3},
+{0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3},
+{0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3},
+{0x1.ac57026295039p-1, 0x1.0790ab4678000p-2},
+{0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2},
+{0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2},
+{0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2},
+{0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2},
+{0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2},
+{0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2},
+{0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2},
+{0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2},
+{0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
+{0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
+{0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2},
+},
+#if !__FP_FAST_FMA
+.tab2 = {
+{0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
+{0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
+{0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
+{0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55},
+{0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55},
+{0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56},
+{0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56},
+{0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57},
+{0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55},
+{0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57},
+{0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55},
+{0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55},
+{0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56},
+{0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56},
+{0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56},
+{0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55},
+{0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57},
+{0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55},
+{0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55},
+{0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58},
+{0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55},
+{0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58},
+{0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56},
+{0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56},
+{0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57},
+{0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56},
+{0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56},
+{0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55},
+{0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58},
+{0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56},
+{0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55},
+{0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56},
+{0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55},
+{0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56},
+{0x1.ea00027edc00cp-1, -0x1.c848309459811p-55},
+{0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55},
+{0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55},
+{0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59},
+{0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58},
+{0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55},
+{0x1.0200004292367p+0, 0x1.b7ff365324681p-54},
+{0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55},
+{0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58},
+{0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54},
+{0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55},
+{0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54},
+{0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54},
+{0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54},
+{0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55},
+{0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55},
+{0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56},
+{0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54},
+{0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56},
+{0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54},
+{0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56},
+{0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54},
+{0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56},
+{0x1.460000d387cb1p+0, 0x1.20837856599a6p-55},
+{0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55},
+{0x1.4e000043543f3p+0, -0x1.81125ed175329p-56},
+{0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54},
+{0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
+{0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
+{0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54},
+},
+#endif
+};
diff --git a/src/math/log2_data.h b/src/math/log2_data.h
new file mode 100644
index 00000000..276a786d
--- /dev/null
+++ b/src/math/log2_data.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _LOG2_DATA_H
+#define _LOG2_DATA_H
+
+#include <features.h>
+
+#define LOG2_TABLE_BITS 6
+#define LOG2_POLY_ORDER 7
+#define LOG2_POLY1_ORDER 11
+extern hidden const struct log2_data {
+ double invln2hi;
+ double invln2lo;
+ double poly[LOG2_POLY_ORDER - 1];
+ double poly1[LOG2_POLY1_ORDER - 1];
+ struct {
+ double invc, logc;
+ } tab[1 << LOG2_TABLE_BITS];
+#if !__FP_FAST_FMA
+ struct {
+ double chi, clo;
+ } tab2[1 << LOG2_TABLE_BITS];
+#endif
+} __log2_data;
+
+#endif
diff --git a/src/math/log2f.c b/src/math/log2f.c
index b3e305fe..c368f88f 100644
--- a/src/math/log2f.c
+++ b/src/math/log2f.c
@@ -1,74 +1,72 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_log2f.c */
/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ * Single-precision log2 function.
*
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/*
- * See comments in log2.c.
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
+#include "libm.h"
+#include "log2f_data.h"
+
+/*
+LOG2F_TABLE_BITS = 4
+LOG2F_POLY_ORDER = 4
+
+ULP error: 0.752 (nearest rounding.)
+Relative error: 1.9 * 2^-26 (before rounding.)
+*/
-static const float
-ivln2hi = 1.4428710938e+00, /* 0x3fb8b000 */
-ivln2lo = -1.7605285393e-04, /* 0xb9389ad4 */
-/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */
-Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */
-Lg2 = 0xccce13.0p-25, /* 0.40000972152 */
-Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */
-Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */
+#define N (1 << LOG2F_TABLE_BITS)
+#define T __log2f_data.tab
+#define A __log2f_data.poly
+#define OFF 0x3f330000
float log2f(float x)
{
- union {float f; uint32_t i;} u = {x};
- float_t hfsq,f,s,z,R,w,t1,t2,hi,lo;
- uint32_t ix;
- int k;
+ double_t z, r, r2, p, y, y0, invc, logc;
+ uint32_t ix, iz, top, tmp;
+ int k, i;
- ix = u.i;
- k = 0;
- if (ix < 0x00800000 || ix>>31) { /* x < 2**-126 */
- if (ix<<1 == 0)
- return -1/(x*x); /* log(+-0)=-inf */
- if (ix>>31)
- return (x-x)/0.0f; /* log(-#) = NaN */
- /* subnormal number, scale up x */
- k -= 25;
- x *= 0x1p25f;
- u.f = x;
- ix = u.i;
- } else if (ix >= 0x7f800000) {
- return x;
- } else if (ix == 0x3f800000)
+ ix = asuint(x);
+ /* Fix sign of zero with downward rounding when x==1. */
+ if (WANT_ROUNDING && predict_false(ix == 0x3f800000))
return 0;
+ if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
+ /* x < 0x1p-126 or inf or nan. */
+ if (ix * 2 == 0)
+ return __math_divzerof(1);
+ if (ix == 0x7f800000) /* log2(inf) == inf. */
+ return x;
+ if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
+ return __math_invalidf(x);
+ /* x is subnormal, normalize it. */
+ ix = asuint(x * 0x1p23f);
+ ix -= 23 << 23;
+ }
- /* reduce x into [sqrt(2)/2, sqrt(2)] */
- ix += 0x3f800000 - 0x3f3504f3;
- k += (int)(ix>>23) - 0x7f;
- ix = (ix&0x007fffff) + 0x3f3504f3;
- u.i = ix;
- x = u.f;
+ /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ tmp = ix - OFF;
+ i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N;
+ top = tmp & 0xff800000;
+ iz = ix - top;
+ k = (int32_t)tmp >> 23; /* arithmetic shift */
+ invc = T[i].invc;
+ logc = T[i].logc;
+ z = (double_t)asfloat(iz);
- f = x - 1.0f;
- s = f/(2.0f + f);
- z = s*s;
- w = z*z;
- t1= w*(Lg2+w*Lg4);
- t2= z*(Lg1+w*Lg3);
- R = t2 + t1;
- hfsq = 0.5f*f*f;
+ /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
+ r = z * invc - 1;
+ y0 = logc + (double_t)k;
- hi = f - hfsq;
- u.f = hi;
- u.i &= 0xfffff000;
- hi = u.f;
- lo = f - hi - hfsq + s*(hfsq+R);
- return (lo+hi)*ivln2lo + lo*ivln2hi + hi*ivln2hi + k;
+ /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
+ r2 = r * r;
+ y = A[1] * r + A[2];
+ y = A[0] * r2 + y;
+ p = A[3] * r + y0;
+ y = y * r2 + p;
+ return eval_as_float(y);
}
diff --git a/src/math/log2f_data.c b/src/math/log2f_data.c
new file mode 100644
index 00000000..24e450f1
--- /dev/null
+++ b/src/math/log2f_data.c
@@ -0,0 +1,33 @@
+/*
+ * Data definition for log2f.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "log2f_data.h"
+
+const struct log2f_data __log2f_data = {
+ .tab = {
+ { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
+ { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 },
+ { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 },
+ { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 },
+ { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 },
+ { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 },
+ { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 },
+ { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 },
+ { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 },
+ { 0x1p+0, 0x0p+0 },
+ { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 },
+ { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 },
+ { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 },
+ { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 },
+ { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 },
+ { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 },
+ },
+ .poly = {
+ -0x1.712b6f70a7e4dp-2, 0x1.ecabf496832ep-2, -0x1.715479ffae3dep-1,
+ 0x1.715475f35c8b8p0,
+ }
+};
diff --git a/src/math/log2f_data.h b/src/math/log2f_data.h
new file mode 100644
index 00000000..4fa48956
--- /dev/null
+++ b/src/math/log2f_data.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _LOG2F_DATA_H
+#define _LOG2F_DATA_H
+
+#include <features.h>
+
+#define LOG2F_TABLE_BITS 4
+#define LOG2F_POLY_ORDER 4
+extern hidden const struct log2f_data {
+ struct {
+ double invc, logc;
+ } tab[1 << LOG2F_TABLE_BITS];
+ double poly[LOG2F_POLY_ORDER];
+} __log2f_data;
+
+#endif
diff --git a/src/math/log_data.c b/src/math/log_data.c
new file mode 100644
index 00000000..1a6ec712
--- /dev/null
+++ b/src/math/log_data.c
@@ -0,0 +1,328 @@
+/*
+ * Data for log.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "log_data.h"
+
+#define N (1 << LOG_TABLE_BITS)
+
+const struct log_data __log_data = {
+.ln2hi = 0x1.62e42fefa3800p-1,
+.ln2lo = 0x1.ef35793c76730p-45,
+.poly1 = {
+// relative error: 0x1.c04d76cp-63
+// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
+-0x1p-1,
+0x1.5555555555577p-2,
+-0x1.ffffffffffdcbp-3,
+0x1.999999995dd0cp-3,
+-0x1.55555556745a7p-3,
+0x1.24924a344de3p-3,
+-0x1.fffffa4423d65p-4,
+0x1.c7184282ad6cap-4,
+-0x1.999eb43b068ffp-4,
+0x1.78182f7afd085p-4,
+-0x1.5521375d145cdp-4,
+},
+.poly = {
+// relative error: 0x1.926199e8p-56
+// abs error: 0x1.882ff33p-65
+// in -0x1.fp-9 0x1.fp-9
+-0x1.0000000000001p-1,
+0x1.555555551305bp-2,
+-0x1.fffffffeb459p-3,
+0x1.999b324f10111p-3,
+-0x1.55575e506c89fp-3,
+},
+/* Algorithm:
+
+ x = 2^k z
+ log(x) = k ln2 + log(c) + log(z/c)
+ log(z/c) = poly(z/c - 1)
+
+where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
+into the ith one, then table entries are computed as
+
+ tab[i].invc = 1/c
+ tab[i].logc = (double)log(c)
+ tab2[i].chi = (double)c
+ tab2[i].clo = (double)(c - (double)c)
+
+where c is near the center of the subinterval and is chosen by trying +-2^29
+floating point invc candidates around 1/center and selecting one for which
+
+ 1) the rounding error in 0x1.8p9 + logc is 0,
+ 2) the rounding error in z - chi - clo is < 0x1p-66 and
+ 3) the rounding error in (double)log(c) is minimized (< 0x1p-66).
+
+Note: 1) ensures that k*ln2hi + logc can be computed without rounding error,
+2) ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to
+a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
+that logc + poly(z/c - 1) has small error, however near x == 1 when
+|log(x)| < 0x1p-4, this is not enough so that is special cased. */
+.tab = {
+{0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
+{0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
+{0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
+{0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
+{0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
+{0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
+{0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
+{0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
+{0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
+{0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
+{0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
+{0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
+{0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
+{0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
+{0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
+{0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
+{0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
+{0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
+{0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
+{0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
+{0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
+{0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
+{0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
+{0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
+{0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
+{0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
+{0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
+{0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
+{0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
+{0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
+{0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
+{0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
+{0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
+{0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
+{0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
+{0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
+{0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
+{0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
+{0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
+{0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
+{0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
+{0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
+{0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
+{0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
+{0x1.293726014b530p+0, -0x1.31b996b490000p-3},
+{0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
+{0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
+{0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
+{0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
+{0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
+{0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
+{0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
+{0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
+{0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
+{0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
+{0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
+{0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
+{0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
+{0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
+{0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
+{0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
+{0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
+{0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
+{0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
+{0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
+{0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
+{0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
+{0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
+{0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
+{0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
+{0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
+{0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
+{0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
+{0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
+{0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
+{0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
+{0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
+{0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
+{0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
+{0x1.008040614b195p+0, -0x1.0040979240000p-9},
+{0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
+{0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
+{0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
+{0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
+{0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
+{0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
+{0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
+{0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
+{0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
+{0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
+{0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
+{0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
+{0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
+{0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
+{0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
+{0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
+{0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
+{0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
+{0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
+{0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
+{0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
+{0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
+{0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
+{0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
+{0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
+{0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
+{0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
+{0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
+{0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
+{0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
+{0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
+{0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
+{0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
+{0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
+{0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
+{0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
+{0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
+{0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
+{0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
+{0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
+{0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
+{0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
+{0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
+{0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
+{0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
+{0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
+{0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
+{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
+},
+#if !__FP_FAST_FMA
+.tab2 = {
+{0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
+{0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
+{0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
+{0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
+{0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
+{0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
+{0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
+{0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
+{0x1.710000e86978p-1, 0x1.bff6671097952p-56},
+{0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
+{0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
+{0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
+{0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
+{0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
+{0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
+{0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
+{0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
+{0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
+{0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
+{0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
+{0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
+{0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
+{0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
+{0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
+{0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
+{0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
+{0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
+{0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
+{0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
+{0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
+{0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
+{0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
+{0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
+{0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
+{0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
+{0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
+{0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
+{0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
+{0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
+{0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
+{0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
+{0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
+{0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
+{0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
+{0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
+{0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
+{0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
+{0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
+{0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
+{0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
+{0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
+{0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
+{0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
+{0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
+{0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
+{0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
+{0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
+{0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
+{0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
+{0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
+{0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
+{0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
+{0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
+{0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
+{0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
+{0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
+{0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
+{0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
+{0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
+{0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
+{0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
+{0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
+{0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
+{0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
+{0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
+{0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
+{0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
+{0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
+{0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
+{0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
+{0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
+{0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
+{0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
+{0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
+{0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
+{0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
+{0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
+{0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
+{0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
+{0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
+{0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
+{0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
+{0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
+{0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
+{0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
+{0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
+{0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
+{0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
+{0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
+{0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
+{0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
+{0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
+{0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
+{0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
+{0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
+{0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
+{0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
+{0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
+{0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
+{0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
+{0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
+{0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
+{0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
+{0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
+{0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
+{0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
+{0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
+{0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
+{0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
+{0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
+{0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
+{0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
+{0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
+{0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
+{0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
+{0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
+{0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
+{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
+},
+#endif
+};
diff --git a/src/math/log_data.h b/src/math/log_data.h
new file mode 100644
index 00000000..1be22ab2
--- /dev/null
+++ b/src/math/log_data.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _LOG_DATA_H
+#define _LOG_DATA_H
+
+#include <features.h>
+
+#define LOG_TABLE_BITS 7
+#define LOG_POLY_ORDER 6
+#define LOG_POLY1_ORDER 12
+extern hidden const struct log_data {
+ double ln2hi;
+ double ln2lo;
+ double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
+ double poly1[LOG_POLY1_ORDER - 1];
+ struct {
+ double invc, logc;
+ } tab[1 << LOG_TABLE_BITS];
+#if !__FP_FAST_FMA
+ struct {
+ double chi, clo;
+ } tab2[1 << LOG_TABLE_BITS];
+#endif
+} __log_data;
+
+#endif
diff --git a/src/math/logf.c b/src/math/logf.c
index 52230a1b..7ee5d7fe 100644
--- a/src/math/logf.c
+++ b/src/math/logf.c
@@ -1,69 +1,71 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_logf.c */
/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ * Single-precision log function.
*
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
+#include "libm.h"
+#include "logf_data.h"
+
+/*
+LOGF_TABLE_BITS = 4
+LOGF_POLY_ORDER = 4
+
+ULP error: 0.818 (nearest rounding.)
+Relative error: 1.957 * 2^-26 (before rounding.)
+*/
-static const float
-ln2_hi = 6.9313812256e-01, /* 0x3f317180 */
-ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */
-/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */
-Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */
-Lg2 = 0xccce13.0p-25, /* 0.40000972152 */
-Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */
-Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */
+#define T __logf_data.tab
+#define A __logf_data.poly
+#define Ln2 __logf_data.ln2
+#define N (1 << LOGF_TABLE_BITS)
+#define OFF 0x3f330000
float logf(float x)
{
- union {float f; uint32_t i;} u = {x};
- float_t hfsq,f,s,z,R,w,t1,t2,dk;
- uint32_t ix;
- int k;
+ double_t z, r, r2, y, y0, invc, logc;
+ uint32_t ix, iz, tmp;
+ int k, i;
- ix = u.i;
- k = 0;
- if (ix < 0x00800000 || ix>>31) { /* x < 2**-126 */
- if (ix<<1 == 0)
- return -1/(x*x); /* log(+-0)=-inf */
- if (ix>>31)
- return (x-x)/0.0f; /* log(-#) = NaN */
- /* subnormal number, scale up x */
- k -= 25;
- x *= 0x1p25f;
- u.f = x;
- ix = u.i;
- } else if (ix >= 0x7f800000) {
- return x;
- } else if (ix == 0x3f800000)
+ ix = asuint(x);
+ /* Fix sign of zero with downward rounding when x==1. */
+ if (WANT_ROUNDING && predict_false(ix == 0x3f800000))
return 0;
+ if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
+ /* x < 0x1p-126 or inf or nan. */
+ if (ix * 2 == 0)
+ return __math_divzerof(1);
+ if (ix == 0x7f800000) /* log(inf) == inf. */
+ return x;
+ if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
+ return __math_invalidf(x);
+ /* x is subnormal, normalize it. */
+ ix = asuint(x * 0x1p23f);
+ ix -= 23 << 23;
+ }
+
+ /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ tmp = ix - OFF;
+ i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
+ k = (int32_t)tmp >> 23; /* arithmetic shift */
+ iz = ix - (tmp & 0x1ff << 23);
+ invc = T[i].invc;
+ logc = T[i].logc;
+ z = (double_t)asfloat(iz);
- /* reduce x into [sqrt(2)/2, sqrt(2)] */
- ix += 0x3f800000 - 0x3f3504f3;
- k += (int)(ix>>23) - 0x7f;
- ix = (ix&0x007fffff) + 0x3f3504f3;
- u.i = ix;
- x = u.f;
+ /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
+ r = z * invc - 1;
+ y0 = logc + (double_t)k * Ln2;
- f = x - 1.0f;
- s = f/(2.0f + f);
- z = s*s;
- w = z*z;
- t1= w*(Lg2+w*Lg4);
- t2= z*(Lg1+w*Lg3);
- R = t2 + t1;
- hfsq = 0.5f*f*f;
- dk = k;
- return s*(hfsq+R) + dk*ln2_lo - hfsq + f + dk*ln2_hi;
+ /* Pipelined polynomial evaluation to approximate log1p(r). */
+ r2 = r * r;
+ y = A[1] * r + A[2];
+ y = A[0] * r2 + y;
+ y = y * r2 + (y0 + r);
+ return eval_as_float(y);
}
diff --git a/src/math/logf_data.c b/src/math/logf_data.c
new file mode 100644
index 00000000..857221f7
--- /dev/null
+++ b/src/math/logf_data.c
@@ -0,0 +1,33 @@
+/*
+ * Data definition for logf.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "logf_data.h"
+
+const struct logf_data __logf_data = {
+ .tab = {
+ { 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
+ { 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 },
+ { 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 },
+ { 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 },
+ { 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 },
+ { 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 },
+ { 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 },
+ { 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 },
+ { 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 },
+ { 0x1p+0, 0x0p+0 },
+ { 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 },
+ { 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 },
+ { 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 },
+ { 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 },
+ { 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 },
+ { 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 },
+ },
+ .ln2 = 0x1.62e42fefa39efp-1,
+ .poly = {
+ -0x1.00ea348b88334p-2, 0x1.5575b0be00b6ap-2, -0x1.ffffef20a4123p-2,
+ }
+};
diff --git a/src/math/logf_data.h b/src/math/logf_data.h
new file mode 100644
index 00000000..00cff6f8
--- /dev/null
+++ b/src/math/logf_data.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _LOGF_DATA_H
+#define _LOGF_DATA_H
+
+#include <features.h>
+
+#define LOGF_TABLE_BITS 4
+#define LOGF_POLY_ORDER 4
+extern hidden const struct logf_data {
+ struct {
+ double invc, logc;
+ } tab[1 << LOGF_TABLE_BITS];
+ double ln2;
+ double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */
+} __logf_data;
+
+#endif
diff --git a/src/math/pow.c b/src/math/pow.c
index 3ddc1b6f..694c2ef6 100644
--- a/src/math/pow.c
+++ b/src/math/pow.c
@@ -1,328 +1,343 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_pow.c */
/*
- * ====================================================
- * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved.
+ * Double-precision x^y function.
*
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/* pow(x,y) return x**y
- *
- * n
- * Method: Let x = 2 * (1+f)
- * 1. Compute and return log2(x) in two pieces:
- * log2(x) = w1 + w2,
- * where w1 has 53-24 = 29 bit trailing zeros.
- * 2. Perform y*log2(x) = n+y' by simulating muti-precision
- * arithmetic, where |y'|<=0.5.
- * 3. Return x**y = 2**n*exp(y'*log2)
- *
- * Special cases:
- * 1. (anything) ** 0 is 1
- * 2. 1 ** (anything) is 1
- * 3. (anything except 1) ** NAN is NAN
- * 4. NAN ** (anything except 0) is NAN
- * 5. +-(|x| > 1) ** +INF is +INF
- * 6. +-(|x| > 1) ** -INF is +0
- * 7. +-(|x| < 1) ** +INF is +0
- * 8. +-(|x| < 1) ** -INF is +INF
- * 9. -1 ** +-INF is 1
- * 10. +0 ** (+anything except 0, NAN) is +0
- * 11. -0 ** (+anything except 0, NAN, odd integer) is +0
- * 12. +0 ** (-anything except 0, NAN) is +INF, raise divbyzero
- * 13. -0 ** (-anything except 0, NAN, odd integer) is +INF, raise divbyzero
- * 14. -0 ** (+odd integer) is -0
- * 15. -0 ** (-odd integer) is -INF, raise divbyzero
- * 16. +INF ** (+anything except 0,NAN) is +INF
- * 17. +INF ** (-anything except 0,NAN) is +0
- * 18. -INF ** (+odd integer) is -INF
- * 19. -INF ** (anything) = -0 ** (-anything), (anything except odd integer)
- * 20. (anything) ** 1 is (anything)
- * 21. (anything) ** -1 is 1/(anything)
- * 22. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
- * 23. (-anything except 0 and inf) ** (non-integer) is NAN
- *
- * Accuracy:
- * pow(x,y) returns x**y nearly rounded. In particular
- * pow(integer,integer)
- * always returns the correct integer provided it is
- * representable.
- *
- * Constants :
- * The hexadecimal values are the intended ones for the following
- * constants. The decimal values may be used, provided that the
- * compiler will convert from decimal to binary accurately enough
- * to produce the hexadecimal values shown.
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
*/
+#include <math.h>
+#include <stdint.h>
#include "libm.h"
+#include "exp_data.h"
+#include "pow_data.h"
-static const double
-bp[] = {1.0, 1.5,},
-dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */
-dp_l[] = { 0.0, 1.35003920212974897128e-08,}, /* 0x3E4CFDEB, 0x43CFD006 */
-two53 = 9007199254740992.0, /* 0x43400000, 0x00000000 */
-huge = 1.0e300,
-tiny = 1.0e-300,
-/* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */
-L1 = 5.99999999999994648725e-01, /* 0x3FE33333, 0x33333303 */
-L2 = 4.28571428578550184252e-01, /* 0x3FDB6DB6, 0xDB6FABFF */
-L3 = 3.33333329818377432918e-01, /* 0x3FD55555, 0x518F264D */
-L4 = 2.72728123808534006489e-01, /* 0x3FD17460, 0xA91D4101 */
-L5 = 2.30660745775561754067e-01, /* 0x3FCD864A, 0x93C9DB65 */
-L6 = 2.06975017800338417784e-01, /* 0x3FCA7E28, 0x4A454EEF */
-P1 = 1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */
-P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */
-P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */
-P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */
-P5 = 4.13813679705723846039e-08, /* 0x3E663769, 0x72BEA4D0 */
-lg2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
-lg2_h = 6.93147182464599609375e-01, /* 0x3FE62E43, 0x00000000 */
-lg2_l = -1.90465429995776804525e-09, /* 0xBE205C61, 0x0CA86C39 */
-ovt = 8.0085662595372944372e-017, /* -(1024-log2(ovfl+.5ulp)) */
-cp = 9.61796693925975554329e-01, /* 0x3FEEC709, 0xDC3A03FD =2/(3ln2) */
-cp_h = 9.61796700954437255859e-01, /* 0x3FEEC709, 0xE0000000 =(float)cp */
-cp_l = -7.02846165095275826516e-09, /* 0xBE3E2FE0, 0x145B01F5 =tail of cp_h*/
-ivln2 = 1.44269504088896338700e+00, /* 0x3FF71547, 0x652B82FE =1/ln2 */
-ivln2_h = 1.44269502162933349609e+00, /* 0x3FF71547, 0x60000000 =24b 1/ln2*/
-ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
+/*
+Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
+relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma)
+ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
+*/
-double pow(double x, double y)
+#define T __pow_log_data.tab
+#define A __pow_log_data.poly
+#define Ln2hi __pow_log_data.ln2hi
+#define Ln2lo __pow_log_data.ln2lo
+#define N (1 << POW_LOG_TABLE_BITS)
+#define OFF 0x3fe6955500000000
+
+/* Top 12 bits of a double (sign and exponent bits). */
+static inline uint32_t top12(double x)
{
- double z,ax,z_h,z_l,p_h,p_l;
- double y1,t1,t2,r,s,t,u,v,w;
- int32_t i,j,k,yisint,n;
- int32_t hx,hy,ix,iy;
- uint32_t lx,ly;
+ return asuint64(x) >> 52;
+}
- EXTRACT_WORDS(hx, lx, x);
- EXTRACT_WORDS(hy, ly, y);
- ix = hx & 0x7fffffff;
- iy = hy & 0x7fffffff;
+/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
+ additional 15 bits precision. IX is the bit representation of x, but
+ normalized in the subnormal range using the sign bit for the exponent. */
+static inline double_t log_inline(uint64_t ix, double_t *tail)
+{
+ /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
+ double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
+ uint64_t iz, tmp;
+ int k, i;
- /* x**0 = 1, even if x is NaN */
- if ((iy|ly) == 0)
- return 1.0;
- /* 1**y = 1, even if y is NaN */
- if (hx == 0x3ff00000 && lx == 0)
- return 1.0;
- /* NaN if either arg is NaN */
- if (ix > 0x7ff00000 || (ix == 0x7ff00000 && lx != 0) ||
- iy > 0x7ff00000 || (iy == 0x7ff00000 && ly != 0))
- return x + y;
+ /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ tmp = ix - OFF;
+ i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
+ k = (int64_t)tmp >> 52; /* arithmetic shift */
+ iz = ix - (tmp & 0xfffULL << 52);
+ z = asdouble(iz);
+ kd = (double_t)k;
- /* determine if y is an odd int when x < 0
- * yisint = 0 ... y is not an integer
- * yisint = 1 ... y is an odd int
- * yisint = 2 ... y is an even int
- */
- yisint = 0;
- if (hx < 0) {
- if (iy >= 0x43400000)
- yisint = 2; /* even integer y */
- else if (iy >= 0x3ff00000) {
- k = (iy>>20) - 0x3ff; /* exponent */
- if (k > 20) {
- uint32_t j = ly>>(52-k);
- if ((j<<(52-k)) == ly)
- yisint = 2 - (j&1);
- } else if (ly == 0) {
- uint32_t j = iy>>(20-k);
- if ((j<<(20-k)) == iy)
- yisint = 2 - (j&1);
- }
- }
- }
+ /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
+ invc = T[i].invc;
+ logc = T[i].logc;
+ logctail = T[i].logctail;
- /* special value of y */
- if (ly == 0) {
- if (iy == 0x7ff00000) { /* y is +-inf */
- if (((ix-0x3ff00000)|lx) == 0) /* (-1)**+-inf is 1 */
- return 1.0;
- else if (ix >= 0x3ff00000) /* (|x|>1)**+-inf = inf,0 */
- return hy >= 0 ? y : 0.0;
- else /* (|x|<1)**+-inf = 0,inf */
- return hy >= 0 ? 0.0 : -y;
- }
- if (iy == 0x3ff00000) { /* y is +-1 */
- if (hy >= 0)
- return x;
- y = 1/x;
-#if FLT_EVAL_METHOD!=0
- {
- union {double f; uint64_t i;} u = {y};
- uint64_t i = u.i & -1ULL/2;
- if (i>>52 == 0 && (i&(i-1)))
- FORCE_EVAL((float)y);
- }
+ /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
+ |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
+#if __FP_FAST_FMA
+ r = __builtin_fma(z, invc, -1.0);
+#else
+ /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
+ double_t zhi = asdouble((iz + (1ULL << 31)) & (-1ULL << 32));
+ double_t zlo = z - zhi;
+ double_t rhi = zhi * invc - 1.0;
+ double_t rlo = zlo * invc;
+ r = rhi + rlo;
#endif
- return y;
- }
- if (hy == 0x40000000) /* y is 2 */
- return x*x;
- if (hy == 0x3fe00000) { /* y is 0.5 */
- if (hx >= 0) /* x >= +0 */
- return sqrt(x);
- }
+
+ /* k*Ln2 + log(c) + r. */
+ t1 = kd * Ln2hi + logc;
+ t2 = t1 + r;
+ lo1 = kd * Ln2lo + logctail;
+ lo2 = t1 - t2 + r;
+
+ /* Evaluation is optimized assuming superscalar pipelined execution. */
+ double_t ar, ar2, ar3, lo3, lo4;
+ ar = A[0] * r; /* A[0] = -0.5. */
+ ar2 = r * ar;
+ ar3 = r * ar2;
+ /* k*Ln2 + log(c) + r + A[0]*r*r. */
+#if __FP_FAST_FMA
+ hi = t2 + ar2;
+ lo3 = __builtin_fma(ar, r, -ar2);
+ lo4 = t2 - hi + ar2;
+#else
+ double_t arhi = A[0] * rhi;
+ double_t arhi2 = rhi * arhi;
+ hi = t2 + arhi2;
+ lo3 = rlo * (ar + arhi);
+ lo4 = t2 - hi + arhi2;
+#endif
+ /* p = log1p(r) - r - A[0]*r*r. */
+ p = (ar3 * (A[1] + r * A[2] +
+ ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
+ lo = lo1 + lo2 + lo3 + lo4 + p;
+ y = hi + lo;
+ *tail = hi - y + lo;
+ return y;
+}
+
+#undef N
+#undef T
+#define N (1 << EXP_TABLE_BITS)
+#define InvLn2N __exp_data.invln2N
+#define NegLn2hiN __exp_data.negln2hiN
+#define NegLn2loN __exp_data.negln2loN
+#define Shift __exp_data.shift
+#define T __exp_data.tab
+#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
+#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
+#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
+#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
+#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
+
+/* Handle cases that may overflow or underflow when computing the result that
+ is scale*(1+TMP) without intermediate rounding. The bit representation of
+ scale is in SBITS, however it has a computed exponent that may have
+ overflown into the sign bit so that needs to be adjusted before using it as
+ a double. (int32_t)KI is the k used in the argument reduction and exponent
+ adjustment of scale, positive k here means the result may overflow and
+ negative k means the result may underflow. */
+static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
+{
+ double_t scale, y;
+
+ if ((ki & 0x80000000) == 0) {
+ /* k > 0, the exponent of scale might have overflowed by <= 460. */
+ sbits -= 1009ull << 52;
+ scale = asdouble(sbits);
+ y = 0x1p1009 * (scale + scale * tmp);
+ return eval_as_double(y);
+ }
+ /* k < 0, need special care in the subnormal range. */
+ sbits += 1022ull << 52;
+ /* Note: sbits is signed scale. */
+ scale = asdouble(sbits);
+ y = scale + scale * tmp;
+ if (fabs(y) < 1.0) {
+ /* Round y to the right precision before scaling it into the subnormal
+ range to avoid double rounding that can cause 0.5+E/2 ulp error where
+ E is the worst-case ulp error outside the subnormal range. So this
+ is only useful if the goal is better than 1 ulp worst-case error. */
+ double_t hi, lo, one = 1.0;
+ if (y < 0.0)
+ one = -1.0;
+ lo = scale - y + scale * tmp;
+ hi = one + y;
+ lo = one - hi + y + lo;
+ y = eval_as_double(hi + lo) - one;
+ /* Fix the sign of 0. */
+ if (y == 0.0)
+ y = asdouble(sbits & 0x8000000000000000);
+ /* The underflow exception needs to be signaled explicitly. */
+ fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
}
+ y = 0x1p-1022 * y;
+ return eval_as_double(y);
+}
- ax = fabs(x);
- /* special value of x */
- if (lx == 0) {
- if (ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000) { /* x is +-0,+-inf,+-1 */
- z = ax;
- if (hy < 0) /* z = (1/|x|) */
- z = 1.0/z;
- if (hx < 0) {
- if (((ix-0x3ff00000)|yisint) == 0) {
- z = (z-z)/(z-z); /* (-1)**non-int is NaN */
- } else if (yisint == 1)
- z = -z; /* (x<0)**odd = -(|x|**odd) */
- }
- return z;
+#define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
+
+/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
+ The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
+static inline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias)
+{
+ uint32_t abstop;
+ uint64_t ki, idx, top, sbits;
+ /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
+ double_t kd, z, r, r2, scale, tail, tmp;
+
+ abstop = top12(x) & 0x7ff;
+ if (predict_false(abstop - top12(0x1p-54) >=
+ top12(512.0) - top12(0x1p-54))) {
+ if (abstop - top12(0x1p-54) >= 0x80000000) {
+ /* Avoid spurious underflow for tiny x. */
+ /* Note: 0 is common input. */
+ double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
+ return sign_bias ? -one : one;
+ }
+ if (abstop >= top12(1024.0)) {
+ /* Note: inf and nan are already handled. */
+ if (asuint64(x) >> 63)
+ return __math_uflow(sign_bias);
+ else
+ return __math_oflow(sign_bias);
}
+ /* Large x is special cased below. */
+ abstop = 0;
}
- s = 1.0; /* sign of result */
- if (hx < 0) {
- if (yisint == 0) /* (x<0)**(non-int) is NaN */
- return (x-x)/(x-x);
- if (yisint == 1) /* (x<0)**(odd int) */
- s = -1.0;
- }
+ /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
+ /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
+ z = InvLn2N * x;
+#if TOINT_INTRINSICS
+ kd = roundtoint(z);
+ ki = converttoint(z);
+#elif EXP_USE_TOINT_NARROW
+ /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
+ kd = eval_as_double(z + Shift);
+ ki = asuint64(kd) >> 16;
+ kd = (double_t)(int32_t)ki;
+#else
+ /* z - kd is in [-1, 1] in non-nearest rounding modes. */
+ kd = eval_as_double(z + Shift);
+ ki = asuint64(kd);
+ kd -= Shift;
+#endif
+ r = x + kd * NegLn2hiN + kd * NegLn2loN;
+ /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
+ r += xtail;
+ /* 2^(k/N) ~= scale * (1 + tail). */
+ idx = 2 * (ki % N);
+ top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
+ tail = asdouble(T[idx]);
+ /* This is only a valid scale when -1023*N < k < 1024*N. */
+ sbits = T[idx + 1] + top;
+ /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
+ /* Evaluation is optimized assuming superscalar pipelined execution. */
+ r2 = r * r;
+ /* Without fma the worst case error is 0.25/N ulp larger. */
+ /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
+ tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
+ if (predict_false(abstop == 0))
+ return specialcase(tmp, sbits, ki);
+ scale = asdouble(sbits);
+ /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+ is no spurious underflow here even without fma. */
+ return eval_as_double(scale + scale * tmp);
+}
- /* |y| is huge */
- if (iy > 0x41e00000) { /* if |y| > 2**31 */
- if (iy > 0x43f00000) { /* if |y| > 2**64, must o/uflow */
- if (ix <= 0x3fefffff)
- return hy < 0 ? huge*huge : tiny*tiny;
- if (ix >= 0x3ff00000)
- return hy > 0 ? huge*huge : tiny*tiny;
+/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
+ the bit representation of a non-zero finite floating-point value. */
+static inline int checkint(uint64_t iy)
+{
+ int e = iy >> 52 & 0x7ff;
+ if (e < 0x3ff)
+ return 0;
+ if (e > 0x3ff + 52)
+ return 2;
+ if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
+ return 0;
+ if (iy & (1ULL << (0x3ff + 52 - e)))
+ return 1;
+ return 2;
+}
+
+/* Returns 1 if input is the bit representation of 0, infinity or nan. */
+static inline int zeroinfnan(uint64_t i)
+{
+ return 2 * i - 1 >= 2 * asuint64(INFINITY) - 1;
+}
+
+double pow(double x, double y)
+{
+ uint32_t sign_bias = 0;
+ uint64_t ix, iy;
+ uint32_t topx, topy;
+
+ ix = asuint64(x);
+ iy = asuint64(y);
+ topx = top12(x);
+ topy = top12(y);
+ if (predict_false(topx - 0x001 >= 0x7ff - 0x001 ||
+ (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)) {
+ /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
+ and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
+ /* Special cases: (x < 0x1p-126 or inf or nan) or
+ (|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
+ if (predict_false(zeroinfnan(iy))) {
+ if (2 * iy == 0)
+ return issignaling_inline(x) ? x + y : 1.0;
+ if (ix == asuint64(1.0))
+ return issignaling_inline(y) ? x + y : 1.0;
+ if (2 * ix > 2 * asuint64(INFINITY) ||
+ 2 * iy > 2 * asuint64(INFINITY))
+ return x + y;
+ if (2 * ix == 2 * asuint64(1.0))
+ return 1.0;
+ if ((2 * ix < 2 * asuint64(1.0)) == !(iy >> 63))
+ return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
+ return y * y;
}
- /* over/underflow if x is not close to one */
- if (ix < 0x3fefffff)
- return hy < 0 ? s*huge*huge : s*tiny*tiny;
- if (ix > 0x3ff00000)
- return hy > 0 ? s*huge*huge : s*tiny*tiny;
- /* now |1-x| is tiny <= 2**-20, suffice to compute
- log(x) by x-x^2/2+x^3/3-x^4/4 */
- t = ax - 1.0; /* t has 20 trailing zeros */
- w = (t*t)*(0.5 - t*(0.3333333333333333333333-t*0.25));
- u = ivln2_h*t; /* ivln2_h has 21 sig. bits */
- v = t*ivln2_l - w*ivln2;
- t1 = u + v;
- SET_LOW_WORD(t1, 0);
- t2 = v - (t1-u);
- } else {
- double ss,s2,s_h,s_l,t_h,t_l;
- n = 0;
- /* take care subnormal number */
- if (ix < 0x00100000) {
- ax *= two53;
- n -= 53;
- GET_HIGH_WORD(ix,ax);
+ if (predict_false(zeroinfnan(ix))) {
+ double_t x2 = x * x;
+ if (ix >> 63 && checkint(iy) == 1)
+ x2 = -x2;
+ /* Without the barrier some versions of clang hoist the 1/x2 and
+ thus division by zero exception can be signaled spuriously. */
+ return iy >> 63 ? fp_barrier(1 / x2) : x2;
}
- n += ((ix)>>20) - 0x3ff;
- j = ix & 0x000fffff;
- /* determine interval */
- ix = j | 0x3ff00000; /* normalize ix */
- if (j <= 0x3988E) /* |x|<sqrt(3/2) */
- k = 0;
- else if (j < 0xBB67A) /* |x|<sqrt(3) */
- k = 1;
- else {
- k = 0;
- n += 1;
- ix -= 0x00100000;
+ /* Here x and y are non-zero finite. */
+ if (ix >> 63) {
+ /* Finite x < 0. */
+ int yint = checkint(iy);
+ if (yint == 0)
+ return __math_invalid(x);
+ if (yint == 1)
+ sign_bias = SIGN_BIAS;
+ ix &= 0x7fffffffffffffff;
+ topx &= 0x7ff;
+ }
+ if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
+ /* Note: sign_bias == 0 here because y is not odd. */
+ if (ix == asuint64(1.0))
+ return 1.0;
+ if ((topy & 0x7ff) < 0x3be) {
+ /* |y| < 2^-65, x^y ~= 1 + y*log(x). */
+ if (WANT_ROUNDING)
+ return ix > asuint64(1.0) ? 1.0 + y :
+ 1.0 - y;
+ else
+ return 1.0;
+ }
+ return (ix > asuint64(1.0)) == (topy < 0x800) ?
+ __math_oflow(0) :
+ __math_uflow(0);
+ }
+ if (topx == 0) {
+ /* Normalize subnormal x so exponent becomes negative. */
+ ix = asuint64(x * 0x1p52);
+ ix &= 0x7fffffffffffffff;
+ ix -= 52ULL << 52;
}
- SET_HIGH_WORD(ax, ix);
-
- /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
- u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */
- v = 1.0/(ax+bp[k]);
- ss = u*v;
- s_h = ss;
- SET_LOW_WORD(s_h, 0);
- /* t_h=ax+bp[k] High */
- t_h = 0.0;
- SET_HIGH_WORD(t_h, ((ix>>1)|0x20000000) + 0x00080000 + (k<<18));
- t_l = ax - (t_h-bp[k]);
- s_l = v*((u-s_h*t_h)-s_h*t_l);
- /* compute log(ax) */
- s2 = ss*ss;
- r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6)))));
- r += s_l*(s_h+ss);
- s2 = s_h*s_h;
- t_h = 3.0 + s2 + r;
- SET_LOW_WORD(t_h, 0);
- t_l = r - ((t_h-3.0)-s2);
- /* u+v = ss*(1+...) */
- u = s_h*t_h;
- v = s_l*t_h + t_l*ss;
- /* 2/(3log2)*(ss+...) */
- p_h = u + v;
- SET_LOW_WORD(p_h, 0);
- p_l = v - (p_h-u);
- z_h = cp_h*p_h; /* cp_h+cp_l = 2/(3*log2) */
- z_l = cp_l*p_h+p_l*cp + dp_l[k];
- /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */
- t = (double)n;
- t1 = ((z_h + z_l) + dp_h[k]) + t;
- SET_LOW_WORD(t1, 0);
- t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
}
- /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
- y1 = y;
- SET_LOW_WORD(y1, 0);
- p_l = (y-y1)*t1 + y*t2;
- p_h = y1*t1;
- z = p_l + p_h;
- EXTRACT_WORDS(j, i, z);
- if (j >= 0x40900000) { /* z >= 1024 */
- if (((j-0x40900000)|i) != 0) /* if z > 1024 */
- return s*huge*huge; /* overflow */
- if (p_l + ovt > z - p_h)
- return s*huge*huge; /* overflow */
- } else if ((j&0x7fffffff) >= 0x4090cc00) { /* z <= -1075 */ // FIXME: instead of abs(j) use unsigned j
- if (((j-0xc090cc00)|i) != 0) /* z < -1075 */
- return s*tiny*tiny; /* underflow */
- if (p_l <= z - p_h)
- return s*tiny*tiny; /* underflow */
- }
- /*
- * compute 2**(p_h+p_l)
- */
- i = j & 0x7fffffff;
- k = (i>>20) - 0x3ff;
- n = 0;
- if (i > 0x3fe00000) { /* if |z| > 0.5, set n = [z+0.5] */
- n = j + (0x00100000>>(k+1));
- k = ((n&0x7fffffff)>>20) - 0x3ff; /* new k for n */
- t = 0.0;
- SET_HIGH_WORD(t, n & ~(0x000fffff>>k));
- n = ((n&0x000fffff)|0x00100000)>>(20-k);
- if (j < 0)
- n = -n;
- p_h -= t;
- }
- t = p_l + p_h;
- SET_LOW_WORD(t, 0);
- u = t*lg2_h;
- v = (p_l-(t-p_h))*lg2 + t*lg2_l;
- z = u + v;
- w = v - (z-u);
- t = z*z;
- t1 = z - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
- r = (z*t1)/(t1-2.0) - (w + z*w);
- z = 1.0 - (r-z);
- GET_HIGH_WORD(j, z);
- j += n<<20;
- if ((j>>20) <= 0) /* subnormal output */
- z = scalbn(z,n);
- else
- SET_HIGH_WORD(z, j);
- return s*z;
+ double_t lo;
+ double_t hi = log_inline(ix, &lo);
+ double_t ehi, elo;
+#if __FP_FAST_FMA
+ ehi = y * hi;
+ elo = y * lo + __builtin_fma(y, hi, -ehi);
+#else
+ double_t yhi = asdouble(iy & -1ULL << 27);
+ double_t ylo = y - yhi;
+ double_t lhi = asdouble(asuint64(hi) & -1ULL << 27);
+ double_t llo = hi - lhi + lo;
+ ehi = yhi * lhi;
+ elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
+#endif
+ return exp_inline(ehi, elo, sign_bias);
}
diff --git a/src/math/pow_data.c b/src/math/pow_data.c
new file mode 100644
index 00000000..81e760de
--- /dev/null
+++ b/src/math/pow_data.c
@@ -0,0 +1,180 @@
+/*
+ * Data for the log part of pow.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "pow_data.h"
+
+#define N (1 << POW_LOG_TABLE_BITS)
+
+const struct pow_log_data __pow_log_data = {
+.ln2hi = 0x1.62e42fefa3800p-1,
+.ln2lo = 0x1.ef35793c76730p-45,
+.poly = {
+// relative error: 0x1.11922ap-70
+// in -0x1.6bp-8 0x1.6bp-8
+// Coefficients are scaled to match the scaling during evaluation.
+-0x1p-1,
+0x1.555555555556p-2 * -2,
+-0x1.0000000000006p-2 * -2,
+0x1.999999959554ep-3 * 4,
+-0x1.555555529a47ap-3 * 4,
+0x1.2495b9b4845e9p-3 * -8,
+-0x1.0002b8b263fc3p-3 * -8,
+},
+/* Algorithm:
+
+ x = 2^k z
+ log(x) = k ln2 + log(c) + log(z/c)
+ log(z/c) = poly(z/c - 1)
+
+where z is in [0x1.69555p-1; 0x1.69555p0] which is split into N subintervals
+and z falls into the ith one, then table entries are computed as
+
+ tab[i].invc = 1/c
+ tab[i].logc = round(0x1p43*log(c))/0x1p43
+ tab[i].logctail = (double)(log(c) - logc)
+
+where c is chosen near the center of the subinterval such that 1/c has only a
+few precision bits so z/c - 1 is exactly representible as double:
+
+ 1/c = center < 1 ? round(N/center)/N : round(2*N/center)/N/2
+
+Note: |z/c - 1| < 1/N for the chosen c, |log(c) - logc - logctail| < 0x1p-97,
+the last few bits of logc are rounded away so k*ln2hi + logc has no rounding
+error and the interval for z is selected such that near x == 1, where log(x)
+is tiny, large cancellation error is avoided in logc + poly(z/c - 1). */
+.tab = {
+#define A(a, b, c) {a, 0, b, c},
+A(0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48)
+A(0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46)
+A(0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45)
+A(0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49)
+A(0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47)
+A(0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46)
+A(0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50)
+A(0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45)
+A(0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45)
+A(0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45)
+A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46)
+A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46)
+A(0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46)
+A(0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46)
+A(0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46)
+A(0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45)
+A(0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47)
+A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48)
+A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48)
+A(0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47)
+A(0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45)
+A(0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46)
+A(0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45)
+A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45)
+A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45)
+A(0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46)
+A(0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52)
+A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45)
+A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45)
+A(0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45)
+A(0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45)
+A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45)
+A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45)
+A(0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46)
+A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46)
+A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46)
+A(0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45)
+A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46)
+A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46)
+A(0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48)
+A(0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45)
+A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45)
+A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45)
+A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47)
+A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47)
+A(0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45)
+A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45)
+A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45)
+A(0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46)
+A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45)
+A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45)
+A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46)
+A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46)
+A(0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45)
+A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46)
+A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46)
+A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45)
+A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45)
+A(0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46)
+A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45)
+A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45)
+A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46)
+A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46)
+A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45)
+A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45)
+A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48)
+A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48)
+A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45)
+A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45)
+A(0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45)
+A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50)
+A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50)
+A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46)
+A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46)
+A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0)
+A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0)
+A(0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46)
+A(0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45)
+A(0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45)
+A(0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47)
+A(0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45)
+A(0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46)
+A(0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46)
+A(0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47)
+A(0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45)
+A(0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45)
+A(0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45)
+A(0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49)
+A(0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45)
+A(0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46)
+A(0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45)
+A(0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45)
+A(0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45)
+A(0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45)
+A(0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45)
+A(0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47)
+A(0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51)
+A(0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45)
+A(0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45)
+A(0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46)
+A(0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45)
+A(0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46)
+A(0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47)
+A(0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47)
+A(0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45)
+A(0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47)
+A(0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45)
+A(0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48)
+A(0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45)
+A(0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51)
+A(0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51)
+A(0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46)
+A(0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48)
+A(0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45)
+A(0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45)
+A(0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45)
+A(0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45)
+A(0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47)
+A(0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45)
+A(0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45)
+A(0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46)
+A(0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46)
+A(0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47)
+A(0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45)
+A(0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45)
+A(0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45)
+A(0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46)
+A(0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47)
+},
+};
diff --git a/src/math/pow_data.h b/src/math/pow_data.h
new file mode 100644
index 00000000..5d609ae8
--- /dev/null
+++ b/src/math/pow_data.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _POW_DATA_H
+#define _POW_DATA_H
+
+#include <features.h>
+
+#define POW_LOG_TABLE_BITS 7
+#define POW_LOG_POLY_ORDER 8
+extern hidden const struct pow_log_data {
+ double ln2hi;
+ double ln2lo;
+ double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
+ /* Note: the pad field is unused, but allows slightly faster indexing. */
+ struct {
+ double invc, pad, logc, logctail;
+ } tab[1 << POW_LOG_TABLE_BITS];
+} __pow_log_data;
+
+#endif
diff --git a/src/math/powf.c b/src/math/powf.c
index 427c8965..de8fab54 100644
--- a/src/math/powf.c
+++ b/src/math/powf.c
@@ -1,259 +1,185 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_powf.c */
/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
*/
+#include <math.h>
+#include <stdint.h>
#include "libm.h"
+#include "exp2f_data.h"
+#include "powf_data.h"
-static const float
-bp[] = {1.0, 1.5,},
-dp_h[] = { 0.0, 5.84960938e-01,}, /* 0x3f15c000 */
-dp_l[] = { 0.0, 1.56322085e-06,}, /* 0x35d1cfdc */
-two24 = 16777216.0, /* 0x4b800000 */
-huge = 1.0e30,
-tiny = 1.0e-30,
-/* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */
-L1 = 6.0000002384e-01, /* 0x3f19999a */
-L2 = 4.2857143283e-01, /* 0x3edb6db7 */
-L3 = 3.3333334327e-01, /* 0x3eaaaaab */
-L4 = 2.7272811532e-01, /* 0x3e8ba305 */
-L5 = 2.3066075146e-01, /* 0x3e6c3255 */
-L6 = 2.0697501302e-01, /* 0x3e53f142 */
-P1 = 1.6666667163e-01, /* 0x3e2aaaab */
-P2 = -2.7777778450e-03, /* 0xbb360b61 */
-P3 = 6.6137559770e-05, /* 0x388ab355 */
-P4 = -1.6533901999e-06, /* 0xb5ddea0e */
-P5 = 4.1381369442e-08, /* 0x3331bb4c */
-lg2 = 6.9314718246e-01, /* 0x3f317218 */
-lg2_h = 6.93145752e-01, /* 0x3f317200 */
-lg2_l = 1.42860654e-06, /* 0x35bfbe8c */
-ovt = 4.2995665694e-08, /* -(128-log2(ovfl+.5ulp)) */
-cp = 9.6179670095e-01, /* 0x3f76384f =2/(3ln2) */
-cp_h = 9.6191406250e-01, /* 0x3f764000 =12b cp */
-cp_l = -1.1736857402e-04, /* 0xb8f623c6 =tail of cp_h */
-ivln2 = 1.4426950216e+00, /* 0x3fb8aa3b =1/ln2 */
-ivln2_h = 1.4426879883e+00, /* 0x3fb8aa00 =16b 1/ln2*/
-ivln2_l = 7.0526075433e-06; /* 0x36eca570 =1/ln2 tail*/
+/*
+POWF_LOG2_POLY_ORDER = 5
+EXP2F_TABLE_BITS = 5
-float powf(float x, float y)
+ULP error: 0.82 (~ 0.5 + relerr*2^24)
+relerr: 1.27 * 2^-26 (Relative error ~= 128*Ln2*relerr_log2 + relerr_exp2)
+relerr_log2: 1.83 * 2^-33 (Relative error of logx.)
+relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).)
+*/
+
+#define N (1 << POWF_LOG2_TABLE_BITS)
+#define T __powf_log2_data.tab
+#define A __powf_log2_data.poly
+#define OFF 0x3f330000
+
+/* Subnormal input is normalized so ix has negative biased exponent.
+ Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set. */
+static inline double_t log2_inline(uint32_t ix)
{
- float z,ax,z_h,z_l,p_h,p_l;
- float y1,t1,t2,r,s,sn,t,u,v,w;
- int32_t i,j,k,yisint,n;
- int32_t hx,hy,ix,iy,is;
+ double_t z, r, r2, r4, p, q, y, y0, invc, logc;
+ uint32_t iz, top, tmp;
+ int k, i;
- GET_FLOAT_WORD(hx, x);
- GET_FLOAT_WORD(hy, y);
- ix = hx & 0x7fffffff;
- iy = hy & 0x7fffffff;
+ /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ tmp = ix - OFF;
+ i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N;
+ top = tmp & 0xff800000;
+ iz = ix - top;
+ k = (int32_t)top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */
+ invc = T[i].invc;
+ logc = T[i].logc;
+ z = (double_t)asfloat(iz);
- /* x**0 = 1, even if x is NaN */
- if (iy == 0)
- return 1.0f;
- /* 1**y = 1, even if y is NaN */
- if (hx == 0x3f800000)
- return 1.0f;
- /* NaN if either arg is NaN */
- if (ix > 0x7f800000 || iy > 0x7f800000)
- return x + y;
+ /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
+ r = z * invc - 1;
+ y0 = logc + (double_t)k;
- /* determine if y is an odd int when x < 0
- * yisint = 0 ... y is not an integer
- * yisint = 1 ... y is an odd int
- * yisint = 2 ... y is an even int
- */
- yisint = 0;
- if (hx < 0) {
- if (iy >= 0x4b800000)
- yisint = 2; /* even integer y */
- else if (iy >= 0x3f800000) {
- k = (iy>>23) - 0x7f; /* exponent */
- j = iy>>(23-k);
- if ((j<<(23-k)) == iy)
- yisint = 2 - (j & 1);
- }
- }
+ /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
+ r2 = r * r;
+ y = A[0] * r + A[1];
+ p = A[2] * r + A[3];
+ r4 = r2 * r2;
+ q = A[4] * r + y0;
+ q = p * r2 + q;
+ y = y * r4 + q;
+ return y;
+}
- /* special value of y */
- if (iy == 0x7f800000) { /* y is +-inf */
- if (ix == 0x3f800000) /* (-1)**+-inf is 1 */
- return 1.0f;
- else if (ix > 0x3f800000) /* (|x|>1)**+-inf = inf,0 */
- return hy >= 0 ? y : 0.0f;
- else /* (|x|<1)**+-inf = 0,inf */
- return hy >= 0 ? 0.0f: -y;
- }
- if (iy == 0x3f800000) /* y is +-1 */
- return hy >= 0 ? x : 1.0f/x;
- if (hy == 0x40000000) /* y is 2 */
- return x*x;
- if (hy == 0x3f000000) { /* y is 0.5 */
- if (hx >= 0) /* x >= +0 */
- return sqrtf(x);
- }
+#undef N
+#undef T
+#define N (1 << EXP2F_TABLE_BITS)
+#define T __exp2f_data.tab
+#define SIGN_BIAS (1 << (EXP2F_TABLE_BITS + 11))
- ax = fabsf(x);
- /* special value of x */
- if (ix == 0x7f800000 || ix == 0 || ix == 0x3f800000) { /* x is +-0,+-inf,+-1 */
- z = ax;
- if (hy < 0) /* z = (1/|x|) */
- z = 1.0f/z;
- if (hx < 0) {
- if (((ix-0x3f800000)|yisint) == 0) {
- z = (z-z)/(z-z); /* (-1)**non-int is NaN */
- } else if (yisint == 1)
- z = -z; /* (x<0)**odd = -(|x|**odd) */
- }
- return z;
- }
+/* The output of log2 and thus the input of exp2 is either scaled by N
+ (in case of fast toint intrinsics) or not. The unscaled xd must be
+ in [-1021,1023], sign_bias sets the sign of the result. */
+static inline float exp2_inline(double_t xd, uint32_t sign_bias)
+{
+ uint64_t ki, ski, t;
+ double_t kd, z, r, r2, y, s;
- sn = 1.0f; /* sign of result */
- if (hx < 0) {
- if (yisint == 0) /* (x<0)**(non-int) is NaN */
- return (x-x)/(x-x);
- if (yisint == 1) /* (x<0)**(odd int) */
- sn = -1.0f;
- }
+#if TOINT_INTRINSICS
+#define C __exp2f_data.poly_scaled
+ /* N*x = k + r with r in [-1/2, 1/2] */
+ kd = roundtoint(xd); /* k */
+ ki = converttoint(xd);
+#else
+#define C __exp2f_data.poly
+#define SHIFT __exp2f_data.shift_scaled
+ /* x = k/N + r with r in [-1/(2N), 1/(2N)] */
+ kd = eval_as_double(xd + SHIFT);
+ ki = asuint64(kd);
+ kd -= SHIFT; /* k/N */
+#endif
+ r = xd - kd;
- /* |y| is huge */
- if (iy > 0x4d000000) { /* if |y| > 2**27 */
- /* over/underflow if x is not close to one */
- if (ix < 0x3f7ffff8)
- return hy < 0 ? sn*huge*huge : sn*tiny*tiny;
- if (ix > 0x3f800007)
- return hy > 0 ? sn*huge*huge : sn*tiny*tiny;
- /* now |1-x| is tiny <= 2**-20, suffice to compute
- log(x) by x-x^2/2+x^3/3-x^4/4 */
- t = ax - 1; /* t has 20 trailing zeros */
- w = (t*t)*(0.5f - t*(0.333333333333f - t*0.25f));
- u = ivln2_h*t; /* ivln2_h has 16 sig. bits */
- v = t*ivln2_l - w*ivln2;
- t1 = u + v;
- GET_FLOAT_WORD(is, t1);
- SET_FLOAT_WORD(t1, is & 0xfffff000);
- t2 = v - (t1-u);
- } else {
- float s2,s_h,s_l,t_h,t_l;
- n = 0;
- /* take care subnormal number */
- if (ix < 0x00800000) {
- ax *= two24;
- n -= 24;
- GET_FLOAT_WORD(ix, ax);
- }
- n += ((ix)>>23) - 0x7f;
- j = ix & 0x007fffff;
- /* determine interval */
- ix = j | 0x3f800000; /* normalize ix */
- if (j <= 0x1cc471) /* |x|<sqrt(3/2) */
- k = 0;
- else if (j < 0x5db3d7) /* |x|<sqrt(3) */
- k = 1;
- else {
- k = 0;
- n += 1;
- ix -= 0x00800000;
- }
- SET_FLOAT_WORD(ax, ix);
+ /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
+ t = T[ki % N];
+ ski = ki + sign_bias;
+ t += ski << (52 - EXP2F_TABLE_BITS);
+ s = asdouble(t);
+ z = C[0] * r + C[1];
+ r2 = r * r;
+ y = C[2] * r + 1;
+ y = z * r2 + y;
+ y = y * s;
+ return eval_as_float(y);
+}
- /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
- u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */
- v = 1.0f/(ax+bp[k]);
- s = u*v;
- s_h = s;
- GET_FLOAT_WORD(is, s_h);
- SET_FLOAT_WORD(s_h, is & 0xfffff000);
- /* t_h=ax+bp[k] High */
- is = ((ix>>1) & 0xfffff000) | 0x20000000;
- SET_FLOAT_WORD(t_h, is + 0x00400000 + (k<<21));
- t_l = ax - (t_h - bp[k]);
- s_l = v*((u - s_h*t_h) - s_h*t_l);
- /* compute log(ax) */
- s2 = s*s;
- r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6)))));
- r += s_l*(s_h+s);
- s2 = s_h*s_h;
- t_h = 3.0f + s2 + r;
- GET_FLOAT_WORD(is, t_h);
- SET_FLOAT_WORD(t_h, is & 0xfffff000);
- t_l = r - ((t_h - 3.0f) - s2);
- /* u+v = s*(1+...) */
- u = s_h*t_h;
- v = s_l*t_h + t_l*s;
- /* 2/(3log2)*(s+...) */
- p_h = u + v;
- GET_FLOAT_WORD(is, p_h);
- SET_FLOAT_WORD(p_h, is & 0xfffff000);
- p_l = v - (p_h - u);
- z_h = cp_h*p_h; /* cp_h+cp_l = 2/(3*log2) */
- z_l = cp_l*p_h + p_l*cp+dp_l[k];
- /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
- t = (float)n;
- t1 = (((z_h + z_l) + dp_h[k]) + t);
- GET_FLOAT_WORD(is, t1);
- SET_FLOAT_WORD(t1, is & 0xfffff000);
- t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
- }
+/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
+ the bit representation of a non-zero finite floating-point value. */
+static inline int checkint(uint32_t iy)
+{
+ int e = iy >> 23 & 0xff;
+ if (e < 0x7f)
+ return 0;
+ if (e > 0x7f + 23)
+ return 2;
+ if (iy & ((1 << (0x7f + 23 - e)) - 1))
+ return 0;
+ if (iy & (1 << (0x7f + 23 - e)))
+ return 1;
+ return 2;
+}
+
+static inline int zeroinfnan(uint32_t ix)
+{
+ return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
+}
- /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
- GET_FLOAT_WORD(is, y);
- SET_FLOAT_WORD(y1, is & 0xfffff000);
- p_l = (y-y1)*t1 + y*t2;
- p_h = y1*t1;
- z = p_l + p_h;
- GET_FLOAT_WORD(j, z);
- if (j > 0x43000000) /* if z > 128 */
- return sn*huge*huge; /* overflow */
- else if (j == 0x43000000) { /* if z == 128 */
- if (p_l + ovt > z - p_h)
- return sn*huge*huge; /* overflow */
- } else if ((j&0x7fffffff) > 0x43160000) /* z < -150 */ // FIXME: check should be (uint32_t)j > 0xc3160000
- return sn*tiny*tiny; /* underflow */
- else if (j == 0xc3160000) { /* z == -150 */
- if (p_l <= z-p_h)
- return sn*tiny*tiny; /* underflow */
+float powf(float x, float y)
+{
+ uint32_t sign_bias = 0;
+ uint32_t ix, iy;
+
+ ix = asuint(x);
+ iy = asuint(y);
+ if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000 ||
+ zeroinfnan(iy))) {
+ /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
+ if (predict_false(zeroinfnan(iy))) {
+ if (2 * iy == 0)
+ return issignalingf_inline(x) ? x + y : 1.0f;
+ if (ix == 0x3f800000)
+ return issignalingf_inline(y) ? x + y : 1.0f;
+ if (2 * ix > 2u * 0x7f800000 ||
+ 2 * iy > 2u * 0x7f800000)
+ return x + y;
+ if (2 * ix == 2 * 0x3f800000)
+ return 1.0f;
+ if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
+ return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
+ return y * y;
+ }
+ if (predict_false(zeroinfnan(ix))) {
+ float_t x2 = x * x;
+ if (ix & 0x80000000 && checkint(iy) == 1)
+ x2 = -x2;
+ /* Without the barrier some versions of clang hoist the 1/x2 and
+ thus division by zero exception can be signaled spuriously. */
+ return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2;
+ }
+ /* x and y are non-zero finite. */
+ if (ix & 0x80000000) {
+ /* Finite x < 0. */
+ int yint = checkint(iy);
+ if (yint == 0)
+ return __math_invalidf(x);
+ if (yint == 1)
+ sign_bias = SIGN_BIAS;
+ ix &= 0x7fffffff;
+ }
+ if (ix < 0x00800000) {
+ /* Normalize subnormal x so exponent becomes negative. */
+ ix = asuint(x * 0x1p23f);
+ ix &= 0x7fffffff;
+ ix -= 23 << 23;
+ }
}
- /*
- * compute 2**(p_h+p_l)
- */
- i = j & 0x7fffffff;
- k = (i>>23) - 0x7f;
- n = 0;
- if (i > 0x3f000000) { /* if |z| > 0.5, set n = [z+0.5] */
- n = j + (0x00800000>>(k+1));
- k = ((n&0x7fffffff)>>23) - 0x7f; /* new k for n */
- SET_FLOAT_WORD(t, n & ~(0x007fffff>>k));
- n = ((n&0x007fffff)|0x00800000)>>(23-k);
- if (j < 0)
- n = -n;
- p_h -= t;
+ double_t logx = log2_inline(ix);
+ double_t ylogx = y * logx; /* cannot overflow, y is single prec. */
+ if (predict_false((asuint64(ylogx) >> 47 & 0xffff) >=
+ asuint64(126.0 * POWF_SCALE) >> 47)) {
+ /* |y*log(x)| >= 126. */
+ if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE)
+ return __math_oflowf(sign_bias);
+ if (ylogx <= -150.0 * POWF_SCALE)
+ return __math_uflowf(sign_bias);
}
- t = p_l + p_h;
- GET_FLOAT_WORD(is, t);
- SET_FLOAT_WORD(t, is & 0xffff8000);
- u = t*lg2_h;
- v = (p_l-(t-p_h))*lg2 + t*lg2_l;
- z = u + v;
- w = v - (z - u);
- t = z*z;
- t1 = z - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
- r = (z*t1)/(t1-2.0f) - (w+z*w);
- z = 1.0f - (r - z);
- GET_FLOAT_WORD(j, z);
- j += n<<23;
- if ((j>>23) <= 0) /* subnormal output */
- z = scalbnf(z, n);
- else
- SET_FLOAT_WORD(z, j);
- return sn*z;
+ return exp2_inline(ylogx, sign_bias);
}
diff --git a/src/math/powf_data.c b/src/math/powf_data.c
new file mode 100644
index 00000000..13e1d9a0
--- /dev/null
+++ b/src/math/powf_data.c
@@ -0,0 +1,34 @@
+/*
+ * Data definition for powf.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "powf_data.h"
+
+const struct powf_log2_data __powf_log2_data = {
+ .tab = {
+ { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE },
+ { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * POWF_SCALE },
+ { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * POWF_SCALE },
+ { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * POWF_SCALE },
+ { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * POWF_SCALE },
+ { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * POWF_SCALE },
+ { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * POWF_SCALE },
+ { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * POWF_SCALE },
+ { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * POWF_SCALE },
+ { 0x1p+0, 0x0p+0 * POWF_SCALE },
+ { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * POWF_SCALE },
+ { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * POWF_SCALE },
+ { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * POWF_SCALE },
+ { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * POWF_SCALE },
+ { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * POWF_SCALE },
+ { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * POWF_SCALE },
+ },
+ .poly = {
+ 0x1.27616c9496e0bp-2 * POWF_SCALE, -0x1.71969a075c67ap-2 * POWF_SCALE,
+ 0x1.ec70a6ca7baddp-2 * POWF_SCALE, -0x1.7154748bef6c8p-1 * POWF_SCALE,
+ 0x1.71547652ab82bp0 * POWF_SCALE,
+ }
+};
diff --git a/src/math/powf_data.h b/src/math/powf_data.h
new file mode 100644
index 00000000..5b136e28
--- /dev/null
+++ b/src/math/powf_data.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _POWF_DATA_H
+#define _POWF_DATA_H
+
+#include "libm.h"
+#include "exp2f_data.h"
+
+#define POWF_LOG2_TABLE_BITS 4
+#define POWF_LOG2_POLY_ORDER 5
+#if TOINT_INTRINSICS
+#define POWF_SCALE_BITS EXP2F_TABLE_BITS
+#else
+#define POWF_SCALE_BITS 0
+#endif
+#define POWF_SCALE ((double)(1 << POWF_SCALE_BITS))
+extern hidden const struct powf_log2_data {
+ struct {
+ double invc, logc;
+ } tab[1 << POWF_LOG2_TABLE_BITS];
+ double poly[POWF_LOG2_POLY_ORDER];
+} __powf_log2_data;
+
+#endif
diff --git a/src/mman/mlock.c b/src/mman/mlock.c
index e683a44a..71af582f 100644
--- a/src/mman/mlock.c
+++ b/src/mman/mlock.c
@@ -3,5 +3,9 @@
int mlock(const void *addr, size_t len)
{
+#ifdef SYS_mlock
return syscall(SYS_mlock, addr, len);
+#else
+ return syscall(SYS_mlock2, addr, len, 0);
+#endif
}
diff --git a/src/network/dn_skipname.c b/src/network/dn_skipname.c
index d54c2e5d..eba65bb8 100644
--- a/src/network/dn_skipname.c
+++ b/src/network/dn_skipname.c
@@ -2,11 +2,14 @@
int dn_skipname(const unsigned char *s, const unsigned char *end)
{
- const unsigned char *p;
- for (p=s; p<end; p++)
+ const unsigned char *p = s;
+ while (p < end)
if (!*p) return p-s+1;
else if (*p>=192)
if (p+1<end) return p-s+2;
else break;
+ else
+ if (end-p<*p+1) break;
+ else p += *p + 1;
return -1;
}
diff --git a/src/network/getaddrinfo.c b/src/network/getaddrinfo.c
index 5ae8cbfb..efaab306 100644
--- a/src/network/getaddrinfo.c
+++ b/src/network/getaddrinfo.c
@@ -104,7 +104,7 @@ int getaddrinfo(const char *restrict host, const char *restrict serv, const stru
}
for (k=i=0; i<naddrs; i++) for (j=0; j<nservs; j++, k++) {
- out[k].slot = i;
+ out[k].slot = k;
out[k].ai = (struct addrinfo){
.ai_family = addrs[i].family,
.ai_socktype = ports[j].socktype,
@@ -113,8 +113,8 @@ int getaddrinfo(const char *restrict host, const char *restrict serv, const stru
? sizeof(struct sockaddr_in)
: sizeof(struct sockaddr_in6),
.ai_addr = (void *)&out[k].sa,
- .ai_canonname = outcanon,
- .ai_next = &out[k+1].ai };
+ .ai_canonname = outcanon };
+ if (k) out[k-1].ai.ai_next = &out[k].ai;
switch (addrs[i].family) {
case AF_INET:
out[k].sa.sin.sin_family = AF_INET;
@@ -130,7 +130,6 @@ int getaddrinfo(const char *restrict host, const char *restrict serv, const stru
}
}
out[0].ref = nais;
- out[nais-1].ai.ai_next = 0;
*res = &out->ai;
return 0;
}
diff --git a/src/passwd/getspnam.c b/src/passwd/getspnam.c
index 041f8965..709b526d 100644
--- a/src/passwd/getspnam.c
+++ b/src/passwd/getspnam.c
@@ -8,10 +8,11 @@ struct spwd *getspnam(const char *name)
static char *line;
struct spwd *res;
int e;
+ int orig_errno = errno;
if (!line) line = malloc(LINE_LIM);
if (!line) return 0;
e = getspnam_r(name, &sp, line, LINE_LIM, &res);
- if (e) errno = e;
+ errno = e ? e : orig_errno;
return res;
}
diff --git a/src/passwd/getspnam_r.c b/src/passwd/getspnam_r.c
index 541206fa..541e8531 100644
--- a/src/passwd/getspnam_r.c
+++ b/src/passwd/getspnam_r.c
@@ -67,6 +67,7 @@ int getspnam_r(const char *name, struct spwd *sp, char *buf, size_t size, struct
size_t k, l = strlen(name);
int skip = 0;
int cs;
+ int orig_errno = errno;
*res = 0;
@@ -93,8 +94,14 @@ int getspnam_r(const char *name, struct spwd *sp, char *buf, size_t size, struct
return errno;
}
} else {
+ if (errno != ENOENT && errno != ENOTDIR)
+ return errno;
f = fopen("/etc/shadow", "rbe");
- if (!f) return errno;
+ if (!f) {
+ if (errno != ENOENT && errno != ENOTDIR)
+ return errno;
+ return 0;
+ }
}
pthread_cleanup_push(cleanup, f);
@@ -113,6 +120,6 @@ int getspnam_r(const char *name, struct spwd *sp, char *buf, size_t size, struct
break;
}
pthread_cleanup_pop(1);
- if (rv) errno = rv;
+ errno = rv ? rv : orig_errno;
return rv;
}
diff --git a/src/passwd/putgrent.c b/src/passwd/putgrent.c
index a0b320fc..2a8257dc 100644
--- a/src/passwd/putgrent.c
+++ b/src/passwd/putgrent.c
@@ -7,7 +7,7 @@ int putgrent(const struct group *gr, FILE *f)
int r;
size_t i;
flockfile(f);
- if ((r = fprintf(f, "%s:%s:%d:", gr->gr_name, gr->gr_passwd, gr->gr_gid))<0) goto done;
+ if ((r = fprintf(f, "%s:%s:%u:", gr->gr_name, gr->gr_passwd, gr->gr_gid))<0) goto done;
if (gr->gr_mem) for (i=0; gr->gr_mem[i]; i++)
if ((r = fprintf(f, "%s%s", i?",":"", gr->gr_mem[i]))<0) goto done;
r = fputc('\n', f);
diff --git a/src/passwd/putpwent.c b/src/passwd/putpwent.c
index 3a02e573..312b7653 100644
--- a/src/passwd/putpwent.c
+++ b/src/passwd/putpwent.c
@@ -4,7 +4,7 @@
int putpwent(const struct passwd *pw, FILE *f)
{
- return fprintf(f, "%s:%s:%d:%d:%s:%s:%s\n",
+ return fprintf(f, "%s:%s:%u:%u:%s:%s:%s\n",
pw->pw_name, pw->pw_passwd, pw->pw_uid, pw->pw_gid,
pw->pw_gecos, pw->pw_dir, pw->pw_shell)<0 ? -1 : 0;
}
diff --git a/src/process/execvp.c b/src/process/execvp.c
index 1fdf036f..ef3b9dd5 100644
--- a/src/process/execvp.c
+++ b/src/process/execvp.c
@@ -28,8 +28,7 @@ int __execvpe(const char *file, char *const argv[], char *const envp[])
for(p=path; ; p=z) {
char b[l+k+1];
- z = strchr(p, ':');
- if (!z) z = p+strlen(p);
+ z = __strchrnul(p, ':');
if (z-p >= l) {
if (!*z++) break;
continue;
diff --git a/src/process/fork.c b/src/process/fork.c
index da074ae9..11286ef4 100644
--- a/src/process/fork.c
+++ b/src/process/fork.c
@@ -27,6 +27,7 @@ pid_t fork(void)
self->tid = __syscall(SYS_gettid);
self->robust_list.off = 0;
self->robust_list.pending = 0;
+ self->next = self->prev = self;
libc.threads_minus_1 = 0;
}
__restore_sigs(&set);
diff --git a/src/signal/sigaction.c b/src/signal/sigaction.c
index af47195e..05445089 100644
--- a/src/signal/sigaction.c
+++ b/src/signal/sigaction.c
@@ -21,6 +21,8 @@ void __get_handler_set(sigset_t *set)
memcpy(set, handler_set, sizeof handler_set);
}
+volatile int __eintr_valid_flag;
+
int __libc_sigaction(int sig, const struct sigaction *restrict sa, struct sigaction *restrict old)
{
struct k_sigaction ksa, ksa_old;
@@ -43,6 +45,10 @@ int __libc_sigaction(int sig, const struct sigaction *restrict sa, struct sigact
SIGPT_SET, 0, _NSIG/8);
unmask_done = 1;
}
+
+ if (!(sa->sa_flags & SA_RESTART)) {
+ a_store(&__eintr_valid_flag, 1);
+ }
}
/* Changing the disposition of SIGABRT to anything but
* SIG_DFL requires a lock, so that it cannot be changed
diff --git a/src/signal/sigaltstack.c b/src/signal/sigaltstack.c
index 62cb81ad..cfa3f5c1 100644
--- a/src/signal/sigaltstack.c
+++ b/src/signal/sigaltstack.c
@@ -9,7 +9,7 @@ int sigaltstack(const stack_t *restrict ss, stack_t *restrict old)
errno = ENOMEM;
return -1;
}
- if (ss->ss_flags & ~SS_DISABLE) {
+ if (ss->ss_flags & SS_ONSTACK) {
errno = EINVAL;
return -1;
}
diff --git a/src/stdio/fgetwc.c b/src/stdio/fgetwc.c
index 0801e28f..aa10b818 100644
--- a/src/stdio/fgetwc.c
+++ b/src/stdio/fgetwc.c
@@ -25,12 +25,18 @@ static wint_t __fgetwc_unlocked_internal(FILE *f)
do {
b = c = getc_unlocked(f);
if (c < 0) {
- if (!first) errno = EILSEQ;
+ if (!first) {
+ f->flags |= F_ERR;
+ errno = EILSEQ;
+ }
return WEOF;
}
l = mbrtowc(&wc, (void *)&b, 1, &st);
if (l == -1) {
- if (!first) ungetc(b, f);
+ if (!first) {
+ f->flags |= F_ERR;
+ ungetc(b, f);
+ }
return WEOF;
}
first = 0;
diff --git a/src/stdio/gets.c b/src/stdio/gets.c
index 6c4645e5..17963b93 100644
--- a/src/stdio/gets.c
+++ b/src/stdio/gets.c
@@ -4,7 +4,12 @@
char *gets(char *s)
{
- char *ret = fgets(s, INT_MAX, stdin);
- if (ret && s[strlen(s)-1] == '\n') s[strlen(s)-1] = 0;
- return ret;
+ size_t i=0;
+ int c;
+ FLOCK(stdin);
+ while ((c=getc_unlocked(stdin)) != EOF && c != '\n') s[i++] = c;
+ s[i] = 0;
+ if (c != '\n' && (!feof(stdin) || !i)) s = 0;
+ FUNLOCK(stdin);
+ return s;
}
diff --git a/src/stdio/rename.c b/src/stdio/rename.c
index 04c90c01..f540adb6 100644
--- a/src/stdio/rename.c
+++ b/src/stdio/rename.c
@@ -4,9 +4,11 @@
int rename(const char *old, const char *new)
{
-#ifdef SYS_rename
+#if defined(SYS_rename)
return syscall(SYS_rename, old, new);
-#else
+#elif defined(SYS_renameat)
return syscall(SYS_renameat, AT_FDCWD, old, AT_FDCWD, new);
+#else
+ return syscall(SYS_renameat2, AT_FDCWD, old, AT_FDCWD, new, 0);
#endif
}
diff --git a/src/stdio/setvbuf.c b/src/stdio/setvbuf.c
index 06ea296c..523dddc8 100644
--- a/src/stdio/setvbuf.c
+++ b/src/stdio/setvbuf.c
@@ -12,13 +12,15 @@ int setvbuf(FILE *restrict f, char *restrict buf, int type, size_t size)
if (type == _IONBF) {
f->buf_size = 0;
- } else {
+ } else if (type == _IOLBF || type == _IOFBF) {
if (buf && size >= UNGET) {
f->buf = (void *)(buf + UNGET);
f->buf_size = size - UNGET;
}
if (type == _IOLBF && f->buf_size)
f->lbf = '\n';
+ } else {
+ return -1;
}
f->flags |= F_SVB;
diff --git a/src/thread/__syscall_cp.c b/src/thread/__syscall_cp.c
index af666f06..42a01674 100644
--- a/src/thread/__syscall_cp.c
+++ b/src/thread/__syscall_cp.c
@@ -7,7 +7,7 @@ static long sccp(syscall_arg_t nr,
syscall_arg_t u, syscall_arg_t v, syscall_arg_t w,
syscall_arg_t x, syscall_arg_t y, syscall_arg_t z)
{
- return (__syscall)(nr, u, v, w, x, y, z);
+ return __syscall(nr, u, v, w, x, y, z);
}
weak_alias(sccp, __syscall_cp_c);
diff --git a/src/thread/__timedwait.c b/src/thread/__timedwait.c
index 229db313..ae19bd63 100644
--- a/src/thread/__timedwait.c
+++ b/src/thread/__timedwait.c
@@ -5,6 +5,9 @@
#include "syscall.h"
#include "pthread_impl.h"
+static volatile int dummy = 0;
+weak_alias(dummy, __eintr_valid_flag);
+
int __timedwait_cp(volatile int *addr, int val,
clockid_t clk, const struct timespec *at, int priv)
{
@@ -28,6 +31,11 @@ int __timedwait_cp(volatile int *addr, int val,
r = -__syscall_cp(SYS_futex, addr, FUTEX_WAIT|priv, val, top);
if (r == ENOSYS) r = -__syscall_cp(SYS_futex, addr, FUTEX_WAIT, val, top);
if (r != EINTR && r != ETIMEDOUT && r != ECANCELED) r = 0;
+ /* Mitigate bug in old kernels wrongly reporting EINTR for non-
+ * interrupting (SA_RESTART) signal handlers. This is only practical
+ * when NO interrupting signal handlers have been installed, and
+ * works by sigaction tracking whether that's the case. */
+ if (r == EINTR && !__eintr_valid_flag) r = 0;
return r;
}
diff --git a/src/thread/__tls_get_addr.c b/src/thread/__tls_get_addr.c
index d7afdabd..19524fe0 100644
--- a/src/thread/__tls_get_addr.c
+++ b/src/thread/__tls_get_addr.c
@@ -1,12 +1,7 @@
-#include <stddef.h>
#include "pthread_impl.h"
void *__tls_get_addr(tls_mod_off_t *v)
{
pthread_t self = __pthread_self();
- if (v[0] <= self->dtv[0])
- return (void *)(self->dtv[v[0]] + v[1]);
- return __tls_get_new(v);
+ return (void *)(self->dtv[v[0]] + v[1]);
}
-
-weak_alias(__tls_get_addr, __tls_get_new);
diff --git a/src/thread/__unmapself.c b/src/thread/__unmapself.c
index 1d3bee1d..31d94e67 100644
--- a/src/thread/__unmapself.c
+++ b/src/thread/__unmapself.c
@@ -4,7 +4,6 @@
/* cheat and reuse CRTJMP macro from dynlink code */
#include "dynlink.h"
-static volatile int lock;
static void *unmap_base;
static size_t unmap_size;
static char shared_stack[256];
@@ -17,12 +16,8 @@ static void do_unmap()
void __unmapself(void *base, size_t size)
{
- int tid=__pthread_self()->tid;
char *stack = shared_stack + sizeof shared_stack;
stack -= (uintptr_t)stack % 16;
- while (lock || a_cas(&lock, 0, tid))
- a_spin();
- __syscall(SYS_set_tid_address, &lock);
unmap_base = base;
unmap_size = size;
CRTJMP(do_unmap, stack);
diff --git a/src/thread/i386/tls.s b/src/thread/i386/tls.s
index 76d5d462..6e4c4cb9 100644
--- a/src/thread/i386/tls.s
+++ b/src/thread/i386/tls.s
@@ -4,14 +4,6 @@
___tls_get_addr:
mov %gs:4,%edx
mov (%eax),%ecx
- cmp %ecx,(%edx)
- jc 1f
mov 4(%eax),%eax
add (%edx,%ecx,4),%eax
ret
-1: push %eax
-.weak __tls_get_new
-.hidden __tls_get_new
- call __tls_get_new
- pop %edx
- ret
diff --git a/src/thread/pthread_attr_setinheritsched.c b/src/thread/pthread_attr_setinheritsched.c
index 6a648376..ca264be7 100644
--- a/src/thread/pthread_attr_setinheritsched.c
+++ b/src/thread/pthread_attr_setinheritsched.c
@@ -1,25 +1,6 @@
#include "pthread_impl.h"
#include "syscall.h"
-hidden void *__start_sched(void *p)
-{
- struct start_sched_args *ssa = p;
- void *start_arg = ssa->start_arg;
- void *(*start_fn)(void *) = ssa->start_fn;
- pthread_t self = __pthread_self();
-
- int ret = -__syscall(SYS_sched_setscheduler, self->tid,
- ssa->attr->_a_policy, &ssa->attr->_a_prio);
- if (!ret) __restore_sigs(&ssa->mask);
- a_store(&ssa->futex, ret);
- __wake(&ssa->futex, 1, 1);
- if (ret) {
- self->detach_state = DT_DYNAMIC;
- return 0;
- }
- return start_fn(start_arg);
-}
-
int pthread_attr_setinheritsched(pthread_attr_t *a, int inherit)
{
if (inherit > 1U) return EINVAL;
diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c
index 3da7db14..ebf61ded 100644
--- a/src/thread/pthread_create.c
+++ b/src/thread/pthread_create.c
@@ -15,12 +15,41 @@ weak_alias(dummy_0, __release_ptc);
weak_alias(dummy_0, __pthread_tsd_run_dtors);
weak_alias(dummy_0, __do_orphaned_stdio_locks);
weak_alias(dummy_0, __dl_thread_cleanup);
+weak_alias(dummy_0, __membarrier_init);
-static void *dummy_1(void *p)
+static int tl_lock_count;
+static int tl_lock_waiters;
+
+void __tl_lock(void)
{
- return 0;
+ int tid = __pthread_self()->tid;
+ int val = __thread_list_lock;
+ if (val == tid) {
+ tl_lock_count++;
+ return;
+ }
+ while ((val = a_cas(&__thread_list_lock, 0, tid)))
+ __wait(&__thread_list_lock, &tl_lock_waiters, val, 0);
+}
+
+void __tl_unlock(void)
+{
+ if (tl_lock_count) {
+ tl_lock_count--;
+ return;
+ }
+ a_store(&__thread_list_lock, 0);
+ if (tl_lock_waiters) __wake(&__thread_list_lock, 1, 0);
+}
+
+void __tl_sync(pthread_t td)
+{
+ a_barrier();
+ int val = __thread_list_lock;
+ if (!val) return;
+ __wait(&__thread_list_lock, &tl_lock_waiters, val, 0);
+ if (tl_lock_waiters) __wake(&__thread_list_lock, 1, 0);
}
-weak_alias(dummy_1, __start_sched);
_Noreturn void __pthread_exit(void *result)
{
@@ -46,24 +75,30 @@ _Noreturn void __pthread_exit(void *result)
* joinable threads it's a valid usage that must be handled. */
LOCK(self->killlock);
- /* Block all signals before decrementing the live thread count.
- * This is important to ensure that dynamically allocated TLS
- * is not under-allocated/over-committed, and possibly for other
- * reasons as well. */
- __block_all_sigs(&set);
-
- /* It's impossible to determine whether this is "the last thread"
- * until performing the atomic decrement, since multiple threads
- * could exit at the same time. For the last thread, revert the
- * decrement, restore the tid, and unblock signals to give the
- * atexit handlers and stdio cleanup code a consistent state. */
- if (a_fetch_add(&libc.threads_minus_1, -1)==0) {
- libc.threads_minus_1 = 0;
- UNLOCK(self->killlock);
+ /* The thread list lock must be AS-safe, and thus requires
+ * application signals to be blocked before it can be taken. */
+ __block_app_sigs(&set);
+ __tl_lock();
+
+ /* If this is the only thread in the list, don't proceed with
+ * termination of the thread, but restore the previous lock and
+ * signal state to prepare for exit to call atexit handlers. */
+ if (self->next == self) {
+ __tl_unlock();
__restore_sigs(&set);
+ UNLOCK(self->killlock);
exit(0);
}
+ /* At this point we are committed to thread termination. Unlink
+ * the thread from the list. This change will not be visible
+ * until the lock is released, which only happens after SYS_exit
+ * has been called, via the exit futex address pointing at the lock. */
+ libc.threads_minus_1--;
+ self->next->prev = self->prev;
+ self->prev->next = self->next;
+ self->prev = self->next = self;
+
/* Process robust list in userspace to handle non-pshared mutexes
* and the detached thread case where the robust list head will
* be invalid when the kernel would process it. */
@@ -90,15 +125,11 @@ _Noreturn void __pthread_exit(void *result)
* call; the loser is responsible for freeing thread resources. */
int state = a_cas(&self->detach_state, DT_JOINABLE, DT_EXITING);
- if (state>=DT_DETACHED && self->map_base) {
- /* Detached threads must avoid the kernel clear_child_tid
- * feature, since the virtual address will have been
- * unmapped and possibly already reused by a new mapping
- * at the time the kernel would perform the write. In
- * the case of threads that started out detached, the
- * initial clone flags are correct, but if the thread was
- * detached later, we need to clear it here. */
- if (state == DT_DYNAMIC) __syscall(SYS_set_tid_address, 0);
+ if (state==DT_DETACHED && self->map_base) {
+ /* Detached threads must block even implementation-internal
+ * signals, since they will not have a stack in their last
+ * moments of existence. */
+ __block_all_sigs(&set);
/* Robust list will no longer be valid, and was already
* processed above, so unregister it with the kernel. */
@@ -114,6 +145,9 @@ _Noreturn void __pthread_exit(void *result)
__unmapself(self->map_base, self->map_size);
}
+ /* Wake any joiner. */
+ __wake(&self->detach_state, 1, 1);
+
/* After the kernel thread exits, its tid may be reused. Clear it
* to prevent inadvertent use and inform functions that would use
* it that it's no longer available. */
@@ -135,21 +169,38 @@ void __do_cleanup_pop(struct __ptcb *cb)
__pthread_self()->cancelbuf = cb->__next;
}
+struct start_args {
+ void *(*start_func)(void *);
+ void *start_arg;
+ pthread_attr_t *attr;
+ volatile int *perr;
+ unsigned long sig_mask[_NSIG/8/sizeof(long)];
+};
+
static int start(void *p)
{
- pthread_t self = p;
- if (self->unblock_cancel)
- __syscall(SYS_rt_sigprocmask, SIG_UNBLOCK,
- SIGPT_SET, 0, _NSIG/8);
- __pthread_exit(self->start(self->start_arg));
+ struct start_args *args = p;
+ if (args->attr) {
+ pthread_t self = __pthread_self();
+ int ret = -__syscall(SYS_sched_setscheduler, self->tid,
+ args->attr->_a_policy, &args->attr->_a_prio);
+ if (a_swap(args->perr, ret)==-2)
+ __wake(args->perr, 1, 1);
+ if (ret) {
+ self->detach_state = DT_DETACHED;
+ __pthread_exit(0);
+ }
+ }
+ __syscall(SYS_rt_sigprocmask, SIG_SETMASK, &args->sig_mask, 0, _NSIG/8);
+ __pthread_exit(args->start_func(args->start_arg));
return 0;
}
static int start_c11(void *p)
{
- pthread_t self = p;
- int (*start)(void*) = (int(*)(void*)) self->start;
- __pthread_exit((void *)(uintptr_t)start(self->start_arg));
+ struct start_args *args = p;
+ int (*start)(void*) = (int(*)(void*)) args->start_func;
+ __pthread_exit((void *)(uintptr_t)start(args->start_arg));
return 0;
}
@@ -161,8 +212,6 @@ weak_alias(dummy, __pthread_tsd_size);
static void *dummy_tsd[1] = { 0 };
weak_alias(dummy_tsd, __pthread_tsd_main);
-volatile int __block_new_threads = 0;
-
static FILE *volatile dummy_file = 0;
weak_alias(dummy_file, __stdin_used);
weak_alias(dummy_file, __stdout_used);
@@ -182,9 +231,9 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
unsigned flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND
| CLONE_THREAD | CLONE_SYSVSEM | CLONE_SETTLS
| CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | CLONE_DETACHED;
- int do_sched = 0;
pthread_attr_t attr = { 0 };
- struct start_sched_args ssa;
+ sigset_t set;
+ volatile int err = -1;
if (!libc.can_do_threads) return ENOSYS;
self = __pthread_self();
@@ -197,6 +246,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
init_file_lock(__stderr_used);
__syscall(SYS_rt_sigprocmask, SIG_UNBLOCK, SIGPT_SET, 0, _NSIG/8);
self->tsd = (void **)__pthread_tsd_main;
+ __membarrier_init();
libc.threaded = 1;
}
if (attrp && !c11) attr = *attrp;
@@ -207,8 +257,6 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
attr._a_guardsize = __default_guardsize;
}
- if (__block_new_threads) __wait(&__block_new_threads, 0, 1, 1);
-
if (attr._a_stackaddr) {
size_t need = libc.tls_size + __pthread_tsd_size;
size = attr._a_stacksize;
@@ -257,49 +305,72 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
new->stack = stack;
new->stack_size = stack - stack_limit;
new->guard_size = guard;
- new->start = entry;
- new->start_arg = arg;
new->self = new;
new->tsd = (void *)tsd;
new->locale = &libc.global_locale;
if (attr._a_detach) {
new->detach_state = DT_DETACHED;
- flags -= CLONE_CHILD_CLEARTID;
} else {
new->detach_state = DT_JOINABLE;
}
- if (attr._a_sched) {
- do_sched = 1;
- ssa.futex = -1;
- ssa.start_fn = new->start;
- ssa.start_arg = new->start_arg;
- ssa.attr = &attr;
- new->start = __start_sched;
- new->start_arg = &ssa;
- __block_app_sigs(&ssa.mask);
- }
new->robust_list.head = &new->robust_list.head;
- new->unblock_cancel = self->cancel;
new->CANARY = self->CANARY;
+ new->sysinfo = self->sysinfo;
+
+ /* Setup argument structure for the new thread on its stack.
+ * It's safe to access from the caller only until the thread
+ * list is unlocked. */
+ stack -= (uintptr_t)stack % sizeof(uintptr_t);
+ stack -= sizeof(struct start_args);
+ struct start_args *args = (void *)stack;
+ args->start_func = entry;
+ args->start_arg = arg;
+ if (attr._a_sched) {
+ args->attr = &attr;
+ args->perr = &err;
+ } else {
+ args->attr = 0;
+ args->perr = 0;
+ }
- a_inc(&libc.threads_minus_1);
- ret = __clone((c11 ? start_c11 : start), stack, flags, new, &new->tid, TP_ADJ(new), &new->detach_state);
-
- __release_ptc();
-
- if (do_sched) {
- __restore_sigs(&ssa.mask);
+ /* Application signals (but not the synccall signal) must be
+ * blocked before the thread list lock can be taken, to ensure
+ * that the lock is AS-safe. */
+ __block_app_sigs(&set);
+
+ /* Ensure SIGCANCEL is unblocked in new thread. This requires
+ * working with a copy of the set so we can restore the
+ * original mask in the calling thread. */
+ memcpy(&args->sig_mask, &set, sizeof args->sig_mask);
+ args->sig_mask[(SIGCANCEL-1)/8/sizeof(long)] &=
+ ~(1UL<<((SIGCANCEL-1)%(8*sizeof(long))));
+
+ __tl_lock();
+ libc.threads_minus_1++;
+ ret = __clone((c11 ? start_c11 : start), stack, flags, args, &new->tid, TP_ADJ(new), &__thread_list_lock);
+
+ /* If clone succeeded, new thread must be linked on the thread
+ * list before unlocking it, even if scheduling may still fail. */
+ if (ret >= 0) {
+ new->next = self->next;
+ new->prev = self;
+ new->next->prev = new;
+ new->prev->next = new;
}
+ __tl_unlock();
+ __restore_sigs(&set);
+ __release_ptc();
if (ret < 0) {
- a_dec(&libc.threads_minus_1);
+ libc.threads_minus_1--;
if (map) __munmap(map, size);
return EAGAIN;
}
- if (do_sched) {
- __futexwait(&ssa.futex, -1, 1);
- ret = ssa.futex;
+ if (attr._a_sched) {
+ if (a_cas(&err, -1, -2)==-1)
+ __wait(&err, 0, -2, 1);
+ ret = err;
if (ret) return ret;
}
diff --git a/src/thread/pthread_detach.c b/src/thread/pthread_detach.c
index 16b0552d..77772af2 100644
--- a/src/thread/pthread_detach.c
+++ b/src/thread/pthread_detach.c
@@ -5,7 +5,7 @@ static int __pthread_detach(pthread_t t)
{
/* If the cas fails, detach state is either already-detached
* or exiting/exited, and pthread_join will trap or cleanup. */
- if (a_cas(&t->detach_state, DT_JOINABLE, DT_DYNAMIC) != DT_JOINABLE)
+ if (a_cas(&t->detach_state, DT_JOINABLE, DT_DETACHED) != DT_JOINABLE)
return __pthread_join(t, 0);
return 0;
}
diff --git a/src/thread/pthread_join.c b/src/thread/pthread_join.c
index 54d81039..b8813e02 100644
--- a/src/thread/pthread_join.c
+++ b/src/thread/pthread_join.c
@@ -1,6 +1,11 @@
#include "pthread_impl.h"
#include <sys/mman.h>
+static void dummy1(pthread_t t)
+{
+}
+weak_alias(dummy1, __tl_sync);
+
static int __pthread_timedjoin_np(pthread_t t, void **res, const struct timespec *at)
{
int state, cs, r = 0;
@@ -9,11 +14,11 @@ static int __pthread_timedjoin_np(pthread_t t, void **res, const struct timespec
if (cs == PTHREAD_CANCEL_ENABLE) __pthread_setcancelstate(cs, 0);
while ((state = t->detach_state) && r != ETIMEDOUT && r != EINVAL) {
if (state >= DT_DETACHED) a_crash();
- r = __timedwait_cp(&t->detach_state, state, CLOCK_REALTIME, at, 0);
+ r = __timedwait_cp(&t->detach_state, state, CLOCK_REALTIME, at, 1);
}
__pthread_setcancelstate(cs, 0);
if (r == ETIMEDOUT || r == EINVAL) return r;
- a_barrier();
+ __tl_sync(t);
if (res) *res = t->result;
if (t->map_base) __munmap(t->map_base, t->map_size);
return 0;
diff --git a/src/thread/pthread_key_create.c b/src/thread/pthread_key_create.c
index e26f199c..d1120941 100644
--- a/src/thread/pthread_key_create.c
+++ b/src/thread/pthread_key_create.c
@@ -13,43 +13,16 @@ static void nodtor(void *dummy)
{
}
-static void dirty(void *dummy)
+static void dummy_0(void)
{
}
-struct cleanup_args {
- pthread_t caller;
- int ret;
-};
-
-static void clean_dirty_tsd_callback(void *p)
-{
- struct cleanup_args *args = p;
- pthread_t self = __pthread_self();
- pthread_key_t i;
- for (i=0; i<PTHREAD_KEYS_MAX; i++) {
- if (keys[i] == dirty && self->tsd[i])
- self->tsd[i] = 0;
- }
- /* Arbitrary choice to avoid data race. */
- if (args->caller == self) args->ret = 0;
-}
-
-static int clean_dirty_tsd(void)
-{
- struct cleanup_args args = {
- .caller = __pthread_self(),
- .ret = EAGAIN
- };
- __pthread_key_delete_synccall(clean_dirty_tsd_callback, &args);
- return args.ret;
-}
+weak_alias(dummy_0, __tl_lock);
+weak_alias(dummy_0, __tl_unlock);
int __pthread_key_create(pthread_key_t *k, void (*dtor)(void *))
{
- pthread_key_t j = next_key;
pthread_t self = __pthread_self();
- int found_dirty = 0;
/* This can only happen in the main thread before
* pthread_create has been called. */
@@ -58,46 +31,38 @@ int __pthread_key_create(pthread_key_t *k, void (*dtor)(void *))
/* Purely a sentinel value since null means slot is free. */
if (!dtor) dtor = nodtor;
- pthread_rwlock_wrlock(&key_lock);
+ __pthread_rwlock_wrlock(&key_lock);
+ pthread_key_t j = next_key;
do {
if (!keys[j]) {
keys[next_key = *k = j] = dtor;
- pthread_rwlock_unlock(&key_lock);
+ __pthread_rwlock_unlock(&key_lock);
return 0;
- } else if (keys[j] == dirty) {
- found_dirty = 1;
}
} while ((j=(j+1)%PTHREAD_KEYS_MAX) != next_key);
- /* It's possible that all slots are in use or __synccall fails. */
- if (!found_dirty || clean_dirty_tsd()) {
- pthread_rwlock_unlock(&key_lock);
- return EAGAIN;
- }
-
- /* If this point is reached there is necessarily a newly-cleaned
- * slot to allocate to satisfy the caller's request. Find it and
- * mark any additional previously-dirty slots clean. */
- for (j=0; j<PTHREAD_KEYS_MAX; j++) {
- if (keys[j] == dirty) {
- if (dtor) {
- keys[next_key = *k = j] = dtor;
- dtor = 0;
- } else {
- keys[j] = 0;
- }
- }
- }
-
- pthread_rwlock_unlock(&key_lock);
- return 0;
+ __pthread_rwlock_unlock(&key_lock);
+ return EAGAIN;
}
-int __pthread_key_delete_impl(pthread_key_t k)
+int __pthread_key_delete(pthread_key_t k)
{
- pthread_rwlock_wrlock(&key_lock);
- keys[k] = dirty;
- pthread_rwlock_unlock(&key_lock);
+ sigset_t set;
+ pthread_t self = __pthread_self(), td=self;
+
+ __block_app_sigs(&set);
+ __pthread_rwlock_wrlock(&key_lock);
+
+ __tl_lock();
+ do td->tsd[k] = 0;
+ while ((td=td->next)!=self);
+ __tl_unlock();
+
+ keys[k] = 0;
+
+ __pthread_rwlock_unlock(&key_lock);
+ __restore_sigs(&set);
+
return 0;
}
@@ -106,20 +71,21 @@ void __pthread_tsd_run_dtors()
pthread_t self = __pthread_self();
int i, j;
for (j=0; self->tsd_used && j<PTHREAD_DESTRUCTOR_ITERATIONS; j++) {
- pthread_rwlock_rdlock(&key_lock);
+ __pthread_rwlock_rdlock(&key_lock);
self->tsd_used = 0;
for (i=0; i<PTHREAD_KEYS_MAX; i++) {
void *val = self->tsd[i];
void (*dtor)(void *) = keys[i];
self->tsd[i] = 0;
- if (val && dtor && dtor != nodtor && dtor != dirty) {
- pthread_rwlock_unlock(&key_lock);
+ if (val && dtor && dtor != nodtor) {
+ __pthread_rwlock_unlock(&key_lock);
dtor(val);
- pthread_rwlock_rdlock(&key_lock);
+ __pthread_rwlock_rdlock(&key_lock);
}
}
- pthread_rwlock_unlock(&key_lock);
+ __pthread_rwlock_unlock(&key_lock);
}
}
weak_alias(__pthread_key_create, pthread_key_create);
+weak_alias(__pthread_key_delete, pthread_key_delete);
diff --git a/src/thread/pthread_key_delete.c b/src/thread/pthread_key_delete.c
deleted file mode 100644
index 012fe2da..00000000
--- a/src/thread/pthread_key_delete.c
+++ /dev/null
@@ -1,14 +0,0 @@
-#include "pthread_impl.h"
-#include "libc.h"
-
-void __pthread_key_delete_synccall(void (*f)(void *), void *p)
-{
- __synccall(f, p);
-}
-
-int __pthread_key_delete(pthread_key_t k)
-{
- return __pthread_key_delete_impl(k);
-}
-
-weak_alias(__pthread_key_delete, pthread_key_delete);
diff --git a/src/thread/pthread_mutex_consistent.c b/src/thread/pthread_mutex_consistent.c
index 96b83b52..27c74e5b 100644
--- a/src/thread/pthread_mutex_consistent.c
+++ b/src/thread/pthread_mutex_consistent.c
@@ -1,10 +1,14 @@
#include "pthread_impl.h"
+#include "atomic.h"
int pthread_mutex_consistent(pthread_mutex_t *m)
{
- if (!(m->_m_type & 8)) return EINVAL;
- if ((m->_m_lock & 0x7fffffff) != __pthread_self()->tid)
+ int old = m->_m_lock;
+ int own = old & 0x3fffffff;
+ if (!(m->_m_type & 4) || !own || !(old & 0x40000000))
+ return EINVAL;
+ if (own != __pthread_self()->tid)
return EPERM;
- m->_m_type &= ~8U;
+ a_and(&m->_m_lock, ~0x40000000);
return 0;
}
diff --git a/src/thread/pthread_mutex_timedlock.c b/src/thread/pthread_mutex_timedlock.c
index 9867f389..6b893627 100644
--- a/src/thread/pthread_mutex_timedlock.c
+++ b/src/thread/pthread_mutex_timedlock.c
@@ -1,5 +1,40 @@
#include "pthread_impl.h"
+static int pthread_mutex_timedlock_pi(pthread_mutex_t *restrict m, const struct timespec *restrict at)
+{
+ int type = m->_m_type;
+ int priv = (type & 128) ^ 128;
+ pthread_t self = __pthread_self();
+ int e;
+
+ if (!priv) self->robust_list.pending = &m->_m_next;
+
+ do e = -__syscall(SYS_futex, &m->_m_lock, FUTEX_LOCK_PI|priv, 0, at);
+ while (e==EINTR);
+ if (e) self->robust_list.pending = 0;
+
+ switch (e) {
+ case 0:
+ /* Catch spurious success for non-robust mutexes. */
+ if (!(type&4) && ((m->_m_lock & 0x40000000) || m->_m_waiters)) {
+ a_store(&m->_m_waiters, -1);
+ __syscall(SYS_futex, &m->_m_lock, FUTEX_UNLOCK_PI|priv);
+ self->robust_list.pending = 0;
+ break;
+ }
+ /* Signal to trylock that we already have the lock. */
+ m->_m_count = -1;
+ return __pthread_mutex_trylock(m);
+ case ETIMEDOUT:
+ return e;
+ case EDEADLK:
+ if ((type&3) == PTHREAD_MUTEX_ERRORCHECK) return e;
+ }
+ do e = __timedwait(&(int){0}, 0, CLOCK_REALTIME, at, 1);
+ while (e != ETIMEDOUT);
+ return e;
+}
+
int __pthread_mutex_timedlock(pthread_mutex_t *restrict m, const struct timespec *restrict at)
{
if ((m->_m_type&15) == PTHREAD_MUTEX_NORMAL
@@ -9,17 +44,21 @@ int __pthread_mutex_timedlock(pthread_mutex_t *restrict m, const struct timespec
int type = m->_m_type;
int r, t, priv = (type & 128) ^ 128;
- r = pthread_mutex_trylock(m);
+ r = __pthread_mutex_trylock(m);
if (r != EBUSY) return r;
+
+ if (type&8) return pthread_mutex_timedlock_pi(m, at);
int spins = 100;
while (spins-- && m->_m_lock && !m->_m_waiters) a_spin();
while ((r=__pthread_mutex_trylock(m)) == EBUSY) {
- if (!(r=m->_m_lock) || ((r&0x40000000) && (type&4)))
+ r = m->_m_lock;
+ int own = r & 0x3fffffff;
+ if (!own && (!r || (type&4)))
continue;
if ((type&3) == PTHREAD_MUTEX_ERRORCHECK
- && (r&0x7fffffff) == __pthread_self()->tid)
+ && own == __pthread_self()->tid)
return EDEADLK;
a_inc(&m->_m_waiters);
diff --git a/src/thread/pthread_mutex_trylock.c b/src/thread/pthread_mutex_trylock.c
index 783ca0c4..a24e7c58 100644
--- a/src/thread/pthread_mutex_trylock.c
+++ b/src/thread/pthread_mutex_trylock.c
@@ -3,21 +3,28 @@
int __pthread_mutex_trylock_owner(pthread_mutex_t *m)
{
int old, own;
- int type = m->_m_type & 15;
+ int type = m->_m_type;
pthread_t self = __pthread_self();
int tid = self->tid;
old = m->_m_lock;
- own = old & 0x7fffffff;
- if (own == tid && (type&3) == PTHREAD_MUTEX_RECURSIVE) {
- if ((unsigned)m->_m_count >= INT_MAX) return EAGAIN;
- m->_m_count++;
- return 0;
+ own = old & 0x3fffffff;
+ if (own == tid) {
+ if ((type&8) && m->_m_count<0) {
+ old &= 0x40000000;
+ m->_m_count = 0;
+ goto success;
+ }
+ if ((type&3) == PTHREAD_MUTEX_RECURSIVE) {
+ if ((unsigned)m->_m_count >= INT_MAX) return EAGAIN;
+ m->_m_count++;
+ return 0;
+ }
}
- if (own == 0x7fffffff) return ENOTRECOVERABLE;
- if (own && (!(own & 0x40000000) || !(type & 4))) return EBUSY;
+ if (own == 0x3fffffff) return ENOTRECOVERABLE;
+ if (own || (old && !(type & 4))) return EBUSY;
- if (m->_m_type & 128) {
+ if (type & 128) {
if (!self->robust_list.off) {
self->robust_list.off = (char*)&m->_m_lock-(char *)&m->_m_next;
__syscall(SYS_set_robust_list, &self->robust_list, 3*sizeof(long));
@@ -25,12 +32,22 @@ int __pthread_mutex_trylock_owner(pthread_mutex_t *m)
if (m->_m_waiters) tid |= 0x80000000;
self->robust_list.pending = &m->_m_next;
}
+ tid |= old & 0x40000000;
if (a_cas(&m->_m_lock, old, tid) != old) {
self->robust_list.pending = 0;
+ if ((type&12)==12 && m->_m_waiters) return ENOTRECOVERABLE;
return EBUSY;
}
+success:
+ if ((type&8) && m->_m_waiters) {
+ int priv = (type & 128) ^ 128;
+ __syscall(SYS_futex, &m->_m_lock, FUTEX_UNLOCK_PI|priv);
+ self->robust_list.pending = 0;
+ return (type&4) ? ENOTRECOVERABLE : EBUSY;
+ }
+
volatile void *next = self->robust_list.head;
m->_m_next = next;
m->_m_prev = &self->robust_list.head;
@@ -39,9 +56,8 @@ int __pthread_mutex_trylock_owner(pthread_mutex_t *m)
self->robust_list.head = &m->_m_next;
self->robust_list.pending = 0;
- if (own) {
+ if (old) {
m->_m_count = 0;
- m->_m_type |= 8;
return EOWNERDEAD;
}
diff --git a/src/thread/pthread_mutex_unlock.c b/src/thread/pthread_mutex_unlock.c
index 7dd00d27..b66423e6 100644
--- a/src/thread/pthread_mutex_unlock.c
+++ b/src/thread/pthread_mutex_unlock.c
@@ -7,13 +7,19 @@ int __pthread_mutex_unlock(pthread_mutex_t *m)
int cont;
int type = m->_m_type & 15;
int priv = (m->_m_type & 128) ^ 128;
+ int new = 0;
+ int old;
if (type != PTHREAD_MUTEX_NORMAL) {
self = __pthread_self();
- if ((m->_m_lock&0x7fffffff) != self->tid)
+ old = m->_m_lock;
+ int own = old & 0x3fffffff;
+ if (own != self->tid)
return EPERM;
if ((type&3) == PTHREAD_MUTEX_RECURSIVE && m->_m_count)
return m->_m_count--, 0;
+ if ((type&4) && (old&0x40000000))
+ new = 0x7fffffff;
if (!priv) {
self->robust_list.pending = &m->_m_next;
__vm_lock();
@@ -24,7 +30,16 @@ int __pthread_mutex_unlock(pthread_mutex_t *m)
if (next != &self->robust_list.head) *(volatile void *volatile *)
((char *)next - sizeof(void *)) = prev;
}
- cont = a_swap(&m->_m_lock, (type & 8) ? 0x7fffffff : 0);
+ if (type&8) {
+ if (old<0 || a_cas(&m->_m_lock, old, new)!=old) {
+ if (new) a_store(&m->_m_waiters, -1);
+ __syscall(SYS_futex, &m->_m_lock, FUTEX_UNLOCK_PI|priv);
+ }
+ cont = 0;
+ waiters = 0;
+ } else {
+ cont = a_swap(&m->_m_lock, new);
+ }
if (type != PTHREAD_MUTEX_NORMAL && !priv) {
self->robust_list.pending = 0;
__vm_unlock();
diff --git a/src/thread/pthread_mutexattr_setprotocol.c b/src/thread/pthread_mutexattr_setprotocol.c
index c92a31c8..511cc32d 100644
--- a/src/thread/pthread_mutexattr_setprotocol.c
+++ b/src/thread/pthread_mutexattr_setprotocol.c
@@ -1,7 +1,29 @@
#include "pthread_impl.h"
+#include "syscall.h"
+
+static pthread_once_t check_pi_once;
+static int check_pi_result;
+
+static void check_pi()
+{
+ volatile int lk = 0;
+ check_pi_result = -__syscall(SYS_futex, &lk, FUTEX_LOCK_PI, 0, 0);
+}
int pthread_mutexattr_setprotocol(pthread_mutexattr_t *a, int protocol)
{
- if (protocol) return ENOTSUP;
- return 0;
+ switch (protocol) {
+ case PTHREAD_PRIO_NONE:
+ a->__attr &= ~8;
+ return 0;
+ case PTHREAD_PRIO_INHERIT:
+ pthread_once(&check_pi_once, check_pi);
+ if (check_pi_result) return check_pi_result;
+ a->__attr |= 8;
+ return 0;
+ case PTHREAD_PRIO_PROTECT:
+ return ENOTSUP;
+ default:
+ return EINVAL;
+ }
}
diff --git a/src/thread/pthread_rwlock_rdlock.c b/src/thread/pthread_rwlock_rdlock.c
index 0800d21f..8546c07d 100644
--- a/src/thread/pthread_rwlock_rdlock.c
+++ b/src/thread/pthread_rwlock_rdlock.c
@@ -1,6 +1,8 @@
#include "pthread_impl.h"
-int pthread_rwlock_rdlock(pthread_rwlock_t *rw)
+int __pthread_rwlock_rdlock(pthread_rwlock_t *rw)
{
- return pthread_rwlock_timedrdlock(rw, 0);
+ return __pthread_rwlock_timedrdlock(rw, 0);
}
+
+weak_alias(__pthread_rwlock_rdlock, pthread_rwlock_rdlock);
diff --git a/src/thread/pthread_rwlock_timedrdlock.c b/src/thread/pthread_rwlock_timedrdlock.c
index 0d5d0d6c..8cdd8ecf 100644
--- a/src/thread/pthread_rwlock_timedrdlock.c
+++ b/src/thread/pthread_rwlock_timedrdlock.c
@@ -1,6 +1,6 @@
#include "pthread_impl.h"
-int pthread_rwlock_timedrdlock(pthread_rwlock_t *restrict rw, const struct timespec *restrict at)
+int __pthread_rwlock_timedrdlock(pthread_rwlock_t *restrict rw, const struct timespec *restrict at)
{
int r, t;
@@ -10,7 +10,7 @@ int pthread_rwlock_timedrdlock(pthread_rwlock_t *restrict rw, const struct times
int spins = 100;
while (spins-- && rw->_rw_lock && !rw->_rw_waiters) a_spin();
- while ((r=pthread_rwlock_tryrdlock(rw))==EBUSY) {
+ while ((r=__pthread_rwlock_tryrdlock(rw))==EBUSY) {
if (!(r=rw->_rw_lock) || (r&0x7fffffff)!=0x7fffffff) continue;
t = r | 0x80000000;
a_inc(&rw->_rw_waiters);
@@ -21,3 +21,5 @@ int pthread_rwlock_timedrdlock(pthread_rwlock_t *restrict rw, const struct times
}
return r;
}
+
+weak_alias(__pthread_rwlock_timedrdlock, pthread_rwlock_timedrdlock);
diff --git a/src/thread/pthread_rwlock_timedwrlock.c b/src/thread/pthread_rwlock_timedwrlock.c
index 7f26dad1..d77706e6 100644
--- a/src/thread/pthread_rwlock_timedwrlock.c
+++ b/src/thread/pthread_rwlock_timedwrlock.c
@@ -1,6 +1,6 @@
#include "pthread_impl.h"
-int pthread_rwlock_timedwrlock(pthread_rwlock_t *restrict rw, const struct timespec *restrict at)
+int __pthread_rwlock_timedwrlock(pthread_rwlock_t *restrict rw, const struct timespec *restrict at)
{
int r, t;
@@ -10,7 +10,7 @@ int pthread_rwlock_timedwrlock(pthread_rwlock_t *restrict rw, const struct times
int spins = 100;
while (spins-- && rw->_rw_lock && !rw->_rw_waiters) a_spin();
- while ((r=pthread_rwlock_trywrlock(rw))==EBUSY) {
+ while ((r=__pthread_rwlock_trywrlock(rw))==EBUSY) {
if (!(r=rw->_rw_lock)) continue;
t = r | 0x80000000;
a_inc(&rw->_rw_waiters);
@@ -21,3 +21,5 @@ int pthread_rwlock_timedwrlock(pthread_rwlock_t *restrict rw, const struct times
}
return r;
}
+
+weak_alias(__pthread_rwlock_timedwrlock, pthread_rwlock_timedwrlock);
diff --git a/src/thread/pthread_rwlock_tryrdlock.c b/src/thread/pthread_rwlock_tryrdlock.c
index fa271fcc..c13bc9cc 100644
--- a/src/thread/pthread_rwlock_tryrdlock.c
+++ b/src/thread/pthread_rwlock_tryrdlock.c
@@ -1,6 +1,6 @@
#include "pthread_impl.h"
-int pthread_rwlock_tryrdlock(pthread_rwlock_t *rw)
+int __pthread_rwlock_tryrdlock(pthread_rwlock_t *rw)
{
int val, cnt;
do {
@@ -11,3 +11,5 @@ int pthread_rwlock_tryrdlock(pthread_rwlock_t *rw)
} while (a_cas(&rw->_rw_lock, val, val+1) != val);
return 0;
}
+
+weak_alias(__pthread_rwlock_tryrdlock, pthread_rwlock_tryrdlock);
diff --git a/src/thread/pthread_rwlock_trywrlock.c b/src/thread/pthread_rwlock_trywrlock.c
index bb3d3a99..64d9d312 100644
--- a/src/thread/pthread_rwlock_trywrlock.c
+++ b/src/thread/pthread_rwlock_trywrlock.c
@@ -1,7 +1,9 @@
#include "pthread_impl.h"
-int pthread_rwlock_trywrlock(pthread_rwlock_t *rw)
+int __pthread_rwlock_trywrlock(pthread_rwlock_t *rw)
{
if (a_cas(&rw->_rw_lock, 0, 0x7fffffff)) return EBUSY;
return 0;
}
+
+weak_alias(__pthread_rwlock_trywrlock, pthread_rwlock_trywrlock);
diff --git a/src/thread/pthread_rwlock_unlock.c b/src/thread/pthread_rwlock_unlock.c
index 7b5eec84..9ae27ad2 100644
--- a/src/thread/pthread_rwlock_unlock.c
+++ b/src/thread/pthread_rwlock_unlock.c
@@ -1,6 +1,6 @@
#include "pthread_impl.h"
-int pthread_rwlock_unlock(pthread_rwlock_t *rw)
+int __pthread_rwlock_unlock(pthread_rwlock_t *rw)
{
int val, cnt, waiters, new, priv = rw->_rw_shared^128;
@@ -16,3 +16,5 @@ int pthread_rwlock_unlock(pthread_rwlock_t *rw)
return 0;
}
+
+weak_alias(__pthread_rwlock_unlock, pthread_rwlock_unlock);
diff --git a/src/thread/pthread_rwlock_wrlock.c b/src/thread/pthread_rwlock_wrlock.c
index 7f33535c..46a3b3a5 100644
--- a/src/thread/pthread_rwlock_wrlock.c
+++ b/src/thread/pthread_rwlock_wrlock.c
@@ -1,6 +1,8 @@
#include "pthread_impl.h"
-int pthread_rwlock_wrlock(pthread_rwlock_t *rw)
+int __pthread_rwlock_wrlock(pthread_rwlock_t *rw)
{
- return pthread_rwlock_timedwrlock(rw, 0);
+ return __pthread_rwlock_timedwrlock(rw, 0);
}
+
+weak_alias(__pthread_rwlock_wrlock, pthread_rwlock_wrlock);
diff --git a/src/thread/pthread_sigmask.c b/src/thread/pthread_sigmask.c
index 88c333f6..f188782a 100644
--- a/src/thread/pthread_sigmask.c
+++ b/src/thread/pthread_sigmask.c
@@ -5,7 +5,7 @@
int pthread_sigmask(int how, const sigset_t *restrict set, sigset_t *restrict old)
{
int ret;
- if ((unsigned)how - SIG_BLOCK > 2U) return EINVAL;
+ if (set && (unsigned)how - SIG_BLOCK > 2U) return EINVAL;
ret = -__syscall(SYS_rt_sigprocmask, how, set, old, _NSIG/8);
if (!ret && old) {
if (sizeof old->__bits[0] == 8) {
diff --git a/src/thread/sem_timedwait.c b/src/thread/sem_timedwait.c
index 8132eb1b..58d3ebfe 100644
--- a/src/thread/sem_timedwait.c
+++ b/src/thread/sem_timedwait.c
@@ -22,7 +22,7 @@ int sem_timedwait(sem_t *restrict sem, const struct timespec *restrict at)
pthread_cleanup_push(cleanup, (void *)(sem->__val+1));
r = __timedwait_cp(sem->__val, -1, CLOCK_REALTIME, at, sem->__val[2]);
pthread_cleanup_pop(1);
- if (r && r != EINTR) {
+ if (r) {
errno = r;
return -1;
}
diff --git a/src/thread/synccall.c b/src/thread/synccall.c
index cc66bd24..648a6ad4 100644
--- a/src/thread/synccall.c
+++ b/src/thread/synccall.c
@@ -1,46 +1,42 @@
#include "pthread_impl.h"
#include <semaphore.h>
-#include <unistd.h>
-#include <dirent.h>
#include <string.h>
-#include <ctype.h>
-#include "futex.h"
-#include "atomic.h"
-#include "../dirent/__dirent.h"
-#include "lock.h"
-
-static struct chain {
- struct chain *next;
- int tid;
- sem_t target_sem, caller_sem;
-} *volatile head;
-
-static volatile int synccall_lock[1];
-static volatile int target_tid;
+
+static void dummy_0(void)
+{
+}
+
+weak_alias(dummy_0, __tl_lock);
+weak_alias(dummy_0, __tl_unlock);
+
+static int target_tid;
static void (*callback)(void *), *context;
-static volatile int dummy = 0;
-weak_alias(dummy, __block_new_threads);
+static sem_t target_sem, caller_sem;
+
+static void dummy(void *p)
+{
+}
static void handler(int sig)
{
- struct chain ch;
- int old_errno = errno;
+ if (__pthread_self()->tid != target_tid) return;
- sem_init(&ch.target_sem, 0, 0);
- sem_init(&ch.caller_sem, 0, 0);
+ int old_errno = errno;
- ch.tid = __syscall(SYS_gettid);
+ /* Inform caller we have received signal and wait for
+ * the caller to let us make the callback. */
+ sem_post(&caller_sem);
+ sem_wait(&target_sem);
- do ch.next = head;
- while (a_cas_p(&head, ch.next, &ch) != ch.next);
+ callback(context);
- if (a_cas(&target_tid, ch.tid, 0) == (ch.tid | 0x80000000))
- __syscall(SYS_futex, &target_tid, FUTEX_UNLOCK_PI|FUTEX_PRIVATE);
+ /* Inform caller we've complered the callback and wait
+ * for the caller to release us to return. */
+ sem_post(&caller_sem);
+ sem_wait(&target_sem);
- sem_wait(&ch.target_sem);
- callback(context);
- sem_post(&ch.caller_sem);
- sem_wait(&ch.target_sem);
+ /* Inform caller we are returning and state is destroyable. */
+ sem_post(&caller_sem);
errno = old_errno;
}
@@ -48,12 +44,10 @@ static void handler(int sig)
void __synccall(void (*func)(void *), void *ctx)
{
sigset_t oldmask;
- int cs, i, r, pid, self;;
- DIR dir = {0};
- struct dirent *de;
+ int cs, i, r;
struct sigaction sa = { .sa_flags = SA_RESTART, .sa_handler = handler };
- struct chain *cp, *next;
- struct timespec ts;
+ pthread_t self = __pthread_self(), td;
+ int count = 0;
/* Blocking signals in two steps, first only app-level signals
* before taking the lock, then all signals after taking the lock,
@@ -62,98 +56,45 @@ void __synccall(void (*func)(void *), void *ctx)
* any until after the lock would allow re-entry in the same thread
* with the lock already held. */
__block_app_sigs(&oldmask);
- LOCK(synccall_lock);
+ __tl_lock();
__block_all_sigs(0);
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
- head = 0;
+ sem_init(&target_sem, 0, 0);
+ sem_init(&caller_sem, 0, 0);
- if (!libc.threaded) goto single_threaded;
+ if (!libc.threads_minus_1) goto single_threaded;
callback = func;
context = ctx;
- /* This atomic store ensures that any signaled threads will see the
- * above stores, and prevents more than a bounded number of threads,
- * those already in pthread_create, from creating new threads until
- * the value is cleared to zero again. */
- a_store(&__block_new_threads, 1);
-
/* Block even implementation-internal signals, so that nothing
* interrupts the SIGSYNCCALL handlers. The main possible source
* of trouble is asynchronous cancellation. */
memset(&sa.sa_mask, -1, sizeof sa.sa_mask);
__libc_sigaction(SIGSYNCCALL, &sa, 0);
- pid = __syscall(SYS_getpid);
- self = __syscall(SYS_gettid);
-
- /* Since opendir is not AS-safe, the DIR needs to be setup manually
- * in automatic storage. Thankfully this is easy. */
- dir.fd = open("/proc/self/task", O_RDONLY|O_DIRECTORY|O_CLOEXEC);
- if (dir.fd < 0) goto out;
-
- /* Initially send one signal per counted thread. But since we can't
- * synchronize with thread creation/exit here, there could be too
- * few signals. This initial signaling is just an optimization, not
- * part of the logic. */
- for (i=libc.threads_minus_1; i; i--)
- __syscall(SYS_kill, pid, SIGSYNCCALL);
-
- /* Loop scanning the kernel-provided thread list until it shows no
- * threads that have not already replied to the signal. */
- for (;;) {
- int miss_cnt = 0;
- while ((de = readdir(&dir))) {
- if (!isdigit(de->d_name[0])) continue;
- int tid = atoi(de->d_name);
- if (tid == self || !tid) continue;
-
- /* Set the target thread as the PI futex owner before
- * checking if it's in the list of caught threads. If it
- * adds itself to the list after we check for it, then
- * it will see its own tid in the PI futex and perform
- * the unlock operation. */
- a_store(&target_tid, tid);
-
- /* Thread-already-caught is a success condition. */
- for (cp = head; cp && cp->tid != tid; cp=cp->next);
- if (cp) continue;
-
- r = -__syscall(SYS_tgkill, pid, tid, SIGSYNCCALL);
-
- /* Target thread exit is a success condition. */
- if (r == ESRCH) continue;
-
- /* The FUTEX_LOCK_PI operation is used to loan priority
- * to the target thread, which otherwise may be unable
- * to run. Timeout is necessary because there is a race
- * condition where the tid may be reused by a different
- * process. */
- clock_gettime(CLOCK_REALTIME, &ts);
- ts.tv_nsec += 10000000;
- if (ts.tv_nsec >= 1000000000) {
- ts.tv_sec++;
- ts.tv_nsec -= 1000000000;
- }
- r = -__syscall(SYS_futex, &target_tid,
- FUTEX_LOCK_PI|FUTEX_PRIVATE, 0, &ts);
-
- /* Obtaining the lock means the thread responded. ESRCH
- * means the target thread exited, which is okay too. */
- if (!r || r == ESRCH) continue;
-
- miss_cnt++;
+
+ for (td=self->next; td!=self; td=td->next) {
+ target_tid = td->tid;
+ while ((r = -__syscall(SYS_tkill, td->tid, SIGSYNCCALL)) == EAGAIN);
+ if (r) {
+ /* If we failed to signal any thread, nop out the
+ * callback to abort the synccall and just release
+ * any threads already caught. */
+ callback = func = dummy;
+ break;
}
- if (!miss_cnt) break;
- rewinddir(&dir);
+ sem_wait(&caller_sem);
+ count++;
}
- close(dir.fd);
+ target_tid = 0;
- /* Serialize execution of callback in caught threads. */
- for (cp=head; cp; cp=cp->next) {
- sem_post(&cp->target_sem);
- sem_wait(&cp->caller_sem);
+ /* Serialize execution of callback in caught threads, or just
+ * release them all if synccall is being aborted. */
+ for (i=0; i<count; i++) {
+ sem_post(&target_sem);
+ sem_wait(&caller_sem);
}
sa.sa_handler = SIG_IGN;
@@ -164,16 +105,15 @@ single_threaded:
/* Only release the caught threads once all threads, including the
* caller, have returned from the callback function. */
- for (cp=head; cp; cp=next) {
- next = cp->next;
- sem_post(&cp->target_sem);
- }
+ for (i=0; i<count; i++)
+ sem_post(&target_sem);
+ for (i=0; i<count; i++)
+ sem_wait(&caller_sem);
-out:
- a_store(&__block_new_threads, 0);
- __wake(&__block_new_threads, -1, 1);
+ sem_destroy(&caller_sem);
+ sem_destroy(&target_sem);
pthread_setcancelstate(cs, 0);
- UNLOCK(synccall_lock);
+ __tl_unlock();
__restore_sigs(&oldmask);
}
diff --git a/src/time/timer_create.c b/src/time/timer_create.c
index ad7a2646..c5e40a19 100644
--- a/src/time/timer_create.c
+++ b/src/time/timer_create.c
@@ -22,28 +22,17 @@ weak_alias(dummy_0, __pthread_tsd_run_dtors);
static void cleanup_fromsig(void *p)
{
pthread_t self = __pthread_self();
- __pthread_tsd_run_dtors(self);
+ __pthread_tsd_run_dtors();
self->cancel = 0;
self->cancelbuf = 0;
self->canceldisable = 0;
self->cancelasync = 0;
- self->unblock_cancel = 0;
__reset_tls();
longjmp(p, 1);
}
static void timer_handler(int sig, siginfo_t *si, void *ctx)
{
- pthread_t self = __pthread_self();
- jmp_buf jb;
- void (*notify)(union sigval) = (void (*)(union sigval))self->start;
- union sigval val = { .sival_ptr = self->start_arg };
-
- if (!setjmp(jb) && si->si_code == SI_TIMER) {
- pthread_cleanup_push(cleanup_fromsig, jb);
- notify(val);
- pthread_cleanup_pop(1);
- }
}
static void install_handler()
@@ -59,20 +48,24 @@ static void *start(void *arg)
{
pthread_t self = __pthread_self();
struct start_args *args = arg;
- int id;
+ int id = self->timer_id;
+ jmp_buf jb;
- /* Reuse no-longer-needed thread structure fields to avoid
- * needing the timer address in the signal handler. */
- self->start = (void *(*)(void *))args->sev->sigev_notify_function;
- self->start_arg = args->sev->sigev_value.sival_ptr;
+ void (*notify)(union sigval) = args->sev->sigev_notify_function;
+ union sigval val = args->sev->sigev_value;
pthread_barrier_wait(&args->b);
- if ((id = self->timer_id) >= 0) {
- __syscall(SYS_rt_sigprocmask, SIG_UNBLOCK,
- SIGTIMER_SET, 0, _NSIG/8);
- __wait(&self->timer_id, 0, id, 1);
- __syscall(SYS_timer_delete, id);
+ for (;;) {
+ siginfo_t si;
+ while (sigwaitinfo(SIGTIMER_SET, &si) < 0);
+ if (si.si_code == SI_TIMER && !setjmp(jb)) {
+ pthread_cleanup_push(cleanup_fromsig, jb);
+ notify(val);
+ pthread_cleanup_pop(1);
+ }
+ if (self->timer_id < 0) break;
}
+ __syscall(SYS_timer_delete, id);
return 0;
}
@@ -112,6 +105,7 @@ int timer_create(clockid_t clk, struct sigevent *restrict evp, timer_t *restrict
args.sev = evp;
__block_app_sigs(&set);
+ __syscall(SYS_rt_sigprocmask, SIG_BLOCK, SIGTIMER_SET, 0, _NSIG/8);
r = pthread_create(&td, &attr, start, &args);
__restore_sigs(&set);
if (r) {
diff --git a/src/time/timer_delete.c b/src/time/timer_delete.c
index 7c97eeb1..b0bfac09 100644
--- a/src/time/timer_delete.c
+++ b/src/time/timer_delete.c
@@ -7,7 +7,7 @@ int timer_delete(timer_t t)
if ((intptr_t)t < 0) {
pthread_t td = (void *)((uintptr_t)t << 1);
a_store(&td->timer_id, td->timer_id | INT_MIN);
- __wake(&td->timer_id, 1, 1);
+ __syscall(SYS_tkill, td->tid, SIGTIMER);
return 0;
}
return __syscall(SYS_timer_delete, t);
diff --git a/src/unistd/renameat.c b/src/unistd/renameat.c
index 12574822..c3b40a25 100644
--- a/src/unistd/renameat.c
+++ b/src/unistd/renameat.c
@@ -3,5 +3,9 @@
int renameat(int oldfd, const char *old, int newfd, const char *new)
{
+#ifdef SYS_renameat
return syscall(SYS_renameat, oldfd, old, newfd, new);
+#else
+ return syscall(SYS_renameat2, oldfd, old, newfd, new, 0);
+#endif
}