1265 files changed, 20297 insertions, 8464 deletions
diff --git a/.mailmap b/.mailmap
new file mode 100644
index 00000000..aede9ec8
--- /dev/null
+++ b/.mailmap
@@ -0,0 +1 @@
+Ada Worcester <oss@ada.pikhq.com> <josiahw@gmail.com>
diff --git a/COPYRIGHT b/COPYRIGHT
index b8a76045..2f15edc7 100644
--- a/COPYRIGHT
+++ b/COPYRIGHT
@@ -1,7 +1,7 @@
 musl as a whole is licensed under the following standard MIT license:
 
 ----------------------------------------------------------------------
-Copyright © 2005-2014 Rich Felker, et al.
+Copyright © 2005-2020 Rich Felker, et al.
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
@@ -26,12 +26,17 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 Authors/contributors include:
 
 A. Wilcox
+Ada Worcester
 Alex Dowad
+Alex Suykov
 Alexander Monakov
+Andre McCurdy
 Andrew Kelley
 Anthony G. Basile
+Aric Belsito
 Arvid Picciani
 Bartosz Brachaczek
+Benjamin Peterson
 Bobby Bingham
 Boris Brezillon
 Brent Cook
@@ -40,11 +45,14 @@ Clément Vasseur
 Daniel Micay
 Daniel Sabogal
 Daurnimator
+David Carlier
 David Edelsohn
 Denys Vlasenko
 Dmitry Ivanov
 Dmitry V. Levin
+Drew DeVault
 Emil Renner Berthing
+Fangrui Song
 Felix Fietkau
 Felix Janda
 Gianluca Anzolin
@@ -58,38 +66,45 @@ Jeremy Huntwork
 Jo-Philipp Wich
 Joakim Sindholt
 John Spencer
-Josiah Worcester
 Julien Ramseier
 Justin Cormack
+Kaarle Ritvanen
 Khem Raj
 Kylie McClain
 Leah Neukirchen
 Luca Barbato
 Luka Perkov
+Lynn Ochs
 M Farkas-Dyck (Strake)
 Mahesh Bodapati
+Markus Wichmann
 Masanori Ogino
+Michael Clark
 Michael Forney
 Mikhail Kremnyov
 Natanael Copa
 Nicholas J. Kain
 orc
 Pascal Cuoq
+Patrick Oppenlander
 Petr Hosek
 Petr Skocik
 Pierre Carrier
 Reini Urban
 Rich Felker
 Richard Pennington
+Ryan Fairfax
 Samuel Holland
+Segev Finer
 Shiz
 sin
 Solar Designer
 Stefan Kristiansson
+Stefan O'Rear
 Szabolcs Nagy
 Timo Teräs
 Trutz Behn
-Valentin Ochs
+Will Dietz
 William Haddon
 William Pitcock
 
@@ -107,14 +122,18 @@ Copyright © 1993,2004 Sun Microsystems or
 Copyright © 2003-2011 David Schultz or
 Copyright © 2003-2009 Steven G. Kargl or
 Copyright © 2003-2009 Bruce D. Evans or
-Copyright © 2008 Stephen L. Moshier
+Copyright © 2008 Stephen L. Moshier or
+Copyright © 2017-2018 Arm Limited
 and labelled as such in comments in the individual source files. All
 have been licensed under extremely permissive terms.
 
-The ARM memcpy code (src/string/arm/memcpy_el.S) is Copyright © 2008
+The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
 The Android Open Source Project and is licensed under a two-clause BSD
 license. It was taken from Bionic libc, used on Android.
 
+The AArch64 memcpy and memset code (src/string/aarch64/*) are
+Copyright © 1999-2019, Arm Limited.
+
 The implementation of DES for crypt (src/crypt/crypt_des.c) is
 Copyright © 1994 David Burren. It is licensed under a BSD license.
 
@@ -124,7 +143,7 @@ domain. The code also comes with a fallback permissive license for use
 in jurisdictions that may not recognize the public domain.
 
 The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011
-Valentin Ochs and is licensed under an MIT-style license.
+Lynn Ochs and is licensed under an MIT-style license.
 
 The x86_64 port was written by Nicholas J. Kain and is licensed under
 the standard MIT terms.
diff --git a/INSTALL b/INSTALL
index a2a142bf..ddbbbb2f 100644
--- a/INSTALL
+++ b/INSTALL
@@ -55,15 +55,16 @@ and ABI combinations:
     * Little-endian default; big-endian variants also supported
 
 * MIPS
-    * ABI is o32
+    * ABI is o32, fp32/fpxx (except on r6 which is fp64)
     * Big-endian default; little-endian variants also supported
     * Default ABI variant uses FPU registers; alternate soft-float ABI
       that does not use FPU registers or instructions is available
     * MIPS2 or later, or kernel emulation of ll/sc (standard in Linux)
       is required
+    * MIPS32r6, an incompatible ISA, is supported as a variant "mipsr6"
 
 * MIPS64
-    * ABI is n64 (LP64)
+    * ABI is n64 (LP64) or n32 (ILP32)
     * Big-endian default; little-endian variants also supported
     * Default ABI variant uses FPU registers; alternate soft-float ABI
       that does not use FPU registers or instructions is available
@@ -85,7 +86,7 @@ and ABI combinations:
 
 * SuperH (SH)
     * Standard ELF ABI or FDPIC ABI (shared-text without MMU)
-    * Little-endian by default; big-engian variant also supported
+    * Little-endian by default; big-endian variant also supported
     * Full FPU ABI or soft-float ABI is supported, but the
       single-precision-only FPU ABI is not
 
@@ -96,6 +97,16 @@ and ABI combinations:
 
 * OpenRISC 1000 (or1k)
 
+* RISC-V
+    * 32-bit and 64-bit
+    * Little endian
+    * Hard, soft, and hard-single/soft-double floating point ABIs
+    * Standard ELF; no shared-text NOMMU support
+
+* LoongArch
+    * 64-bit ISA
+    * Hard, soft, and hard-single/soft-double floating point ABIs
+
 
 
 Build and Installation Procedure
diff --git a/Makefile b/Makefile
index b46f8ca4..3ad88b35 100644
--- a/Makefile
+++ b/Makefile
@@ -17,7 +17,8 @@ includedir = $(prefix)/include
 libdir = $(prefix)/lib
 syslibdir = /lib
 
-SRC_DIRS = $(addprefix $(srcdir)/,src/* crt ldso)
+MALLOC_DIR = mallocng
+SRC_DIRS = $(addprefix $(srcdir)/,src/* src/malloc/$(MALLOC_DIR) crt ldso $(COMPAT_SRC_DIRS))
 BASE_GLOBS = $(addsuffix /*.c,$(SRC_DIRS))
 ARCH_GLOBS = $(addsuffix /$(ARCH)/*.[csS],$(SRC_DIRS))
 BASE_SRCS = $(sort $(wildcard $(BASE_GLOBS)))
@@ -27,7 +28,7 @@ ARCH_OBJS = $(patsubst $(srcdir)/%,%.o,$(basename $(ARCH_SRCS)))
 REPLACED_OBJS = $(sort $(subst /$(ARCH)/,/,$(ARCH_OBJS)))
 ALL_OBJS = $(addprefix obj/, $(filter-out $(REPLACED_OBJS), $(sort $(BASE_OBJS) $(ARCH_OBJS))))
 
-LIBC_OBJS = $(filter obj/src/%,$(ALL_OBJS))
+LIBC_OBJS = $(filter obj/src/%,$(ALL_OBJS)) $(filter obj/compat/%,$(ALL_OBJS))
 LDSO_OBJS = $(filter obj/ldso/%,$(ALL_OBJS:%.o=%.lo))
 CRT_OBJS = $(filter obj/crt/%,$(ALL_OBJS))
 
@@ -75,6 +76,7 @@ WRAPCC_CLANG = clang
 LDSO_PATHNAME = $(syslibdir)/ld-musl-$(ARCH)$(SUBARCH).so.1
 
 -include config.mak
+-include $(srcdir)/arch/$(ARCH)/arch.mak
 
 ifeq ($(ARCH),)
 
@@ -107,7 +109,7 @@ obj/src/internal/version.o obj/src/internal/version.lo: obj/src/internal/version
 
 obj/crt/rcrt1.o obj/ldso/dlstart.lo obj/ldso/dynlink.lo: $(srcdir)/src/internal/dynlink.h $(srcdir)/arch/$(ARCH)/reloc.h
 
-obj/crt/crt1.o obj/crt/scrt1.o obj/crt/rcrt1.o obj/ldso/dlstart.lo: $(srcdir)/arch/$(ARCH)/crt_arch.h
+obj/crt/crt1.o obj/crt/Scrt1.o obj/crt/rcrt1.o obj/ldso/dlstart.lo: $(srcdir)/arch/$(ARCH)/crt_arch.h
 
 obj/crt/rcrt1.o: $(srcdir)/ldso/dlstart.c
 
diff --git a/VERSION b/VERSION
index be5b4c7b..c813fe11 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.1.20
+1.2.5
diff --git a/WHATSNEW b/WHATSNEW
index dad17d6e..7bd90728 100644
--- a/WHATSNEW
+++ b/WHATSNEW
@@ -1984,3 +1984,457 @@ arch-specfic bugs fixed:
 - on mips, return from start function passed to clone crashed (runaway exec)
 - printf %a precision specifier malfunctioned except on ld80 archs
 - async thread cancellation crashed on powerpc64 and sh-fdpic
+
+
+1.1.21 release notes
+
+new features:
+- setting default thread stack size via PT_GNU_STACK program header
+- arm vfork implementation
+- arm tlsdesc/gnu2 tls dialect support
+- name_to_handle_at and name_to_handle_at syscall wrappers
+- header-level support for new linux features through 4.18
+
+optimizations:
+- glob rewrite with much better performance and stack usage properties
+- single-threaded and already-locked fast paths for getc/putc variants
+- single-instruction fma implementations for arm, s390x, powerpc, & x86_64
+- single-instruction fabs and sqrt implementations for powerpc
+- size and performance from making all internal-only functions/data hidden
+- made &errno and pthread_self results cachable again (attribute((const)))
+- significant speedup in strtod with short inputs
+- new tsearch AVL tree implementation, smaller and faster
+- special-cased nop calls to wmemmove
+- fixed erroneously suboptimal skip conditions in strstr and memmem
+
+hardening:
+- default thread stack guard size increased from 4k to 8k
+
+compatibility:
+- default thread stack size increased from 80k to 128k
+- building for arm as thumb2 with clang internal assembler now works
+- aio threads could overflow stack on kernels that break MINSIGSTKSZ ABI
+- aio threads no longer call malloc (problematic with malloc replacement)
+- pthread_sigmask/sigprocmask now ignore an invalid how when not changing mask
+
+bugs fixed:
+- soft deadlock regression in stdio FILE locks with >2 threads contending
+- deadlock and buffered data loss race in fclose
+- race condition leading to possible crash in dcngettext plural forms
+- glob failed to see past searchable-but-unreadable path components
+- getdelim wrongly realloc'd buffer that was already exactly right size
+- getdelim failed to set stream orientation on early error
+- ttyname[_r] reported wrong error when given bad fd
+- pthread_key_delete left old tsd values exposed if slot was reused
+- freeaddrinfo failed to support freeing sublists
+- access to optopt was broken by copy relocations
+- memccpy returned wrong result if first byte past buffer end matched
+- wordexp read past end of input string ending in backslash
+- sem_wait and sem_timedwait were wrongly not interruptible by signals
+- getspnam[_r] wrongly treated not-found as an error
+
+arch-specfic bugs fixed:
+- soft deadlocks (missing futex wake) on powerpc locking
+- dlsym returned wrong address for thread-local symbols on ppc/mips/m68k
+
+
+1.1.22 release notes
+
+new features:
+- priority-inheritance mutexes
+- membarrier syscall, pre-registration to use it, fallback emulation
+- header-level support for new linux features in 4.19, 4.20, 5.0
+
+major internal changes:
+- complete, async-safe view of all existent threads as global list
+- robust __synccall based on new thread list
+- new dynamic TLS is installed synchronously at dlopen
+- TLSDESC resolver functions no longer make bad ABI assumptions to call C
+- resolved shared library dependencies are now recorded
+
+compatibility & conformance:
+- dependency-order shared library constructor execution
+- sigaltstack no longer rejects SS_AUTODISARM, future flags
+- FILE is now a complete (dummy) type in pre-C11 feature profiles
+- setvbuf reports failure on invalid arguments
+- TSVTX is exposed unconditionally in tar.h
+- multithreaded set*id() no longer depends on /proc
+- key slot reuse after pthread_key_delete no longer depends on /proc
+
+bugs fixed:
+- failures in multithreaded set*id() with concurrent thread creation/exit
+- interposed free was called from invalid/inconsistent contexts
+- freeaddrinfo performed invalid free of some partial results lists
+- dlsym dependency order search had false negatives and false positives
+- dn_skipname gave wrong results for labels with 8-bit content
+- dcngettext clobbered errno, often breaking printing of error messages
+- sscanf read past end of buffer under certain conditions (1.1.21 regression)
+- pthread_key_create spuriously failed under race condition (1.1.21 regression)
+- fdopendir wrongly succeeded with O_PATH file descriptors
+- gets behaved incorrectly in presence of null bytes
+- namespace violations in c11 tsd and mutex function dependencies
+- incorrect prototype for makecontext (unimplemented)
+
+arch-specfic bugs fixed:
+- s390x had wrong values for POSIX_FADV_DONTNEED/_NOREUSE
+
+
+
+1.1.23 release notes
+
+new features:
+- riscv64 port
+- configure now allows customizing AR and RANLIB vars
+- header-level support for new linux features in 5.1
+
+major internal changes:
+- removed extern __syscall; syscall header code is now fully self-contained
+
+performance:
+- new math library implementation for log/exp/pow
+- aarch64 dynamic tlsdesc function is streamlined
+
+compatibility & conformance:
+- O_TTY_INIT is now defined
+- sys/types.h no longer pollutes namespace with sys/sysmacros.h in any profile
+- powerpc asm is now compatible with clang internal assembler
+
+changes for new POSIX interpretations:
+- fgetwc now sets stream error indicator on encoding errors
+- fmemopen no longer rejects 0 size
+
+bugs fixed:
+- static TLS for shared libraries was allocated wrong on "Variant I" archs
+- crash in dladdr reading through uninitialized pointer on non-match
+- sigaltstack wrongly errored out on invalid ss_size when doing SS_DISABLE
+- getdents function misbehaved with buffer length larger than INT_MAX
+- set*id could deadlock after fork from multithreaded process
+
+arch-specfic bugs fixed:
+- s390x SO_PEERSEC definition was wrong
+- passing of 64-bit syscall arguments was broken on microblaze
+- posix_fadvise was broken on mips due to missing 7-arg syscall support
+- vrregset_t layout and member naming was wrong on powerpc64
+
+
+
+1.1.24 release notes
+
+new features:
+- GLOB_TILDE extension to glob
+- non-stub catgets localization API, using netbsd binary catalog format
+- posix_spawn file actions for [f]chdir (extension, pending future standard)
+- secure_getenv function (extension)
+- copy_file_range syscall wrapper (Linux extension)
+- header-level support for new linux features in 5.2
+
+performance:
+- new fast path for lrint (generic C version) on 32-bit archs
+
+major internal changes:
+- functions involving time are overhauled to be time64-ready in 32-bit archs
+- x32 uses the new time64 code paths to replace nasty hacks in syscall glue
+
+compatibility & conformance:
+- support for powerpc[64] unaligned relocation types
+- powerpc[64] and sh sys/user.h no longer clash with kernel asm/ptrace.h
+- select no longer modifies timeout on failure (or at all)
+- mips64 stat results are no longer limited to 32-bit time range
+- optreset (BSD extension) now has a public declaration
+- support for clang inconsistencies in wchar_t type vs some 32-bit archs
+- mips r6 syscall asm no longer has invalid lo/hi register clobbers
+- vestigial asm declarations of __tls_get_new are removed (broke some tooling)
+- riscv64 mcontext_t mismatch glibc's member naming is corrected
+
+bugs fixed:
+- glob failed to match broken symlinks consistently
+- invalid use of interposed calloc to allocate initial TLS
+- various dlsym symbol resolution logic errors
+- semctl with SEM_STAT_ANY didn't work
+- pthread_create with explicit scheduling was subject to priority inversion
+- pthread_create failure path had data race for thread count
+- timer_create with SIGEV_THREAD notification had data race getting timer id
+- wide printf family failed to support l modifier for float formats
+
+arch-specific bugs fixed:
+- x87 floating point stack imbalance in math asm (i386-only CVE-2019-14697)
+- x32 clock_adjtime, getrusage, wait3, wait4 produced junk (struct mismatches)
+- lseek broken on x32 and mipsn32 with large file offsets
+- riscv64 atomics weren't compiler barriers
+- riscv64 atomics had broken asm constraints (missing earlyclobber flag)
+- arm clone() was broken when compiled as thumb if start function returned
+- mipsr6 setjmp/longjmp did not preserve fpu register state correctly
+
+
+
+1.2.0 release notes
+
+new features:
+- time_t is now 64-bit on all archs (not just 64-bit archs)
+- character type & case mapping data updated to Unicode 12.1.0
+- header-level support for new linux features in 5.3 and 5.4
+
+performance:
+- new O(1) wchar_t case mapping implementation
+- i386 now uses C math code for exp, faster than old asm
+- mips math asm
+
+compatibility & conformance:
+- endian.h now aims to conform to future POSIX definition
+- support older compilers that don't accept powerpc math asm constraints
+- fdpic code in ldso was incompatible with valid optimizations in gcc 9+
+- RLIMIT_RTTIME was missing from sys/resource.h
+
+bugs fixed:
+- wcwidth wrongly returned 0 for most of planes 4 and up
+- missing case mapping between U+03F3 and U+037F
+- wrong cacosh results for arguments with negative imaginary part
+- wrong catanf/catanl results for various classes of arguments
+- wrong return value for ungetc with argument outside [0,UCHAR_MAX]
+- posix_openpt with no ptys available produced wrong errno
+
+arch-specific bugs fixed:
+- sigcontext/regset definition mistakes & omissions on m68k, powerpc64
+- fesetenv(FE_DFL_ENV) crashed on riscv64
+- sh2 dynamic linker was broken since 1.1.21 (crash in stage 2b)
+- arm dynamic linker chose wrong tls/atomic variants since 1.1.21
+- some math library functions returned excess precision on i386
+- unconfirmed regression in fchmodat AT_SYMLINK_NOFOLLOW on mips*
+
+
+
+1.2.1 release notes
+
+major changes:
+- new malloc implementation (mallocng & overhauled bump allocator)
+
+new features:
+- DNS queries via res_* now set AD flag, report zone signedness (DNSSEC)
+- PTHREAD_NULL macro (POSIX-future)
+
+performance:
+- optimized memcpy and memset for aarch64
+- optimized memcpy for arm now supports big endian
+- optimized x86_64 remquol
+- improved strerror without linear search
+
+bugs fixed:
+- lock-skipping for processes that returned to single-threaded was wrong
+- AF_UNSPEC dns lookups mishandled single failure in paired A+AAAA
+- res_send and res_query returned wrong value on errors from nameserver
+- corrupted sysvipc timestamps on 32-bit archs with old kernels
+- incorrect parsing of timezone offsets after overly-long zone name
+- clock_adjtime was broken on 32-bit archs (time64)
+- pthread_kill as not async-signal-safe
+- pthread_cancel was not async-cancel-safe
+- large-ulp errors in various math functions in non-default rounding modes
+
+arch-specific bugs fixed:
+- arm clock_gettime was broken on some hw due to bad time64 vdso
+- m68k sqrtl lacked long double precision
+- mips* syscall mechanism regressions on older kernels
+- mips* had negated error codes for some syscalls (kernel bug)
+- mips* SIGEMT was wrongly called SIGSTKFLT
+- sh fesetround didn't work correctly on sh
+
+
+
+1.2.2 release notes
+
+major changes:
+- child restrictions lifted after fork of multithreaded parent
+
+new features:
+- _Fork function (POSIX-future)
+- reallocarray function (extension from OpenBSD, now widespread)
+- gettid function (kernel tid as supported concept)
+- SIGEV_THREAD_ID sigevent API (Linux extension)
+- tcgetwinsize and tcsetwinsize functions (POSIX-future)
+
+performance:
+- faster software sqrt on archs without native sqrt instruction
+
+compatibility:
+- realpath no longer depends on procfs availability & accuracy
+- time zone parser now always prefers 64-bit tables if present
+- crypt_blowfish now supports $2b$ prefix
+- res_query now reports errors via h_errno
+- set*id and setrlimit are now safe in vforked/cloned child
+- setgroups now applies to all threads
+- dlopen debugger notification is improved, should work with lldb
+- setrlimit no longer needs __synccall broadcast on linux 2.6.36+
+- faccessat with AT_EACCESS no longer needs child process on linux 5.8+
+
+bugs fixed:
+- buffer overflow and infinite loop errors in wcsnrtombs (CVE-2020-28928)
+- sem_close unmapped still-referenced semaphores
+- fork of process with active aio could deadlock or crash paren
+- pthread_cond_wait was broken with priority-inheritance mutex
+- getgrouplist wrongly failed when nscd reported an empty list
+- abort could leak modified SIGABRT disposition to fork or posix_spawn child
+- regression with mallocng: malloc_usable_size(0) crashed
+- readlink wrongly gave EINVAL on zero length dest buffer
+- sqrtl was severely inaccurate (not correctly rounded) on ldquad archs
+- assert failure wrongly flushed stdio (possible deadlock)
+- MUSL_LOCPATH search was broken with multiple components
+- missing newline in herror output
+- possible deadlock in pthread_exit with pshared mutex or barrier usage
+- pthread_mutexattr_getprotocol didn't read back protocol
+- v4l2 ioctl translation for pre-time64 kernels didn't work
+
+arch-specific bugs fixed:
+- x86_64 longjmp failed to handle 0 argument reliably
+- i386 __set_thread_area fallback for pre-2.6 kernels didn't work
+- missing O_LARGEFILE macro value on x86_64, x32, mips64
+- unpredictable s390x breakage from failure to preserve call-saved registers
+
+
+
+1.2.3 release notes
+
+new features:
+- qsort_r function (POSIX-future)
+- pthread_getname_np extension function
+- hard float on SPE FPU for powerpc-sf
+- SEEK_DATA and SEEK_HOLE exposed in unistd.h (Linux extensions)
+
+compatibility:
+- free now preserves errno (POSIX-future requirement)
+- setjmp is declared explicitly with returns_twice for non-GCC compilers
+- macro version of isascii is no longer defined for C++
+- dynamic linker now tolerates zero-length LOAD segments
+- epoll_[p]wait is now a cancellation point
+- pwd/grp functions no longer fail on systems without AF_UNIX support
+- POSIX TZ parsing is stricter to allow more names to fallback to files
+- NULL is now defined as nullptr when used in C++11 or later
+- gettext now accepts null pointer as argument
+
+bugs fixed:
+- old regression in wcwidth of Hangul combining (vowel/final) letters
+- duplocale used wrong malloc when malloc was replaced (1.2.2 regression)
+- fmaf rounded wrong on archs without FE_TOWARDZERO (all softfloat archs)
+- popen didn't honor requirement not to leak other popen pipe fds to child
+- aligned_alloc and variants crashed on allocation failure
+- dl_iterate_phdr reported incorrect module TLS pointers
+- mishandling of some inputs in acoshf and expm1f and functions using them
+- potentially wrong-sign zero in cproj functions at infinity
+- multiple bugs in legacy function cuserid
+- minor posix_spawn file actions API conformance issues
+- pthread_setname_np fd leak
+- out-of-bound read in zoneinfo handling with distant-past times
+- out-of-tree builds lacked generated debug cfi for x86 asm
+
+arch-specific bugs fixed:
+- powerpc (32-bit) struct shmid_ds layout was wrong for some fields
+- time64 struct layout was wrong in sound ioctl fallback (32-bit archs)
+
+
+
+1.2.4 release notes
+
+new features:
+- large dns record lookups via tcp fallback
+- new getaddrinfo EAI_NODATA result to distinguish NODATA/NxDomain
+- support for new RELR compressed format for relative relocations
+- sysconf keys for querying signal stack size requirements
+- real vfork on riscv64
+
+performance:
+- mallocng no longer uses MADV_FREE (high performance cost, little gain)
+- vdso clock_gettime is supported once again on 32-bit arm
+
+compatibility:
+- gethostbyname family now distinguishes NO_DATA from HOST_NOT_FOUND
+- res_send now works with caller-provided edns0 queries
+- arpa/nameser.h RR types list is now up-to-date
+- previously-missing POSIX confstr keys have been added
+- mntent interfaces now accept missing fields
+- alt signal stack, if any, is now used for internal signals
+- the LFS64 macros are no longer exposed without _LARGEFILE64_SOURCE
+- memmem (POSIX-future) is now exposed in default feature profile
+- pthread_atfork now admits calls from an application-provided malloc
+- debugger tracking of shared libraries now works on MIPS PIE binaries
+- sendmsg now supports up to SCM_MAX_FD fds in SCM_RIGHTS messages
+
+bugs fixed:
+- gethostbyname[2]_r wrongly returned nonzero (error) on negative result
+- parallel v4/v6 address queries could fail on query id collisions
+- spurious getaddrinfo/AI_ADDRCONFIG failures due to errno clobbering
+- dns search domains ending in dot (including lone dot) broke lookups
+- ipv6 servers in resolv.conf broke lookups on systems with v6 disabled
+- systems with bindv6only failed to query both v4 and v6 nameservers
+- res_mkquery mishandled consecutive final dots in name
+- res_send could malfunction for very small answer buffer sizes
+- resolver dns backend accepted answers with wrong (A vs AAAA) RR type
+- getservbyport_r returned junk or ENOENT (vs ERANGE) on buffer size errors
+- dns result parsing of malformed responses could process uninitialized data
+- freopen didn't reset stream orientation (byte/wide) & encoding rule
+- fwprintf didn't print most fields on open_wmemstream FILEs
+- wide printf %lc ignored field width
+- wide printf erroneously processed %n after encoding errors
+- use of wide printf %9$ argument slot overflowed undersized buffer
+- swprintf malfunctioned on nul character in output
+- strverscmp ordered digit sequences vs nondigits incorrectly
+- timer_create/SIGEV_THREAD failure leaked the thread
+- semaphores were subject to missed-wake under certain usage patterns
+- several possible rare deadlocks with lock handling at thread exit
+- several possible rare deadlocks with aio and multithreaded fork
+- dynamic linker relro processing was broken on archs w/variable pagesize
+- async cancellation could run cancellation handlers in invalid context
+- pthread_detach was wrongly a cancellation point in rare race code path
+- use-after-close/double-close errors in mq_notify error paths
+- mq_notify event thread wrongly ran with signals unmasked
+- wcs{,n}cmp, wmemcmp returned wrong results when difference overflowed
+- accept4, pipe2, and dup3 handled unknown flags wrong in fallback cases
+- CPU_SETSIZE macro had wrong unit
+- select fallback for pre-time64 kernels truncated timeout (vs clamping)
+
+arch-specific bugs fixed:
+- x32 new socketcalls took fallback path due to pointer sign extension
+- x32 wait4 didn't fill rusage structure (time64 regression)
+- x32 semtimedop mismatched timespec ABI with kernel (time64 regression)
+- sigaction signal mask was bogus on or1k, microblaze, mips, and riscv
+- powerpc-sf longjmp asm clobbered value argument
+- or1k poll function passed timeout to syscall in wrong form
+
+
+
+1.2.5 release notes
+
+new features:
+- statx function (linux extension; via syscall and fallback using fstatat)
+- clone function is now usable and gives _Fork-like consistency in child
+- statvfs now provides f_type result
+- preadv2 and pwritev2 (linux extension) syscall wrappers
+- riscv64 TLSDESC support
+
+new ports:
+- loongarch64
+- riscv32
+
+compatibility:
+- DNS resolver can now handle answers with long CNAME chains
+- string.h no longer provides (C23-incompat) non-prototype decl of basename
+- fstatat statx backend now matches stat syscall non-automounting behavior
+- mntent interfaces now handle escaped whitespace in paths/options
+
+standards updates:
+- printf %lc of nul wchar now produces output
+- snprintf and swprintf no longer fail on n > INT_MAX
+- ppoll is now exposed in default feature profile
+
+bugs fixed:
+- some long DNS answers were wrongly rejected despite new TCP support
+- glob could wrongly return GLOB_NOMATCH if aborted before any matches
+- multithreaded set*id could malfunction from thread sequencing logic bug
+- certain use of threads after fork could deadlock thread-list lock
+- posix_spawn child could deadlock in race with async parent death
+- mbrtowc return value was wrong if argument n exceeded UINT_MAX
+- 80-bit extended acoshl and powl got some corner cases wrong
+- syslog incorrectly generated localized timestamps
+
+arch-specific bugs fixed:
+- arm (32-bit) TLSDESC malfunctioned due to addends being processed wrong
+- riscv64 icache flush operation was non-functional
+- sh sigsetjmp failed to properly restore call-saved register r8 on return
+- sh dlsym RTLD_NEXT did not identify calling module correctly
diff --git a/arch/aarch64/bits/alltypes.h.in b/arch/aarch64/bits/alltypes.h.in
index d56abdac..c547ca0b 100644
--- a/arch/aarch64/bits/alltypes.h.in
+++ b/arch/aarch64/bits/alltypes.h.in
@@ -2,8 +2,13 @@
 #define _Int64 long
 #define _Reg long
 
-TYPEDEF __builtin_va_list va_list;
-TYPEDEF __builtin_va_list __isoc_va_list;
+#if __AARCH64EB__
+#define __BYTE_ORDER 4321
+#else
+#define __BYTE_ORDER 1234
+#endif
+
+#define __LONG_MAX 0x7fffffffffffffffL
 
 #ifndef __cplusplus
 TYPEDEF unsigned wchar_t;
@@ -17,14 +22,3 @@ TYPEDEF float float_t;
 TYPEDEF double double_t;
 
 TYPEDEF struct { long long __ll; long double __ld; } max_align_t;
-
-TYPEDEF long time_t;
-TYPEDEF long suseconds_t;
-
-TYPEDEF struct { union { int __i[14]; volatile int __vi[14]; unsigned long __s[7]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[10]; volatile int __vi[10]; volatile void *volatile __p[5]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[10]; volatile int __vi[10]; volatile void *volatile __p[5]; } __u; } mtx_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[6]; } __u; } pthread_cond_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[6]; } __u; } cnd_t;
-TYPEDEF struct { union { int __i[14]; volatile int __vi[14]; void *__p[7]; } __u; } pthread_rwlock_t;
-TYPEDEF struct { union { int __i[8]; volatile int __vi[8]; void *__p[4]; } __u; } pthread_barrier_t;
diff --git a/arch/aarch64/bits/endian.h b/arch/aarch64/bits/endian.h
deleted file mode 100644
index 7a74d2fe..00000000
--- a/arch/aarch64/bits/endian.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#if __AARCH64EB__
-#define __BYTE_ORDER __BIG_ENDIAN
-#else
-#define __BYTE_ORDER __LITTLE_ENDIAN
-#endif
diff --git a/arch/aarch64/bits/hwcap.h b/arch/aarch64/bits/hwcap.h
index 8541e329..424cc4d4 100644
--- a/arch/aarch64/bits/hwcap.h
+++ b/arch/aarch64/bits/hwcap.h
@@ -26,3 +26,27 @@
 #define HWCAP_USCAT		(1 << 25)
 #define HWCAP_ILRCPC		(1 << 26)
 #define HWCAP_FLAGM		(1 << 27)
+#define HWCAP_SSBS		(1 << 28)
+#define HWCAP_SB		(1 << 29)
+#define HWCAP_PACA		(1 << 30)
+#define HWCAP_PACG		(1UL << 31)
+
+#define HWCAP2_DCPODP		(1 << 0)
+#define HWCAP2_SVE2		(1 << 1)
+#define HWCAP2_SVEAES		(1 << 2)
+#define HWCAP2_SVEPMULL		(1 << 3)
+#define HWCAP2_SVEBITPERM	(1 << 4)
+#define HWCAP2_SVESHA3		(1 << 5)
+#define HWCAP2_SVESM4		(1 << 6)
+#define HWCAP2_FLAGM2		(1 << 7)
+#define HWCAP2_FRINT		(1 << 8)
+#define HWCAP2_SVEI8MM		(1 << 9)
+#define HWCAP2_SVEF32MM		(1 << 10)
+#define HWCAP2_SVEF64MM		(1 << 11)
+#define HWCAP2_SVEBF16		(1 << 12)
+#define HWCAP2_I8MM		(1 << 13)
+#define HWCAP2_BF16		(1 << 14)
+#define HWCAP2_DGH		(1 << 15)
+#define HWCAP2_RNG		(1 << 16)
+#define HWCAP2_BTI		(1 << 17)
+#define HWCAP2_MTE		(1 << 18)
diff --git a/arch/aarch64/bits/ipc.h b/arch/aarch64/bits/ipc.h
deleted file mode 100644
index 6f3328a8..00000000
--- a/arch/aarch64/bits/ipc.h
+++ /dev/null
@@ -1,14 +0,0 @@
-struct ipc_perm {
-	key_t __ipc_perm_key;
-	uid_t uid;
-	gid_t gid;
-	uid_t cuid;
-	gid_t cgid;
-	mode_t mode;
-	unsigned short __ipc_perm_seq;
-
-	unsigned long __pad1;
-	unsigned long __pad2;
-};
-
-#define IPC_64 0
diff --git a/arch/aarch64/bits/limits.h b/arch/aarch64/bits/limits.h
deleted file mode 100644
index 0226588c..00000000
--- a/arch/aarch64/bits/limits.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
- || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
-#define LONG_BIT 64
-#endif
-
-#define LONG_MAX  0x7fffffffffffffffL
-#define LLONG_MAX  0x7fffffffffffffffLL
diff --git a/arch/aarch64/bits/mman.h b/arch/aarch64/bits/mman.h
new file mode 100644
index 00000000..8fad5ceb
--- /dev/null
+++ b/arch/aarch64/bits/mman.h
@@ -0,0 +1,2 @@
+#define PROT_BTI 0x10
+#define PROT_MTE 0x20
diff --git a/arch/aarch64/bits/msg.h b/arch/aarch64/bits/msg.h
deleted file mode 100644
index 641e1703..00000000
--- a/arch/aarch64/bits/msg.h
+++ /dev/null
@@ -1,13 +0,0 @@
-struct msqid_ds {
-	struct ipc_perm msg_perm;
-	time_t msg_stime;
-	time_t msg_rtime;
-	time_t msg_ctime;
-	unsigned long msg_cbytes;
-	msgqnum_t msg_qnum;
-	msglen_t msg_qbytes;
-	pid_t msg_lspid;
-	pid_t msg_lrpid;
-	unsigned long __pad1;
-	unsigned long __pad2;
-};
diff --git a/arch/aarch64/bits/posix.h b/arch/aarch64/bits/posix.h
deleted file mode 100644
index c37b94c1..00000000
--- a/arch/aarch64/bits/posix.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#define _POSIX_V6_LP64_OFF64  1
-#define _POSIX_V7_LP64_OFF64  1
diff --git a/arch/aarch64/bits/reg.h b/arch/aarch64/bits/reg.h
deleted file mode 100644
index 2633f39d..00000000
--- a/arch/aarch64/bits/reg.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#undef __WORDSIZE
-#define __WORDSIZE 64
diff --git a/arch/aarch64/bits/sem.h b/arch/aarch64/bits/sem.h
deleted file mode 100644
index e46ced95..00000000
--- a/arch/aarch64/bits/sem.h
+++ /dev/null
@@ -1,14 +0,0 @@
-struct semid_ds {
-	struct ipc_perm sem_perm;
-	time_t sem_otime;
-	time_t sem_ctime;
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-	unsigned short sem_nsems;
-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
-#else
-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
-	unsigned short sem_nsems;
-#endif
-	time_t __unused3;
-	time_t __unused4;
-};
diff --git a/arch/aarch64/bits/signal.h b/arch/aarch64/bits/signal.h
index b71261f5..5098c734 100644
--- a/arch/aarch64/bits/signal.h
+++ b/arch/aarch64/bits/signal.h
@@ -11,7 +11,7 @@ typedef unsigned long greg_t;
 typedef unsigned long gregset_t[34];
 
 typedef struct {
-	long double vregs[32];
+	__uint128_t vregs[32];
 	unsigned int fpsr;
 	unsigned int fpcr;
 } fpregset_t;
@@ -34,7 +34,7 @@ struct fpsimd_context {
 	struct _aarch64_ctx head;
 	unsigned int fpsr;
 	unsigned int fpcr;
-	long double vregs[32];
+	__uint128_t vregs[32];
 };
 struct esr_context {
 	struct _aarch64_ctx head;
diff --git a/arch/aarch64/bits/socket.h b/arch/aarch64/bits/socket.h
deleted file mode 100644
index c11677e9..00000000
--- a/arch/aarch64/bits/socket.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#include <endian.h>
-
-struct msghdr {
-	void *msg_name;
-	socklen_t msg_namelen;
-	struct iovec *msg_iov;
-#if __BYTE_ORDER == __BIG_ENDIAN
-	int __pad1, msg_iovlen;
-#else
-	int msg_iovlen, __pad1;
-#endif
-	void *msg_control;
-#if __BYTE_ORDER == __BIG_ENDIAN
-	int __pad2;
-	socklen_t msg_controllen;
-#else
-	socklen_t msg_controllen;
-	int __pad2;
-#endif
-	int msg_flags;
-};
-
-struct cmsghdr {
-#if __BYTE_ORDER == __BIG_ENDIAN
-	int __pad1;
-	socklen_t cmsg_len;
-#else
-	socklen_t cmsg_len;
-	int __pad1;
-#endif
-	int cmsg_level;
-	int cmsg_type;
-};
diff --git a/arch/aarch64/bits/stdint.h b/arch/aarch64/bits/stdint.h
deleted file mode 100644
index 1bb147f2..00000000
--- a/arch/aarch64/bits/stdint.h
+++ /dev/null
@@ -1,20 +0,0 @@
-typedef int32_t int_fast16_t;
-typedef int32_t int_fast32_t;
-typedef uint32_t uint_fast16_t;
-typedef uint32_t uint_fast32_t;
-
-#define INT_FAST16_MIN  INT32_MIN
-#define INT_FAST32_MIN  INT32_MIN
-
-#define INT_FAST16_MAX  INT32_MAX
-#define INT_FAST32_MAX  INT32_MAX
-
-#define UINT_FAST16_MAX UINT32_MAX
-#define UINT_FAST32_MAX UINT32_MAX
-
-#define INTPTR_MIN      INT64_MIN
-#define INTPTR_MAX      INT64_MAX
-#define UINTPTR_MAX     UINT64_MAX
-#define PTRDIFF_MIN     INT64_MIN
-#define PTRDIFF_MAX     INT64_MAX
-#define SIZE_MAX        UINT64_MAX
diff --git a/arch/aarch64/bits/syscall.h.in b/arch/aarch64/bits/syscall.h.in
index 47a969bc..ea5a152a 100644
--- a/arch/aarch64/bits/syscall.h.in
+++ b/arch/aarch64/bits/syscall.h.in
@@ -275,4 +275,33 @@
 #define __NR_pkey_free 290
 #define __NR_statx 291
 #define __NR_io_pgetevents 292
+#define __NR_rseq 293
+#define __NR_kexec_file_load 294
+#define __NR_pidfd_send_signal 424
+#define __NR_io_uring_setup 425
+#define __NR_io_uring_enter 426
+#define __NR_io_uring_register 427
+#define __NR_open_tree		428
+#define __NR_move_mount		429
+#define __NR_fsopen		430
+#define __NR_fsconfig		431
+#define __NR_fsmount		432
+#define __NR_fspick		433
+#define __NR_pidfd_open		434
+#define __NR_clone3		435
+#define __NR_close_range	436
+#define __NR_openat2		437
+#define __NR_pidfd_getfd	438
+#define __NR_faccessat2		439
+#define __NR_process_madvise	440
+#define __NR_epoll_pwait2	441
+#define __NR_mount_setattr	442
+#define __NR_landlock_create_ruleset	444
+#define __NR_landlock_add_rule	445
+#define __NR_landlock_restrict_self	446
+#define __NR_process_mrelease	448
+#define __NR_futex_waitv	449
+#define __NR_set_mempolicy_home_node	450
+#define __NR_cachestat		451
+#define __NR_fchmodat2		452
 
diff --git a/arch/aarch64/bits/user.h b/arch/aarch64/bits/user.h
index d12cdf7f..8a1002aa 100644
--- a/arch/aarch64/bits/user.h
+++ b/arch/aarch64/bits/user.h
@@ -6,7 +6,7 @@ struct user_regs_struct {
 };
 
 struct user_fpsimd_struct {
-	long double vregs[32];
+	__uint128_t vregs[32];
 	unsigned int fpsr;
 	unsigned int fpcr;
 };
diff --git a/arch/aarch64/fp_arch.h b/arch/aarch64/fp_arch.h
new file mode 100644
index 00000000..f3d445b9
--- /dev/null
+++ b/arch/aarch64/fp_arch.h
@@ -0,0 +1,25 @@
+#define fp_barrierf fp_barrierf
+static inline float fp_barrierf(float x)
+{
+	__asm__ __volatile__ ("" : "+w"(x));
+	return x;
+}
+
+#define fp_barrier fp_barrier
+static inline double fp_barrier(double x)
+{
+	__asm__ __volatile__ ("" : "+w"(x));
+	return x;
+}
+
+#define fp_force_evalf fp_force_evalf
+static inline void fp_force_evalf(float x)
+{
+	__asm__ __volatile__ ("" : "+w"(x));
+}
+
+#define fp_force_eval fp_force_eval
+static inline void fp_force_eval(double x)
+{
+	__asm__ __volatile__ ("" : "+w"(x));
+}
diff --git a/arch/aarch64/kstat.h b/arch/aarch64/kstat.h
new file mode 100644
index 00000000..92625f36
--- /dev/null
+++ b/arch/aarch64/kstat.h
@@ -0,0 +1,21 @@
+struct kstat {
+	dev_t st_dev;
+	ino_t st_ino;
+	mode_t st_mode;
+	nlink_t st_nlink;
+	uid_t st_uid;
+	gid_t st_gid;
+	dev_t st_rdev;
+	unsigned long __pad;
+	off_t st_size;
+	blksize_t st_blksize;
+	int __pad2;
+	blkcnt_t st_blocks;
+	long st_atime_sec;
+	long st_atime_nsec;
+	long st_mtime_sec;
+	long st_mtime_nsec;
+	long st_ctime_sec;
+	long st_ctime_nsec;
+	unsigned __unused[2];
+};
diff --git a/arch/aarch64/pthread_arch.h b/arch/aarch64/pthread_arch.h
index e64b126d..3909616c 100644
--- a/arch/aarch64/pthread_arch.h
+++ b/arch/aarch64/pthread_arch.h
@@ -1,12 +1,11 @@
-static inline struct pthread *__pthread_self()
+static inline uintptr_t __get_tp()
 {
-	char *self;
-	__asm__ ("mrs %0,tpidr_el0" : "=r"(self));
-	return (void*)(self - sizeof(struct pthread));
+	uintptr_t tp;
+	__asm__ ("mrs %0,tpidr_el0" : "=r"(tp));
+	return tp;
 }
 
 #define TLS_ABOVE_TP
 #define GAP_ABOVE_TP 16
-#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
 
 #define MC_PC pc
diff --git a/arch/aarch64/reloc.h b/arch/aarch64/reloc.h
index 40cf0b28..b1b68c72 100644
--- a/arch/aarch64/reloc.h
+++ b/arch/aarch64/reloc.h
@@ -1,5 +1,3 @@
-#include <endian.h>
-
 #if __BYTE_ORDER == __BIG_ENDIAN
 #define ENDIAN_SUFFIX "_be"
 #else
diff --git a/arch/aarch64/syscall_arch.h b/arch/aarch64/syscall_arch.h
index 25f5ce67..504983aa 100644
--- a/arch/aarch64/syscall_arch.h
+++ b/arch/aarch64/syscall_arch.h
@@ -74,3 +74,5 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo
 #define VDSO_USEFUL
 #define VDSO_CGT_SYM "__kernel_clock_gettime"
 #define VDSO_CGT_VER "LINUX_2.6.39"
+
+#define IPC_64 0
diff --git a/arch/arm/arch.mak b/arch/arm/arch.mak
new file mode 100644
index 00000000..aa4d05ce
--- /dev/null
+++ b/arch/arm/arch.mak
@@ -0,0 +1 @@
+COMPAT_SRC_DIRS = compat/time32
diff --git a/arch/arm/atomic_arch.h b/arch/arm/atomic_arch.h
index e427836a..9e3937cc 100644
--- a/arch/arm/atomic_arch.h
+++ b/arch/arm/atomic_arch.h
@@ -83,7 +83,7 @@ static inline void a_crash()
 		: : : "memory");
 }
 
-#if __ARM_ARCH >= 5
+#if __ARM_ARCH >= 5 && (!__thumb__ || __thumb2__)
 
 #define a_clz_32 a_clz_32
 static inline int a_clz_32(uint32_t x)
diff --git a/arch/arm/bits/alltypes.h.in b/arch/arm/bits/alltypes.h.in
index 667963c7..d62bd7bd 100644
--- a/arch/arm/bits/alltypes.h.in
+++ b/arch/arm/bits/alltypes.h.in
@@ -1,9 +1,15 @@
+#define _REDIR_TIME64 1
 #define _Addr int
 #define _Int64 long long
 #define _Reg int
 
-TYPEDEF __builtin_va_list va_list;
-TYPEDEF __builtin_va_list __isoc_va_list;
+#if __ARMEB__
+#define __BYTE_ORDER 4321
+#else
+#define __BYTE_ORDER 1234
+#endif
+
+#define __LONG_MAX 0x7fffffffL
 
 #ifndef __cplusplus
 TYPEDEF unsigned wchar_t;
@@ -13,14 +19,3 @@ TYPEDEF float float_t;
 TYPEDEF double double_t;
 
 TYPEDEF struct { long long __ll; long double __ld; } max_align_t;
-
-TYPEDEF long time_t;
-TYPEDEF long suseconds_t;
-
-TYPEDEF struct { union { int __i[9]; volatile int __vi[9]; unsigned __s[9]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } mtx_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } pthread_cond_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } cnd_t;
-TYPEDEF struct { union { int __i[8]; volatile int __vi[8]; void *__p[8]; } __u; } pthread_rwlock_t;
-TYPEDEF struct { union { int __i[5]; volatile int __vi[5]; void *__p[5]; } __u; } pthread_barrier_t;
diff --git a/arch/arm/bits/endian.h b/arch/arm/bits/endian.h
deleted file mode 100644
index 5953724a..00000000
--- a/arch/arm/bits/endian.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#if __ARMEB__
-#define __BYTE_ORDER __BIG_ENDIAN
-#else
-#define __BYTE_ORDER __LITTLE_ENDIAN
-#endif
diff --git a/arch/arm/bits/ipcstat.h b/arch/arm/bits/ipcstat.h
new file mode 100644
index 00000000..4f4fcb0c
--- /dev/null
+++ b/arch/arm/bits/ipcstat.h
@@ -0,0 +1 @@
+#define IPC_STAT 0x102
diff --git a/arch/arm/bits/limits.h b/arch/arm/bits/limits.h
deleted file mode 100644
index fbc6d238..00000000
--- a/arch/arm/bits/limits.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
- || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
-#define LONG_BIT 32
-#endif
-
-#define LONG_MAX  0x7fffffffL
-#define LLONG_MAX  0x7fffffffffffffffLL
diff --git a/arch/s390x/bits/msg.h b/arch/arm/bits/msg.h
index 2e23ca27..7bbbb2bf 100644
--- a/arch/s390x/bits/msg.h
+++ b/arch/arm/bits/msg.h
@@ -1,12 +1,18 @@
 struct msqid_ds {
 	struct ipc_perm msg_perm;
-	time_t msg_stime;
-	time_t msg_rtime;
-	time_t msg_ctime;
+	unsigned long __msg_stime_lo;
+	unsigned long __msg_stime_hi;
+	unsigned long __msg_rtime_lo;
+	unsigned long __msg_rtime_hi;
+	unsigned long __msg_ctime_lo;
+	unsigned long __msg_ctime_hi;
 	unsigned long msg_cbytes;
 	msgqnum_t msg_qnum;
 	msglen_t msg_qbytes;
 	pid_t msg_lspid;
 	pid_t msg_lrpid;
 	unsigned long __unused[2];
+	time_t msg_stime;
+	time_t msg_rtime;
+	time_t msg_ctime;
 };
diff --git a/arch/arm/bits/posix.h b/arch/arm/bits/posix.h
deleted file mode 100644
index 30a38714..00000000
--- a/arch/arm/bits/posix.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#define _POSIX_V6_ILP32_OFFBIG  1
-#define _POSIX_V7_ILP32_OFFBIG  1
diff --git a/arch/arm/bits/reg.h b/arch/arm/bits/reg.h
deleted file mode 100644
index 0c7bffca..00000000
--- a/arch/arm/bits/reg.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#undef __WORDSIZE
-#define __WORDSIZE 32
-/* FIXME */
diff --git a/arch/arm/bits/sem.h b/arch/arm/bits/sem.h
new file mode 100644
index 00000000..544e3d2a
--- /dev/null
+++ b/arch/arm/bits/sem.h
@@ -0,0 +1,18 @@
+struct semid_ds {
+	struct ipc_perm sem_perm;
+	unsigned long __sem_otime_lo;
+	unsigned long __sem_otime_hi;
+	unsigned long __sem_ctime_lo;
+	unsigned long __sem_ctime_hi;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+	unsigned short sem_nsems;
+	char __sem_nsems_pad[sizeof(long)-sizeof(short)];
+#else
+	char __sem_nsems_pad[sizeof(long)-sizeof(short)];
+	unsigned short sem_nsems;
+#endif
+	long __unused3;
+	long __unused4;
+	time_t sem_otime;
+	time_t sem_ctime;
+};
diff --git a/arch/arm/bits/shm.h b/arch/arm/bits/shm.h
new file mode 100644
index 00000000..725fb469
--- /dev/null
+++ b/arch/arm/bits/shm.h
@@ -0,0 +1,31 @@
+#define SHMLBA 4096
+
+struct shmid_ds {
+	struct ipc_perm shm_perm;
+	size_t shm_segsz;
+	unsigned long __shm_atime_lo;
+	unsigned long __shm_atime_hi;
+	unsigned long __shm_dtime_lo;
+	unsigned long __shm_dtime_hi;
+	unsigned long __shm_ctime_lo;
+	unsigned long __shm_ctime_hi;
+	pid_t shm_cpid;
+	pid_t shm_lpid;
+	unsigned long shm_nattch;
+	unsigned long __pad1;
+	unsigned long __pad2;
+	unsigned long __pad3;
+	time_t shm_atime;
+	time_t shm_dtime;
+	time_t shm_ctime;
+};
+
+struct shminfo {
+	unsigned long shmmax, shmmin, shmmni, shmseg, shmall, __unused[4];
+};
+
+struct shm_info {
+	int __used_ids;
+	unsigned long shm_tot, shm_rss, shm_swp;
+	unsigned long __swap_attempts, __swap_successes;
+};
diff --git a/arch/arm/bits/stat.h b/arch/arm/bits/stat.h
index 22b19bbf..5d7828cf 100644
--- a/arch/arm/bits/stat.h
+++ b/arch/arm/bits/stat.h
@@ -14,8 +14,12 @@ struct stat {
 	off_t st_size;
 	blksize_t st_blksize;
 	blkcnt_t st_blocks;
+	struct {
+		long tv_sec;
+		long tv_nsec;
+	} __st_atim32, __st_mtim32, __st_ctim32;
+	ino_t st_ino;
 	struct timespec st_atim;
 	struct timespec st_mtim;
 	struct timespec st_ctim;
-	ino_t st_ino;
 };
diff --git a/arch/arm/bits/stdint.h b/arch/arm/bits/stdint.h
deleted file mode 100644
index d1b27121..00000000
--- a/arch/arm/bits/stdint.h
+++ /dev/null
@@ -1,20 +0,0 @@
-typedef int32_t int_fast16_t;
-typedef int32_t int_fast32_t;
-typedef uint32_t uint_fast16_t;
-typedef uint32_t uint_fast32_t;
-
-#define INT_FAST16_MIN  INT32_MIN
-#define INT_FAST32_MIN  INT32_MIN
-
-#define INT_FAST16_MAX  INT32_MAX
-#define INT_FAST32_MAX  INT32_MAX
-
-#define UINT_FAST16_MAX UINT32_MAX
-#define UINT_FAST32_MAX UINT32_MAX
-
-#define INTPTR_MIN      INT32_MIN
-#define INTPTR_MAX      INT32_MAX
-#define UINTPTR_MAX     UINT32_MAX
-#define PTRDIFF_MIN     INT32_MIN
-#define PTRDIFF_MAX     INT32_MAX
-#define SIZE_MAX        UINT32_MAX
diff --git a/arch/arm/bits/syscall.h.in b/arch/arm/bits/syscall.h.in
index 13a3b66c..157b304d 100644
--- a/arch/arm/bits/syscall.h.in
+++ b/arch/arm/bits/syscall.h.in
@@ -55,8 +55,8 @@
 #define __NR_sethostname	74
 #define __NR_setrlimit	75
 #define __NR_getrusage	77
-#define __NR_gettimeofday	78
-#define __NR_settimeofday	79
+#define __NR_gettimeofday_time32	78
+#define __NR_settimeofday_time32	79
 #define __NR_getgroups	80
 #define __NR_setgroups	81
 #define __NR_symlink	83
@@ -211,14 +211,14 @@
 #define __NR_remap_file_pages	253
 #define __NR_set_tid_address	256
 #define __NR_timer_create	257
-#define __NR_timer_settime	258
-#define __NR_timer_gettime	259
+#define __NR_timer_settime32	258
+#define __NR_timer_gettime32	259
 #define __NR_timer_getoverrun	260
 #define __NR_timer_delete	261
-#define __NR_clock_settime	262
-#define __NR_clock_gettime	263
-#define __NR_clock_getres	264
-#define __NR_clock_nanosleep	265
+#define __NR_clock_settime32	262
+#define __NR_clock_gettime32	263
+#define __NR_clock_getres_time32	264
+#define __NR_clock_nanosleep_time32	265
 #define __NR_statfs64	266
 #define __NR_fstatfs64	267
 #define __NR_tgkill	268
@@ -308,8 +308,8 @@
 #define __NR_timerfd_create	350
 #define __NR_eventfd	351
 #define __NR_fallocate	352
-#define __NR_timerfd_settime	353
-#define __NR_timerfd_gettime	354
+#define __NR_timerfd_settime32	353
+#define __NR_timerfd_gettime32	354
 #define __NR_signalfd4	355
 #define __NR_eventfd2	356
 #define __NR_epoll_create1	357
@@ -354,6 +354,56 @@
 #define __NR_pkey_free	396
 #define __NR_statx	397
 #define __NR_rseq	398
+#define __NR_io_pgetevents	399
+#define __NR_migrate_pages	400
+#define __NR_kexec_file_load	401
+#define __NR_clock_gettime64	403
+#define __NR_clock_settime64	404
+#define __NR_clock_adjtime64	405
+#define __NR_clock_getres_time64	406
+#define __NR_clock_nanosleep_time64	407
+#define __NR_timer_gettime64	408
+#define __NR_timer_settime64	409
+#define __NR_timerfd_gettime64	410
+#define __NR_timerfd_settime64	411
+#define __NR_utimensat_time64	412
+#define __NR_pselect6_time64	413
+#define __NR_ppoll_time64	414
+#define __NR_io_pgetevents_time64	416
+#define __NR_recvmmsg_time64	417
+#define __NR_mq_timedsend_time64	418
+#define __NR_mq_timedreceive_time64	419
+#define __NR_semtimedop_time64	420
+#define __NR_rt_sigtimedwait_time64	421
+#define __NR_futex_time64	422
+#define __NR_sched_rr_get_interval_time64	423
+#define __NR_pidfd_send_signal	424
+#define __NR_io_uring_setup	425
+#define __NR_io_uring_enter	426
+#define __NR_io_uring_register	427
+#define __NR_open_tree		428
+#define __NR_move_mount		429
+#define __NR_fsopen		430
+#define __NR_fsconfig		431
+#define __NR_fsmount		432
+#define __NR_fspick		433
+#define __NR_pidfd_open		434
+#define __NR_clone3		435
+#define __NR_close_range	436
+#define __NR_openat2		437
+#define __NR_pidfd_getfd	438
+#define __NR_faccessat2		439
+#define __NR_process_madvise	440
+#define __NR_epoll_pwait2	441
+#define __NR_mount_setattr	442
+#define __NR_landlock_create_ruleset	444
+#define __NR_landlock_add_rule	445
+#define __NR_landlock_restrict_self	446
+#define __NR_process_mrelease	448
+#define __NR_futex_waitv	449
+#define __NR_set_mempolicy_home_node	450
+#define __NR_cachestat		451
+#define __NR_fchmodat2		452
 
 #define __ARM_NR_breakpoint	0x0f0001
 #define __ARM_NR_cacheflush	0x0f0002
diff --git a/arch/arm/kstat.h b/arch/arm/kstat.h
new file mode 100644
index 00000000..af449c95
--- /dev/null
+++ b/arch/arm/kstat.h
@@ -0,0 +1,21 @@
+struct kstat {
+	dev_t st_dev;
+	int __st_dev_padding;
+	long __st_ino_truncated;
+	mode_t st_mode;
+	nlink_t st_nlink;
+	uid_t st_uid;
+	gid_t st_gid;
+	dev_t st_rdev;
+	int __st_rdev_padding;
+	off_t st_size;
+	blksize_t st_blksize;
+	blkcnt_t st_blocks;
+	long st_atime_sec;
+	long st_atime_nsec;
+	long st_mtime_sec;
+	long st_mtime_nsec;
+	long st_ctime_sec;
+	long st_ctime_nsec;
+	ino_t st_ino;
+};
diff --git a/arch/arm/pthread_arch.h b/arch/arm/pthread_arch.h
index e689ea21..157e2eae 100644
--- a/arch/arm/pthread_arch.h
+++ b/arch/arm/pthread_arch.h
@@ -1,11 +1,11 @@
 #if ((__ARM_ARCH_6K__ || __ARM_ARCH_6KZ__ || __ARM_ARCH_6ZK__) && !__thumb__) \
  || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
 
-static inline pthread_t __pthread_self()
+static inline uintptr_t __get_tp()
 {
-	char *p;
-	__asm__ ( "mrc p15,0,%0,c13,c0,3" : "=r"(p) );
-	return (void *)(p-sizeof(struct pthread));
+	uintptr_t tp;
+	__asm__ ( "mrc p15,0,%0,c13,c0,3" : "=r"(tp) );
+	return tp;
 }
 
 #else
@@ -16,18 +16,17 @@ static inline pthread_t __pthread_self()
 #define BLX "blx"
 #endif
 
-static inline pthread_t __pthread_self()
+static inline uintptr_t __get_tp()
 {
 	extern hidden uintptr_t __a_gettp_ptr;
-	register uintptr_t p __asm__("r0");
-	__asm__ ( BLX " %1" : "=r"(p) : "r"(__a_gettp_ptr) : "cc", "lr" );
-	return (void *)(p-sizeof(struct pthread));
+	register uintptr_t tp __asm__("r0");
+	__asm__ ( BLX " %1" : "=r"(tp) : "r"(__a_gettp_ptr) : "cc", "lr" );
+	return tp;
 }
 
 #endif
 
 #define TLS_ABOVE_TP
 #define GAP_ABOVE_TP 8
-#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
 
 #define MC_PC arm_pc
diff --git a/arch/arm/reloc.h b/arch/arm/reloc.h
index 2c2e7f58..d98eb8af 100644
--- a/arch/arm/reloc.h
+++ b/arch/arm/reloc.h
@@ -1,5 +1,3 @@
-#include <endian.h>
-
 #if __BYTE_ORDER == __BIG_ENDIAN
 #define ENDIAN_SUFFIX "eb"
 #else
@@ -28,7 +26,7 @@
 #define REL_TPOFF       R_ARM_TLS_TPOFF32
 #define REL_TLSDESC     R_ARM_TLS_DESC
 
-#define TLSDESC_BACKWARDS
+#define TLSDESC_BACKWARDS 1
 
 #define CRTJMP(pc,sp) __asm__ __volatile__( \
 	"mov sp,%1 ; bx %0" : : "r"(pc), "r"(sp) : "memory" )
diff --git a/arch/arm/syscall_arch.h b/arch/arm/syscall_arch.h
index 53fb155c..624e992e 100644
--- a/arch/arm/syscall_arch.h
+++ b/arch/arm/syscall_arch.h
@@ -98,10 +98,13 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo
 	__asm_syscall(R7_OPERAND, "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4), "r"(r5));
 }
 
-#define VDSO_USEFUL
-#define VDSO_CGT_SYM "__vdso_clock_gettime"
-#define VDSO_CGT_VER "LINUX_2.6"
-
 #define SYSCALL_FADVISE_6_ARG
 
 #define SYSCALL_IPC_BROKEN_MODE
+
+#define VDSO_USEFUL
+#define VDSO_CGT32_SYM "__vdso_clock_gettime"
+#define VDSO_CGT32_VER "LINUX_2.6"
+#define VDSO_CGT_SYM "__vdso_clock_gettime64"
+#define VDSO_CGT_VER "LINUX_2.6"
+#define VDSO_CGT_WORKAROUND 1
diff --git a/arch/generic/bits/dirent.h b/arch/generic/bits/dirent.h
new file mode 100644
index 00000000..c845fe82
--- /dev/null
+++ b/arch/generic/bits/dirent.h
@@ -0,0 +1,11 @@
+#define _DIRENT_HAVE_D_RECLEN
+#define _DIRENT_HAVE_D_OFF
+#define _DIRENT_HAVE_D_TYPE
+
+struct dirent {
+	ino_t d_ino;
+	off_t d_off;
+	unsigned short d_reclen;
+	unsigned char d_type;
+	char d_name[256];
+};
diff --git a/arch/generic/bits/fcntl.h b/arch/generic/bits/fcntl.h
index ae233cc0..730a98cf 100644
--- a/arch/generic/bits/fcntl.h
+++ b/arch/generic/bits/fcntl.h
@@ -30,9 +30,15 @@
 #define F_SETSIG 10
 #define F_GETSIG 11
 
+#if __LONG_MAX == 0x7fffffffL
 #define F_GETLK 12
 #define F_SETLK 13
 #define F_SETLKW 14
+#else
+#define F_GETLK 5
+#define F_SETLK 6
+#define F_SETLKW 7
+#endif
 
 #define F_SETOWN_EX 15
 #define F_GETOWN_EX 16
diff --git a/arch/generic/bits/ioctl.h b/arch/generic/bits/ioctl.h
index 42a8f1a2..60ae8b85 100644
--- a/arch/generic/bits/ioctl.h
+++ b/arch/generic/bits/ioctl.h
@@ -64,6 +64,8 @@
 #define TIOCGPTLCK	0x80045439
 #define TIOCGEXCL	0x80045440
 #define TIOCGPTPEER	0x5441
+#define TIOCGISO7816	0x80285442
+#define TIOCSISO7816	0xc0285443
 
 #define FIONCLEX	0x5450
 #define FIOCLEX		0x5451
@@ -82,24 +84,6 @@
 #define TIOCGICOUNT	0x545D
 #define FIOQSIZE	0x5460
 
-#define TIOCPKT_DATA		 0
-#define TIOCPKT_FLUSHREAD	 1
-#define TIOCPKT_FLUSHWRITE	 2
-#define TIOCPKT_STOP		 4
-#define TIOCPKT_START		 8
-#define TIOCPKT_NOSTOP		16
-#define TIOCPKT_DOSTOP		32
-#define TIOCPKT_IOCTL		64
-
-#define TIOCSER_TEMT    0x01
-
-struct winsize {
-	unsigned short ws_row;
-	unsigned short ws_col;
-	unsigned short ws_xpixel;
-	unsigned short ws_ypixel;
-};
-
 #define TIOCM_LE        0x001
 #define TIOCM_DTR       0x002
 #define TIOCM_RTS       0x004
@@ -115,92 +99,17 @@ struct winsize {
 #define TIOCM_OUT2      0x4000
 #define TIOCM_LOOP      0x8000
 
-#define N_TTY           0
-#define N_SLIP          1
-#define N_MOUSE         2
-#define N_PPP           3
-#define N_STRIP         4
-#define N_AX25          5
-#define N_X25           6
-#define N_6PACK         7
-#define N_MASC          8
-#define N_R3964         9
-#define N_PROFIBUS_FDL  10
-#define N_IRDA          11
-#define N_SMSBLOCK      12
-#define N_HDLC          13
-#define N_SYNC_PPP      14
-#define N_HCI           15
-
 #define FIOSETOWN       0x8901
 #define SIOCSPGRP       0x8902
 #define FIOGETOWN       0x8903
 #define SIOCGPGRP       0x8904
 #define SIOCATMARK      0x8905
+#if __LONG_MAX == 0x7fffffff
+#define SIOCGSTAMP      _IOR(0x89, 6, char[16])
+#define SIOCGSTAMPNS    _IOR(0x89, 7, char[16])
+#else
 #define SIOCGSTAMP      0x8906
 #define SIOCGSTAMPNS    0x8907
-
-#define SIOCADDRT       0x890B
-#define SIOCDELRT       0x890C
-#define SIOCRTMSG       0x890D
-
-#define SIOCGIFNAME     0x8910
-#define SIOCSIFLINK     0x8911
-#define SIOCGIFCONF     0x8912
-#define SIOCGIFFLAGS    0x8913
-#define SIOCSIFFLAGS    0x8914
-#define SIOCGIFADDR     0x8915
-#define SIOCSIFADDR     0x8916
-#define SIOCGIFDSTADDR  0x8917
-#define SIOCSIFDSTADDR  0x8918
-#define SIOCGIFBRDADDR  0x8919
-#define SIOCSIFBRDADDR  0x891a
-#define SIOCGIFNETMASK  0x891b
-#define SIOCSIFNETMASK  0x891c
-#define SIOCGIFMETRIC   0x891d
-#define SIOCSIFMETRIC   0x891e
-#define SIOCGIFMEM      0x891f
-#define SIOCSIFMEM      0x8920
-#define SIOCGIFMTU      0x8921
-#define SIOCSIFMTU      0x8922
-#define SIOCSIFNAME     0x8923
-#define SIOCSIFHWADDR   0x8924
-#define SIOCGIFENCAP    0x8925
-#define SIOCSIFENCAP    0x8926
-#define SIOCGIFHWADDR   0x8927
-#define SIOCGIFSLAVE    0x8929
-#define SIOCSIFSLAVE    0x8930
-#define SIOCADDMULTI    0x8931
-#define SIOCDELMULTI    0x8932
-#define SIOCGIFINDEX    0x8933
-#define SIOGIFINDEX     SIOCGIFINDEX
-#define SIOCSIFPFLAGS   0x8934
-#define SIOCGIFPFLAGS   0x8935
-#define SIOCDIFADDR     0x8936
-#define SIOCSIFHWBROADCAST 0x8937
-#define SIOCGIFCOUNT    0x8938
-
-#define SIOCGIFBR       0x8940
-#define SIOCSIFBR       0x8941
-
-#define SIOCGIFTXQLEN   0x8942
-#define SIOCSIFTXQLEN   0x8943
-
-#define SIOCDARP        0x8953
-#define SIOCGARP        0x8954
-#define SIOCSARP        0x8955
-
-#define SIOCDRARP       0x8960
-#define SIOCGRARP       0x8961
-#define SIOCSRARP       0x8962
-
-#define SIOCGIFMAP      0x8970
-#define SIOCSIFMAP      0x8971
-
-#define SIOCADDDLCI     0x8980
-#define SIOCDELDLCI     0x8981
-
-#define SIOCDEVPRIVATE		0x89F0
-#define SIOCPROTOPRIVATE	0x89E0
+#endif
 
 #include <bits/ioctl_fix.h>
diff --git a/arch/generic/bits/ipc.h b/arch/generic/bits/ipc.h
index 779c42fd..40d6f3a2 100644
--- a/arch/generic/bits/ipc.h
+++ b/arch/generic/bits/ipc.h
@@ -9,5 +9,3 @@ struct ipc_perm {
 	long __pad1;
 	long __pad2;
 };
-
-#define IPC_64 0x100
diff --git a/arch/generic/bits/ipcstat.h b/arch/generic/bits/ipcstat.h
new file mode 100644
index 00000000..0018ad1e
--- /dev/null
+++ b/arch/generic/bits/ipcstat.h
@@ -0,0 +1 @@
+#define IPC_STAT 2
diff --git a/src/internal/syscall.c b/arch/generic/bits/limits.h
index e69de29b..e69de29b 100644
--- a/src/internal/syscall.c
+++ b/arch/generic/bits/limits.h
diff --git a/arch/generic/bits/msg.h b/arch/generic/bits/msg.h
index bc8436c4..2e23ca27 100644
--- a/arch/generic/bits/msg.h
+++ b/arch/generic/bits/msg.h
@@ -1,11 +1,8 @@
 struct msqid_ds {
 	struct ipc_perm msg_perm;
 	time_t msg_stime;
-	int __unused1;
 	time_t msg_rtime;
-	int __unused2;
 	time_t msg_ctime;
-	int __unused3;
 	unsigned long msg_cbytes;
 	msgqnum_t msg_qnum;
 	msglen_t msg_qbytes;
diff --git a/arch/generic/bits/reg.h b/arch/generic/bits/reg.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/arch/generic/bits/reg.h
diff --git a/arch/generic/bits/sem.h b/arch/generic/bits/sem.h
index c629b81e..5184eb59 100644
--- a/arch/generic/bits/sem.h
+++ b/arch/generic/bits/sem.h
@@ -1,16 +1,14 @@
 struct semid_ds {
 	struct ipc_perm sem_perm;
 	time_t sem_otime;
-	time_t __unused1;
 	time_t sem_ctime;
-	time_t __unused2;
 #if __BYTE_ORDER == __LITTLE_ENDIAN
 	unsigned short sem_nsems;
-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
+	char __sem_nsems_pad[sizeof(long)-sizeof(short)];
 #else
-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
+	char __sem_nsems_pad[sizeof(long)-sizeof(short)];
 	unsigned short sem_nsems;
 #endif
-	time_t __unused3;
-	time_t __unused4;
+	long __unused3;
+	long __unused4;
 };
diff --git a/arch/generic/bits/shm.h b/arch/generic/bits/shm.h
index 45d1d157..8d193781 100644
--- a/arch/generic/bits/shm.h
+++ b/arch/generic/bits/shm.h
@@ -4,11 +4,8 @@ struct shmid_ds {
 	struct ipc_perm shm_perm;
 	size_t shm_segsz;
 	time_t shm_atime;
-	int __unused1;
 	time_t shm_dtime;
-	int __unused2;
 	time_t shm_ctime;
-	int __unused3;
 	pid_t shm_cpid;
 	pid_t shm_lpid;
 	unsigned long shm_nattch;
@@ -25,4 +22,3 @@ struct shm_info {
 	unsigned long shm_tot, shm_rss, shm_swp;
 	unsigned long __swap_attempts, __swap_successes;
 };
-
diff --git a/arch/generic/bits/socket.h b/arch/generic/bits/socket.h
index 1f73b995..e69de29b 100644
--- a/arch/generic/bits/socket.h
+++ b/arch/generic/bits/socket.h
@@ -1,15 +0,0 @@
-struct msghdr {
-	void *msg_name;
-	socklen_t msg_namelen;
-	struct iovec *msg_iov;
-	int msg_iovlen;
-	void *msg_control;
-	socklen_t msg_controllen;
-	int msg_flags;
-};
-
-struct cmsghdr {
-	socklen_t cmsg_len;
-	int cmsg_level;
-	int cmsg_type;
-};
diff --git a/arch/aarch64/bits/stat.h b/arch/generic/bits/stat.h
index b7f4221b..f6d9e864 100644
--- a/arch/aarch64/bits/stat.h
+++ b/arch/generic/bits/stat.h
@@ -6,7 +6,7 @@ struct stat {
 	uid_t st_uid;
 	gid_t st_gid;
 	dev_t st_rdev;
-	unsigned long __pad;
+	unsigned long long __pad;
 	off_t st_size;
 	blksize_t st_blksize;
 	int __pad2;
diff --git a/arch/i386/bits/stdint.h b/arch/generic/bits/stdint.h
index d1b27121..86489187 100644
--- a/arch/i386/bits/stdint.h
+++ b/arch/generic/bits/stdint.h
@@ -12,9 +12,18 @@ typedef uint32_t uint_fast32_t;
 #define UINT_FAST16_MAX UINT32_MAX
 #define UINT_FAST32_MAX UINT32_MAX
 
+#if __LONG_MAX == 0x7fffffffL
 #define INTPTR_MIN      INT32_MIN
 #define INTPTR_MAX      INT32_MAX
 #define UINTPTR_MAX     UINT32_MAX
 #define PTRDIFF_MIN     INT32_MIN
 #define PTRDIFF_MAX     INT32_MAX
 #define SIZE_MAX        UINT32_MAX
+#else
+#define INTPTR_MIN      INT64_MIN
+#define INTPTR_MAX      INT64_MAX
+#define UINTPTR_MAX     UINT64_MAX
+#define PTRDIFF_MIN     INT64_MIN
+#define PTRDIFF_MAX     INT64_MAX
+#define SIZE_MAX        UINT64_MAX
+#endif
diff --git a/arch/generic/fp_arch.h b/arch/generic/fp_arch.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/arch/generic/fp_arch.h
diff --git a/arch/i386/arch.mak b/arch/i386/arch.mak
new file mode 100644
index 00000000..aa4d05ce
--- /dev/null
+++ b/arch/i386/arch.mak
@@ -0,0 +1 @@
+COMPAT_SRC_DIRS = compat/time32
diff --git a/arch/i386/bits/alltypes.h.in b/arch/i386/bits/alltypes.h.in
index 1a8432d3..6feb03a6 100644
--- a/arch/i386/bits/alltypes.h.in
+++ b/arch/i386/bits/alltypes.h.in
@@ -1,14 +1,10 @@
+#define _REDIR_TIME64 1
 #define _Addr int
 #define _Int64 long long
 #define _Reg int
 
-#if __GNUC__ >= 3
-TYPEDEF __builtin_va_list va_list;
-TYPEDEF __builtin_va_list __isoc_va_list;
-#else
-TYPEDEF struct __va_list * va_list;
-TYPEDEF struct __va_list * __isoc_va_list;
-#endif
+#define __BYTE_ORDER 1234
+#define __LONG_MAX 0x7fffffffL
 
 #ifndef __cplusplus
 #ifdef __WCHAR_TYPE__
@@ -33,14 +29,3 @@ TYPEDEF struct { __attribute__((__aligned__(8))) long long __ll; long double __l
 #else
 TYPEDEF struct { alignas(8) long long __ll; long double __ld; } max_align_t;
 #endif
-
-TYPEDEF long time_t;
-TYPEDEF long suseconds_t;
-
-TYPEDEF struct { union { int __i[9]; volatile int __vi[9]; unsigned __s[9]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } mtx_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } pthread_cond_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } cnd_t;
-TYPEDEF struct { union { int __i[8]; volatile int __vi[8]; void *__p[8]; } __u; } pthread_rwlock_t;
-TYPEDEF struct { union { int __i[5]; volatile int __vi[5]; void *__p[5]; } __u; } pthread_barrier_t;
diff --git a/arch/i386/bits/endian.h b/arch/i386/bits/endian.h
deleted file mode 100644
index 172c338f..00000000
--- a/arch/i386/bits/endian.h
+++ /dev/null
@@ -1 +0,0 @@
-#define __BYTE_ORDER __LITTLE_ENDIAN
diff --git a/arch/i386/bits/ipcstat.h b/arch/i386/bits/ipcstat.h
new file mode 100644
index 00000000..4f4fcb0c
--- /dev/null
+++ b/arch/i386/bits/ipcstat.h
@@ -0,0 +1 @@
+#define IPC_STAT 0x102
diff --git a/arch/i386/bits/limits.h b/arch/i386/bits/limits.h
index c340ceb2..07743b6f 100644
--- a/arch/i386/bits/limits.h
+++ b/arch/i386/bits/limits.h
@@ -1,8 +1 @@
-#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
- || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 #define PAGESIZE 4096
-#define LONG_BIT 32
-#endif
-
-#define LONG_MAX  0x7fffffffL
-#define LLONG_MAX  0x7fffffffffffffffLL
diff --git a/arch/i386/bits/msg.h b/arch/i386/bits/msg.h
new file mode 100644
index 00000000..7bbbb2bf
--- /dev/null
+++ b/arch/i386/bits/msg.h
@@ -0,0 +1,18 @@
+struct msqid_ds {
+	struct ipc_perm msg_perm;
+	unsigned long __msg_stime_lo;
+	unsigned long __msg_stime_hi;
+	unsigned long __msg_rtime_lo;
+	unsigned long __msg_rtime_hi;
+	unsigned long __msg_ctime_lo;
+	unsigned long __msg_ctime_hi;
+	unsigned long msg_cbytes;
+	msgqnum_t msg_qnum;
+	msglen_t msg_qbytes;
+	pid_t msg_lspid;
+	pid_t msg_lrpid;
+	unsigned long __unused[2];
+	time_t msg_stime;
+	time_t msg_rtime;
+	time_t msg_ctime;
+};
diff --git a/arch/i386/bits/posix.h b/arch/i386/bits/posix.h
deleted file mode 100644
index 30a38714..00000000
--- a/arch/i386/bits/posix.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#define _POSIX_V6_ILP32_OFFBIG  1
-#define _POSIX_V7_ILP32_OFFBIG  1
diff --git a/arch/i386/bits/reg.h b/arch/i386/bits/reg.h
index 8bc2582d..7dfe8250 100644
--- a/arch/i386/bits/reg.h
+++ b/arch/i386/bits/reg.h
@@ -1,5 +1,3 @@
-#undef __WORDSIZE
-#define __WORDSIZE 32
 #define EBX 0
 #define ECX 1
 #define EDX 2
diff --git a/arch/i386/bits/sem.h b/arch/i386/bits/sem.h
new file mode 100644
index 00000000..65661542
--- /dev/null
+++ b/arch/i386/bits/sem.h
@@ -0,0 +1,13 @@
+struct semid_ds {
+	struct ipc_perm sem_perm;
+	unsigned long __sem_otime_lo;
+	unsigned long __sem_otime_hi;
+	unsigned long __sem_ctime_lo;
+	unsigned long __sem_ctime_hi;
+	unsigned short sem_nsems;
+	char __sem_nsems_pad[sizeof(long)-sizeof(short)];
+	long __unused3;
+	long __unused4;
+	time_t sem_otime;
+	time_t sem_ctime;
+};
diff --git a/arch/aarch64/bits/shm.h b/arch/i386/bits/shm.h
index 8d193781..725fb469 100644
--- a/arch/aarch64/bits/shm.h
+++ b/arch/i386/bits/shm.h
@@ -3,14 +3,21 @@
 struct shmid_ds {
 	struct ipc_perm shm_perm;
 	size_t shm_segsz;
-	time_t shm_atime;
-	time_t shm_dtime;
-	time_t shm_ctime;
+	unsigned long __shm_atime_lo;
+	unsigned long __shm_atime_hi;
+	unsigned long __shm_dtime_lo;
+	unsigned long __shm_dtime_hi;
+	unsigned long __shm_ctime_lo;
+	unsigned long __shm_ctime_hi;
 	pid_t shm_cpid;
 	pid_t shm_lpid;
 	unsigned long shm_nattch;
 	unsigned long __pad1;
 	unsigned long __pad2;
+	unsigned long __pad3;
+	time_t shm_atime;
+	time_t shm_dtime;
+	time_t shm_ctime;
 };
 
 struct shminfo {
diff --git a/arch/i386/bits/stat.h b/arch/i386/bits/stat.h
index 22b19bbf..5d7828cf 100644
--- a/arch/i386/bits/stat.h
+++ b/arch/i386/bits/stat.h
@@ -14,8 +14,12 @@ struct stat {
 	off_t st_size;
 	blksize_t st_blksize;
 	blkcnt_t st_blocks;
+	struct {
+		long tv_sec;
+		long tv_nsec;
+	} __st_atim32, __st_mtim32, __st_ctim32;
+	ino_t st_ino;
 	struct timespec st_atim;
 	struct timespec st_mtim;
 	struct timespec st_ctim;
-	ino_t st_ino;
 };
diff --git a/arch/i386/bits/syscall.h.in b/arch/i386/bits/syscall.h.in
index 47f4ae03..55e91cc4 100644
--- a/arch/i386/bits/syscall.h.in
+++ b/arch/i386/bits/syscall.h.in
@@ -76,8 +76,8 @@
 #define __NR_setrlimit		 75
 #define __NR_getrlimit		 76   /* Back compatible 2Gig limited rlimit */
 #define __NR_getrusage		 77
-#define __NR_gettimeofday	 78
-#define __NR_settimeofday	 79
+#define __NR_gettimeofday_time32	 78
+#define __NR_settimeofday_time32	 79
 #define __NR_getgroups		 80
 #define __NR_setgroups		 81
 #define __NR_select		 82
@@ -257,14 +257,14 @@
 #define __NR_remap_file_pages	257
 #define __NR_set_tid_address	258
 #define __NR_timer_create	259
-#define __NR_timer_settime	(__NR_timer_create+1)
-#define __NR_timer_gettime	(__NR_timer_create+2)
+#define __NR_timer_settime32	(__NR_timer_create+1)
+#define __NR_timer_gettime32	(__NR_timer_create+2)
 #define __NR_timer_getoverrun	(__NR_timer_create+3)
 #define __NR_timer_delete	(__NR_timer_create+4)
-#define __NR_clock_settime	(__NR_timer_create+5)
-#define __NR_clock_gettime	(__NR_timer_create+6)
-#define __NR_clock_getres	(__NR_timer_create+7)
-#define __NR_clock_nanosleep	(__NR_timer_create+8)
+#define __NR_clock_settime32	(__NR_timer_create+5)
+#define __NR_clock_gettime32	(__NR_timer_create+6)
+#define __NR_clock_getres_time32	(__NR_timer_create+7)
+#define __NR_clock_nanosleep_time32	(__NR_timer_create+8)
 #define __NR_statfs64		268
 #define __NR_fstatfs64		269
 #define __NR_tgkill		270
@@ -322,8 +322,8 @@
 #define __NR_timerfd_create	322
 #define __NR_eventfd		323
 #define __NR_fallocate		324
-#define __NR_timerfd_settime	325
-#define __NR_timerfd_gettime	326
+#define __NR_timerfd_settime32	325
+#define __NR_timerfd_gettime32	326
 #define __NR_signalfd4		327
 #define __NR_eventfd2		328
 #define __NR_epoll_create1	329
@@ -384,4 +384,62 @@
 #define __NR_arch_prctl		384
 #define __NR_io_pgetevents	385
 #define __NR_rseq		386
+#define __NR_semget		393
+#define __NR_semctl		394
+#define __NR_shmget		395
+#define __NR_shmctl		396
+#define __NR_shmat		397
+#define __NR_shmdt		398
+#define __NR_msgget		399
+#define __NR_msgsnd		400
+#define __NR_msgrcv		401
+#define __NR_msgctl		402
+#define __NR_clock_gettime64	403
+#define __NR_clock_settime64	404
+#define __NR_clock_adjtime64	405
+#define __NR_clock_getres_time64 406
+#define __NR_clock_nanosleep_time64 407
+#define __NR_timer_gettime64	408
+#define __NR_timer_settime64	409
+#define __NR_timerfd_gettime64	410
+#define __NR_timerfd_settime64	411
+#define __NR_utimensat_time64	412
+#define __NR_pselect6_time64	413
+#define __NR_ppoll_time64	414
+#define __NR_io_pgetevents_time64 416
+#define __NR_recvmmsg_time64	417
+#define __NR_mq_timedsend_time64 418
+#define __NR_mq_timedreceive_time64 419
+#define __NR_semtimedop_time64	420
+#define __NR_rt_sigtimedwait_time64 421
+#define __NR_futex_time64	422
+#define __NR_sched_rr_get_interval_time64 423
+#define __NR_pidfd_send_signal	424
+#define __NR_io_uring_setup	425
+#define __NR_io_uring_enter	426
+#define __NR_io_uring_register	427
+#define __NR_open_tree		428
+#define __NR_move_mount		429
+#define __NR_fsopen		430
+#define __NR_fsconfig		431
+#define __NR_fsmount		432
+#define __NR_fspick		433
+#define __NR_pidfd_open		434
+#define __NR_clone3		435
+#define __NR_close_range	436
+#define __NR_openat2		437
+#define __NR_pidfd_getfd	438
+#define __NR_faccessat2		439
+#define __NR_process_madvise	440
+#define __NR_epoll_pwait2	441
+#define __NR_mount_setattr	442
+#define __NR_landlock_create_ruleset	444
+#define __NR_landlock_add_rule	445
+#define __NR_landlock_restrict_self	446
+#define __NR_memfd_secret	447
+#define __NR_process_mrelease	448
+#define __NR_futex_waitv	449
+#define __NR_set_mempolicy_home_node	450
+#define __NR_cachestat		451
+#define __NR_fchmodat2		452
 
diff --git a/arch/i386/crt_arch.h b/arch/i386/crt_arch.h
index 43c8477a..1a80fce3 100644
--- a/arch/i386/crt_arch.h
+++ b/arch/i386/crt_arch.h
@@ -3,6 +3,7 @@ __asm__(
 ".weak _DYNAMIC \n"
 ".hidden _DYNAMIC \n"
 ".global " START "\n"
+".type " START ",%function \n"
 START ":\n"
 "	xor %ebp,%ebp \n"
 "	mov %esp,%eax \n"
diff --git a/arch/i386/kstat.h b/arch/i386/kstat.h
new file mode 100644
index 00000000..af449c95
--- /dev/null
+++ b/arch/i386/kstat.h
@@ -0,0 +1,21 @@
+struct kstat {
+	dev_t st_dev;
+	int __st_dev_padding;
+	long __st_ino_truncated;
+	mode_t st_mode;
+	nlink_t st_nlink;
+	uid_t st_uid;
+	gid_t st_gid;
+	dev_t st_rdev;
+	int __st_rdev_padding;
+	off_t st_size;
+	blksize_t st_blksize;
+	blkcnt_t st_blocks;
+	long st_atime_sec;
+	long st_atime_nsec;
+	long st_mtime_sec;
+	long st_mtime_nsec;
+	long st_ctime_sec;
+	long st_ctime_nsec;
+	ino_t st_ino;
+};
diff --git a/arch/i386/pthread_arch.h b/arch/i386/pthread_arch.h
index 6f600b9e..a639c382 100644
--- a/arch/i386/pthread_arch.h
+++ b/arch/i386/pthread_arch.h
@@ -1,10 +1,8 @@
-static inline struct pthread *__pthread_self()
+static inline uintptr_t __get_tp()
 {
-	struct pthread *self;
-	__asm__ ("movl %%gs:0,%0" : "=r" (self) );
-	return self;
+	uintptr_t tp;
+	__asm__ ("movl %%gs:0,%0" : "=r" (tp) );
+	return tp;
 }
 
-#define TP_ADJ(p) (p)
-
 #define MC_PC gregs[REG_EIP]
diff --git a/arch/i386/syscall_arch.h b/arch/i386/syscall_arch.h
index 4c9d874a..f92b7aa9 100644
--- a/arch/i386/syscall_arch.h
+++ b/arch/i386/syscall_arch.h
@@ -3,57 +3,87 @@
 ((union { long long ll; long l[2]; }){ .ll = x }).l[1]
 #define __SYSCALL_LL_O(x) __SYSCALL_LL_E((x))
 
+#if SYSCALL_NO_TLS
+#define SYSCALL_INSNS "int $128"
+#else
+#define SYSCALL_INSNS "call *%%gs:16"
+#endif
+
+#define SYSCALL_INSNS_12 "xchg %%ebx,%%edx ; " SYSCALL_INSNS " ; xchg %%ebx,%%edx"
+#define SYSCALL_INSNS_34 "xchg %%ebx,%%edi ; " SYSCALL_INSNS " ; xchg %%ebx,%%edi"
+
 static inline long __syscall0(long n)
 {
 	unsigned long __ret;
-	__asm__ __volatile__ (".hidden __vsyscall ; call __vsyscall" : "=a"(__ret) : "a"(n) : "memory");
+	__asm__ __volatile__ (SYSCALL_INSNS : "=a"(__ret) : "a"(n) : "memory");
 	return __ret;
 }
 
 static inline long __syscall1(long n, long a1)
 {
 	unsigned long __ret;
-	__asm__ __volatile__ (".hidden __vsyscall ; call __vsyscall" : "=a"(__ret) : "a"(n), "d"(a1) : "memory");
+	__asm__ __volatile__ (SYSCALL_INSNS_12 : "=a"(__ret) : "a"(n), "d"(a1) : "memory");
 	return __ret;
 }
 
 static inline long __syscall2(long n, long a1, long a2)
 {
 	unsigned long __ret;
-	__asm__ __volatile__ (".hidden __vsyscall ; call __vsyscall" : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2) : "memory");
+	__asm__ __volatile__ (SYSCALL_INSNS_12 : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2) : "memory");
 	return __ret;
 }
 
 static inline long __syscall3(long n, long a1, long a2, long a3)
 {
 	unsigned long __ret;
-	__asm__ __volatile__ (".hidden __vsyscall ; call __vsyscall" : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2), "D"(a3) : "memory");
+#if !defined(__PIC__) || !defined(BROKEN_EBX_ASM)
+	__asm__ __volatile__ (SYSCALL_INSNS : "=a"(__ret) : "a"(n), "b"(a1), "c"(a2), "d"(a3) : "memory");
+#else
+	__asm__ __volatile__ (SYSCALL_INSNS_34 : "=a"(__ret) : "a"(n), "D"(a1), "c"(a2), "d"(a3) : "memory");
+#endif
 	return __ret;
 }
 
 static inline long __syscall4(long n, long a1, long a2, long a3, long a4)
 {
 	unsigned long __ret;
-	__asm__ __volatile__ (".hidden __vsyscall ; call __vsyscall" : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2), "D"(a3), "S"(a4) : "memory");
+#if !defined(__PIC__) || !defined(BROKEN_EBX_ASM)
+	__asm__ __volatile__ (SYSCALL_INSNS : "=a"(__ret) : "a"(n), "b"(a1), "c"(a2), "d"(a3), "S"(a4) : "memory");
+#else
+	__asm__ __volatile__ (SYSCALL_INSNS_34 : "=a"(__ret) : "a"(n), "D"(a1), "c"(a2), "d"(a3), "S"(a4) : "memory");
+#endif
 	return __ret;
 }
 
 static inline long __syscall5(long n, long a1, long a2, long a3, long a4, long a5)
 {
 	unsigned long __ret;
-	__asm__ __volatile__ ("push %6 ; .hidden __vsyscall ; call __vsyscall ; add $4,%%esp" : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2), "D"(a3), "S"(a4), "g"(a5) : "memory");
+#if !defined(__PIC__) || !defined(BROKEN_EBX_ASM)
+	__asm__ __volatile__ (SYSCALL_INSNS
+		: "=a"(__ret) : "a"(n), "b"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5) : "memory");
+#else
+	__asm__ __volatile__ ("pushl %2 ; push %%ebx ; mov 4(%%esp),%%ebx ; " SYSCALL_INSNS " ; pop %%ebx ; add $4,%%esp"
+		: "=a"(__ret) : "a"(n), "g"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5) : "memory");
+#endif
 	return __ret;
 }
 
 static inline long __syscall6(long n, long a1, long a2, long a3, long a4, long a5, long a6)
 {
 	unsigned long __ret;
-	__asm__ __volatile__ ("push %6 ; .hidden __vsyscall6 ; call __vsyscall6 ; add $4,%%esp" : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2), "D"(a3), "S"(a4), "g"(0+(long[]){a5, a6}) : "memory");
+#if !defined(__PIC__) || !defined(BROKEN_EBX_ASM)
+	__asm__ __volatile__ ("pushl %7 ; push %%ebp ; mov 4(%%esp),%%ebp ; " SYSCALL_INSNS " ; pop %%ebp ; add $4,%%esp"
+		: "=a"(__ret) : "a"(n), "b"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5), "g"(a6) : "memory");
+#else
+	unsigned long a1a6[2] = { a1, a6 };
+	__asm__ __volatile__ ("pushl %1 ; push %%ebx ; push %%ebp ; mov 8(%%esp),%%ebx ; mov 4(%%ebx),%%ebp ; mov (%%ebx),%%ebx ; " SYSCALL_INSNS " ; pop %%ebp ; pop %%ebx ; add $4,%%esp"
+		: "=a"(__ret) : "g"(&a1a6), "a"(n), "c"(a2), "d"(a3), "S"(a4), "D"(a5) : "memory");
+#endif
 	return __ret;
 }
 
 #define VDSO_USEFUL
-#define VDSO_CGT_SYM "__vdso_clock_gettime"
+#define VDSO_CGT32_SYM "__vdso_clock_gettime"
+#define VDSO_CGT32_VER "LINUX_2.6"
+#define VDSO_CGT_SYM "__vdso_clock_gettime64"
 #define VDSO_CGT_VER "LINUX_2.6"
-
-#define SYSCALL_USE_SOCKETCALL
diff --git a/arch/loongarch64/atomic_arch.h b/arch/loongarch64/atomic_arch.h
new file mode 100644
index 00000000..2225d027
--- /dev/null
+++ b/arch/loongarch64/atomic_arch.h
@@ -0,0 +1,53 @@
+#define a_ll a_ll
+static inline int a_ll(volatile int *p)
+{
+	int v;
+	__asm__ __volatile__ (
+		"ll.w %0, %1"
+		: "=r"(v)
+		: "ZC"(*p));
+	return v;
+}
+
+#define a_sc a_sc
+static inline int a_sc(volatile int *p, int v)
+{
+	int r;
+	__asm__ __volatile__ (
+		"sc.w %0, %1"
+		: "=r"(r), "=ZC"(*p)
+		: "0"(v) : "memory");
+	return r;
+}
+
+#define a_ll_p a_ll_p
+static inline void *a_ll_p(volatile void *p)
+{
+	void *v;
+	__asm__ __volatile__ (
+		"ll.d %0, %1"
+		: "=r"(v)
+		: "ZC"(*(void *volatile *)p));
+	return v;
+}
+
+#define a_sc_p a_sc_p
+static inline int a_sc_p(volatile void *p, void *v)
+{
+	long r;
+	__asm__ __volatile__ (
+		"sc.d %0, %1"
+		: "=r"(r), "=ZC"(*(void *volatile *)p)
+		: "0"(v)
+		: "memory");
+	return r;
+}
+
+#define a_barrier a_barrier
+static inline void a_barrier()
+{
+	__asm__ __volatile__ ("dbar 0" : : : "memory");
+}
+
+#define a_pre_llsc  a_barrier
+#define a_post_llsc a_barrier
diff --git a/arch/loongarch64/bits/alltypes.h.in b/arch/loongarch64/bits/alltypes.h.in
new file mode 100644
index 00000000..d1807aca
--- /dev/null
+++ b/arch/loongarch64/bits/alltypes.h.in
@@ -0,0 +1,18 @@
+#define _Addr long
+#define _Int64 long
+#define _Reg long
+
+#define __BYTE_ORDER 1234
+#define __LONG_MAX 0x7fffffffffffffffL
+
+#ifndef __cplusplus
+TYPEDEF int wchar_t;
+#endif
+
+TYPEDEF float float_t;
+TYPEDEF double double_t;
+
+TYPEDEF struct { long long __ll; long double __ld; } max_align_t;
+
+TYPEDEF unsigned nlink_t;
+TYPEDEF int blksize_t;
diff --git a/arch/loongarch64/bits/fenv.h b/arch/loongarch64/bits/fenv.h
new file mode 100644
index 00000000..264cafb5
--- /dev/null
+++ b/arch/loongarch64/bits/fenv.h
@@ -0,0 +1,20 @@
+#define FE_INEXACT    0x010000
+#define FE_UNDERFLOW  0x020000
+#define FE_OVERFLOW   0x040000
+#define FE_DIVBYZERO  0x080000
+#define FE_INVALID    0x100000
+
+#define FE_ALL_EXCEPT 0x1F0000
+
+#define FE_TONEAREST  0x000
+#define FE_TOWARDZERO 0x100
+#define FE_UPWARD     0x200
+#define FE_DOWNWARD   0x300
+
+typedef unsigned fexcept_t;
+
+typedef struct {
+	unsigned __cw;
+} fenv_t;
+
+#define FE_DFL_ENV ((const fenv_t *) -1)
diff --git a/arch/loongarch64/bits/float.h b/arch/loongarch64/bits/float.h
new file mode 100644
index 00000000..719c7908
--- /dev/null
+++ b/arch/loongarch64/bits/float.h
@@ -0,0 +1,16 @@
+#define FLT_EVAL_METHOD 0
+
+#define LDBL_TRUE_MIN 6.47517511943802511092443895822764655e-4966L
+#define LDBL_MIN 3.36210314311209350626267781732175260e-4932L
+#define LDBL_MAX 1.18973149535723176508575932662800702e+4932L
+#define LDBL_EPSILON 1.92592994438723585305597794258492732e-34L
+
+#define LDBL_MANT_DIG 113
+#define LDBL_MIN_EXP (-16381)
+#define LDBL_MAX_EXP 16384
+
+#define LDBL_DIG 33
+#define LDBL_MIN_10_EXP (-4931)
+#define LDBL_MAX_10_EXP 4932
+
+#define DECIMAL_DIG 36
diff --git a/arch/loongarch64/bits/hwcap.h b/arch/loongarch64/bits/hwcap.h
new file mode 100644
index 00000000..b8184f69
--- /dev/null
+++ b/arch/loongarch64/bits/hwcap.h
@@ -0,0 +1,14 @@
+#define HWCAP_LOONGARCH_CPUCFG          (1 << 0)
+#define HWCAP_LOONGARCH_LAM             (1 << 1)
+#define HWCAP_LOONGARCH_UAL             (1 << 2)
+#define HWCAP_LOONGARCH_FPU             (1 << 3)
+#define HWCAP_LOONGARCH_LSX             (1 << 4)
+#define HWCAP_LOONGARCH_LASX            (1 << 5)
+#define HWCAP_LOONGARCH_CRC32           (1 << 6)
+#define HWCAP_LOONGARCH_COMPLEX         (1 << 7)
+#define HWCAP_LOONGARCH_CRYPTO          (1 << 8)
+#define HWCAP_LOONGARCH_LVZ             (1 << 9)
+#define HWCAP_LOONGARCH_LBT_X86         (1 << 10)
+#define HWCAP_LOONGARCH_LBT_ARM         (1 << 11)
+#define HWCAP_LOONGARCH_LBT_MIPS        (1 << 12)
+#define HWCAP_LOONGARCH_PTW             (1 << 13)
diff --git a/arch/loongarch64/bits/setjmp.h b/arch/loongarch64/bits/setjmp.h
new file mode 100644
index 00000000..3b15e87b
--- /dev/null
+++ b/arch/loongarch64/bits/setjmp.h
@@ -0,0 +1 @@
+typedef unsigned long __jmp_buf[23];
diff --git a/arch/loongarch64/bits/signal.h b/arch/loongarch64/bits/signal.h
new file mode 100644
index 00000000..5a9ed8c9
--- /dev/null
+++ b/arch/loongarch64/bits/signal.h
@@ -0,0 +1,101 @@
+#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
+ || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+
+#if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+#define MINSIGSTKSZ 4096
+#define SIGSTKSZ 16384
+#endif
+
+#if defined(_GNU_SOURCE)
+#define LARCH_NGREG 32
+#define LARCH_REG_RA 1
+#define LARCH_REG_SP 3
+#define LARCH_REG_S0 23
+#define LARCH_REG_S1 24
+#define LARCH_REG_A0 4
+#define LARCH_REG_S2 25
+#define LARCH_REG_NARGS 8
+#endif
+
+#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+typedef unsigned long greg_t, gregset_t[32];
+
+struct sigcontext {
+	unsigned long sc_pc;
+	unsigned long sc_regs[32];
+	unsigned sc_flags;
+	unsigned long sc_extcontext[] __attribute__((__aligned__(16)));
+};
+#endif
+
+typedef struct {
+	unsigned long __pc;
+	unsigned long __gregs[32];
+	unsigned __flags;
+	unsigned long __extcontext[] __attribute__((__aligned__(16)));
+} mcontext_t;
+
+struct sigaltstack {
+	void *ss_sp;
+	int ss_flags;
+	size_t ss_size;
+};
+
+typedef struct __ucontext
+{
+	unsigned long uc_flags;
+	struct __ucontext *uc_link;
+	stack_t uc_stack;
+	sigset_t uc_sigmask;
+	long __uc_pad;
+	mcontext_t uc_mcontext;
+} ucontext_t;
+
+#define __uc_flags uc_flags
+
+#define SA_NOCLDSTOP 1
+#define SA_NOCLDWAIT 2
+#define SA_SIGINFO   4
+#define SA_ONSTACK   0x08000000
+#define SA_RESTART   0x10000000
+#define SA_NODEFER   0x40000000
+#define SA_RESETHAND 0x80000000
+
+#endif
+
+#define SIGHUP     1
+#define SIGINT     2
+#define SIGQUIT    3
+#define SIGILL     4
+#define SIGTRAP    5
+#define SIGABRT    6
+#define SIGIOT     SIGABRT
+#define SIGBUS     7
+#define SIGFPE     8
+#define SIGKILL    9
+#define SIGUSR1   10
+#define SIGSEGV   11
+#define SIGUSR2   12
+#define SIGPIPE   13
+#define SIGALRM   14
+#define SIGTERM   15
+#define SIGSTKFLT 16
+#define SIGCHLD   17
+#define SIGCONT   18
+#define SIGSTOP   19
+#define SIGTSTP   20
+#define SIGTTIN   21
+#define SIGTTOU   22
+#define SIGURG    23
+#define SIGXCPU   24
+#define SIGXFSZ   25
+#define SIGVTALRM 26
+#define SIGPROF   27
+#define SIGWINCH  28
+#define SIGIO     29
+#define SIGPOLL   SIGIO
+#define SIGPWR    30
+#define SIGSYS    31
+#define SIGUNUSED SIGSYS
+
+#define _NSIG 65
diff --git a/arch/loongarch64/bits/syscall.h.in b/arch/loongarch64/bits/syscall.h.in
new file mode 100644
index 00000000..2afb4ea1
--- /dev/null
+++ b/arch/loongarch64/bits/syscall.h.in
@@ -0,0 +1,316 @@
+#define __NR_io_setup                   0
+#define __NR_io_destroy                 1
+#define __NR_io_submit                  2
+#define __NR_io_cancel                  3
+#define __NR_io_getevents               4
+#define __NR_setxattr                   5
+#define __NR_lsetxattr                  6
+#define __NR_fsetxattr                  7
+#define __NR_getxattr                   8
+#define __NR_lgetxattr                  9
+#define __NR_fgetxattr                  10
+#define __NR_listxattr                  11
+#define __NR_llistxattr                 12
+#define __NR_flistxattr                 13
+#define __NR_removexattr                14
+#define __NR_lremovexattr               15
+#define __NR_fremovexattr               16
+#define __NR_getcwd                     17
+#define __NR_lookup_dcookie             18
+#define __NR_eventfd2                   19
+#define __NR_epoll_create1              20
+#define __NR_epoll_ctl                  21
+#define __NR_epoll_pwait                22
+#define __NR_dup                        23
+#define __NR_dup3                       24
+#define __NR3264_fcntl                  25
+#define __NR_inotify_init1              26
+#define __NR_inotify_add_watch          27
+#define __NR_inotify_rm_watch           28
+#define __NR_ioctl                      29
+#define __NR_ioprio_set                 30
+#define __NR_ioprio_get                 31
+#define __NR_flock                      32
+#define __NR_mknodat                    33
+#define __NR_mkdirat                    34
+#define __NR_unlinkat                   35
+#define __NR_symlinkat                  36
+#define __NR_linkat                     37
+#define __NR_umount2                    39
+#define __NR_mount                      40
+#define __NR_pivot_root                 41
+#define __NR_nfsservctl                 42
+#define __NR3264_statfs                 43
+#define __NR3264_fstatfs                44
+#define __NR3264_truncate               45
+#define __NR3264_ftruncate              46
+#define __NR_fallocate                  47
+#define __NR_faccessat                  48
+#define __NR_chdir                      49
+#define __NR_fchdir                     50
+#define __NR_chroot                     51
+#define __NR_fchmod                     52
+#define __NR_fchmodat                   53
+#define __NR_fchownat                   54
+#define __NR_fchown                     55
+#define __NR_openat                     56
+#define __NR_close                      57
+#define __NR_vhangup                    58
+#define __NR_pipe2                      59
+#define __NR_quotactl                   60
+#define __NR_getdents64                 61
+#define __NR3264_lseek                  62
+#define __NR_read                       63
+#define __NR_write                      64
+#define __NR_readv                      65
+#define __NR_writev                     66
+#define __NR_pread64                    67
+#define __NR_pwrite64                   68
+#define __NR_preadv                     69
+#define __NR_pwritev                    70
+#define __NR3264_sendfile               71
+#define __NR_pselect6                   72
+#define __NR_ppoll                      73
+#define __NR_signalfd4                  74
+#define __NR_vmsplice                   75
+#define __NR_splice                     76
+#define __NR_tee                        77
+#define __NR_readlinkat                 78
+#define __NR_sync                       81
+#define __NR_fsync                      82
+#define __NR_fdatasync                  83
+#define __NR_sync_file_range            84
+#define __NR_timerfd_create             85
+#define __NR_timerfd_settime            86
+#define __NR_timerfd_gettime            87
+#define __NR_utimensat                  88
+#define __NR_acct                       89
+#define __NR_capget                     90
+#define __NR_capset                     91
+#define __NR_personality                92
+#define __NR_exit                       93
+#define __NR_exit_group                 94
+#define __NR_waitid                     95
+#define __NR_set_tid_address            96
+#define __NR_unshare                    97
+#define __NR_futex                      98
+#define __NR_set_robust_list            99
+#define __NR_get_robust_list            100
+#define __NR_nanosleep                  101
+#define __NR_getitimer                  102
+#define __NR_setitimer                  103
+#define __NR_kexec_load                 104
+#define __NR_init_module                105
+#define __NR_delete_module              106
+#define __NR_timer_create               107
+#define __NR_timer_gettime              108
+#define __NR_timer_getoverrun           109
+#define __NR_timer_settime              110
+#define __NR_timer_delete               111
+#define __NR_clock_settime              112
+#define __NR_clock_gettime              113
+#define __NR_clock_getres               114
+#define __NR_clock_nanosleep            115
+#define __NR_syslog                     116
+#define __NR_ptrace                     117
+#define __NR_sched_setparam             118
+#define __NR_sched_setscheduler         119
+#define __NR_sched_getscheduler         120
+#define __NR_sched_getparam             121
+#define __NR_sched_setaffinity          122
+#define __NR_sched_getaffinity          123
+#define __NR_sched_yield                124
+#define __NR_sched_get_priority_max     125
+#define __NR_sched_get_priority_min     126
+#define __NR_sched_rr_get_interval      127
+#define __NR_restart_syscall            128
+#define __NR_kill                       129
+#define __NR_tkill                      130
+#define __NR_tgkill                     131
+#define __NR_sigaltstack                132
+#define __NR_rt_sigsuspend              133
+#define __NR_rt_sigaction               134
+#define __NR_rt_sigprocmask             135
+#define __NR_rt_sigpending              136
+#define __NR_rt_sigtimedwait            137
+#define __NR_rt_sigqueueinfo            138
+#define __NR_rt_sigreturn               139
+#define __NR_setpriority                140
+#define __NR_getpriority                141
+#define __NR_reboot                     142
+#define __NR_setregid                   143
+#define __NR_setgid                     144
+#define __NR_setreuid                   145
+#define __NR_setuid                     146
+#define __NR_setresuid                  147
+#define __NR_getresuid                  148
+#define __NR_setresgid                  149
+#define __NR_getresgid                  150
+#define __NR_setfsuid                   151
+#define __NR_setfsgid                   152
+#define __NR_times                      153
+#define __NR_setpgid                    154
+#define __NR_getpgid                    155
+#define __NR_getsid                     156
+#define __NR_setsid                     157
+#define __NR_getgroups                  158
+#define __NR_setgroups                  159
+#define __NR_uname                      160
+#define __NR_sethostname                161
+#define __NR_setdomainname              162
+#define __NR_getrusage                  165
+#define __NR_umask                      166
+#define __NR_prctl                      167
+#define __NR_getcpu                     168
+#define __NR_gettimeofday               169
+#define __NR_settimeofday               170
+#define __NR_adjtimex                   171
+#define __NR_getpid                     172
+#define __NR_getppid                    173
+#define __NR_getuid                     174
+#define __NR_geteuid                    175
+#define __NR_getgid                     176
+#define __NR_getegid                    177
+#define __NR_gettid                     178
+#define __NR_sysinfo                    179
+#define __NR_mq_open                    180
+#define __NR_mq_unlink                  181
+#define __NR_mq_timedsend               182
+#define __NR_mq_timedreceive            183
+#define __NR_mq_notify                  184
+#define __NR_mq_getsetattr              185
+#define __NR_msgget                     186
+#define __NR_msgctl                     187
+#define __NR_msgrcv                     188
+#define __NR_msgsnd                     189
+#define __NR_semget                     190
+#define __NR_semctl                     191
+#define __NR_semtimedop                 192
+#define __NR_semop                      193
+#define __NR_shmget                     194
+#define __NR_shmctl                     195
+#define __NR_shmat                      196
+#define __NR_shmdt                      197
+#define __NR_socket                     198
+#define __NR_socketpair                 199
+#define __NR_bind                       200
+#define __NR_listen                     201
+#define __NR_accept                     202
+#define __NR_connect                    203
+#define __NR_getsockname                204
+#define __NR_getpeername                205
+#define __NR_sendto                     206
+#define __NR_recvfrom                   207
+#define __NR_setsockopt                 208
+#define __NR_getsockopt                 209
+#define __NR_shutdown                   210
+#define __NR_sendmsg                    211
+#define __NR_recvmsg                    212
+#define __NR_readahead                  213
+#define __NR_brk                        214
+#define __NR_munmap                     215
+#define __NR_mremap                     216
+#define __NR_add_key                    217
+#define __NR_request_key                218
+#define __NR_keyctl                     219
+#define __NR_clone                      220
+#define __NR_execve                     221
+#define __NR3264_mmap                   222
+#define __NR3264_fadvise64              223
+#define __NR_swapon                     224
+#define __NR_swapoff                    225
+#define __NR_mprotect                   226
+#define __NR_msync                      227
+#define __NR_mlock                      228
+#define __NR_munlock                    229
+#define __NR_mlockall                   230
+#define __NR_munlockall                 231
+#define __NR_mincore                    232
+#define __NR_madvise                    233
+#define __NR_remap_file_pages           234
+#define __NR_mbind                      235
+#define __NR_get_mempolicy              236
+#define __NR_set_mempolicy              237
+#define __NR_migrate_pages              238
+#define __NR_move_pages                 239
+#define __NR_rt_tgsigqueueinfo          240
+#define __NR_perf_event_open            241
+#define __NR_accept4                    242
+#define __NR_recvmmsg                   243
+#define __NR_arch_specific_syscall      244
+#define __NR_wait4                      260
+#define __NR_prlimit64                  261
+#define __NR_fanotify_init              262
+#define __NR_fanotify_mark              263
+#define __NR_name_to_handle_at          264
+#define __NR_open_by_handle_at          265
+#define __NR_clock_adjtime              266
+#define __NR_syncfs                     267
+#define __NR_setns                      268
+#define __NR_sendmmsg                   269
+#define __NR_process_vm_readv           270
+#define __NR_process_vm_writev          271
+#define __NR_kcmp                       272
+#define __NR_finit_module               273
+#define __NR_sched_setattr              274
+#define __NR_sched_getattr              275
+#define __NR_renameat2                  276
+#define __NR_seccomp                    277
+#define __NR_getrandom                  278
+#define __NR_memfd_create               279
+#define __NR_bpf                        280
+#define __NR_execveat                   281
+#define __NR_userfaultfd                282
+#define __NR_membarrier                 283
+#define __NR_mlock2                     284
+#define __NR_copy_file_range            285
+#define __NR_preadv2                    286
+#define __NR_pwritev2                   287
+#define __NR_pkey_mprotect              288
+#define __NR_pkey_alloc                 289
+#define __NR_pkey_free                  290
+#define __NR_statx                      291
+#define __NR_io_pgetevents              292
+#define __NR_rseq                       293
+#define __NR_kexec_file_load            294
+#define __NR_pidfd_send_signal          424
+#define __NR_io_uring_setup             425
+#define __NR_io_uring_enter             426
+#define __NR_io_uring_register          427
+#define __NR_open_tree                  428
+#define __NR_move_mount                 429
+#define __NR_fsopen                     430
+#define __NR_fsconfig                   431
+#define __NR_fsmount                    432
+#define __NR_fspick                     433
+#define __NR_pidfd_open                 434
+#define __NR_clone3                     435
+#define __NR_close_range                436
+#define __NR_openat2                    437
+#define __NR_pidfd_getfd                438
+#define __NR_faccessat2                 439
+#define __NR_process_madvise            440
+#define __NR_epoll_pwait2               441
+#define __NR_mount_setattr              442
+#define __NR_quotactl_fd                443
+#define __NR_landlock_create_ruleset    444
+#define __NR_landlock_add_rule          445
+#define __NR_landlock_restrict_self     446
+#define __NR_process_mrelease           448
+#define __NR_futex_waitv                449
+#define __NR_set_mempolicy_home_node    450
+#define __NR_cachestat                  451
+#define __NR_fchmodat2                  452
+#define __NR_map_shadow_stack           453
+#define __NR_futex_wake                 454
+#define __NR_futex_wait                 455
+#define __NR_futex_requeue              456
+#define __NR_fcntl                      __NR3264_fcntl
+#define __NR_statfs                     __NR3264_statfs
+#define __NR_fstatfs                    __NR3264_fstatfs
+#define __NR_truncate                   __NR3264_truncate
+#define __NR_ftruncate                  __NR3264_ftruncate
+#define __NR_lseek                      __NR3264_lseek
+#define __NR_sendfile                   __NR3264_sendfile
+#define __NR_mmap                       __NR3264_mmap
+#define __NR_fadvise64                  __NR3264_fadvise64
diff --git a/arch/loongarch64/bits/user.h b/arch/loongarch64/bits/user.h
new file mode 100644
index 00000000..fd9b7b22
--- /dev/null
+++ b/arch/loongarch64/bits/user.h
@@ -0,0 +1,24 @@
+#define ELF_NGREG    45
+#define ELF_NFPREG   34
+
+struct user_regs_struct {
+	unsigned long regs[32];
+	unsigned long orig_a0;
+	unsigned long csr_era;
+	unsigned long csr_badv;
+	unsigned long reserved[10];
+};
+
+struct user_fp_struct {
+	unsigned long fpr[32];
+	unsigned long fcc;
+	unsigned int fcsr;
+};
+
+typedef unsigned long elf_greg_t, elf_gregset_t[ELF_NGREG];
+
+typedef union {
+	double d;
+	float f;
+} elf_fpreg_t;
+typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
diff --git a/arch/loongarch64/crt_arch.h b/arch/loongarch64/crt_arch.h
new file mode 100644
index 00000000..e0760d9e
--- /dev/null
+++ b/arch/loongarch64/crt_arch.h
@@ -0,0 +1,13 @@
+__asm__(
+".text \n"
+".global " START "\n"
+".type   " START ", @function\n"
+START ":\n"
+"	move $fp, $zero\n"
+"	move $a0, $sp\n"
+".weak _DYNAMIC\n"
+".hidden _DYNAMIC\n"
+"	la.local $a1, _DYNAMIC\n"
+"	bstrins.d $sp, $zero, 3, 0\n"
+"	b " START "_c\n"
+);
diff --git a/arch/loongarch64/pthread_arch.h b/arch/loongarch64/pthread_arch.h
new file mode 100644
index 00000000..365f6ca8
--- /dev/null
+++ b/arch/loongarch64/pthread_arch.h
@@ -0,0 +1,11 @@
+static inline uintptr_t __get_tp()
+{
+	register uintptr_t tp __asm__("tp");
+	__asm__ ("" : "=r" (tp) );
+	return tp;
+}
+
+#define TLS_ABOVE_TP
+#define GAP_ABOVE_TP   0
+#define DTP_OFFSET     0
+#define MC_PC          __pc
diff --git a/arch/loongarch64/reloc.h b/arch/loongarch64/reloc.h
new file mode 100644
index 00000000..a4db6a9c
--- /dev/null
+++ b/arch/loongarch64/reloc.h
@@ -0,0 +1,30 @@
+#ifdef __loongarch_soft_float
+#define FP_SUFFIX "-sf"
+#elif defined __loongarch_single_float
+#define FP_SUFFIX "-sp"
+#else
+#define FP_SUFFIX ""
+#endif
+
+#define LDSO_ARCH "loongarch64" FP_SUFFIX
+
+#define TPOFF_K         0
+
+#define REL_PLT         R_LARCH_JUMP_SLOT
+#define REL_COPY        R_LARCH_COPY
+#define REL_DTPMOD      R_LARCH_TLS_DTPMOD64
+#define REL_DTPOFF      R_LARCH_TLS_DTPREL64
+#define REL_TPOFF       R_LARCH_TLS_TPREL64
+#define REL_RELATIVE    R_LARCH_RELATIVE
+#define REL_SYMBOLIC    R_LARCH_64
+#define REL_TLSDESC     R_LARCH_TLS_DESC64
+
+#define CRTJMP(pc,sp) __asm__ __volatile__( \
+	"move $sp, %1 ; jr %0" : : "r"(pc), "r"(sp) : "memory" )
+
+#define GETFUNCSYM(fp, sym, got) __asm__ ( \
+	".hidden " #sym "\n" \
+	".align 8 \n" \
+	"	la.local $t1, "#sym" \n" \
+	"	move %0, $t1 \n" \
+	: "=r"(*(fp)) : : "memory" )
diff --git a/arch/loongarch64/syscall_arch.h b/arch/loongarch64/syscall_arch.h
new file mode 100644
index 00000000..4d5e1885
--- /dev/null
+++ b/arch/loongarch64/syscall_arch.h
@@ -0,0 +1,137 @@
+#define __SYSCALL_LL_E(x) (x)
+#define __SYSCALL_LL_O(x) (x)
+
+#define SYSCALL_CLOBBERLIST \
+	"$t0", "$t1", "$t2", "$t3", \
+	"$t4", "$t5", "$t6", "$t7", "$t8", "memory"
+
+static inline long __syscall0(long n)
+{
+	register long a7 __asm__("$a7") = n;
+	register long a0 __asm__("$a0");
+
+	__asm__ __volatile__ (
+		"syscall 0"
+		: "=r"(a0)
+		: "r"(a7)
+		: SYSCALL_CLOBBERLIST);
+	return a0;
+}
+
+static inline long __syscall1(long n, long a)
+{
+	register long a7 __asm__("$a7") = n;
+	register long a0 __asm__("$a0") = a;
+
+	__asm__ __volatile__ (
+		"syscall 0"
+		: "+r"(a0)
+		: "r"(a7)
+		: SYSCALL_CLOBBERLIST);
+	return a0;
+}
+
+static inline long __syscall2(long n, long a, long b)
+{
+	register long a7 __asm__("$a7") = n;
+	register long a0 __asm__("$a0") = a;
+	register long a1 __asm__("$a1") = b;
+
+	__asm__ __volatile__ (
+		"syscall 0"
+		: "+r"(a0)
+	        : "r"(a7), "r"(a1)
+		: SYSCALL_CLOBBERLIST);
+	return a0;
+}
+
+static inline long __syscall3(long n, long a, long b, long c)
+{
+	register long a7 __asm__("$a7") = n;
+	register long a0 __asm__("$a0") = a;
+	register long a1 __asm__("$a1") = b;
+	register long a2 __asm__("$a2") = c;
+
+	__asm__ __volatile__ (
+		"syscall 0"
+		: "+r"(a0)
+	        : "r"(a7), "r"(a1), "r"(a2)
+		: SYSCALL_CLOBBERLIST);
+	return a0;
+}
+
+static inline long __syscall4(long n, long a, long b, long c, long d)
+{
+	register long a7 __asm__("$a7") = n;
+	register long a0 __asm__("$a0") = a;
+	register long a1 __asm__("$a1") = b;
+	register long a2 __asm__("$a2") = c;
+	register long a3 __asm__("$a3") = d;
+
+	__asm__ __volatile__ (
+		"syscall 0"
+		: "+r"(a0)
+	        : "r"(a7), "r"(a1), "r"(a2), "r"(a3)
+		: SYSCALL_CLOBBERLIST);
+	return a0;
+}
+
+static inline long __syscall5(long n, long a, long b, long c, long d, long e)
+{
+	register long a7 __asm__("$a7") = n;
+	register long a0 __asm__("$a0") = a;
+	register long a1 __asm__("$a1") = b;
+	register long a2 __asm__("$a2") = c;
+	register long a3 __asm__("$a3") = d;
+	register long a4 __asm__("$a4") = e;
+
+	__asm__ __volatile__ (
+		"syscall 0"
+		: "+r"(a0)
+	        : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4)
+		: SYSCALL_CLOBBERLIST);
+	return a0;
+}
+
+static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f)
+{
+	register long a7 __asm__("$a7") = n;
+	register long a0 __asm__("$a0") = a;
+	register long a1 __asm__("$a1") = b;
+	register long a2 __asm__("$a2") = c;
+	register long a3 __asm__("$a3") = d;
+	register long a4 __asm__("$a4") = e;
+	register long a5 __asm__("$a5") = f;
+
+	__asm__ __volatile__ (
+		"syscall 0"
+		: "+r"(a0)
+	        : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5)
+		: SYSCALL_CLOBBERLIST);
+	return a0;
+}
+
+static inline long __syscall7(long n, long a, long b, long c, long d, long e, long f, long g)
+{
+	register long a7 __asm__("$a7") = n;
+	register long a0 __asm__("$a0") = a;
+	register long a1 __asm__("$a1") = b;
+	register long a2 __asm__("$a2") = c;
+	register long a3 __asm__("$a3") = d;
+	register long a4 __asm__("$a4") = e;
+	register long a5 __asm__("$a5") = f;
+	register long a6 __asm__("$a6") = g;
+
+	__asm__ __volatile__ (
+		"syscall 0"
+		: "+r"(a0)
+	        : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5), "r"(a6)
+		: SYSCALL_CLOBBERLIST);
+	return a0;
+}
+
+#define VDSO_USEFUL
+#define VDSO_CGT_SYM "__vdso_clock_gettime"
+#define VDSO_CGT_VER "LINUX_5.10"
+
+#define IPC_64  0
diff --git a/arch/m68k/arch.mak b/arch/m68k/arch.mak
new file mode 100644
index 00000000..aa4d05ce
--- /dev/null
+++ b/arch/m68k/arch.mak
@@ -0,0 +1 @@
+COMPAT_SRC_DIRS = compat/time32
diff --git a/arch/m68k/bits/alltypes.h.in b/arch/m68k/bits/alltypes.h.in
index a4a8141f..f5646909 100644
--- a/arch/m68k/bits/alltypes.h.in
+++ b/arch/m68k/bits/alltypes.h.in
@@ -1,13 +1,18 @@
+#define _REDIR_TIME64 1
 #define _Addr int
 #define _Int64 long long
 #define _Reg int
 
-TYPEDEF __builtin_va_list va_list;
-TYPEDEF __builtin_va_list __isoc_va_list;
+#define __BYTE_ORDER 4321
+#define __LONG_MAX 0x7fffffffL
 
 #ifndef __cplusplus
+#ifdef __WCHAR_TYPE__
+TYPEDEF __WCHAR_TYPE__ wchar_t;
+#else
 TYPEDEF long wchar_t;
 #endif
+#endif
 
 #if __mcffpu__
 TYPEDEF float float_t;
@@ -18,14 +23,3 @@ TYPEDEF long double double_t;
 #endif
 
 TYPEDEF struct { long long __ll; long double __ld; } max_align_t;
-
-TYPEDEF long time_t;
-TYPEDEF long suseconds_t;
-
-TYPEDEF struct { union { int __i[9]; volatile int __vi[9]; unsigned __s[9]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } mtx_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } pthread_cond_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } cnd_t;
-TYPEDEF struct { union { int __i[8]; volatile int __vi[8]; void *__p[8]; } __u; } pthread_rwlock_t;
-TYPEDEF struct { union { int __i[5]; volatile int __vi[5]; void *__p[5]; } __u; } pthread_barrier_t;
diff --git a/arch/m68k/bits/endian.h b/arch/m68k/bits/endian.h
deleted file mode 100644
index ef074b77..00000000
--- a/arch/m68k/bits/endian.h
+++ /dev/null
@@ -1 +0,0 @@
-#define __BYTE_ORDER __BIG_ENDIAN
diff --git a/arch/m68k/bits/ipcstat.h b/arch/m68k/bits/ipcstat.h
new file mode 100644
index 00000000..4f4fcb0c
--- /dev/null
+++ b/arch/m68k/bits/ipcstat.h
@@ -0,0 +1 @@
+#define IPC_STAT 0x102
diff --git a/arch/m68k/bits/limits.h b/arch/m68k/bits/limits.h
deleted file mode 100644
index fbc6d238..00000000
--- a/arch/m68k/bits/limits.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
- || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
-#define LONG_BIT 32
-#endif
-
-#define LONG_MAX  0x7fffffffL
-#define LLONG_MAX  0x7fffffffffffffffLL
diff --git a/arch/m68k/bits/msg.h b/arch/m68k/bits/msg.h
new file mode 100644
index 00000000..7bbbb2bf
--- /dev/null
+++ b/arch/m68k/bits/msg.h
@@ -0,0 +1,18 @@
+struct msqid_ds {
+	struct ipc_perm msg_perm;
+	unsigned long __msg_stime_lo;
+	unsigned long __msg_stime_hi;
+	unsigned long __msg_rtime_lo;
+	unsigned long __msg_rtime_hi;
+	unsigned long __msg_ctime_lo;
+	unsigned long __msg_ctime_hi;
+	unsigned long msg_cbytes;
+	msgqnum_t msg_qnum;
+	msglen_t msg_qbytes;
+	pid_t msg_lspid;
+	pid_t msg_lrpid;
+	unsigned long __unused[2];
+	time_t msg_stime;
+	time_t msg_rtime;
+	time_t msg_ctime;
+};
diff --git a/arch/m68k/bits/poll.h b/arch/m68k/bits/poll.h
new file mode 100644
index 00000000..00063f41
--- /dev/null
+++ b/arch/m68k/bits/poll.h
@@ -0,0 +1,2 @@
+#define POLLWRNORM POLLOUT
+#define POLLWRBAND 256
diff --git a/arch/m68k/bits/posix.h b/arch/m68k/bits/posix.h
deleted file mode 100644
index 30a38714..00000000
--- a/arch/m68k/bits/posix.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#define _POSIX_V6_ILP32_OFFBIG  1
-#define _POSIX_V7_ILP32_OFFBIG  1
diff --git a/arch/m68k/bits/reg.h b/arch/m68k/bits/reg.h
index 99201f70..fedc4f9f 100644
--- a/arch/m68k/bits/reg.h
+++ b/arch/m68k/bits/reg.h
@@ -1,5 +1,3 @@
-#undef __WORDSIZE
-#define __WORDSIZE 32
 #define PT_D1 0
 #define PT_D2 1
 #define PT_D3 2
diff --git a/arch/m68k/bits/sem.h b/arch/m68k/bits/sem.h
new file mode 100644
index 00000000..d88338e6
--- /dev/null
+++ b/arch/m68k/bits/sem.h
@@ -0,0 +1,13 @@
+struct semid_ds {
+	struct ipc_perm sem_perm;
+	unsigned long __sem_otime_lo;
+	unsigned long __sem_otime_hi;
+	unsigned long __sem_ctime_lo;
+	unsigned long __sem_ctime_hi;
+	char __sem_nsems_pad[sizeof(long)-sizeof(short)];
+	unsigned short sem_nsems;
+	long __unused3;
+	long __unused4;
+	time_t sem_otime;
+	time_t sem_ctime;
+};
diff --git a/arch/m68k/bits/shm.h b/arch/m68k/bits/shm.h
new file mode 100644
index 00000000..725fb469
--- /dev/null
+++ b/arch/m68k/bits/shm.h
@@ -0,0 +1,31 @@
+#define SHMLBA 4096
+
+struct shmid_ds {
+	struct ipc_perm shm_perm;
+	size_t shm_segsz;
+	unsigned long __shm_atime_lo;
+	unsigned long __shm_atime_hi;
+	unsigned long __shm_dtime_lo;
+	unsigned long __shm_dtime_hi;
+	unsigned long __shm_ctime_lo;
+	unsigned long __shm_ctime_hi;
+	pid_t shm_cpid;
+	pid_t shm_lpid;
+	unsigned long shm_nattch;
+	unsigned long __pad1;
+	unsigned long __pad2;
+	unsigned long __pad3;
+	time_t shm_atime;
+	time_t shm_dtime;
+	time_t shm_ctime;
+};
+
+struct shminfo {
+	unsigned long shmmax, shmmin, shmmni, shmseg, shmall, __unused[4];
+};
+
+struct shm_info {
+	int __used_ids;
+	unsigned long shm_tot, shm_rss, shm_swp;
+	unsigned long __swap_attempts, __swap_successes;
+};
diff --git a/arch/m68k/bits/stat.h b/arch/m68k/bits/stat.h
index 0f7b66a1..f8768147 100644
--- a/arch/m68k/bits/stat.h
+++ b/arch/m68k/bits/stat.h
@@ -14,8 +14,12 @@ struct stat {
 	off_t st_size;
 	blksize_t st_blksize;
 	blkcnt_t st_blocks;
+	struct {
+		long tv_sec;
+		long tv_nsec;
+	} __st_atim32, __st_mtim32, __st_ctim32;
+	ino_t st_ino;
 	struct timespec st_atim;
 	struct timespec st_mtim;
 	struct timespec st_ctim;
-	ino_t st_ino;
 };
diff --git a/arch/m68k/bits/stdint.h b/arch/m68k/bits/stdint.h
deleted file mode 100644
index d1b27121..00000000
--- a/arch/m68k/bits/stdint.h
+++ /dev/null
@@ -1,20 +0,0 @@
-typedef int32_t int_fast16_t;
-typedef int32_t int_fast32_t;
-typedef uint32_t uint_fast16_t;
-typedef uint32_t uint_fast32_t;
-
-#define INT_FAST16_MIN  INT32_MIN
-#define INT_FAST32_MIN  INT32_MIN
-
-#define INT_FAST16_MAX  INT32_MAX
-#define INT_FAST32_MAX  INT32_MAX
-
-#define UINT_FAST16_MAX UINT32_MAX
-#define UINT_FAST32_MAX UINT32_MAX
-
-#define INTPTR_MIN      INT32_MIN
-#define INTPTR_MAX      INT32_MAX
-#define UINTPTR_MAX     UINT32_MAX
-#define PTRDIFF_MIN     INT32_MIN
-#define PTRDIFF_MAX     INT32_MAX
-#define SIZE_MAX        UINT32_MAX
diff --git a/arch/m68k/bits/syscall.h.in b/arch/m68k/bits/syscall.h.in
index 89cf114c..5cd84602 100644
--- a/arch/m68k/bits/syscall.h.in
+++ b/arch/m68k/bits/syscall.h.in
@@ -67,8 +67,8 @@
 #define __NR_setrlimit		 75
 #define __NR_getrlimit		 76
 #define __NR_getrusage		 77
-#define __NR_gettimeofday	 78
-#define __NR_settimeofday	 79
+#define __NR_gettimeofday_time32	 78
+#define __NR_settimeofday_time32	 79
 #define __NR_getgroups		 80
 #define __NR_setgroups		 81
 #define __NR_select		 82
@@ -235,14 +235,14 @@
 #define __NR_remap_file_pages	252
 #define __NR_set_tid_address	253
 #define __NR_timer_create	254
-#define __NR_timer_settime	255
-#define __NR_timer_gettime	256
+#define __NR_timer_settime32	255
+#define __NR_timer_gettime32	256
 #define __NR_timer_getoverrun	257
 #define __NR_timer_delete	258
-#define __NR_clock_settime	259
-#define __NR_clock_gettime	260
-#define __NR_clock_getres	261
-#define __NR_clock_nanosleep	262
+#define __NR_clock_settime32	259
+#define __NR_clock_gettime32	260
+#define __NR_clock_getres_time32	261
+#define __NR_clock_nanosleep_time32	262
 #define __NR_statfs64		263
 #define __NR_fstatfs64		264
 #define __NR_tgkill		265
@@ -300,8 +300,8 @@
 #define __NR_timerfd_create	318
 #define __NR_eventfd		319
 #define __NR_fallocate		320
-#define __NR_timerfd_settime	321
-#define __NR_timerfd_gettime	322
+#define __NR_timerfd_settime32	321
+#define __NR_timerfd_gettime32	322
 #define __NR_signalfd4		323
 #define __NR_eventfd2		324
 #define __NR_epoll_create1	325
@@ -359,3 +359,65 @@
 #define __NR_preadv2		377
 #define __NR_pwritev2		378
 #define __NR_statx		379
+#define __NR_seccomp		380
+#define __NR_pkey_mprotect	381
+#define __NR_pkey_alloc		382
+#define __NR_pkey_free		383
+#define __NR_rseq		384
+#define __NR_semget		393
+#define __NR_semctl		394
+#define __NR_shmget		395
+#define __NR_shmctl		396
+#define __NR_shmat		397
+#define __NR_shmdt		398
+#define __NR_msgget		399
+#define __NR_msgsnd		400
+#define __NR_msgrcv		401
+#define __NR_msgctl		402
+#define __NR_clock_gettime64	403
+#define __NR_clock_settime64	404
+#define __NR_clock_adjtime64	405
+#define __NR_clock_getres_time64 406
+#define __NR_clock_nanosleep_time64 407
+#define __NR_timer_gettime64	408
+#define __NR_timer_settime64	409
+#define __NR_timerfd_gettime64	410
+#define __NR_timerfd_settime64	411
+#define __NR_utimensat_time64	412
+#define __NR_pselect6_time64	413
+#define __NR_ppoll_time64	414
+#define __NR_io_pgetevents_time64 416
+#define __NR_recvmmsg_time64	417
+#define __NR_mq_timedsend_time64 418
+#define __NR_mq_timedreceive_time64 419
+#define __NR_semtimedop_time64	420
+#define __NR_rt_sigtimedwait_time64 421
+#define __NR_futex_time64	422
+#define __NR_sched_rr_get_interval_time64 423
+#define __NR_pidfd_send_signal	424
+#define __NR_io_uring_setup	425
+#define __NR_io_uring_enter	426
+#define __NR_io_uring_register	427
+#define __NR_open_tree		428
+#define __NR_move_mount		429
+#define __NR_fsopen		430
+#define __NR_fsconfig		431
+#define __NR_fsmount		432
+#define __NR_fspick		433
+#define __NR_pidfd_open		434
+#define __NR_clone3		435
+#define __NR_close_range	436
+#define __NR_openat2		437
+#define __NR_pidfd_getfd	438
+#define __NR_faccessat2		439
+#define __NR_process_madvise	440
+#define __NR_epoll_pwait2	441
+#define __NR_mount_setattr	442
+#define __NR_landlock_create_ruleset	444
+#define __NR_landlock_add_rule	445
+#define __NR_landlock_restrict_self	446
+#define __NR_process_mrelease	448
+#define __NR_futex_waitv	449
+#define __NR_set_mempolicy_home_node	450
+#define __NR_cachestat		451
+#define __NR_fchmodat2		452
diff --git a/arch/m68k/bits/user.h b/arch/m68k/bits/user.h
index 9a4ca128..6a443919 100644
--- a/arch/m68k/bits/user.h
+++ b/arch/m68k/bits/user.h
@@ -27,6 +27,11 @@ struct user {
 	char u_comm[32];
 };
 
+#define ELF_NGREG 20
+typedef unsigned long elf_greg_t;
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+typedef struct user_m68kfp_struct elf_fpregset_t;
+
 #define NBPG			4096
 #define UPAGES			1
 #define HOST_TEXT_START_ADDR	(u.start_code)
diff --git a/arch/m68k/kstat.h b/arch/m68k/kstat.h
new file mode 100644
index 00000000..ac13e272
--- /dev/null
+++ b/arch/m68k/kstat.h
@@ -0,0 +1,21 @@
+struct kstat {
+	dev_t st_dev;
+	short __st_dev_padding;
+	long __st_ino_truncated;
+	mode_t st_mode;
+	nlink_t st_nlink;
+	uid_t st_uid;
+	gid_t st_gid;
+	dev_t st_rdev;
+	short __st_rdev_padding;
+	off_t st_size;
+	blksize_t st_blksize;
+	blkcnt_t st_blocks;
+	long st_atime_sec;
+	long st_atime_nsec;
+	long st_mtime_sec;
+	long st_mtime_nsec;
+	long st_ctime_sec;
+	long st_ctime_nsec;
+	ino_t st_ino;
+};
diff --git a/arch/m68k/pthread_arch.h b/arch/m68k/pthread_arch.h
index 02d5b8a0..5bea4e1a 100644
--- a/arch/m68k/pthread_arch.h
+++ b/arch/m68k/pthread_arch.h
@@ -1,13 +1,12 @@
-static inline struct pthread *__pthread_self()
+static inline uintptr_t __get_tp()
 {
-	uintptr_t tp = __syscall(SYS_get_thread_area);
-	return (pthread_t)(tp - 0x7000 - sizeof(struct pthread));
+	return __syscall(SYS_get_thread_area);
 }
 
 #define TLS_ABOVE_TP
 #define GAP_ABOVE_TP 0
-#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 
+#define TP_OFFSET 0x7000
 #define DTP_OFFSET 0x8000
 
 #define MC_PC gregs[R_PC]
diff --git a/arch/m68k/syscall_arch.h b/arch/m68k/syscall_arch.h
index af79c306..6a9d0ae8 100644
--- a/arch/m68k/syscall_arch.h
+++ b/arch/m68k/syscall_arch.h
@@ -87,5 +87,4 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo
 	return d0;
 }
 
-#define SYSCALL_USE_SOCKETCALL
 #define SYSCALL_IPC_BROKEN_MODE
diff --git a/arch/microblaze/arch.mak b/arch/microblaze/arch.mak
new file mode 100644
index 00000000..aa4d05ce
--- /dev/null
+++ b/arch/microblaze/arch.mak
@@ -0,0 +1 @@
+COMPAT_SRC_DIRS = compat/time32
diff --git a/arch/microblaze/bits/alltypes.h.in b/arch/microblaze/bits/alltypes.h.in
index 66ca18ad..9a4ce29d 100644
--- a/arch/microblaze/bits/alltypes.h.in
+++ b/arch/microblaze/bits/alltypes.h.in
@@ -1,9 +1,15 @@
+#define _REDIR_TIME64 1
 #define _Addr int
 #define _Int64 long long
 #define _Reg int
 
-TYPEDEF __builtin_va_list va_list;
-TYPEDEF __builtin_va_list __isoc_va_list;
+#if __MICROBLAZEEL__
+#define __BYTE_ORDER 1234
+#else
+#define __BYTE_ORDER 4321
+#endif
+
+#define __LONG_MAX 0x7fffffffL
 
 #ifndef __cplusplus
 TYPEDEF int wchar_t;
@@ -13,14 +19,3 @@ TYPEDEF float float_t;
 TYPEDEF double double_t;
 
 TYPEDEF struct { long long __ll; long double __ld; } max_align_t;
-
-TYPEDEF long time_t;
-TYPEDEF long suseconds_t;
-
-TYPEDEF struct { union { int __i[9]; volatile int __vi[9]; unsigned __s[9]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } mtx_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } pthread_cond_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } cnd_t;
-TYPEDEF struct { union { int __i[8]; volatile int __vi[8]; void *__p[8]; } __u; } pthread_rwlock_t;
-TYPEDEF struct { union { int __i[5]; volatile int __vi[5]; void *__p[5]; } __u; } pthread_barrier_t;
diff --git a/arch/microblaze/bits/endian.h b/arch/microblaze/bits/endian.h
deleted file mode 100644
index d82a92ac..00000000
--- a/arch/microblaze/bits/endian.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#if __MICROBLAZEEL__
-#define __BYTE_ORDER __LITTLE_ENDIAN
-#else
-#define __BYTE_ORDER __BIG_ENDIAN
-#endif
diff --git a/arch/microblaze/bits/ipcstat.h b/arch/microblaze/bits/ipcstat.h
new file mode 100644
index 00000000..4f4fcb0c
--- /dev/null
+++ b/arch/microblaze/bits/ipcstat.h
@@ -0,0 +1 @@
+#define IPC_STAT 0x102
diff --git a/arch/microblaze/bits/limits.h b/arch/microblaze/bits/limits.h
deleted file mode 100644
index fbc6d238..00000000
--- a/arch/microblaze/bits/limits.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
- || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
-#define LONG_BIT 32
-#endif
-
-#define LONG_MAX  0x7fffffffL
-#define LLONG_MAX  0x7fffffffffffffffLL
diff --git a/arch/microblaze/bits/msg.h b/arch/microblaze/bits/msg.h
new file mode 100644
index 00000000..7bbbb2bf
--- /dev/null
+++ b/arch/microblaze/bits/msg.h
@@ -0,0 +1,18 @@
+struct msqid_ds {
+	struct ipc_perm msg_perm;
+	unsigned long __msg_stime_lo;
+	unsigned long __msg_stime_hi;
+	unsigned long __msg_rtime_lo;
+	unsigned long __msg_rtime_hi;
+	unsigned long __msg_ctime_lo;
+	unsigned long __msg_ctime_hi;
+	unsigned long msg_cbytes;
+	msgqnum_t msg_qnum;
+	msglen_t msg_qbytes;
+	pid_t msg_lspid;
+	pid_t msg_lrpid;
+	unsigned long __unused[2];
+	time_t msg_stime;
+	time_t msg_rtime;
+	time_t msg_ctime;
+};
diff --git a/arch/microblaze/bits/posix.h b/arch/microblaze/bits/posix.h
deleted file mode 100644
index 30a38714..00000000
--- a/arch/microblaze/bits/posix.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#define _POSIX_V6_ILP32_OFFBIG  1
-#define _POSIX_V7_ILP32_OFFBIG  1
diff --git a/arch/microblaze/bits/reg.h b/arch/microblaze/bits/reg.h
deleted file mode 100644
index 0c7bffca..00000000
--- a/arch/microblaze/bits/reg.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#undef __WORDSIZE
-#define __WORDSIZE 32
-/* FIXME */
diff --git a/arch/microblaze/bits/sem.h b/arch/microblaze/bits/sem.h
new file mode 100644
index 00000000..544e3d2a
--- /dev/null
+++ b/arch/microblaze/bits/sem.h
@@ -0,0 +1,18 @@
+struct semid_ds {
+	struct ipc_perm sem_perm;
+	unsigned long __sem_otime_lo;
+	unsigned long __sem_otime_hi;
+	unsigned long __sem_ctime_lo;
+	unsigned long __sem_ctime_hi;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+	unsigned short sem_nsems;
+	char __sem_nsems_pad[sizeof(long)-sizeof(short)];
+#else
+	char __sem_nsems_pad[sizeof(long)-sizeof(short)];
+	unsigned short sem_nsems;
+#endif
+	long __unused3;
+	long __unused4;
+	time_t sem_otime;
+	time_t sem_ctime;
+};
diff --git a/arch/x86_64/bits/shm.h b/arch/microblaze/bits/shm.h
index 6652d659..725fb469 100644
--- a/arch/x86_64/bits/shm.h
+++ b/arch/microblaze/bits/shm.h
@@ -3,14 +3,21 @@
 struct shmid_ds {
 	struct ipc_perm shm_perm;
 	size_t shm_segsz;
-	time_t shm_atime;
-	time_t shm_dtime;
-	time_t shm_ctime;
+	unsigned long __shm_atime_lo;
+	unsigned long __shm_atime_hi;
+	unsigned long __shm_dtime_lo;
+	unsigned long __shm_dtime_hi;
+	unsigned long __shm_ctime_lo;
+	unsigned long __shm_ctime_hi;
 	pid_t shm_cpid;
 	pid_t shm_lpid;
 	unsigned long shm_nattch;
 	unsigned long __pad1;
 	unsigned long __pad2;
+	unsigned long __pad3;
+	time_t shm_atime;
+	time_t shm_dtime;
+	time_t shm_ctime;
 };
 
 struct shminfo {
@@ -22,4 +29,3 @@ struct shm_info {
 	unsigned long shm_tot, shm_rss, shm_swp;
 	unsigned long __swap_attempts, __swap_successes;
 };
-
diff --git a/arch/microblaze/bits/signal.h b/arch/microblaze/bits/signal.h
index 490f83bf..f25b7c6a 100644
--- a/arch/microblaze/bits/signal.h
+++ b/arch/microblaze/bits/signal.h
@@ -46,7 +46,6 @@ typedef struct __ucontext {
 #define SA_RESTART    0x10000000
 #define SA_NODEFER    0x40000000
 #define SA_RESETHAND  0x80000000
-#define SA_RESTORER   0x04000000
 
 #endif
 
diff --git a/arch/microblaze/bits/stat.h b/arch/microblaze/bits/stat.h
index ce6a6bd0..8a4d509a 100644
--- a/arch/microblaze/bits/stat.h
+++ b/arch/microblaze/bits/stat.h
@@ -14,8 +14,12 @@ struct stat {
 	blksize_t st_blksize;
 	int __st_blksize_padding;
 	blkcnt_t st_blocks;
+	struct {
+		long tv_sec;
+		long tv_nsec;
+	} __st_atim32, __st_mtim32, __st_ctim32;
+	unsigned __unused[2];
 	struct timespec st_atim;
 	struct timespec st_mtim;
 	struct timespec st_ctim;
-	unsigned __unused[2];
 };
diff --git a/arch/microblaze/bits/stdint.h b/arch/microblaze/bits/stdint.h
deleted file mode 100644
index d1b27121..00000000
--- a/arch/microblaze/bits/stdint.h
+++ /dev/null
@@ -1,20 +0,0 @@
-typedef int32_t int_fast16_t;
-typedef int32_t int_fast32_t;
-typedef uint32_t uint_fast16_t;
-typedef uint32_t uint_fast32_t;
-
-#define INT_FAST16_MIN  INT32_MIN
-#define INT_FAST32_MIN  INT32_MIN
-
-#define INT_FAST16_MAX  INT32_MAX
-#define INT_FAST32_MAX  INT32_MAX
-
-#define UINT_FAST16_MAX UINT32_MAX
-#define UINT_FAST32_MAX UINT32_MAX
-
-#define INTPTR_MIN      INT32_MIN
-#define INTPTR_MAX      INT32_MAX
-#define UINTPTR_MAX     UINT32_MAX
-#define PTRDIFF_MIN     INT32_MIN
-#define PTRDIFF_MAX     INT32_MAX
-#define SIZE_MAX        UINT32_MAX
diff --git a/arch/microblaze/bits/syscall.h.in b/arch/microblaze/bits/syscall.h.in
index f0583961..40860e6d 100644
--- a/arch/microblaze/bits/syscall.h.in
+++ b/arch/microblaze/bits/syscall.h.in
@@ -76,8 +76,8 @@
 #define __NR_setrlimit 75
 #define __NR_getrlimit 76
 #define __NR_getrusage 77
-#define __NR_gettimeofday 78
-#define __NR_settimeofday 79
+#define __NR_gettimeofday_time32 78
+#define __NR_settimeofday_time32 79
 #define __NR_getgroups 80
 #define __NR_setgroups 81
 #define __NR_select 82
@@ -255,14 +255,14 @@
 #define __NR_remap_file_pages 257
 #define __NR_set_tid_address 258
 #define __NR_timer_create 259
-#define __NR_timer_settime 260
-#define __NR_timer_gettime 261
+#define __NR_timer_settime32 260
+#define __NR_timer_gettime32 261
 #define __NR_timer_getoverrun 262
 #define __NR_timer_delete 263
-#define __NR_clock_settime 264
-#define __NR_clock_gettime 265
-#define __NR_clock_getres 266
-#define __NR_clock_nanosleep 267
+#define __NR_clock_settime32 264
+#define __NR_clock_gettime32 265
+#define __NR_clock_getres_time32 266
+#define __NR_clock_nanosleep_time32 267
 #define __NR_statfs64 268
 #define __NR_fstatfs64 269
 #define __NR_tgkill 270
@@ -320,8 +320,8 @@
 #define __NR_eventfd 323
 #define __NR_fallocate 324
 #define __NR_semtimedop 325
-#define __NR_timerfd_settime 326
-#define __NR_timerfd_gettime 327
+#define __NR_timerfd_settime32 326
+#define __NR_timerfd_gettime32 327
 #define __NR_semctl 328
 #define __NR_semget 329
 #define __NR_semop 330
@@ -395,4 +395,51 @@
 #define __NR_statx 398
 #define __NR_io_pgetevents 399
 #define __NR_rseq 400
+#define __NR_clock_gettime64 403
+#define __NR_clock_settime64 404
+#define __NR_clock_adjtime64 405
+#define __NR_clock_getres_time64 406
+#define __NR_clock_nanosleep_time64 407
+#define __NR_timer_gettime64 408
+#define __NR_timer_settime64 409
+#define __NR_timerfd_gettime64 410
+#define __NR_timerfd_settime64 411
+#define __NR_utimensat_time64 412
+#define __NR_pselect6_time64 413
+#define __NR_ppoll_time64 414
+#define __NR_io_pgetevents_time64 416
+#define __NR_recvmmsg_time64 417
+#define __NR_mq_timedsend_time64 418
+#define __NR_mq_timedreceive_time64 419
+#define __NR_semtimedop_time64 420
+#define __NR_rt_sigtimedwait_time64 421
+#define __NR_futex_time64 422
+#define __NR_sched_rr_get_interval_time64 423
+#define __NR_pidfd_send_signal 424
+#define __NR_io_uring_setup 425
+#define __NR_io_uring_enter 426
+#define __NR_io_uring_register 427
+#define __NR_open_tree		428
+#define __NR_move_mount		429
+#define __NR_fsopen		430
+#define __NR_fsconfig		431
+#define __NR_fsmount		432
+#define __NR_fspick		433
+#define __NR_pidfd_open		434
+#define __NR_clone3		435
+#define __NR_close_range	436
+#define __NR_openat2		437
+#define __NR_pidfd_getfd	438
+#define __NR_faccessat2		439
+#define __NR_process_madvise	440
+#define __NR_epoll_pwait2	441
+#define __NR_mount_setattr	442
+#define __NR_landlock_create_ruleset	444
+#define __NR_landlock_add_rule	445
+#define __NR_landlock_restrict_self	446
+#define __NR_process_mrelease	448
+#define __NR_futex_waitv	449
+#define __NR_set_mempolicy_home_node	450
+#define __NR_cachestat		451
+#define __NR_fchmodat2		452
 
diff --git a/arch/microblaze/kstat.h b/arch/microblaze/kstat.h
new file mode 100644
index 00000000..c1449579
--- /dev/null
+++ b/arch/microblaze/kstat.h
@@ -0,0 +1,21 @@
+struct kstat {
+	dev_t st_dev;
+	ino_t st_ino;
+	mode_t st_mode;
+	nlink_t st_nlink;
+	uid_t st_uid;
+	gid_t st_gid;
+	dev_t st_rdev;
+	long long __st_rdev_padding;
+	off_t st_size;
+	blksize_t st_blksize;
+	int __st_blksize_padding;
+	blkcnt_t st_blocks;
+	long st_atime_sec;
+	long st_atime_nsec;
+	long st_mtime_sec;
+	long st_mtime_nsec;
+	long st_ctime_sec;
+	long st_ctime_nsec;
+	unsigned __unused[2];
+};
diff --git a/arch/microblaze/pthread_arch.h b/arch/microblaze/pthread_arch.h
index f6ba8de9..ff26624e 100644
--- a/arch/microblaze/pthread_arch.h
+++ b/arch/microblaze/pthread_arch.h
@@ -1,10 +1,8 @@
-static inline struct pthread *__pthread_self()
+static inline uintptr_t __get_tp()
 {
-	struct pthread *self;
-	__asm__ ("ori %0, r21, 0" : "=r" (self) );
-	return self;
+	uintptr_t tp;
+	__asm__ ("ori %0, r21, 0" : "=r" (tp) );
+	return tp;
 }
 
-#define TP_ADJ(p) (p)
-
 #define MC_PC regs.pc
diff --git a/arch/microblaze/reloc.h b/arch/microblaze/reloc.h
index 0a030c7e..6302c6ee 100644
--- a/arch/microblaze/reloc.h
+++ b/arch/microblaze/reloc.h
@@ -1,5 +1,3 @@
-#include <endian.h>
-
 #if __BYTE_ORDER == __LITTLE_ENDIAN
 #define ENDIAN_SUFFIX "el"
 #else
diff --git a/arch/microblaze/syscall_arch.h b/arch/microblaze/syscall_arch.h
index 6cf631ad..61d8248e 100644
--- a/arch/microblaze/syscall_arch.h
+++ b/arch/microblaze/syscall_arch.h
@@ -1,9 +1,7 @@
 #define __SYSCALL_LL_E(x) \
 ((union { long long ll; long l[2]; }){ .ll = x }).l[0], \
 ((union { long long ll; long l[2]; }){ .ll = x }).l[1]
-#define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x))
-
-#ifndef __clang__
+#define __SYSCALL_LL_O(x) __SYSCALL_LL_E((x))
 
 static __inline long __syscall0(long n)
 {
@@ -96,11 +94,6 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo
 	return r3;
 }
 
-#else
-
-#undef SYSCALL_NO_INLINE
-#define SYSCALL_NO_INLINE
-
-#endif
-
 #define SYSCALL_IPC_BROKEN_MODE
+
+#undef SYS_socketcall
diff --git a/arch/mips/arch.mak b/arch/mips/arch.mak
new file mode 100644
index 00000000..aa4d05ce
--- /dev/null
+++ b/arch/mips/arch.mak
@@ -0,0 +1 @@
+COMPAT_SRC_DIRS = compat/time32
diff --git a/arch/mips/bits/alltypes.h.in b/arch/mips/bits/alltypes.h.in
index 66ca18ad..ff934a4c 100644
--- a/arch/mips/bits/alltypes.h.in
+++ b/arch/mips/bits/alltypes.h.in
@@ -1,9 +1,15 @@
+#define _REDIR_TIME64 1
 #define _Addr int
 #define _Int64 long long
 #define _Reg int
 
-TYPEDEF __builtin_va_list va_list;
-TYPEDEF __builtin_va_list __isoc_va_list;
+#if _MIPSEL || __MIPSEL || __MIPSEL__
+#define __BYTE_ORDER 1234
+#else
+#define __BYTE_ORDER 4321
+#endif
+
+#define __LONG_MAX 0x7fffffffL
 
 #ifndef __cplusplus
 TYPEDEF int wchar_t;
@@ -13,14 +19,3 @@ TYPEDEF float float_t;
 TYPEDEF double double_t;
 
 TYPEDEF struct { long long __ll; long double __ld; } max_align_t;
-
-TYPEDEF long time_t;
-TYPEDEF long suseconds_t;
-
-TYPEDEF struct { union { int __i[9]; volatile int __vi[9]; unsigned __s[9]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } mtx_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } pthread_cond_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } cnd_t;
-TYPEDEF struct { union { int __i[8]; volatile int __vi[8]; void *__p[8]; } __u; } pthread_rwlock_t;
-TYPEDEF struct { union { int __i[5]; volatile int __vi[5]; void *__p[5]; } __u; } pthread_barrier_t;
diff --git a/arch/mips/bits/endian.h b/arch/mips/bits/endian.h
deleted file mode 100644
index 5399dcb5..00000000
--- a/arch/mips/bits/endian.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#if _MIPSEL || __MIPSEL || __MIPSEL__
-#define __BYTE_ORDER __LITTLE_ENDIAN
-#else
-#define __BYTE_ORDER __BIG_ENDIAN
-#endif
diff --git a/arch/mips/bits/hwcap.h b/arch/mips/bits/hwcap.h
index 13e86fe7..7986deb7 100644
--- a/arch/mips/bits/hwcap.h
+++ b/arch/mips/bits/hwcap.h
@@ -1,3 +1,14 @@
 #define HWCAP_MIPS_R6		(1 << 0)
 #define HWCAP_MIPS_MSA		(1 << 1)
 #define HWCAP_MIPS_CRC32	(1 << 2)
+#define HWCAP_MIPS_MIPS16	(1 << 3)
+#define HWCAP_MIPS_MDMX		(1 << 4)
+#define HWCAP_MIPS_MIPS3D	(1 << 5)
+#define HWCAP_MIPS_SMARTMIPS	(1 << 6)
+#define HWCAP_MIPS_DSP		(1 << 7)
+#define HWCAP_MIPS_DSP2		(1 << 8)
+#define HWCAP_MIPS_DSP3		(1 << 9)
+#define HWCAP_MIPS_MIPS16E2	(1 << 10)
+#define HWCAP_LOONGSON_MMI	(1 << 11)
+#define HWCAP_LOONGSON_EXT	(1 << 12)
+#define HWCAP_LOONGSON_EXT2	(1 << 13)
diff --git a/arch/mips/bits/ioctl.h b/arch/mips/bits/ioctl.h
index b8f77cb5..e20bf19e 100644
--- a/arch/mips/bits/ioctl.h
+++ b/arch/mips/bits/ioctl.h
@@ -90,24 +90,6 @@
 #define TIOCMIWAIT	0x5491
 #define TIOCGICOUNT	0x5492
 
-#define TIOCPKT_DATA		 0
-#define TIOCPKT_FLUSHREAD	 1
-#define TIOCPKT_FLUSHWRITE	 2
-#define TIOCPKT_STOP		 4
-#define TIOCPKT_START		 8
-#define TIOCPKT_NOSTOP		16
-#define TIOCPKT_DOSTOP		32
-#define TIOCPKT_IOCTL		64
-
-#define TIOCSER_TEMT    0x01
-
-struct winsize {
-	unsigned short ws_row;
-	unsigned short ws_col;
-	unsigned short ws_xpixel;
-	unsigned short ws_ypixel;
-};
-
 #define TIOCM_LE	0x001
 #define TIOCM_DTR	0x002
 #define TIOCM_RTS	0x004
@@ -123,90 +105,10 @@ struct winsize {
 #define TIOCM_OUT2	0x4000
 #define TIOCM_LOOP	0x8000
 
-#define N_TTY           0
-#define N_SLIP          1
-#define N_MOUSE         2
-#define N_PPP           3
-#define N_STRIP         4
-#define N_AX25          5
-#define N_X25           6
-#define N_6PACK         7
-#define N_MASC          8
-#define N_R3964         9
-#define N_PROFIBUS_FDL  10
-#define N_IRDA          11
-#define N_SMSBLOCK      12
-#define N_HDLC          13
-#define N_SYNC_PPP      14
-#define N_HCI           15
-
 #define FIOGETOWN       _IOR('f', 123, int)
 #define FIOSETOWN       _IOW('f', 124, int)
 #define SIOCATMARK      _IOR('s', 7, int)
 #define SIOCSPGRP       _IOW('s', 8, pid_t)
 #define SIOCGPGRP       _IOR('s', 9, pid_t)
-#define SIOCGSTAMP      0x8906
-#define SIOCGSTAMPNS    0x8907
-
-#define SIOCADDRT       0x890B
-#define SIOCDELRT       0x890C
-#define SIOCRTMSG       0x890D
-
-#define SIOCGIFNAME     0x8910
-#define SIOCSIFLINK     0x8911
-#define SIOCGIFCONF     0x8912
-#define SIOCGIFFLAGS    0x8913
-#define SIOCSIFFLAGS    0x8914
-#define SIOCGIFADDR     0x8915
-#define SIOCSIFADDR     0x8916
-#define SIOCGIFDSTADDR  0x8917
-#define SIOCSIFDSTADDR  0x8918
-#define SIOCGIFBRDADDR  0x8919
-#define SIOCSIFBRDADDR  0x891a
-#define SIOCGIFNETMASK  0x891b
-#define SIOCSIFNETMASK  0x891c
-#define SIOCGIFMETRIC   0x891d
-#define SIOCSIFMETRIC   0x891e
-#define SIOCGIFMEM      0x891f
-#define SIOCSIFMEM      0x8920
-#define SIOCGIFMTU      0x8921
-#define SIOCSIFMTU      0x8922
-#define SIOCSIFNAME     0x8923
-#define SIOCSIFHWADDR   0x8924
-#define SIOCGIFENCAP    0x8925
-#define SIOCSIFENCAP    0x8926
-#define SIOCGIFHWADDR   0x8927
-#define SIOCGIFSLAVE    0x8929
-#define SIOCSIFSLAVE    0x8930
-#define SIOCADDMULTI    0x8931
-#define SIOCDELMULTI    0x8932
-#define SIOCGIFINDEX    0x8933
-#define SIOGIFINDEX     SIOCGIFINDEX
-#define SIOCSIFPFLAGS   0x8934
-#define SIOCGIFPFLAGS   0x8935
-#define SIOCDIFADDR     0x8936
-#define SIOCSIFHWBROADCAST 0x8937
-#define SIOCGIFCOUNT    0x8938
-
-#define SIOCGIFBR       0x8940
-#define SIOCSIFBR       0x8941
-
-#define SIOCGIFTXQLEN   0x8942
-#define SIOCSIFTXQLEN   0x8943
-
-#define SIOCDARP        0x8953
-#define SIOCGARP        0x8954
-#define SIOCSARP        0x8955
-
-#define SIOCDRARP       0x8960
-#define SIOCGRARP       0x8961
-#define SIOCSRARP       0x8962
-
-#define SIOCGIFMAP      0x8970
-#define SIOCSIFMAP      0x8971
-
-#define SIOCADDDLCI     0x8980
-#define SIOCDELDLCI     0x8981
-
-#define SIOCDEVPRIVATE		0x89F0
-#define SIOCPROTOPRIVATE	0x89E0
+#define SIOCGSTAMP      _IOR(0x89, 6, char[16])
+#define SIOCGSTAMPNS    _IOR(0x89, 7, char[16])
diff --git a/arch/mips/bits/ipcstat.h b/arch/mips/bits/ipcstat.h
new file mode 100644
index 00000000..4f4fcb0c
--- /dev/null
+++ b/arch/mips/bits/ipcstat.h
@@ -0,0 +1 @@
+#define IPC_STAT 0x102
diff --git a/arch/mips/bits/limits.h b/arch/mips/bits/limits.h
deleted file mode 100644
index fbc6d238..00000000
--- a/arch/mips/bits/limits.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
- || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
-#define LONG_BIT 32
-#endif
-
-#define LONG_MAX  0x7fffffffL
-#define LLONG_MAX  0x7fffffffffffffffLL
diff --git a/arch/mips/bits/msg.h b/arch/mips/bits/msg.h
index f28aece8..c734dbb5 100644
--- a/arch/mips/bits/msg.h
+++ b/arch/mips/bits/msg.h
@@ -1,19 +1,19 @@
 struct msqid_ds {
 	struct ipc_perm msg_perm;
 #if _MIPSEL || __MIPSEL || __MIPSEL__
-	time_t msg_stime;
-	int __unused1;
-	time_t msg_rtime;
-	int __unused2;
-	time_t msg_ctime;
-	int __unused3;
+	unsigned long __msg_stime_lo;
+	unsigned long __msg_stime_hi;
+	unsigned long __msg_rtime_lo;
+	unsigned long __msg_rtime_hi;
+	unsigned long __msg_ctime_lo;
+	unsigned long __msg_ctime_hi;
 #else
-	int __unused1;
-	time_t msg_stime;
-	int __unused2;
-	time_t msg_rtime;
-	int __unused3;
-	time_t msg_ctime;
+	unsigned long __msg_stime_hi;
+	unsigned long __msg_stime_lo;
+	unsigned long __msg_rtime_hi;
+	unsigned long __msg_rtime_lo;
+	unsigned long __msg_ctime_hi;
+	unsigned long __msg_ctime_lo;
 #endif
 	unsigned long msg_cbytes;
 	msgqnum_t msg_qnum;
@@ -21,4 +21,7 @@ struct msqid_ds {
 	pid_t msg_lspid;
 	pid_t msg_lrpid;
 	unsigned long __unused[2];
+	time_t msg_stime;
+	time_t msg_rtime;
+	time_t msg_ctime;
 };
diff --git a/arch/mips/bits/posix.h b/arch/mips/bits/posix.h
deleted file mode 100644
index 30a38714..00000000
--- a/arch/mips/bits/posix.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#define _POSIX_V6_ILP32_OFFBIG  1
-#define _POSIX_V7_ILP32_OFFBIG  1
diff --git a/arch/mips/bits/reg.h b/arch/mips/bits/reg.h
index 0c370987..2611b632 100644
--- a/arch/mips/bits/reg.h
+++ b/arch/mips/bits/reg.h
@@ -1,6 +1,3 @@
-#undef __WORDSIZE
-#define __WORDSIZE 32
-
 #define EF_R0 6
 #define EF_R1 7
 #define EF_R2 8
diff --git a/arch/mips/bits/sem.h b/arch/mips/bits/sem.h
index e46ced95..fe6f0948 100644
--- a/arch/mips/bits/sem.h
+++ b/arch/mips/bits/sem.h
@@ -1,14 +1,16 @@
 struct semid_ds {
 	struct ipc_perm sem_perm;
-	time_t sem_otime;
-	time_t sem_ctime;
+	unsigned long __sem_otime_lo;
+	unsigned long __sem_ctime_lo;
 #if __BYTE_ORDER == __LITTLE_ENDIAN
 	unsigned short sem_nsems;
-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
+	char __sem_nsems_pad[sizeof(long)-sizeof(short)];
 #else
-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
+	char __sem_nsems_pad[sizeof(long)-sizeof(short)];
 	unsigned short sem_nsems;
 #endif
-	time_t __unused3;
-	time_t __unused4;
+	unsigned long __sem_otime_hi;
+	unsigned long __sem_ctime_hi;
+	time_t sem_otime;
+	time_t sem_ctime;
 };
diff --git a/arch/mips/bits/shm.h b/arch/mips/bits/shm.h
index 6652d659..ab8c642d 100644
--- a/arch/mips/bits/shm.h
+++ b/arch/mips/bits/shm.h
@@ -3,14 +3,19 @@
 struct shmid_ds {
 	struct ipc_perm shm_perm;
 	size_t shm_segsz;
-	time_t shm_atime;
-	time_t shm_dtime;
-	time_t shm_ctime;
+	unsigned long __shm_atime_lo;
+	unsigned long __shm_dtime_lo;
+	unsigned long __shm_ctime_lo;
 	pid_t shm_cpid;
 	pid_t shm_lpid;
 	unsigned long shm_nattch;
-	unsigned long __pad1;
-	unsigned long __pad2;
+	unsigned short __shm_atime_hi;
+	unsigned short __shm_dtime_hi;
+	unsigned short __shm_ctime_hi;
+	unsigned short __pad1;
+	time_t shm_atime;
+	time_t shm_dtime;
+	time_t shm_ctime;
 };
 
 struct shminfo {
@@ -22,4 +27,3 @@ struct shm_info {
 	unsigned long shm_tot, shm_rss, shm_swp;
 	unsigned long __swap_attempts, __swap_successes;
 };
-
diff --git a/arch/mips/bits/signal.h b/arch/mips/bits/signal.h
index 1a84de59..a3b3857a 100644
--- a/arch/mips/bits/signal.h
+++ b/arch/mips/bits/signal.h
@@ -19,14 +19,18 @@ typedef struct {
 } fpregset_t;
 struct sigcontext {
 	unsigned sc_regmask, sc_status;
-	unsigned long long sc_pc, sc_regs[32], sc_fpregs[32];
+	unsigned long long sc_pc;
+	gregset_t sc_regs;
+	fpregset_t sc_fpregs;
 	unsigned sc_ownedfp, sc_fpc_csr, sc_fpc_eir, sc_used_math, sc_dsp;
 	unsigned long long sc_mdhi, sc_mdlo;
 	unsigned long sc_hi1, sc_lo1, sc_hi2, sc_lo2, sc_hi3, sc_lo3;
 };
 typedef struct {
 	unsigned regmask, status;
-	unsigned long long pc, gregs[32], fpregs[32];
+	unsigned long long pc;
+	gregset_t gregs;
+	fpregset_t fpregs;
 	unsigned ownedfp, fpc_csr, fpc_eir, used_math, dsp;
 	unsigned long long mdhi, mdlo;
 	unsigned long hi1, lo1, hi2, lo2, hi3, lo3;
@@ -62,7 +66,6 @@ typedef struct __ucontext {
 #define SA_RESTART    0x10000000
 #define SA_NODEFER    0x40000000
 #define SA_RESETHAND  0x80000000
-#define SA_RESTORER   0x04000000
 
 #undef SIG_BLOCK
 #undef SIG_UNBLOCK
@@ -89,7 +92,7 @@ typedef struct __ucontext {
 #define SIGTRAP   5
 #define SIGABRT   6
 #define SIGIOT    SIGABRT
-#define SIGSTKFLT 7
+#define SIGEMT    7
 #define SIGFPE    8
 #define SIGKILL   9
 #define SIGBUS    10
diff --git a/arch/mips/bits/socket.h b/arch/mips/bits/socket.h
index b82c7d34..02fbb88b 100644
--- a/arch/mips/bits/socket.h
+++ b/arch/mips/bits/socket.h
@@ -1,19 +1,3 @@
-struct msghdr {
-	void *msg_name;
-	socklen_t msg_namelen;
-	struct iovec *msg_iov;
-	int msg_iovlen;
-	void *msg_control;
-	socklen_t msg_controllen;
-	int msg_flags;
-};
-
-struct cmsghdr {
-	socklen_t cmsg_len;
-	int cmsg_level;
-	int cmsg_type;
-};
-
 #define SOCK_STREAM    2
 #define SOCK_DGRAM     1
 
@@ -32,8 +16,6 @@ struct cmsghdr {
 #define SO_RCVBUF       0x1002
 #define SO_SNDLOWAT     0x1003
 #define SO_RCVLOWAT     0x1004
-#define SO_RCVTIMEO     0x1006
-#define SO_SNDTIMEO     0x1005
 #define SO_ERROR        0x1007
 #define SO_TYPE         0x1008
 #define SO_ACCEPTCONN   0x1009
diff --git a/arch/mips/bits/stat.h b/arch/mips/bits/stat.h
index 3291a636..48d4ac80 100644
--- a/arch/mips/bits/stat.h
+++ b/arch/mips/bits/stat.h
@@ -12,11 +12,15 @@ struct stat {
 	dev_t st_rdev;
 	long __st_padding2[2];
 	off_t st_size;
-	struct timespec st_atim;
-	struct timespec st_mtim;
-	struct timespec st_ctim;
+	struct {
+		long tv_sec;
+		long tv_nsec;
+	} __st_atim32, __st_mtim32, __st_ctim32;
 	blksize_t st_blksize;
 	long __st_padding3;
 	blkcnt_t st_blocks;
-        long __st_padding4[14];
+	struct timespec st_atim;
+	struct timespec st_mtim;
+	struct timespec st_ctim;
+	long __st_padding4[2];
 };
diff --git a/arch/mips/bits/stdint.h b/arch/mips/bits/stdint.h
deleted file mode 100644
index d1b27121..00000000
--- a/arch/mips/bits/stdint.h
+++ /dev/null
@@ -1,20 +0,0 @@
-typedef int32_t int_fast16_t;
-typedef int32_t int_fast32_t;
-typedef uint32_t uint_fast16_t;
-typedef uint32_t uint_fast32_t;
-
-#define INT_FAST16_MIN  INT32_MIN
-#define INT_FAST32_MIN  INT32_MIN
-
-#define INT_FAST16_MAX  INT32_MAX
-#define INT_FAST32_MAX  INT32_MAX
-
-#define UINT_FAST16_MAX UINT32_MAX
-#define UINT_FAST32_MAX UINT32_MAX
-
-#define INTPTR_MIN      INT32_MIN
-#define INTPTR_MAX      INT32_MAX
-#define UINTPTR_MAX     UINT32_MAX
-#define PTRDIFF_MIN     INT32_MIN
-#define PTRDIFF_MAX     INT32_MAX
-#define SIZE_MAX        UINT32_MAX
diff --git a/arch/mips/bits/syscall.h.in b/arch/mips/bits/syscall.h.in
index be8c3207..55e35742 100644
--- a/arch/mips/bits/syscall.h.in
+++ b/arch/mips/bits/syscall.h.in
@@ -76,8 +76,8 @@
 #define __NR_setrlimit               4075
 #define __NR_getrlimit               4076
 #define __NR_getrusage               4077
-#define __NR_gettimeofday            4078
-#define __NR_settimeofday            4079
+#define __NR_gettimeofday_time32            4078
+#define __NR_settimeofday_time32            4079
 #define __NR_getgroups               4080
 #define __NR_setgroups               4081
 #define __NR_reserved82              4082
@@ -256,14 +256,14 @@
 #define __NR_statfs64                4255
 #define __NR_fstatfs64               4256
 #define __NR_timer_create            4257
-#define __NR_timer_settime           4258
-#define __NR_timer_gettime           4259
+#define __NR_timer_settime32           4258
+#define __NR_timer_gettime32           4259
 #define __NR_timer_getoverrun        4260
 #define __NR_timer_delete            4261
-#define __NR_clock_settime           4262
-#define __NR_clock_gettime           4263
-#define __NR_clock_getres            4264
-#define __NR_clock_nanosleep         4265
+#define __NR_clock_settime32           4262
+#define __NR_clock_gettime32           4263
+#define __NR_clock_getres_time32            4264
+#define __NR_clock_nanosleep_time32         4265
 #define __NR_tgkill                  4266
 #define __NR_utimes                  4267
 #define __NR_mbind                   4268
@@ -319,8 +319,8 @@
 #define __NR_eventfd                 4319
 #define __NR_fallocate               4320
 #define __NR_timerfd_create          4321
-#define __NR_timerfd_gettime         4322
-#define __NR_timerfd_settime         4323
+#define __NR_timerfd_gettime32         4322
+#define __NR_timerfd_settime32         4323
 #define __NR_signalfd4               4324
 #define __NR_eventfd2                4325
 #define __NR_epoll_create1           4326
@@ -366,4 +366,61 @@
 #define __NR_statx                   4366
 #define __NR_rseq                    4367
 #define __NR_io_pgetevents           4368
+#define __NR_semget                  4393
+#define __NR_semctl                  4394
+#define __NR_shmget                  4395
+#define __NR_shmctl                  4396
+#define __NR_shmat                   4397
+#define __NR_shmdt                   4398
+#define __NR_msgget                  4399
+#define __NR_msgsnd                  4400
+#define __NR_msgrcv                  4401
+#define __NR_msgctl                  4402
+#define __NR_clock_gettime64         4403
+#define __NR_clock_settime64         4404
+#define __NR_clock_adjtime64         4405
+#define __NR_clock_getres_time64     4406
+#define __NR_clock_nanosleep_time64  4407
+#define __NR_timer_gettime64         4408
+#define __NR_timer_settime64         4409
+#define __NR_timerfd_gettime64       4410
+#define __NR_timerfd_settime64       4411
+#define __NR_utimensat_time64        4412
+#define __NR_pselect6_time64         4413
+#define __NR_ppoll_time64            4414
+#define __NR_io_pgetevents_time64    4416
+#define __NR_recvmmsg_time64         4417
+#define __NR_mq_timedsend_time64     4418
+#define __NR_mq_timedreceive_time64  4419
+#define __NR_semtimedop_time64       4420
+#define __NR_rt_sigtimedwait_time64  4421
+#define __NR_futex_time64            4422
+#define __NR_sched_rr_get_interval_time64 4423
+#define __NR_pidfd_send_signal       4424
+#define __NR_io_uring_setup          4425
+#define __NR_io_uring_enter          4426
+#define __NR_io_uring_register       4427
+#define __NR_open_tree		4428
+#define __NR_move_mount		4429
+#define __NR_fsopen		4430
+#define __NR_fsconfig		4431
+#define __NR_fsmount		4432
+#define __NR_fspick		4433
+#define __NR_pidfd_open		4434
+#define __NR_clone3		4435
+#define __NR_close_range	4436
+#define __NR_openat2		4437
+#define __NR_pidfd_getfd	4438
+#define __NR_faccessat2		4439
+#define __NR_process_madvise	4440
+#define __NR_epoll_pwait2	4441
+#define __NR_mount_setattr	4442
+#define __NR_landlock_create_ruleset	4444
+#define __NR_landlock_add_rule	4445
+#define __NR_landlock_restrict_self	4446
+#define __NR_process_mrelease	4448
+#define __NR_futex_waitv	4449
+#define __NR_set_mempolicy_home_node	4450
+#define __NR_cachestat		4451
+#define __NR_fchmodat2		4452
 
diff --git a/arch/mips/bits/termios.h b/arch/mips/bits/termios.h
index f7b9dd2e..9d571f78 100644
--- a/arch/mips/bits/termios.h
+++ b/arch/mips/bits/termios.h
@@ -165,5 +165,5 @@ struct termios {
 #define EXTPROC 0200000
 
 #define XTABS  0014000
-#define TIOCSER_TEMT 0x01
+#define TIOCSER_TEMT 1
 #endif
diff --git a/arch/mips/ksigaction.h b/arch/mips/ksigaction.h
index 63fdfab0..485abf75 100644
--- a/arch/mips/ksigaction.h
+++ b/arch/mips/ksigaction.h
@@ -4,10 +4,7 @@ struct k_sigaction {
 	unsigned flags;
 	void (*handler)(int);
 	unsigned long mask[4];
-	/* The following field is past the end of the structure the
-	 * kernel will read or write, and exists only to avoid having
-	 * mips-specific preprocessor conditionals in sigaction.c. */
-	void (*restorer)();
+	void *unused;
 };
 
 hidden void __restore(), __restore_rt();
diff --git a/arch/mips/kstat.h b/arch/mips/kstat.h
new file mode 100644
index 00000000..5e637eab
--- /dev/null
+++ b/arch/mips/kstat.h
@@ -0,0 +1,22 @@
+struct kstat {
+	unsigned st_dev;
+	long __st_padding1[3];
+	ino_t st_ino;
+	mode_t st_mode;
+	nlink_t st_nlink;
+	uid_t st_uid;
+	gid_t st_gid;
+	unsigned st_rdev;
+	long __st_padding2[3];
+	off_t st_size;
+	long st_atime_sec;
+	long st_atime_nsec;
+	long st_mtime_sec;
+	long st_mtime_nsec;
+	long st_ctime_sec;
+	long st_ctime_nsec;
+	blksize_t st_blksize;
+	long __st_padding3;
+	blkcnt_t st_blocks;
+        long __st_padding4[14];
+};
diff --git a/arch/mips/pthread_arch.h b/arch/mips/pthread_arch.h
index 1e7839ea..376b7741 100644
--- a/arch/mips/pthread_arch.h
+++ b/arch/mips/pthread_arch.h
@@ -1,19 +1,18 @@
-static inline struct pthread *__pthread_self()
+static inline uintptr_t __get_tp()
 {
+	register uintptr_t tp __asm__("$3");
 #if __mips_isa_rev < 2
-	register char *tp __asm__("$3");
 	__asm__ (".word 0x7c03e83b" : "=r" (tp) );
 #else
-	char *tp;
 	__asm__ ("rdhwr %0, $29" : "=r" (tp) );
 #endif
-	return (pthread_t)(tp - 0x7000 - sizeof(struct pthread));
+	return tp;
 }
 
 #define TLS_ABOVE_TP
 #define GAP_ABOVE_TP 0
-#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 
+#define TP_OFFSET 0x7000
 #define DTP_OFFSET 0x8000
 
 #define MC_PC pc
diff --git a/arch/mips/reloc.h b/arch/mips/reloc.h
index b3d59a45..f4023b16 100644
--- a/arch/mips/reloc.h
+++ b/arch/mips/reloc.h
@@ -1,5 +1,3 @@
-#include <endian.h>
-
 #if __mips_isa_rev >= 6
 #define ISA_SUFFIX "r6"
 #else
@@ -31,6 +29,7 @@
 
 #define NEED_MIPS_GOT_RELOCS 1
 #define DT_DEBUG_INDIRECT DT_MIPS_RLD_MAP
+#define DT_DEBUG_INDIRECT_REL DT_MIPS_RLD_MAP_REL
 #define ARCH_SYM_REJECT_UND(s) (!((s)->st_other & STO_MIPS_PLT))
 
 #define CRTJMP(pc,sp) __asm__ __volatile__( \
diff --git a/arch/mips/syscall_arch.h b/arch/mips/syscall_arch.h
index 01de67b8..5b7c38de 100644
--- a/arch/mips/syscall_arch.h
+++ b/arch/mips/syscall_arch.h
@@ -3,20 +3,16 @@
 ((union { long long ll; long l[2]; }){ .ll = x }).l[1]
 #define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x))
 
-hidden long (__syscall)(long, ...);
-
 #define SYSCALL_RLIM_INFINITY (-1UL/2)
 
-#if _MIPSEL || __MIPSEL || __MIPSEL__
-#define __stat_fix(st) ((st),(void)0)
+#if __mips_isa_rev >= 6
+#define SYSCALL_CLOBBERLIST \
+	"$1", "$3", "$11", "$12", "$13", \
+	"$14", "$15", "$24", "$25", "memory"
 #else
-#include <sys/stat.h>
-static inline void __stat_fix(long p)
-{
-	struct stat *st = (struct stat *)p;
-	st->st_dev >>= 32;
-	st->st_rdev >>= 32;
-}
+#define SYSCALL_CLOBBERLIST \
+	"$1", "$3", "$11", "$12", "$13", \
+	"$14", "$15", "$24", "$25", "hi", "lo", "memory"
 #endif
 
 static inline long __syscall0(long n)
@@ -25,10 +21,10 @@ static inline long __syscall0(long n)
 	register long r2 __asm__("$2");
 	__asm__ __volatile__ (
 		"addu $2,$0,%2 ; syscall"
-		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7)
-		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
-		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
-	return r7 ? -r2 : r2;
+		: "=&r"(r2), "=r"(r7)
+		: "ir"(n), "0"(r2)
+		: SYSCALL_CLOBBERLIST, "$8", "$9", "$10");
+	return r7 && r2>0 ? -r2 : r2;
 }
 
 static inline long __syscall1(long n, long a)
@@ -38,11 +34,10 @@ static inline long __syscall1(long n, long a)
 	register long r2 __asm__("$2");
 	__asm__ __volatile__ (
 		"addu $2,$0,%2 ; syscall"
-		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
-		  "r"(r4)
-		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
-		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
-	return r7 ? -r2 : r2;
+		: "=&r"(r2), "=r"(r7)
+		: "ir"(n), "0"(r2), "r"(r4)
+		: SYSCALL_CLOBBERLIST, "$8", "$9", "$10");
+	return r7 && r2>0 ? -r2 : r2;
 }
 
 static inline long __syscall2(long n, long a, long b)
@@ -53,14 +48,10 @@ static inline long __syscall2(long n, long a, long b)
 	register long r2 __asm__("$2");
 	__asm__ __volatile__ (
 		"addu $2,$0,%2 ; syscall"
-		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
-		  "r"(r4), "r"(r5)
-		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
-		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
-	if (r7) return -r2;
-	long ret = r2;
-	if (n == SYS_stat64 || n == SYS_fstat64 || n == SYS_lstat64) __stat_fix(b);
-	return ret;
+		: "=&r"(r2), "=r"(r7)
+		: "ir"(n), "0"(r2), "r"(r4), "r"(r5)
+		: SYSCALL_CLOBBERLIST, "$8", "$9", "$10");
+	return r7 && r2>0 ? -r2 : r2;
 }
 
 static inline long __syscall3(long n, long a, long b, long c)
@@ -72,14 +63,10 @@ static inline long __syscall3(long n, long a, long b, long c)
 	register long r2 __asm__("$2");
 	__asm__ __volatile__ (
 		"addu $2,$0,%2 ; syscall"
-		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
-		  "r"(r4), "r"(r5), "r"(r6)
-		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
-		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
-	if (r7) return -r2;
-	long ret = r2;
-	if (n == SYS_stat64 || n == SYS_fstat64 || n == SYS_lstat64) __stat_fix(b);
-	return ret;
+		: "=&r"(r2), "=r"(r7)
+		: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6)
+		: SYSCALL_CLOBBERLIST, "$8", "$9", "$10");
+	return r7 && r2>0 ? -r2 : r2;
 }
 
 static inline long __syscall4(long n, long a, long b, long c, long d)
@@ -91,35 +78,76 @@ static inline long __syscall4(long n, long a, long b, long c, long d)
 	register long r2 __asm__("$2");
 	__asm__ __volatile__ (
 		"addu $2,$0,%2 ; syscall"
-		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
-		  "r"(r4), "r"(r5), "r"(r6)
-		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
-		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
-	if (r7) return -r2;
-	long ret = r2;
-	if (n == SYS_stat64 || n == SYS_fstat64 || n == SYS_lstat64) __stat_fix(b);
-	if (n == SYS_fstatat64) __stat_fix(c);
-	return ret;
+		: "=&r"(r2), "+r"(r7)
+		: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6)
+		: SYSCALL_CLOBBERLIST, "$8", "$9", "$10");
+	return r7 && r2>0 ? -r2 : r2;
 }
 
 static inline long __syscall5(long n, long a, long b, long c, long d, long e)
 {
-	long r2 = (__syscall)(n, a, b, c, d, e);
-	if (r2 > -4096UL) return r2;
-	if (n == SYS_stat64 || n == SYS_fstat64 || n == SYS_lstat64) __stat_fix(b);
-	if (n == SYS_fstatat64) __stat_fix(c);
-	return r2;
+	register long r4 __asm__("$4") = a;
+	register long r5 __asm__("$5") = b;
+	register long r6 __asm__("$6") = c;
+	register long r7 __asm__("$7") = d;
+	register long r8 __asm__("$8") = e;
+	register long r2 __asm__("$2");
+	__asm__ __volatile__ (
+		"subu $sp,$sp,32 ; sw $8,16($sp) ; "
+		"addu $2,$0,%3 ; syscall ;"
+		"addu $sp,$sp,32"
+		: "=&r"(r2), "+r"(r7), "+r"(r8)
+		: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6)
+		: SYSCALL_CLOBBERLIST, "$9", "$10");
+	return r7 && r2>0 ? -r2 : r2;
 }
 
 static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f)
 {
-	long r2 = (__syscall)(n, a, b, c, d, e, f);
-	if (r2 > -4096UL) return r2;
-	if (n == SYS_stat64 || n == SYS_fstat64 || n == SYS_lstat64) __stat_fix(b);
-	if (n == SYS_fstatat64) __stat_fix(c);
-	return r2;
+	register long r4 __asm__("$4") = a;
+	register long r5 __asm__("$5") = b;
+	register long r6 __asm__("$6") = c;
+	register long r7 __asm__("$7") = d;
+	register long r8 __asm__("$8") = e;
+	register long r9 __asm__("$9") = f;
+	register long r2 __asm__("$2");
+	__asm__ __volatile__ (
+		"subu $sp,$sp,32 ; sw $8,16($sp) ; sw $9,20($sp) ; "
+		"addu $2,$0,%4 ; syscall ;"
+		"addu $sp,$sp,32"
+		: "=&r"(r2), "+r"(r7), "+r"(r8), "+r"(r9)
+		: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6)
+		: SYSCALL_CLOBBERLIST, "$10");
+	return r7 && r2>0 ? -r2 : r2;
+}
+
+static inline long __syscall7(long n, long a, long b, long c, long d, long e, long f, long g)
+{
+	register long r4 __asm__("$4") = a;
+	register long r5 __asm__("$5") = b;
+	register long r6 __asm__("$6") = c;
+	register long r7 __asm__("$7") = d;
+	register long r8 __asm__("$8") = e;
+	register long r9 __asm__("$9") = f;
+	register long r10 __asm__("$10") = g;
+	register long r2 __asm__("$2");
+	__asm__ __volatile__ (
+		"subu $sp,$sp,32 ; sw $8,16($sp) ; sw $9,20($sp) ; sw $10,24($sp) ; "
+		"addu $2,$0,%5 ; syscall ;"
+		"addu $sp,$sp,32"
+		: "=&r"(r2), "+r"(r7), "+r"(r8), "+r"(r9), "+r"(r10)
+		: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6)
+		: SYSCALL_CLOBBERLIST);
+	return r7 && r2>0 ? -r2 : r2;
 }
 
 #define VDSO_USEFUL
-#define VDSO_CGT_SYM "__vdso_clock_gettime"
+#define VDSO_CGT32_SYM "__vdso_clock_gettime"
+#define VDSO_CGT32_VER "LINUX_2.6"
+#define VDSO_CGT_SYM "__vdso_clock_gettime64"
 #define VDSO_CGT_VER "LINUX_2.6"
+
+#define SO_SNDTIMEO_OLD 0x1005
+#define SO_RCVTIMEO_OLD 0x1006
+
+#undef SYS_socketcall
diff --git a/arch/mips64/bits/alltypes.h.in b/arch/mips64/bits/alltypes.h.in
index 2b2e34a8..fcd61ee8 100644
--- a/arch/mips64/bits/alltypes.h.in
+++ b/arch/mips64/bits/alltypes.h.in
@@ -2,8 +2,13 @@
 #define _Int64 long
 #define _Reg long
 
-TYPEDEF __builtin_va_list va_list;
-TYPEDEF __builtin_va_list __isoc_va_list;
+#if _MIPSEL || __MIPSEL || __MIPSEL__
+#define __BYTE_ORDER 1234
+#else
+#define __BYTE_ORDER 4321
+#endif
+
+#define __LONG_MAX 0x7fffffffffffffffL
 
 #ifndef __cplusplus
 TYPEDEF int wchar_t;
@@ -14,15 +19,4 @@ TYPEDEF double double_t;
 
 TYPEDEF struct { long long __ll; long double __ld; } max_align_t;
 
-TYPEDEF long time_t;
-TYPEDEF long suseconds_t;
-
 TYPEDEF unsigned nlink_t;
-
-TYPEDEF struct { union { int __i[14]; volatile int __vi[14]; unsigned long __s[7]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[10]; volatile int __vi[10]; volatile void *volatile __p[5]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[10]; volatile int __vi[10]; volatile void *volatile __p[5]; } __u; } mtx_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[6]; } __u; } pthread_cond_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[6]; } __u; } cnd_t;
-TYPEDEF struct { union { int __i[14]; volatile int __vi[14]; void *__p[7]; } __u; } pthread_rwlock_t;
-TYPEDEF struct { union { int __i[8]; volatile int __vi[8]; void *__p[4]; } __u; } pthread_barrier_t;
diff --git a/arch/mips64/bits/endian.h b/arch/mips64/bits/endian.h
deleted file mode 100644
index 5399dcb5..00000000
--- a/arch/mips64/bits/endian.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#if _MIPSEL || __MIPSEL || __MIPSEL__
-#define __BYTE_ORDER __LITTLE_ENDIAN
-#else
-#define __BYTE_ORDER __BIG_ENDIAN
-#endif
diff --git a/arch/mips64/bits/fcntl.h b/arch/mips64/bits/fcntl.h
index 3bcec15e..5da1eef8 100644
--- a/arch/mips64/bits/fcntl.h
+++ b/arch/mips64/bits/fcntl.h
@@ -13,7 +13,7 @@
 
 #define O_ASYNC      010000
 #define O_DIRECT    0100000
-#define O_LARGEFILE       0
+#define O_LARGEFILE  020000
 #define O_NOATIME  01000000
 #define O_PATH    010000000
 #define O_TMPFILE 020200000
diff --git a/arch/mips64/bits/ioctl.h b/arch/mips64/bits/ioctl.h
index b8f77cb5..e277c3f0 100644
--- a/arch/mips64/bits/ioctl.h
+++ b/arch/mips64/bits/ioctl.h
@@ -90,24 +90,6 @@
 #define TIOCMIWAIT	0x5491
 #define TIOCGICOUNT	0x5492
 
-#define TIOCPKT_DATA		 0
-#define TIOCPKT_FLUSHREAD	 1
-#define TIOCPKT_FLUSHWRITE	 2
-#define TIOCPKT_STOP		 4
-#define TIOCPKT_START		 8
-#define TIOCPKT_NOSTOP		16
-#define TIOCPKT_DOSTOP		32
-#define TIOCPKT_IOCTL		64
-
-#define TIOCSER_TEMT    0x01
-
-struct winsize {
-	unsigned short ws_row;
-	unsigned short ws_col;
-	unsigned short ws_xpixel;
-	unsigned short ws_ypixel;
-};
-
 #define TIOCM_LE	0x001
 #define TIOCM_DTR	0x002
 #define TIOCM_RTS	0x004
@@ -123,23 +105,6 @@ struct winsize {
 #define TIOCM_OUT2	0x4000
 #define TIOCM_LOOP	0x8000
 
-#define N_TTY           0
-#define N_SLIP          1
-#define N_MOUSE         2
-#define N_PPP           3
-#define N_STRIP         4
-#define N_AX25          5
-#define N_X25           6
-#define N_6PACK         7
-#define N_MASC          8
-#define N_R3964         9
-#define N_PROFIBUS_FDL  10
-#define N_IRDA          11
-#define N_SMSBLOCK      12
-#define N_HDLC          13
-#define N_SYNC_PPP      14
-#define N_HCI           15
-
 #define FIOGETOWN       _IOR('f', 123, int)
 #define FIOSETOWN       _IOW('f', 124, int)
 #define SIOCATMARK      _IOR('s', 7, int)
@@ -147,66 +112,3 @@ struct winsize {
 #define SIOCGPGRP       _IOR('s', 9, pid_t)
 #define SIOCGSTAMP      0x8906
 #define SIOCGSTAMPNS    0x8907
-
-#define SIOCADDRT       0x890B
-#define SIOCDELRT       0x890C
-#define SIOCRTMSG       0x890D
-
-#define SIOCGIFNAME     0x8910
-#define SIOCSIFLINK     0x8911
-#define SIOCGIFCONF     0x8912
-#define SIOCGIFFLAGS    0x8913
-#define SIOCSIFFLAGS    0x8914
-#define SIOCGIFADDR     0x8915
-#define SIOCSIFADDR     0x8916
-#define SIOCGIFDSTADDR  0x8917
-#define SIOCSIFDSTADDR  0x8918
-#define SIOCGIFBRDADDR  0x8919
-#define SIOCSIFBRDADDR  0x891a
-#define SIOCGIFNETMASK  0x891b
-#define SIOCSIFNETMASK  0x891c
-#define SIOCGIFMETRIC   0x891d
-#define SIOCSIFMETRIC   0x891e
-#define SIOCGIFMEM      0x891f
-#define SIOCSIFMEM      0x8920
-#define SIOCGIFMTU      0x8921
-#define SIOCSIFMTU      0x8922
-#define SIOCSIFNAME     0x8923
-#define SIOCSIFHWADDR   0x8924
-#define SIOCGIFENCAP    0x8925
-#define SIOCSIFENCAP    0x8926
-#define SIOCGIFHWADDR   0x8927
-#define SIOCGIFSLAVE    0x8929
-#define SIOCSIFSLAVE    0x8930
-#define SIOCADDMULTI    0x8931
-#define SIOCDELMULTI    0x8932
-#define SIOCGIFINDEX    0x8933
-#define SIOGIFINDEX     SIOCGIFINDEX
-#define SIOCSIFPFLAGS   0x8934
-#define SIOCGIFPFLAGS   0x8935
-#define SIOCDIFADDR     0x8936
-#define SIOCSIFHWBROADCAST 0x8937
-#define SIOCGIFCOUNT    0x8938
-
-#define SIOCGIFBR       0x8940
-#define SIOCSIFBR       0x8941
-
-#define SIOCGIFTXQLEN   0x8942
-#define SIOCSIFTXQLEN   0x8943
-
-#define SIOCDARP        0x8953
-#define SIOCGARP        0x8954
-#define SIOCSARP        0x8955
-
-#define SIOCDRARP       0x8960
-#define SIOCGRARP       0x8961
-#define SIOCSRARP       0x8962
-
-#define SIOCGIFMAP      0x8970
-#define SIOCSIFMAP      0x8971
-
-#define SIOCADDDLCI     0x8980
-#define SIOCDELDLCI     0x8981
-
-#define SIOCDEVPRIVATE		0x89F0
-#define SIOCPROTOPRIVATE	0x89E0
diff --git a/arch/mips64/bits/ipc.h b/arch/mips64/bits/ipc.h
index 43a8314e..df227168 100644
--- a/arch/mips64/bits/ipc.h
+++ b/arch/mips64/bits/ipc.h
@@ -10,5 +10,3 @@ struct ipc_perm {
 	unsigned long __unused1;
 	unsigned long __unused2;
 };
-
-#define IPC_64 0x100
diff --git a/arch/mips64/bits/limits.h b/arch/mips64/bits/limits.h
deleted file mode 100644
index 58698c62..00000000
--- a/arch/mips64/bits/limits.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
- || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
-#define LONG_BIT 64
-#endif
-
-#define LONG_MAX 0x7fffffffffffffffL
-#define LLONG_MAX 0x7fffffffffffffffLL
diff --git a/arch/mips64/bits/msg.h b/arch/mips64/bits/msg.h
deleted file mode 100644
index 641e1703..00000000
--- a/arch/mips64/bits/msg.h
+++ /dev/null
@@ -1,13 +0,0 @@
-struct msqid_ds {
-	struct ipc_perm msg_perm;
-	time_t msg_stime;
-	time_t msg_rtime;
-	time_t msg_ctime;
-	unsigned long msg_cbytes;
-	msgqnum_t msg_qnum;
-	msglen_t msg_qbytes;
-	pid_t msg_lspid;
-	pid_t msg_lrpid;
-	unsigned long __pad1;
-	unsigned long __pad2;
-};
diff --git a/arch/mips64/bits/posix.h b/arch/mips64/bits/posix.h
deleted file mode 100644
index acf42944..00000000
--- a/arch/mips64/bits/posix.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#define _POSIX_V6_LP64_OFFBIG 1
-#define _POSIX_V7_LP64_OFFBIG 1
diff --git a/arch/mips64/bits/reg.h b/arch/mips64/bits/reg.h
index a3f63acc..16178dd3 100644
--- a/arch/mips64/bits/reg.h
+++ b/arch/mips64/bits/reg.h
@@ -1,6 +1,3 @@
-#undef __WORDSIZE
-#define __WORDSIZE 64
-
 #define EF_R0 0
 #define EF_R1 1
 #define EF_R2 2
diff --git a/arch/mips64/bits/sem.h b/arch/mips64/bits/sem.h
deleted file mode 100644
index e46ced95..00000000
--- a/arch/mips64/bits/sem.h
+++ /dev/null
@@ -1,14 +0,0 @@
-struct semid_ds {
-	struct ipc_perm sem_perm;
-	time_t sem_otime;
-	time_t sem_ctime;
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-	unsigned short sem_nsems;
-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
-#else
-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
-	unsigned short sem_nsems;
-#endif
-	time_t __unused3;
-	time_t __unused4;
-};
diff --git a/arch/mips64/bits/signal.h b/arch/mips64/bits/signal.h
index c31ad07e..ffec7fd0 100644
--- a/arch/mips64/bits/signal.h
+++ b/arch/mips64/bits/signal.h
@@ -85,7 +85,6 @@ typedef struct __ucontext {
 #define SA_RESTART    0x10000000
 #define SA_NODEFER    0x40000000
 #define SA_RESETHAND  0x80000000
-#define SA_RESTORER   0x04000000
 
 #undef SIG_BLOCK
 #undef SIG_UNBLOCK
@@ -112,7 +111,7 @@ typedef struct __ucontext {
 #define SIGTRAP   5
 #define SIGABRT   6
 #define SIGIOT    SIGABRT
-#define SIGSTKFLT 7
+#define SIGEMT    7
 #define SIGFPE    8
 #define SIGKILL   9
 #define SIGBUS    10
diff --git a/arch/mips64/bits/socket.h b/arch/mips64/bits/socket.h
index 5aff0d91..519b9c8e 100644
--- a/arch/mips64/bits/socket.h
+++ b/arch/mips64/bits/socket.h
@@ -1,37 +1,3 @@
-#include <endian.h>
-
-struct msghdr {
-	void *msg_name;
-	socklen_t msg_namelen;
-	struct iovec *msg_iov;
-#if __BYTE_ORDER == __BIG_ENDIAN
-	int __pad1, msg_iovlen;
-#else
-	int msg_iovlen, __pad1;
-#endif
-	void *msg_control;
-#if __BYTE_ORDER == __BIG_ENDIAN
-	int __pad2;
-	socklen_t msg_controllen;
-#else
-	socklen_t msg_controllen;
-	int __pad2;
-#endif
-	int msg_flags;
-};
-
-struct cmsghdr {
-#if __BYTE_ORDER == __BIG_ENDIAN
-	int __pad1;
-	socklen_t cmsg_len;
-#else
-	socklen_t cmsg_len;
-	int __pad1;
-#endif
-	int cmsg_level;
-	int cmsg_type;
-};
-
 #define SOCK_STREAM    2
 #define SOCK_DGRAM     1
 #define SOL_SOCKET     65535
diff --git a/arch/mips64/bits/stat.h b/arch/mips64/bits/stat.h
index b46617f1..b620e142 100644
--- a/arch/mips64/bits/stat.h
+++ b/arch/mips64/bits/stat.h
@@ -1,6 +1,3 @@
-#include <string.h>
-#include <bits/alltypes.h>
-
 struct stat {
 	dev_t st_dev;
 	int __pad1[3];
diff --git a/arch/mips64/bits/stdint.h b/arch/mips64/bits/stdint.h
deleted file mode 100644
index 1bb147f2..00000000
--- a/arch/mips64/bits/stdint.h
+++ /dev/null
@@ -1,20 +0,0 @@
-typedef int32_t int_fast16_t;
-typedef int32_t int_fast32_t;
-typedef uint32_t uint_fast16_t;
-typedef uint32_t uint_fast32_t;
-
-#define INT_FAST16_MIN  INT32_MIN
-#define INT_FAST32_MIN  INT32_MIN
-
-#define INT_FAST16_MAX  INT32_MAX
-#define INT_FAST32_MAX  INT32_MAX
-
-#define UINT_FAST16_MAX UINT32_MAX
-#define UINT_FAST32_MAX UINT32_MAX
-
-#define INTPTR_MIN      INT64_MIN
-#define INTPTR_MAX      INT64_MAX
-#define UINTPTR_MAX     UINT64_MAX
-#define PTRDIFF_MIN     INT64_MIN
-#define PTRDIFF_MAX     INT64_MAX
-#define SIZE_MAX        UINT64_MAX
diff --git a/arch/mips64/bits/syscall.h.in b/arch/mips64/bits/syscall.h.in
index f814aa48..50cec45a 100644
--- a/arch/mips64/bits/syscall.h.in
+++ b/arch/mips64/bits/syscall.h.in
@@ -324,6 +324,33 @@
 #define __NR_pkey_alloc			5324
 #define __NR_pkey_free			5325
 #define __NR_statx			5326
-#define __NR_rseq			4327
-#define __NR_io_pgetevents		4328
+#define __NR_rseq			5327
+#define __NR_io_pgetevents		5328
+#define __NR_pidfd_send_signal		5424
+#define __NR_io_uring_setup		5425
+#define __NR_io_uring_enter		5426
+#define __NR_io_uring_register		5427
+#define __NR_open_tree		5428
+#define __NR_move_mount		5429
+#define __NR_fsopen		5430
+#define __NR_fsconfig		5431
+#define __NR_fsmount		5432
+#define __NR_fspick		5433
+#define __NR_pidfd_open		5434
+#define __NR_clone3		5435
+#define __NR_close_range	5436
+#define __NR_openat2		5437
+#define __NR_pidfd_getfd	5438
+#define __NR_faccessat2		5439
+#define __NR_process_madvise	5440
+#define __NR_epoll_pwait2	5441
+#define __NR_mount_setattr	5442
+#define __NR_landlock_create_ruleset	5444
+#define __NR_landlock_add_rule	5445
+#define __NR_landlock_restrict_self	5446
+#define __NR_process_mrelease	5448
+#define __NR_futex_waitv	5449
+#define __NR_set_mempolicy_home_node	5450
+#define __NR_cachestat		5451
+#define __NR_fchmodat2		5452
 
diff --git a/arch/mips64/bits/termios.h b/arch/mips64/bits/termios.h
index f7b9dd2e..9d571f78 100644
--- a/arch/mips64/bits/termios.h
+++ b/arch/mips64/bits/termios.h
@@ -165,5 +165,5 @@ struct termios {
 #define EXTPROC 0200000
 
 #define XTABS  0014000
-#define TIOCSER_TEMT 0x01
+#define TIOCSER_TEMT 1
 #endif
diff --git a/arch/mips64/ksigaction.h b/arch/mips64/ksigaction.h
index c16e4731..b4d0fa5f 100644
--- a/arch/mips64/ksigaction.h
+++ b/arch/mips64/ksigaction.h
@@ -4,7 +4,7 @@ struct k_sigaction {
 	unsigned flags;
 	void (*handler)(int);
 	unsigned long mask[2];
-	void (*restorer)();
+	void *unused;
 };
 
 hidden void __restore(), __restore_rt();
diff --git a/arch/mips64/kstat.h b/arch/mips64/kstat.h
new file mode 100644
index 00000000..9a4468b4
--- /dev/null
+++ b/arch/mips64/kstat.h
@@ -0,0 +1,21 @@
+struct kstat {
+	unsigned st_dev;
+	int __pad1[3];
+	ino_t st_ino;
+	mode_t st_mode;
+	unsigned st_nlink;
+	uid_t st_uid;
+	gid_t st_gid;
+	unsigned st_rdev;
+	int __pad2[3];
+	off_t st_size;
+	int st_atime_sec;
+	int st_atime_nsec;
+	int st_mtime_sec;
+	int st_mtime_nsec;
+	int st_ctime_sec;
+	int st_ctime_nsec;
+	unsigned st_blksize;
+	unsigned __pad3;
+	blkcnt_t st_blocks;
+};
diff --git a/arch/mips64/pthread_arch.h b/arch/mips64/pthread_arch.h
index 1e7839ea..c45347ab 100644
--- a/arch/mips64/pthread_arch.h
+++ b/arch/mips64/pthread_arch.h
@@ -1,19 +1,19 @@
-static inline struct pthread *__pthread_self()
+static inline uintptr_t __get_tp()
 {
 #if __mips_isa_rev < 2
-	register char *tp __asm__("$3");
+	register uintptr_t tp __asm__("$3");
 	__asm__ (".word 0x7c03e83b" : "=r" (tp) );
 #else
-	char *tp;
+	uintptr_t tp;
 	__asm__ ("rdhwr %0, $29" : "=r" (tp) );
 #endif
-	return (pthread_t)(tp - 0x7000 - sizeof(struct pthread));
+	return tp;
 }
 
 #define TLS_ABOVE_TP
 #define GAP_ABOVE_TP 0
-#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 
+#define TP_OFFSET 0x7000
 #define DTP_OFFSET 0x8000
 
 #define MC_PC pc
diff --git a/arch/mips64/reloc.h b/arch/mips64/reloc.h
index bbd9bd9d..145d8b0b 100644
--- a/arch/mips64/reloc.h
+++ b/arch/mips64/reloc.h
@@ -1,9 +1,3 @@
-#ifndef __RELOC_H__
-#define __RELOC_H__
-
-#define _GNU_SOURCE
-#include <endian.h>
-
 #if __mips_isa_rev >= 6
 #define ISA_SUFFIX "r6"
 #else
@@ -33,6 +27,8 @@
 #define REL_DTPOFF      R_MIPS_TLS_DTPREL64
 #define REL_TPOFF       R_MIPS_TLS_TPREL64
 
+#include <endian.h>
+
 #undef R_TYPE
 #undef R_SYM
 #undef R_INFO
@@ -42,6 +38,7 @@
 
 #define NEED_MIPS_GOT_RELOCS 1
 #define DT_DEBUG_INDIRECT DT_MIPS_RLD_MAP
+#define DT_DEBUG_INDIRECT_REL DT_MIPS_RLD_MAP_REL
 #define ARCH_SYM_REJECT_UND(s) (!((s)->st_other & STO_MIPS_PLT))
 
 #define CRTJMP(pc,sp) __asm__ __volatile__( \
@@ -62,5 +59,3 @@
 	"	daddu %0, %0, $ra \n" \
 	".set pop \n" \
 	: "=r"(*(fp)) : : "memory", "ra" )
-
-#endif
diff --git a/arch/mips64/syscall_arch.h b/arch/mips64/syscall_arch.h
index 5eabdf46..ae6532fc 100644
--- a/arch/mips64/syscall_arch.h
+++ b/arch/mips64/syscall_arch.h
@@ -1,53 +1,17 @@
 #define __SYSCALL_LL_E(x) (x)
 #define __SYSCALL_LL_O(x) (x)
 
-__attribute__((visibility("hidden")))
-long (__syscall)(long, ...);
-
 #define SYSCALL_RLIM_INFINITY (-1UL/2)
 
-#include <sys/stat.h>
-struct kernel_stat {
-	unsigned int st_dev;
-	unsigned int __pad1[3];
-	unsigned long long st_ino;
-	unsigned int st_mode;
-	unsigned int st_nlink;
-	int st_uid;
-	int st_gid;
-	unsigned int st_rdev;
-	unsigned int __pad2[3];
-	long long st_size;
-	unsigned int st_atime_sec;
-	unsigned int st_atime_nsec;
-	unsigned int st_mtime_sec;
-	unsigned int st_mtime_nsec;
-	unsigned int st_ctime_sec;
-	unsigned int st_ctime_nsec;
-	unsigned int st_blksize;
-	unsigned int __pad3;
-	unsigned long long st_blocks;
-};
-
-static void __stat_fix(struct kernel_stat *kst, struct stat *st)
-{
-	st->st_dev = kst->st_dev;
-	st->st_ino = kst->st_ino;
-	st->st_mode = kst->st_mode;
-	st->st_nlink = kst->st_nlink;
-	st->st_uid = kst->st_uid;
-	st->st_gid = kst->st_gid;
-	st->st_rdev = kst->st_rdev;
-	st->st_size = kst->st_size;
-	st->st_atim.tv_sec = kst->st_atime_sec;
-	st->st_atim.tv_nsec = kst->st_atime_nsec;
-	st->st_mtim.tv_sec = kst->st_mtime_sec;
-	st->st_mtim.tv_nsec = kst->st_mtime_nsec;
-	st->st_ctim.tv_sec = kst->st_ctime_sec;
-	st->st_ctim.tv_nsec = kst->st_ctime_nsec;
-	st->st_blksize = kst->st_blksize;
-	st->st_blocks = kst->st_blocks;
-}
+#if __mips_isa_rev >= 6
+#define SYSCALL_CLOBBERLIST \
+	"$1", "$3", "$10", "$11", "$12", "$13", \
+	"$14", "$15", "$24", "$25", "memory"
+#else
+#define SYSCALL_CLOBBERLIST \
+	"$1", "$3", "$10", "$11", "$12", "$13", \
+	"$14", "$15", "$24", "$25", "hi", "lo", "memory"
+#endif
 
 static inline long __syscall0(long n)
 {
@@ -55,10 +19,10 @@ static inline long __syscall0(long n)
 	register long r2 __asm__("$2");
 	__asm__ __volatile__ (
 		"daddu $2,$0,%2 ; syscall"
-		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7)
-		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
-		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
-	return r7 ? -r2 : r2;
+		: "=&r"(r2), "=r"(r7)
+		: "ir"(n), "0"(r2)
+		: SYSCALL_CLOBBERLIST);
+	return r7 && r2>0 ? -r2 : r2;
 }
 
 static inline long __syscall1(long n, long a)
@@ -68,158 +32,97 @@ static inline long __syscall1(long n, long a)
 	register long r2 __asm__("$2");
 	__asm__ __volatile__ (
 		"daddu $2,$0,%2 ; syscall"
-		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
-		  "r"(r4)
-		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
-		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
-	return r7 ? -r2 : r2;
+		: "=&r"(r2), "=r"(r7)
+		: "ir"(n), "0"(r2), "r"(r4)
+		: SYSCALL_CLOBBERLIST);
+	return r7 && r2>0 ? -r2 : r2;
 }
 
 static inline long __syscall2(long n, long a, long b)
 {
-	struct kernel_stat kst;
-	long ret;
-	register long r4 __asm__("$4");
-	register long r5 __asm__("$5");
+	register long r4 __asm__("$4") = a;
+	register long r5 __asm__("$5") = b;
 	register long r7 __asm__("$7");
 	register long r2 __asm__("$2");
 
-	r5 = b;
-	if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat)
-		r5 = (long) &kst;
-
-	r4 = a;
 	__asm__ __volatile__ (
 		"daddu $2,$0,%2 ; syscall"
-		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
-		  "r"(r4), "r"(r5)
-		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
-		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
-
-	if (r7) return -r2;
-	ret = r2;
-
-	if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat)
-		__stat_fix(&kst, (struct stat *)b);
-
-	return ret;
+		: "=&r"(r2), "=r"(r7)
+		: "ir"(n), "0"(r2), "r"(r4), "r"(r5)
+		: SYSCALL_CLOBBERLIST);
+	return r7 && r2>0 ? -r2 : r2;
 }
 
 static inline long __syscall3(long n, long a, long b, long c)
 {
-	struct kernel_stat kst;
-	long ret;
-	register long r4 __asm__("$4");
-	register long r5 __asm__("$5");
-	register long r6 __asm__("$6");
+	register long r4 __asm__("$4") = a;
+	register long r5 __asm__("$5") = b;
+	register long r6 __asm__("$6") = c;
 	register long r7 __asm__("$7");
 	register long r2 __asm__("$2");
 
-	r5 = b;
-	if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat)
-		r5 = (long) &kst;
-
-	r4 = a;
-	r6 = c;
 	__asm__ __volatile__ (
 		"daddu $2,$0,%2 ; syscall"
-		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
-		  "r"(r4), "r"(r5), "r"(r6)
-		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
-		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
-
-	if (r7) return -r2;
-	ret = r2;
-
-	if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat)
-		__stat_fix(&kst, (struct stat *)b);
-
-	return ret;
+		: "=&r"(r2), "=r"(r7)
+		: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6)
+		: SYSCALL_CLOBBERLIST);
+	return r7 && r2>0 ? -r2 : r2;
 }
 
 static inline long __syscall4(long n, long a, long b, long c, long d)
 {
-	struct kernel_stat kst;
-	long ret;
-	register long r4 __asm__("$4");
-	register long r5 __asm__("$5");
-	register long r6 __asm__("$6");
-	register long r7 __asm__("$7");
+	register long r4 __asm__("$4") = a;
+	register long r5 __asm__("$5") = b;
+	register long r6 __asm__("$6") = c;
+	register long r7 __asm__("$7") = d;
 	register long r2 __asm__("$2");
 
-	r4 = a;
-	r5 = b;
-	r6 = c;
-	r7 = d;
-	if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat)
-		r5 = (long) &kst;
-	if (n == SYS_newfstatat)
-		r6 = (long) &kst;
-
 	__asm__ __volatile__ (
 		"daddu $2,$0,%2 ; syscall"
-		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
-		  "r"(r4), "r"(r5), "r"(r6)
-		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
-		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
-
-	if (r7) return -r2;
-	ret = r2;
-
-	if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat)
-		__stat_fix(&kst, (struct stat *)b);
-	if (n == SYS_newfstatat)
-		__stat_fix(&kst, (struct stat *)c);
-
-	return ret;
+		: "=&r"(r2), "+r"(r7)
+		: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6)
+		: SYSCALL_CLOBBERLIST);
+	return r7 && r2>0 ? -r2 : r2;
 }
 
 static inline long __syscall5(long n, long a, long b, long c, long d, long e)
 {
-	long r2;
-	long old_b = b;
-	long old_c = c;
-	struct kernel_stat kst;
-
-	if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat)
-		b = (long) &kst;
-	if (n == SYS_newfstatat)
-		c = (long) &kst;
-
-	r2 = (__syscall)(n, a, b, c, d, e);
-	if (r2 > -4096UL) return r2;
-
-	if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat)
-		__stat_fix(&kst, (struct stat *)old_b);
-	if (n == SYS_newfstatat)
-		__stat_fix(&kst, (struct stat *)old_c);
+	register long r4 __asm__("$4") = a;
+	register long r5 __asm__("$5") = b;
+	register long r6 __asm__("$6") = c;
+	register long r7 __asm__("$7") = d;
+	register long r8 __asm__("$8") = e;
+	register long r2 __asm__("$2");
 
-	return r2;
+	__asm__ __volatile__ (
+		"daddu $2,$0,%2 ; syscall"
+		: "=&r"(r2), "+r"(r7)
+		: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6), "r"(r8)
+		: SYSCALL_CLOBBERLIST);
+	return r7 && r2>0 ? -r2 : r2;
 }
 
 static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f)
 {
-	long r2;
-	long old_b = b;
-	long old_c = c;
-	struct kernel_stat kst;
-
-	if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat)
-		b = (long) &kst;
-	if (n == SYS_newfstatat)
-		c = (long) &kst;
-
-	r2 = (__syscall)(n, a, b, c, d, e, f);
-	if (r2 > -4096UL) return r2;
-
-	if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat)
-		__stat_fix(&kst, (struct stat *)old_b);
-	if (n == SYS_newfstatat)
-		__stat_fix(&kst, (struct stat *)old_c);
+	register long r4 __asm__("$4") = a;
+	register long r5 __asm__("$5") = b;
+	register long r6 __asm__("$6") = c;
+	register long r7 __asm__("$7") = d;
+	register long r8 __asm__("$8") = e;
+	register long r9 __asm__("$9") = f;
+	register long r2 __asm__("$2");
 
-	return r2;
+	__asm__ __volatile__ (
+		"daddu $2,$0,%2 ; syscall"
+		: "=&r"(r2), "+r"(r7)
+		: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6), "r"(r8), "r"(r9)
+		: SYSCALL_CLOBBERLIST);
+	return r7 && r2>0 ? -r2 : r2;
 }
 
 #define VDSO_USEFUL
 #define VDSO_CGT_SYM "__vdso_clock_gettime"
 #define VDSO_CGT_VER "LINUX_2.6"
+
+#define SO_SNDTIMEO_OLD 0x1005
+#define SO_RCVTIMEO_OLD 0x1006
diff --git a/arch/mipsn32/arch.mak b/arch/mipsn32/arch.mak
new file mode 100644
index 00000000..aa4d05ce
--- /dev/null
+++ b/arch/mipsn32/arch.mak
@@ -0,0 +1 @@
+COMPAT_SRC_DIRS = compat/time32
diff --git a/arch/mipsn32/bits/alltypes.h.in b/arch/mipsn32/bits/alltypes.h.in
index 66ca18ad..ff934a4c 100644
--- a/arch/mipsn32/bits/alltypes.h.in
+++ b/arch/mipsn32/bits/alltypes.h.in
@@ -1,9 +1,15 @@
+#define _REDIR_TIME64 1
 #define _Addr int
 #define _Int64 long long
 #define _Reg int
 
-TYPEDEF __builtin_va_list va_list;
-TYPEDEF __builtin_va_list __isoc_va_list;
+#if _MIPSEL || __MIPSEL || __MIPSEL__
+#define __BYTE_ORDER 1234
+#else
+#define __BYTE_ORDER 4321
+#endif
+
+#define __LONG_MAX 0x7fffffffL
 
 #ifndef __cplusplus
 TYPEDEF int wchar_t;
@@ -13,14 +19,3 @@ TYPEDEF float float_t;
 TYPEDEF double double_t;
 
 TYPEDEF struct { long long __ll; long double __ld; } max_align_t;
-
-TYPEDEF long time_t;
-TYPEDEF long suseconds_t;
-
-TYPEDEF struct { union { int __i[9]; volatile int __vi[9]; unsigned __s[9]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } mtx_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } pthread_cond_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } cnd_t;
-TYPEDEF struct { union { int __i[8]; volatile int __vi[8]; void *__p[8]; } __u; } pthread_rwlock_t;
-TYPEDEF struct { union { int __i[5]; volatile int __vi[5]; void *__p[5]; } __u; } pthread_barrier_t;
diff --git a/arch/mipsn32/bits/endian.h b/arch/mipsn32/bits/endian.h
deleted file mode 100644
index 5399dcb5..00000000
--- a/arch/mipsn32/bits/endian.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#if _MIPSEL || __MIPSEL || __MIPSEL__
-#define __BYTE_ORDER __LITTLE_ENDIAN
-#else
-#define __BYTE_ORDER __BIG_ENDIAN
-#endif
diff --git a/arch/mipsn32/bits/ioctl.h b/arch/mipsn32/bits/ioctl.h
index b8f77cb5..e20bf19e 100644
--- a/arch/mipsn32/bits/ioctl.h
+++ b/arch/mipsn32/bits/ioctl.h
@@ -90,24 +90,6 @@
 #define TIOCMIWAIT	0x5491
 #define TIOCGICOUNT	0x5492
 
-#define TIOCPKT_DATA		 0
-#define TIOCPKT_FLUSHREAD	 1
-#define TIOCPKT_FLUSHWRITE	 2
-#define TIOCPKT_STOP		 4
-#define TIOCPKT_START		 8
-#define TIOCPKT_NOSTOP		16
-#define TIOCPKT_DOSTOP		32
-#define TIOCPKT_IOCTL		64
-
-#define TIOCSER_TEMT    0x01
-
-struct winsize {
-	unsigned short ws_row;
-	unsigned short ws_col;
-	unsigned short ws_xpixel;
-	unsigned short ws_ypixel;
-};
-
 #define TIOCM_LE	0x001
 #define TIOCM_DTR	0x002
 #define TIOCM_RTS	0x004
@@ -123,90 +105,10 @@ struct winsize {
 #define TIOCM_OUT2	0x4000
 #define TIOCM_LOOP	0x8000
 
-#define N_TTY           0
-#define N_SLIP          1
-#define N_MOUSE         2
-#define N_PPP           3
-#define N_STRIP         4
-#define N_AX25          5
-#define N_X25           6
-#define N_6PACK         7
-#define N_MASC          8
-#define N_R3964         9
-#define N_PROFIBUS_FDL  10
-#define N_IRDA          11
-#define N_SMSBLOCK      12
-#define N_HDLC          13
-#define N_SYNC_PPP      14
-#define N_HCI           15
-
 #define FIOGETOWN       _IOR('f', 123, int)
 #define FIOSETOWN       _IOW('f', 124, int)
 #define SIOCATMARK      _IOR('s', 7, int)
 #define SIOCSPGRP       _IOW('s', 8, pid_t)
 #define SIOCGPGRP       _IOR('s', 9, pid_t)
-#define SIOCGSTAMP      0x8906
-#define SIOCGSTAMPNS    0x8907
-
-#define SIOCADDRT       0x890B
-#define SIOCDELRT       0x890C
-#define SIOCRTMSG       0x890D
-
-#define SIOCGIFNAME     0x8910
-#define SIOCSIFLINK     0x8911
-#define SIOCGIFCONF     0x8912
-#define SIOCGIFFLAGS    0x8913
-#define SIOCSIFFLAGS    0x8914
-#define SIOCGIFADDR     0x8915
-#define SIOCSIFADDR     0x8916
-#define SIOCGIFDSTADDR  0x8917
-#define SIOCSIFDSTADDR  0x8918
-#define SIOCGIFBRDADDR  0x8919
-#define SIOCSIFBRDADDR  0x891a
-#define SIOCGIFNETMASK  0x891b
-#define SIOCSIFNETMASK  0x891c
-#define SIOCGIFMETRIC   0x891d
-#define SIOCSIFMETRIC   0x891e
-#define SIOCGIFMEM      0x891f
-#define SIOCSIFMEM      0x8920
-#define SIOCGIFMTU      0x8921
-#define SIOCSIFMTU      0x8922
-#define SIOCSIFNAME     0x8923
-#define SIOCSIFHWADDR   0x8924
-#define SIOCGIFENCAP    0x8925
-#define SIOCSIFENCAP    0x8926
-#define SIOCGIFHWADDR   0x8927
-#define SIOCGIFSLAVE    0x8929
-#define SIOCSIFSLAVE    0x8930
-#define SIOCADDMULTI    0x8931
-#define SIOCDELMULTI    0x8932
-#define SIOCGIFINDEX    0x8933
-#define SIOGIFINDEX     SIOCGIFINDEX
-#define SIOCSIFPFLAGS   0x8934
-#define SIOCGIFPFLAGS   0x8935
-#define SIOCDIFADDR     0x8936
-#define SIOCSIFHWBROADCAST 0x8937
-#define SIOCGIFCOUNT    0x8938
-
-#define SIOCGIFBR       0x8940
-#define SIOCSIFBR       0x8941
-
-#define SIOCGIFTXQLEN   0x8942
-#define SIOCSIFTXQLEN   0x8943
-
-#define SIOCDARP        0x8953
-#define SIOCGARP        0x8954
-#define SIOCSARP        0x8955
-
-#define SIOCDRARP       0x8960
-#define SIOCGRARP       0x8961
-#define SIOCSRARP       0x8962
-
-#define SIOCGIFMAP      0x8970
-#define SIOCSIFMAP      0x8971
-
-#define SIOCADDDLCI     0x8980
-#define SIOCDELDLCI     0x8981
-
-#define SIOCDEVPRIVATE		0x89F0
-#define SIOCPROTOPRIVATE	0x89E0
+#define SIOCGSTAMP      _IOR(0x89, 6, char[16])
+#define SIOCGSTAMPNS    _IOR(0x89, 7, char[16])
diff --git a/arch/mipsn32/bits/ipcstat.h b/arch/mipsn32/bits/ipcstat.h
new file mode 100644
index 00000000..4f4fcb0c
--- /dev/null
+++ b/arch/mipsn32/bits/ipcstat.h
@@ -0,0 +1 @@
+#define IPC_STAT 0x102
diff --git a/arch/mipsn32/bits/limits.h b/arch/mipsn32/bits/limits.h
deleted file mode 100644
index fbc6d238..00000000
--- a/arch/mipsn32/bits/limits.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
- || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
-#define LONG_BIT 32
-#endif
-
-#define LONG_MAX  0x7fffffffL
-#define LLONG_MAX  0x7fffffffffffffffLL
diff --git a/arch/mipsn32/bits/msg.h b/arch/mipsn32/bits/msg.h
index f28aece8..c734dbb5 100644
--- a/arch/mipsn32/bits/msg.h
+++ b/arch/mipsn32/bits/msg.h
@@ -1,19 +1,19 @@
 struct msqid_ds {
 	struct ipc_perm msg_perm;
 #if _MIPSEL || __MIPSEL || __MIPSEL__
-	time_t msg_stime;
-	int __unused1;
-	time_t msg_rtime;
-	int __unused2;
-	time_t msg_ctime;
-	int __unused3;
+	unsigned long __msg_stime_lo;
+	unsigned long __msg_stime_hi;
+	unsigned long __msg_rtime_lo;
+	unsigned long __msg_rtime_hi;
+	unsigned long __msg_ctime_lo;
+	unsigned long __msg_ctime_hi;
 #else
-	int __unused1;
-	time_t msg_stime;
-	int __unused2;
-	time_t msg_rtime;
-	int __unused3;
-	time_t msg_ctime;
+	unsigned long __msg_stime_hi;
+	unsigned long __msg_stime_lo;
+	unsigned long __msg_rtime_hi;
+	unsigned long __msg_rtime_lo;
+	unsigned long __msg_ctime_hi;
+	unsigned long __msg_ctime_lo;
 #endif
 	unsigned long msg_cbytes;
 	msgqnum_t msg_qnum;
@@ -21,4 +21,7 @@ struct msqid_ds {
 	pid_t msg_lspid;
 	pid_t msg_lrpid;
 	unsigned long __unused[2];
+	time_t msg_stime;
+	time_t msg_rtime;
+	time_t msg_ctime;
 };
diff --git a/arch/mipsn32/bits/posix.h b/arch/mipsn32/bits/posix.h
deleted file mode 100644
index 30a38714..00000000
--- a/arch/mipsn32/bits/posix.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#define _POSIX_V6_ILP32_OFFBIG  1
-#define _POSIX_V7_ILP32_OFFBIG  1
diff --git a/arch/mipsn32/bits/reg.h b/arch/mipsn32/bits/reg.h
index a3f63acc..16178dd3 100644
--- a/arch/mipsn32/bits/reg.h
+++ b/arch/mipsn32/bits/reg.h
@@ -1,6 +1,3 @@
-#undef __WORDSIZE
-#define __WORDSIZE 64
-
 #define EF_R0 0
 #define EF_R1 1
 #define EF_R2 2
diff --git a/arch/mipsn32/bits/sem.h b/arch/mipsn32/bits/sem.h
index e46ced95..fe6f0948 100644
--- a/arch/mipsn32/bits/sem.h
+++ b/arch/mipsn32/bits/sem.h
@@ -1,14 +1,16 @@
 struct semid_ds {
 	struct ipc_perm sem_perm;
-	time_t sem_otime;
-	time_t sem_ctime;
+	unsigned long __sem_otime_lo;
+	unsigned long __sem_ctime_lo;
 #if __BYTE_ORDER == __LITTLE_ENDIAN
 	unsigned short sem_nsems;
-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
+	char __sem_nsems_pad[sizeof(long)-sizeof(short)];
 #else
-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
+	char __sem_nsems_pad[sizeof(long)-sizeof(short)];
 	unsigned short sem_nsems;
 #endif
-	time_t __unused3;
-	time_t __unused4;
+	unsigned long __sem_otime_hi;
+	unsigned long __sem_ctime_hi;
+	time_t sem_otime;
+	time_t sem_ctime;
 };
diff --git a/arch/mipsn32/bits/shm.h b/arch/mipsn32/bits/shm.h
index 8d193781..ab8c642d 100644
--- a/arch/mipsn32/bits/shm.h
+++ b/arch/mipsn32/bits/shm.h
@@ -3,14 +3,19 @@
 struct shmid_ds {
 	struct ipc_perm shm_perm;
 	size_t shm_segsz;
-	time_t shm_atime;
-	time_t shm_dtime;
-	time_t shm_ctime;
+	unsigned long __shm_atime_lo;
+	unsigned long __shm_dtime_lo;
+	unsigned long __shm_ctime_lo;
 	pid_t shm_cpid;
 	pid_t shm_lpid;
 	unsigned long shm_nattch;
-	unsigned long __pad1;
-	unsigned long __pad2;
+	unsigned short __shm_atime_hi;
+	unsigned short __shm_dtime_hi;
+	unsigned short __shm_ctime_hi;
+	unsigned short __pad1;
+	time_t shm_atime;
+	time_t shm_dtime;
+	time_t shm_ctime;
 };
 
 struct shminfo {
diff --git a/arch/mipsn32/bits/signal.h b/arch/mipsn32/bits/signal.h
index c31ad07e..ffec7fd0 100644
--- a/arch/mipsn32/bits/signal.h
+++ b/arch/mipsn32/bits/signal.h
@@ -85,7 +85,6 @@ typedef struct __ucontext {
 #define SA_RESTART    0x10000000
 #define SA_NODEFER    0x40000000
 #define SA_RESETHAND  0x80000000
-#define SA_RESTORER   0x04000000
 
 #undef SIG_BLOCK
 #undef SIG_UNBLOCK
@@ -112,7 +111,7 @@ typedef struct __ucontext {
 #define SIGTRAP   5
 #define SIGABRT   6
 #define SIGIOT    SIGABRT
-#define SIGSTKFLT 7
+#define SIGEMT    7
 #define SIGFPE    8
 #define SIGKILL   9
 #define SIGBUS    10
diff --git a/arch/mipsn32/bits/socket.h b/arch/mipsn32/bits/socket.h
index b82c7d34..02fbb88b 100644
--- a/arch/mipsn32/bits/socket.h
+++ b/arch/mipsn32/bits/socket.h
@@ -1,19 +1,3 @@
-struct msghdr {
-	void *msg_name;
-	socklen_t msg_namelen;
-	struct iovec *msg_iov;
-	int msg_iovlen;
-	void *msg_control;
-	socklen_t msg_controllen;
-	int msg_flags;
-};
-
-struct cmsghdr {
-	socklen_t cmsg_len;
-	int cmsg_level;
-	int cmsg_type;
-};
-
 #define SOCK_STREAM    2
 #define SOCK_DGRAM     1
 
@@ -32,8 +16,6 @@ struct cmsghdr {
 #define SO_RCVBUF       0x1002
 #define SO_SNDLOWAT     0x1003
 #define SO_RCVLOWAT     0x1004
-#define SO_RCVTIMEO     0x1006
-#define SO_SNDTIMEO     0x1005
 #define SO_ERROR        0x1007
 #define SO_TYPE         0x1008
 #define SO_ACCEPTCONN   0x1009
diff --git a/arch/mipsn32/bits/stat.h b/arch/mipsn32/bits/stat.h
index f4d1df83..6e2f2808 100644
--- a/arch/mipsn32/bits/stat.h
+++ b/arch/mipsn32/bits/stat.h
@@ -1,6 +1,3 @@
-#include <string.h>
-#include <bits/alltypes.h>
-
 struct stat {
 	dev_t st_dev;
 	long __pad1[2];
@@ -12,11 +9,15 @@ struct stat {
 	dev_t st_rdev;
 	long __pad2[2];
 	off_t st_size;
-	struct timespec st_atim;
-	struct timespec st_mtim;
-	struct timespec st_ctim;
+	struct {
+		long tv_sec;
+		long tv_nsec;
+	} __st_atim32, __st_mtim32, __st_ctim32;
 	blksize_t st_blksize;
 	long __pad3;
 	blkcnt_t st_blocks;
-	long __pad4[14];
+	struct timespec st_atim;
+	struct timespec st_mtim;
+	struct timespec st_ctim;
+	long __pad4[2];
 };
diff --git a/arch/mipsn32/bits/stdint.h b/arch/mipsn32/bits/stdint.h
deleted file mode 100644
index d1b27121..00000000
--- a/arch/mipsn32/bits/stdint.h
+++ /dev/null
@@ -1,20 +0,0 @@
-typedef int32_t int_fast16_t;
-typedef int32_t int_fast32_t;
-typedef uint32_t uint_fast16_t;
-typedef uint32_t uint_fast32_t;
-
-#define INT_FAST16_MIN  INT32_MIN
-#define INT_FAST32_MIN  INT32_MIN
-
-#define INT_FAST16_MAX  INT32_MAX
-#define INT_FAST32_MAX  INT32_MAX
-
-#define UINT_FAST16_MAX UINT32_MAX
-#define UINT_FAST32_MAX UINT32_MAX
-
-#define INTPTR_MIN      INT32_MIN
-#define INTPTR_MAX      INT32_MAX
-#define UINTPTR_MAX     UINT32_MAX
-#define PTRDIFF_MIN     INT32_MIN
-#define PTRDIFF_MAX     INT32_MAX
-#define SIZE_MAX        UINT32_MAX
diff --git a/arch/mipsn32/bits/syscall.h.in b/arch/mipsn32/bits/syscall.h.in
index c1726180..9a4bd301 100644
--- a/arch/mipsn32/bits/syscall.h.in
+++ b/arch/mipsn32/bits/syscall.h.in
@@ -92,7 +92,7 @@
 #define __NR_fchown			6091
 #define __NR_lchown			6092
 #define __NR_umask			6093
-#define __NR_gettimeofday		6094
+#define __NR_gettimeofday_time32		6094
 #define __NR_getrlimit			6095
 #define __NR_getrusage			6096
 #define __NR_sysinfo			6097
@@ -157,7 +157,7 @@
 #define __NR_chroot			6156
 #define __NR_sync			6157
 #define __NR_acct			6158
-#define __NR_settimeofday		6159
+#define __NR_settimeofday_time32		6159
 #define __NR_mount			6160
 #define __NR_umount2			6161
 #define __NR_swapon			6162
@@ -219,14 +219,14 @@
 #define __NR_fstatfs64			6218
 #define __NR_sendfile64			6219
 #define __NR_timer_create		6220
-#define __NR_timer_settime		6221
-#define __NR_timer_gettime		6222
+#define __NR_timer_settime32		6221
+#define __NR_timer_gettime32		6222
 #define __NR_timer_getoverrun		6223
 #define __NR_timer_delete		6224
-#define __NR_clock_settime		6225
-#define __NR_clock_gettime		6226
-#define __NR_clock_getres		6227
-#define __NR_clock_nanosleep		6228
+#define __NR_clock_settime32		6225
+#define __NR_clock_gettime32		6226
+#define __NR_clock_getres_time32		6227
+#define __NR_clock_nanosleep_time32		6228
 #define __NR_tgkill			6229
 #define __NR_utimes			6230
 #define __NR_mbind			6231
@@ -282,8 +282,8 @@
 #define __NR_eventfd			6282
 #define __NR_fallocate			6283
 #define __NR_timerfd_create		6284
-#define __NR_timerfd_gettime		6285
-#define __NR_timerfd_settime		6286
+#define __NR_timerfd_gettime32		6285
+#define __NR_timerfd_settime32		6286
 #define __NR_signalfd4			6287
 #define __NR_eventfd2			6288
 #define __NR_epoll_create1		6289
@@ -330,4 +330,51 @@
 #define __NR_statx			6330
 #define __NR_rseq			6331
 #define __NR_io_pgetevents		6332
+#define __NR_clock_gettime64		6403
+#define __NR_clock_settime64		6404
+#define __NR_clock_adjtime64		6405
+#define __NR_clock_getres_time64	6406
+#define __NR_clock_nanosleep_time64	6407
+#define __NR_timer_gettime64		6408
+#define __NR_timer_settime64		6409
+#define __NR_timerfd_gettime64		6410
+#define __NR_timerfd_settime64		6411
+#define __NR_utimensat_time64		6412
+#define __NR_pselect6_time64		6413
+#define __NR_ppoll_time64		6414
+#define __NR_io_pgetevents_time64	6416
+#define __NR_recvmmsg_time64		6417
+#define __NR_mq_timedsend_time64	6418
+#define __NR_mq_timedreceive_time64	6419
+#define __NR_semtimedop_time64		6420
+#define __NR_rt_sigtimedwait_time64	6421
+#define __NR_futex_time64		6422
+#define __NR_sched_rr_get_interval_time64 6423
+#define __NR_pidfd_send_signal		6424
+#define __NR_io_uring_setup		6425
+#define __NR_io_uring_enter		6426
+#define __NR_io_uring_register		6427
+#define __NR_open_tree		6428
+#define __NR_move_mount		6429
+#define __NR_fsopen		6430
+#define __NR_fsconfig		6431
+#define __NR_fsmount		6432
+#define __NR_fspick		6433
+#define __NR_pidfd_open		6434
+#define __NR_clone3		6435
+#define __NR_close_range	6436
+#define __NR_openat2		6437
+#define __NR_pidfd_getfd	6438
+#define __NR_faccessat2		6439
+#define __NR_process_madvise	6440
+#define __NR_epoll_pwait2	6441
+#define __NR_mount_setattr	6442
+#define __NR_landlock_create_ruleset	6444
+#define __NR_landlock_add_rule	6445
+#define __NR_landlock_restrict_self	6446
+#define __NR_process_mrelease	6448
+#define __NR_futex_waitv	6449
+#define __NR_set_mempolicy_home_node	6450
+#define __NR_cachestat		6451
+#define __NR_fchmodat2		6452
 
diff --git a/arch/mipsn32/bits/termios.h b/arch/mipsn32/bits/termios.h
index f7b9dd2e..9d571f78 100644
--- a/arch/mipsn32/bits/termios.h
+++ b/arch/mipsn32/bits/termios.h
@@ -165,5 +165,5 @@ struct termios {
 #define EXTPROC 0200000
 
 #define XTABS  0014000
-#define TIOCSER_TEMT 0x01
+#define TIOCSER_TEMT 1
 #endif
diff --git a/arch/mipsn32/ksigaction.h b/arch/mipsn32/ksigaction.h
index b565f1fc..485abf75 100644
--- a/arch/mipsn32/ksigaction.h
+++ b/arch/mipsn32/ksigaction.h
@@ -4,7 +4,7 @@ struct k_sigaction {
 	unsigned flags;
 	void (*handler)(int);
 	unsigned long mask[4];
-	void (*restorer)();
+	void *unused;
 };
 
 hidden void __restore(), __restore_rt();
diff --git a/arch/mipsn32/kstat.h b/arch/mipsn32/kstat.h
new file mode 100644
index 00000000..3841559c
--- /dev/null
+++ b/arch/mipsn32/kstat.h
@@ -0,0 +1,22 @@
+struct kstat {
+	unsigned st_dev;
+	long __pad1[3];
+	ino_t st_ino;
+	mode_t st_mode;
+	nlink_t st_nlink;
+	uid_t st_uid;
+	gid_t st_gid;
+	unsigned st_rdev;
+	long __pad2[3];
+	off_t st_size;
+	long st_atime_sec;
+	long st_atime_nsec;
+	long st_mtime_sec;
+	long st_mtime_nsec;
+	long st_ctime_sec;
+	long st_ctime_nsec;
+	blksize_t st_blksize;
+	long __pad3;
+	blkcnt_t st_blocks;
+	long __pad4[14];
+};
diff --git a/arch/mipsn32/pthread_arch.h b/arch/mipsn32/pthread_arch.h
index 1e7839ea..c45347ab 100644
--- a/arch/mipsn32/pthread_arch.h
+++ b/arch/mipsn32/pthread_arch.h
@@ -1,19 +1,19 @@
-static inline struct pthread *__pthread_self()
+static inline uintptr_t __get_tp()
 {
 #if __mips_isa_rev < 2
-	register char *tp __asm__("$3");
+	register uintptr_t tp __asm__("$3");
 	__asm__ (".word 0x7c03e83b" : "=r" (tp) );
 #else
-	char *tp;
+	uintptr_t tp;
 	__asm__ ("rdhwr %0, $29" : "=r" (tp) );
 #endif
-	return (pthread_t)(tp - 0x7000 - sizeof(struct pthread));
+	return tp;
 }
 
 #define TLS_ABOVE_TP
 #define GAP_ABOVE_TP 0
-#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 
+#define TP_OFFSET 0x7000
 #define DTP_OFFSET 0x8000
 
 #define MC_PC pc
diff --git a/arch/mipsn32/reloc.h b/arch/mipsn32/reloc.h
index 728aaab2..bf00bd6a 100644
--- a/arch/mipsn32/reloc.h
+++ b/arch/mipsn32/reloc.h
@@ -1,5 +1,3 @@
-#include <endian.h>
-
 #if __mips_isa_rev >= 6
 #define ISA_SUFFIX "r6"
 #else
@@ -31,6 +29,7 @@
 
 #define NEED_MIPS_GOT_RELOCS 1
 #define DT_DEBUG_INDIRECT DT_MIPS_RLD_MAP
+#define DT_DEBUG_INDIRECT_REL DT_MIPS_RLD_MAP_REL
 #define ARCH_SYM_REJECT_UND(s) (!((s)->st_other & STO_MIPS_PLT))
 
 #define CRTJMP(pc,sp) __asm__ __volatile__( \
diff --git a/arch/mipsn32/syscall_arch.h b/arch/mipsn32/syscall_arch.h
index f6a1fbae..c681905d 100644
--- a/arch/mipsn32/syscall_arch.h
+++ b/arch/mipsn32/syscall_arch.h
@@ -1,20 +1,16 @@
 #define __SYSCALL_LL_E(x) (x)
 #define __SYSCALL_LL_O(x) (x)
 
-hidden long (__syscall)(long, ...);
-
 #define SYSCALL_RLIM_INFINITY (-1UL/2)
 
-#if _MIPSEL || __MIPSEL || __MIPSEL__
-#define __stat_fix(st) ((st),(void)0)
+#if __mips_isa_rev >= 6
+#define SYSCALL_CLOBBERLIST \
+	"$1", "$3", "$10", "$11", "$12", "$13", \
+	"$14", "$15", "$24", "$25", "memory"
 #else
-#include <sys/stat.h>
-static inline void __stat_fix(long p)
-{
-	struct stat *st = (struct stat *)p;
-	st->st_dev >>= 32;
-	st->st_rdev >>= 32;
-}
+#define SYSCALL_CLOBBERLIST \
+	"$1", "$3", "$10", "$11", "$12", "$13", \
+	"$14", "$15", "$24", "$25", "hi", "lo", "memory"
 #endif
 
 static inline long __syscall0(long n)
@@ -22,11 +18,11 @@ static inline long __syscall0(long n)
 	register long r7 __asm__("$7");
 	register long r2 __asm__("$2");
 	__asm__ __volatile__ (
-		"addu $2,$0,%2 ; syscall"
-		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7)
-		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
-		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
-	return r7 ? -r2 : r2;
+		"daddu $2,$0,%2 ; syscall"
+		: "=&r"(r2), "=r"(r7)
+		: "ir"(n), "0"(r2)
+		: SYSCALL_CLOBBERLIST);
+	return r7 && r2>0 ? -r2 : r2;
 }
 
 static inline long __syscall1(long n, long a)
@@ -35,12 +31,11 @@ static inline long __syscall1(long n, long a)
 	register long r7 __asm__("$7");
 	register long r2 __asm__("$2");
 	__asm__ __volatile__ (
-		"addu $2,$0,%2 ; syscall"
-		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
-		  "r"(r4)
-		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
-		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
-	return r7 ? -r2 : r2;
+		"daddu $2,$0,%2 ; syscall"
+		: "=&r"(r2), "=r"(r7)
+		: "ir"(n), "0"(r2), "r"(r4)
+		: SYSCALL_CLOBBERLIST);
+	return r7 && r2>0 ? -r2 : r2;
 }
 
 static inline long __syscall2(long n, long a, long b)
@@ -49,16 +44,13 @@ static inline long __syscall2(long n, long a, long b)
 	register long r5 __asm__("$5") = b;
 	register long r7 __asm__("$7");
 	register long r2 __asm__("$2");
+
 	__asm__ __volatile__ (
-		"addu $2,$0,%2 ; syscall"
-		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
-		  "r"(r4), "r"(r5)
-		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
-		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
-	if (r7) return -r2;
-	long ret = r2;
-	if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat) __stat_fix(b);
-	return ret;
+		"daddu $2,$0,%2 ; syscall"
+		: "=&r"(r2), "=r"(r7)
+		: "ir"(n), "0"(r2), "r"(r4), "r"(r5)
+		: SYSCALL_CLOBBERLIST);
+	return r7 && r2>0 ? -r2 : r2;
 }
 
 static inline long __syscall3(long n, long a, long b, long c)
@@ -68,16 +60,13 @@ static inline long __syscall3(long n, long a, long b, long c)
 	register long r6 __asm__("$6") = c;
 	register long r7 __asm__("$7");
 	register long r2 __asm__("$2");
+
 	__asm__ __volatile__ (
-		"addu $2,$0,%2 ; syscall"
-		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
-		  "r"(r4), "r"(r5), "r"(r6)
-		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
-		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
-	if (r7) return -r2;
-	long ret = r2;
-	if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat) __stat_fix(b);
-	return ret;
+		"daddu $2,$0,%2 ; syscall"
+		: "=&r"(r2), "=r"(r7)
+		: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6)
+		: SYSCALL_CLOBBERLIST);
+	return r7 && r2>0 ? -r2 : r2;
 }
 
 static inline long __syscall4(long n, long a, long b, long c, long d)
@@ -87,37 +76,55 @@ static inline long __syscall4(long n, long a, long b, long c, long d)
 	register long r6 __asm__("$6") = c;
 	register long r7 __asm__("$7") = d;
 	register long r2 __asm__("$2");
+
 	__asm__ __volatile__ (
-		"addu $2,$0,%2 ; syscall"
-		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
-		  "r"(r4), "r"(r5), "r"(r6)
-		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
-		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
-	if (r7) return -r2;
-	long ret = r2;
-	if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat) __stat_fix(b);
-	if (n == SYS_newfstatat) __stat_fix(c);
-	return ret;
+		"daddu $2,$0,%2 ; syscall"
+		: "=&r"(r2), "+r"(r7)
+		: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6)
+		: SYSCALL_CLOBBERLIST);
+	return r7 && r2>0 ? -r2 : r2;
 }
 
 static inline long __syscall5(long n, long a, long b, long c, long d, long e)
 {
-	long r2 = (__syscall)(n, a, b, c, d, e);
-	if (r2 > -4096UL) return r2;
-	if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat) __stat_fix(b);
-	if (n == SYS_newfstatat) __stat_fix(c);
-	return r2;
+	register long r4 __asm__("$4") = a;
+	register long r5 __asm__("$5") = b;
+	register long r6 __asm__("$6") = c;
+	register long r7 __asm__("$7") = d;
+	register long r8 __asm__("$8") = e;
+	register long r2 __asm__("$2");
+
+	__asm__ __volatile__ (
+		"daddu $2,$0,%2 ; syscall"
+		: "=&r"(r2), "+r"(r7)
+		: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6), "r"(r8)
+		: SYSCALL_CLOBBERLIST);
+	return r7 && r2>0 ? -r2 : r2;
 }
 
 static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f)
 {
-	long r2 = (__syscall)(n, a, b, c, d, e, f);
-	if (r2 > -4096UL) return r2;
-	if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat) __stat_fix(b);
-	if (n == SYS_newfstatat) __stat_fix(c);
-	return r2;
+	register long r4 __asm__("$4") = a;
+	register long r5 __asm__("$5") = b;
+	register long r6 __asm__("$6") = c;
+	register long r7 __asm__("$7") = d;
+	register long r8 __asm__("$8") = e;
+	register long r9 __asm__("$9") = f;
+	register long r2 __asm__("$2");
+
+	__asm__ __volatile__ (
+		"daddu $2,$0,%2 ; syscall"
+		: "=&r"(r2), "+r"(r7)
+		: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6), "r"(r8), "r"(r9)
+		: SYSCALL_CLOBBERLIST);
+	return r7 && r2>0 ? -r2 : r2;
 }
 
 #define VDSO_USEFUL
-#define VDSO_CGT_SYM "__vdso_clock_gettime"
+#define VDSO_CGT32_SYM "__vdso_clock_gettime"
+#define VDSO_CGT32_VER "LINUX_2.6"
+#define VDSO_CGT_SYM "__vdso_clock_gettime64"
 #define VDSO_CGT_VER "LINUX_2.6"
+
+#define SO_SNDTIMEO_OLD 0x1005
+#define SO_RCVTIMEO_OLD 0x1006
diff --git a/arch/or1k/arch.mak b/arch/or1k/arch.mak
new file mode 100644
index 00000000..aa4d05ce
--- /dev/null
+++ b/arch/or1k/arch.mak
@@ -0,0 +1 @@
+COMPAT_SRC_DIRS = compat/time32
diff --git a/arch/or1k/bits/alltypes.h.in b/arch/or1k/bits/alltypes.h.in
index 667963c7..7d3e291a 100644
--- a/arch/or1k/bits/alltypes.h.in
+++ b/arch/or1k/bits/alltypes.h.in
@@ -1,9 +1,10 @@
+#define _REDIR_TIME64 1
 #define _Addr int
 #define _Int64 long long
 #define _Reg int
 
-TYPEDEF __builtin_va_list va_list;
-TYPEDEF __builtin_va_list __isoc_va_list;
+#define __BYTE_ORDER 4321
+#define __LONG_MAX 0x7fffffffL
 
 #ifndef __cplusplus
 TYPEDEF unsigned wchar_t;
@@ -13,14 +14,3 @@ TYPEDEF float float_t;
 TYPEDEF double double_t;
 
 TYPEDEF struct { long long __ll; long double __ld; } max_align_t;
-
-TYPEDEF long time_t;
-TYPEDEF long suseconds_t;
-
-TYPEDEF struct { union { int __i[9]; volatile int __vi[9]; unsigned __s[9]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } mtx_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } pthread_cond_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } cnd_t;
-TYPEDEF struct { union { int __i[8]; volatile int __vi[8]; void *__p[8]; } __u; } pthread_rwlock_t;
-TYPEDEF struct { union { int __i[5]; volatile int __vi[5]; void *__p[5]; } __u; } pthread_barrier_t;
diff --git a/arch/or1k/bits/endian.h b/arch/or1k/bits/endian.h
deleted file mode 100644
index ef074b77..00000000
--- a/arch/or1k/bits/endian.h
+++ /dev/null
@@ -1 +0,0 @@
-#define __BYTE_ORDER __BIG_ENDIAN
diff --git a/arch/or1k/bits/ipc.h b/arch/or1k/bits/ipc.h
deleted file mode 100644
index 3d894e30..00000000
--- a/arch/or1k/bits/ipc.h
+++ /dev/null
@@ -1,13 +0,0 @@
-struct ipc_perm {
-	key_t __ipc_perm_key;
-	uid_t uid;
-	gid_t gid;
-	uid_t cuid;
-	gid_t cgid;
-	mode_t mode;
-	int __ipc_perm_seq;
-	long __pad1;
-	long __pad2;
-};
-
-#define IPC_64 0
diff --git a/arch/or1k/bits/ipcstat.h b/arch/or1k/bits/ipcstat.h
new file mode 100644
index 00000000..4f4fcb0c
--- /dev/null
+++ b/arch/or1k/bits/ipcstat.h
@@ -0,0 +1 @@
+#define IPC_STAT 0x102
diff --git a/arch/or1k/bits/limits.h b/arch/or1k/bits/limits.h
index 3a811c99..fac47aad 100644
--- a/arch/or1k/bits/limits.h
+++ b/arch/or1k/bits/limits.h
@@ -1,8 +1 @@
-#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
- || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 #define PAGESIZE 8192
-#define LONG_BIT 32
-#endif
-
-#define LONG_MAX  0x7fffffffL
-#define LLONG_MAX  0x7fffffffffffffffLL
diff --git a/arch/or1k/bits/msg.h b/arch/or1k/bits/msg.h
index bc8436c4..7bbbb2bf 100644
--- a/arch/or1k/bits/msg.h
+++ b/arch/or1k/bits/msg.h
@@ -1,15 +1,18 @@
 struct msqid_ds {
 	struct ipc_perm msg_perm;
-	time_t msg_stime;
-	int __unused1;
-	time_t msg_rtime;
-	int __unused2;
-	time_t msg_ctime;
-	int __unused3;
+	unsigned long __msg_stime_lo;
+	unsigned long __msg_stime_hi;
+	unsigned long __msg_rtime_lo;
+	unsigned long __msg_rtime_hi;
+	unsigned long __msg_ctime_lo;
+	unsigned long __msg_ctime_hi;
 	unsigned long msg_cbytes;
 	msgqnum_t msg_qnum;
 	msglen_t msg_qbytes;
 	pid_t msg_lspid;
 	pid_t msg_lrpid;
 	unsigned long __unused[2];
+	time_t msg_stime;
+	time_t msg_rtime;
+	time_t msg_ctime;
 };
diff --git a/arch/or1k/bits/posix.h b/arch/or1k/bits/posix.h
deleted file mode 100644
index 30a38714..00000000
--- a/arch/or1k/bits/posix.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#define _POSIX_V6_ILP32_OFFBIG  1
-#define _POSIX_V7_ILP32_OFFBIG  1
diff --git a/arch/or1k/bits/reg.h b/arch/or1k/bits/reg.h
deleted file mode 100644
index 0c7bffca..00000000
--- a/arch/or1k/bits/reg.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#undef __WORDSIZE
-#define __WORDSIZE 32
-/* FIXME */
diff --git a/arch/or1k/bits/sem.h b/arch/or1k/bits/sem.h
new file mode 100644
index 00000000..d88338e6
--- /dev/null
+++ b/arch/or1k/bits/sem.h
@@ -0,0 +1,13 @@
+struct semid_ds {
+	struct ipc_perm sem_perm;
+	unsigned long __sem_otime_lo;
+	unsigned long __sem_otime_hi;
+	unsigned long __sem_ctime_lo;
+	unsigned long __sem_ctime_hi;
+	char __sem_nsems_pad[sizeof(long)-sizeof(short)];
+	unsigned short sem_nsems;
+	long __unused3;
+	long __unused4;
+	time_t sem_otime;
+	time_t sem_ctime;
+};
diff --git a/arch/s390x/bits/shm.h b/arch/or1k/bits/shm.h
index 6652d659..725fb469 100644
--- a/arch/s390x/bits/shm.h
+++ b/arch/or1k/bits/shm.h
@@ -3,14 +3,21 @@
 struct shmid_ds {
 	struct ipc_perm shm_perm;
 	size_t shm_segsz;
-	time_t shm_atime;
-	time_t shm_dtime;
-	time_t shm_ctime;
+	unsigned long __shm_atime_lo;
+	unsigned long __shm_atime_hi;
+	unsigned long __shm_dtime_lo;
+	unsigned long __shm_dtime_hi;
+	unsigned long __shm_ctime_lo;
+	unsigned long __shm_ctime_hi;
 	pid_t shm_cpid;
 	pid_t shm_lpid;
 	unsigned long shm_nattch;
 	unsigned long __pad1;
 	unsigned long __pad2;
+	unsigned long __pad3;
+	time_t shm_atime;
+	time_t shm_dtime;
+	time_t shm_ctime;
 };
 
 struct shminfo {
@@ -22,4 +29,3 @@ struct shm_info {
 	unsigned long shm_tot, shm_rss, shm_swp;
 	unsigned long __swap_attempts, __swap_successes;
 };
-
diff --git a/arch/or1k/bits/signal.h b/arch/or1k/bits/signal.h
index be576d1d..c45be676 100644
--- a/arch/or1k/bits/signal.h
+++ b/arch/or1k/bits/signal.h
@@ -43,7 +43,6 @@ typedef struct __ucontext {
 #define SA_RESTART    0x10000000
 #define SA_NODEFER    0x40000000
 #define SA_RESETHAND  0x80000000
-#define SA_RESTORER   0x04000000
 
 #endif
 
diff --git a/arch/or1k/bits/stat.h b/arch/or1k/bits/stat.h
index ce6a6bd0..cde3fd02 100644
--- a/arch/or1k/bits/stat.h
+++ b/arch/or1k/bits/stat.h
@@ -14,8 +14,12 @@ struct stat {
 	blksize_t st_blksize;
 	int __st_blksize_padding;
 	blkcnt_t st_blocks;
-	struct timespec st_atim;
-	struct timespec st_mtim;
-	struct timespec st_ctim;
+	struct {
+		long tv_sec;
+		long tv_nsec;
+	} __st_atim32, __st_mtim32, __st_ctim32;
 	unsigned __unused[2];
+ 	struct timespec st_atim;
+ 	struct timespec st_mtim;
+ 	struct timespec st_ctim;
 };
diff --git a/arch/or1k/bits/stdint.h b/arch/or1k/bits/stdint.h
deleted file mode 100644
index d1b27121..00000000
--- a/arch/or1k/bits/stdint.h
+++ /dev/null
@@ -1,20 +0,0 @@
-typedef int32_t int_fast16_t;
-typedef int32_t int_fast32_t;
-typedef uint32_t uint_fast16_t;
-typedef uint32_t uint_fast32_t;
-
-#define INT_FAST16_MIN  INT32_MIN
-#define INT_FAST32_MIN  INT32_MIN
-
-#define INT_FAST16_MAX  INT32_MAX
-#define INT_FAST32_MAX  INT32_MAX
-
-#define UINT_FAST16_MAX UINT32_MAX
-#define UINT_FAST32_MAX UINT32_MAX
-
-#define INTPTR_MIN      INT32_MIN
-#define INTPTR_MAX      INT32_MAX
-#define UINTPTR_MAX     UINT32_MAX
-#define PTRDIFF_MIN     INT32_MIN
-#define PTRDIFF_MAX     INT32_MAX
-#define SIZE_MAX        UINT32_MAX
diff --git a/arch/or1k/bits/syscall.h.in b/arch/or1k/bits/syscall.h.in
index 76ba2c6e..00812bf8 100644
--- a/arch/or1k/bits/syscall.h.in
+++ b/arch/or1k/bits/syscall.h.in
@@ -85,8 +85,8 @@
 #define __NR_fdatasync 83
 #define __NR_sync_file_range 84
 #define __NR_timerfd_create 85
-#define __NR_timerfd_settime 86
-#define __NR_timerfd_gettime 87
+#define __NR_timerfd_settime32 86
+#define __NR_timerfd_gettime32 87
 #define __NR_utimensat 88
 #define __NR_acct 89
 #define __NR_capget 90
@@ -107,14 +107,14 @@
 #define __NR_init_module 105
 #define __NR_delete_module 106
 #define __NR_timer_create 107
-#define __NR_timer_gettime 108
+#define __NR_timer_gettime32 108
 #define __NR_timer_getoverrun 109
-#define __NR_timer_settime 110
+#define __NR_timer_settime32 110
 #define __NR_timer_delete 111
-#define __NR_clock_settime 112
-#define __NR_clock_gettime 113
-#define __NR_clock_getres 114
-#define __NR_clock_nanosleep 115
+#define __NR_clock_settime32 112
+#define __NR_clock_gettime32 113
+#define __NR_clock_getres_time32 114
+#define __NR_clock_nanosleep_time32 115
 #define __NR_syslog 116
 #define __NR_ptrace 117
 #define __NR_sched_setparam 118
@@ -168,8 +168,8 @@
 #define __NR_umask 166
 #define __NR_prctl 167
 #define __NR_getcpu 168
-#define __NR_gettimeofday 169
-#define __NR_settimeofday 170
+#define __NR_gettimeofday_time32 169
+#define __NR_settimeofday_time32 170
 #define __NR_adjtimex 171
 #define __NR_getpid 172
 #define __NR_getppid 173
@@ -277,4 +277,53 @@
 #define __NR_pkey_free 290
 #define __NR_statx 291
 #define __NR_io_pgetevents 292
+#define __NR_rseq 293
+#define __NR_kexec_file_load 294
+#define __NR_clock_gettime64 403
+#define __NR_clock_settime64 404
+#define __NR_clock_adjtime64 405
+#define __NR_clock_getres_time64 406
+#define __NR_clock_nanosleep_time64 407
+#define __NR_timer_gettime64 408
+#define __NR_timer_settime64 409
+#define __NR_timerfd_gettime64 410
+#define __NR_timerfd_settime64 411
+#define __NR_utimensat_time64 412
+#define __NR_pselect6_time64 413
+#define __NR_ppoll_time64 414
+#define __NR_io_pgetevents_time64 416
+#define __NR_recvmmsg_time64 417
+#define __NR_mq_timedsend_time64 418
+#define __NR_mq_timedreceive_time64 419
+#define __NR_semtimedop_time64 420
+#define __NR_rt_sigtimedwait_time64 421
+#define __NR_futex_time64 422
+#define __NR_sched_rr_get_interval_time64 423
+#define __NR_pidfd_send_signal 424
+#define __NR_io_uring_setup 425
+#define __NR_io_uring_enter 426
+#define __NR_io_uring_register 427
+#define __NR_open_tree		428
+#define __NR_move_mount		429
+#define __NR_fsopen		430
+#define __NR_fsconfig		431
+#define __NR_fsmount		432
+#define __NR_fspick		433
+#define __NR_pidfd_open		434
+#define __NR_clone3		435
+#define __NR_close_range	436
+#define __NR_openat2		437
+#define __NR_pidfd_getfd	438
+#define __NR_faccessat2		439
+#define __NR_process_madvise	440
+#define __NR_epoll_pwait2	441
+#define __NR_mount_setattr	442
+#define __NR_landlock_create_ruleset	444
+#define __NR_landlock_add_rule	445
+#define __NR_landlock_restrict_self	446
+#define __NR_process_mrelease	448
+#define __NR_futex_waitv	449
+#define __NR_set_mempolicy_home_node	450
+#define __NR_cachestat		451
+#define __NR_fchmodat2		452
 
diff --git a/arch/or1k/kstat.h b/arch/or1k/kstat.h
new file mode 100644
index 00000000..c1449579
--- /dev/null
+++ b/arch/or1k/kstat.h
@@ -0,0 +1,21 @@
+struct kstat {
+	dev_t st_dev;
+	ino_t st_ino;
+	mode_t st_mode;
+	nlink_t st_nlink;
+	uid_t st_uid;
+	gid_t st_gid;
+	dev_t st_rdev;
+	long long __st_rdev_padding;
+	off_t st_size;
+	blksize_t st_blksize;
+	int __st_blksize_padding;
+	blkcnt_t st_blocks;
+	long st_atime_sec;
+	long st_atime_nsec;
+	long st_mtime_sec;
+	long st_mtime_nsec;
+	long st_ctime_sec;
+	long st_ctime_nsec;
+	unsigned __unused[2];
+};
diff --git a/arch/or1k/pthread_arch.h b/arch/or1k/pthread_arch.h
index 1b806f89..f75ea7e4 100644
--- a/arch/or1k/pthread_arch.h
+++ b/arch/or1k/pthread_arch.h
@@ -1,18 +1,16 @@
-/* or1k use variant I, but with the twist that tp points to the end of TCB */
-static inline struct pthread *__pthread_self()
+static inline uintptr_t __get_tp()
 {
 #ifdef __clang__
-	char *tp;
+	uintptr_t tp;
 	__asm__ ("l.ori %0, r10, 0" : "=r" (tp) );
 #else
-	register char *tp __asm__("r10");
+	register uintptr_t tp __asm__("r10");
 	__asm__ ("" : "=r" (tp) );
 #endif
-	return (struct pthread *) (tp - sizeof(struct pthread));
+	return tp;
 }
 
 #define TLS_ABOVE_TP
 #define GAP_ABOVE_TP 0
-#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
 
 #define MC_PC regs.pc
diff --git a/arch/or1k/syscall_arch.h b/arch/or1k/syscall_arch.h
index caff7ece..21738ce0 100644
--- a/arch/or1k/syscall_arch.h
+++ b/arch/or1k/syscall_arch.h
@@ -5,8 +5,6 @@
 
 #define SYSCALL_MMAP2_UNIT 8192ULL
 
-#ifndef __clang__
-
 static __inline long __syscall0(long n)
 {
 	register unsigned long r11 __asm__("r11") = n;
@@ -114,9 +112,4 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo
 	return r11;
 }
 
-#else
-
-#undef SYSCALL_NO_INLINE
-#define SYSCALL_NO_INLINE
-
-#endif
+#define IPC_64 0
diff --git a/arch/powerpc/arch.mak b/arch/powerpc/arch.mak
new file mode 100644
index 00000000..aa4d05ce
--- /dev/null
+++ b/arch/powerpc/arch.mak
@@ -0,0 +1 @@
+COMPAT_SRC_DIRS = compat/time32
diff --git a/arch/powerpc/bits/alltypes.h.in b/arch/powerpc/bits/alltypes.h.in
index 37f27d6f..b48df6a6 100644
--- a/arch/powerpc/bits/alltypes.h.in
+++ b/arch/powerpc/bits/alltypes.h.in
@@ -1,26 +1,20 @@
+#define _REDIR_TIME64 1
 #define _Addr int
 #define _Int64 long long
 #define _Reg int
 
-TYPEDEF __builtin_va_list va_list;
-TYPEDEF __builtin_va_list __isoc_va_list;
+#define __BYTE_ORDER 4321
+#define __LONG_MAX 0x7fffffffL
 
 #ifndef __cplusplus
+#ifdef __WCHAR_TYPE__
+TYPEDEF __WCHAR_TYPE__ wchar_t;
+#else
 TYPEDEF long wchar_t;
 #endif
+#endif
 
 TYPEDEF float float_t;
 TYPEDEF double double_t;
 
 TYPEDEF struct { long long __ll; long double __ld; } max_align_t;
-
-TYPEDEF long time_t;
-TYPEDEF long suseconds_t;
-
-TYPEDEF struct { union { int __i[9]; volatile int __vi[9]; unsigned __s[9]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } mtx_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } pthread_cond_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } cnd_t;
-TYPEDEF struct { union { int __i[8]; volatile int __vi[8]; void *__p[8]; } __u; } pthread_rwlock_t;
-TYPEDEF struct { union { int __i[5]; volatile int __vi[5]; void *__p[5]; } __u; } pthread_barrier_t;
diff --git a/arch/powerpc/bits/endian.h b/arch/powerpc/bits/endian.h
deleted file mode 100644
index 4442abf4..00000000
--- a/arch/powerpc/bits/endian.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifdef __BIG_ENDIAN__
-  #if __BIG_ENDIAN__
-    #define __BYTE_ORDER __BIG_ENDIAN
-  #endif
-#endif /* __BIG_ENDIAN__ */
-
-#ifdef __LITTLE_ENDIAN__
-  #if __LITTLE_ENDIAN__
-    #define __BYTE_ORDER __LITTLE_ENDIAN
-  #endif
-#endif /* __LITTLE_ENDIAN__ */
-
-#ifndef __BYTE_ORDER
-  #define __BYTE_ORDER __BIG_ENDIAN
-#endif
diff --git a/arch/powerpc/bits/fenv.h b/arch/powerpc/bits/fenv.h
index c5a3e5c5..5b15c69a 100644
--- a/arch/powerpc/bits/fenv.h
+++ b/arch/powerpc/bits/fenv.h
@@ -1,4 +1,4 @@
-#ifdef _SOFT_FLOAT
+#if defined(_SOFT_FLOAT) || defined(__NO_FPRS__)
 #define FE_ALL_EXCEPT 0
 #define FE_TONEAREST  0
 #else
diff --git a/arch/powerpc/bits/hwcap.h b/arch/powerpc/bits/hwcap.h
index 803de9b5..12981623 100644
--- a/arch/powerpc/bits/hwcap.h
+++ b/arch/powerpc/bits/hwcap.h
@@ -41,3 +41,5 @@
 #define PPC_FEATURE2_DARN		0x00200000
 #define PPC_FEATURE2_SCV		0x00100000
 #define PPC_FEATURE2_HTM_NO_SUSPEND	0x00080000
+#define PPC_FEATURE2_ARCH_3_1		0x00040000
+#define PPC_FEATURE2_MMA		0x00020000
diff --git a/arch/powerpc/bits/ioctl.h b/arch/powerpc/bits/ioctl.h
index 47586234..ac9bfd20 100644
--- a/arch/powerpc/bits/ioctl.h
+++ b/arch/powerpc/bits/ioctl.h
@@ -78,14 +78,6 @@
 #define TIOCGSERIAL	0x541E
 #define TIOCSSERIAL	0x541F
 #define TIOCPKT	0x5420
-#define TIOCPKT_DATA		0
-#define TIOCPKT_FLUSHREAD	1
-#define TIOCPKT_FLUSHWRITE	2
-#define TIOCPKT_STOP		4
-#define TIOCPKT_START		8
-#define TIOCPKT_NOSTOP		16
-#define TIOCPKT_DOSTOP		32
-#define TIOCPKT_IOCTL		64
 
 #define TIOCNOTTY	0x5422
 #define TIOCSETD	0x5423
@@ -113,105 +105,16 @@
 #define TIOCSLCKTRMIOS	0x5457
 #define TIOCSERGSTRUCT	0x5458
 #define TIOCSERGETLSR	0x5459
-#define TIOCSER_TEMT	0x01
 #define TIOCSERGETMULTI	0x545A
 #define TIOCSERSETMULTI	0x545B
 
 #define TIOCMIWAIT	0x545C
 #define TIOCGICOUNT	0x545D
 
-
-struct winsize {
-	unsigned short ws_row;
-	unsigned short ws_col;
-	unsigned short ws_xpixel;
-	unsigned short ws_ypixel;
-};
-
-#define N_TTY           0
-#define N_SLIP          1
-#define N_MOUSE         2
-#define N_PPP           3
-#define N_STRIP         4
-#define N_AX25          5
-#define N_X25           6
-#define N_6PACK         7
-#define N_MASC          8
-#define N_R3964         9
-#define N_PROFIBUS_FDL  10
-#define N_IRDA          11
-#define N_SMSBLOCK      12
-#define N_HDLC          13
-#define N_SYNC_PPP      14
-#define N_HCI           15
-
 #define FIOSETOWN       0x8901
 #define SIOCSPGRP       0x8902
 #define FIOGETOWN       0x8903
 #define SIOCGPGRP       0x8904
 #define SIOCATMARK      0x8905
-#define SIOCGSTAMP      0x8906
-#define SIOCGSTAMPNS    0x8907
-
-#define SIOCADDRT       0x890B
-#define SIOCDELRT       0x890C
-#define SIOCRTMSG       0x890D
-
-#define SIOCGIFNAME     0x8910
-#define SIOCSIFLINK     0x8911
-#define SIOCGIFCONF     0x8912
-#define SIOCGIFFLAGS    0x8913
-#define SIOCSIFFLAGS    0x8914
-#define SIOCGIFADDR     0x8915
-#define SIOCSIFADDR     0x8916
-#define SIOCGIFDSTADDR  0x8917
-#define SIOCSIFDSTADDR  0x8918
-#define SIOCGIFBRDADDR  0x8919
-#define SIOCSIFBRDADDR  0x891a
-#define SIOCGIFNETMASK  0x891b
-#define SIOCSIFNETMASK  0x891c
-#define SIOCGIFMETRIC   0x891d
-#define SIOCSIFMETRIC   0x891e
-#define SIOCGIFMEM      0x891f
-#define SIOCSIFMEM      0x8920
-#define SIOCGIFMTU      0x8921
-#define SIOCSIFMTU      0x8922
-#define SIOCSIFNAME     0x8923
-#define SIOCSIFHWADDR   0x8924
-#define SIOCGIFENCAP    0x8925
-#define SIOCSIFENCAP    0x8926
-#define SIOCGIFHWADDR   0x8927
-#define SIOCGIFSLAVE    0x8929
-#define SIOCSIFSLAVE    0x8930
-#define SIOCADDMULTI    0x8931
-#define SIOCDELMULTI    0x8932
-#define SIOCGIFINDEX    0x8933
-#define SIOGIFINDEX     SIOCGIFINDEX
-#define SIOCSIFPFLAGS   0x8934
-#define SIOCGIFPFLAGS   0x8935
-#define SIOCDIFADDR     0x8936
-#define SIOCSIFHWBROADCAST 0x8937
-#define SIOCGIFCOUNT    0x8938
-
-#define SIOCGIFBR       0x8940
-#define SIOCSIFBR       0x8941
-
-#define SIOCGIFTXQLEN   0x8942
-#define SIOCSIFTXQLEN   0x8943
-
-#define SIOCDARP        0x8953
-#define SIOCGARP        0x8954
-#define SIOCSARP        0x8955
-
-#define SIOCDRARP       0x8960
-#define SIOCGRARP       0x8961
-#define SIOCSRARP       0x8962
-
-#define SIOCGIFMAP      0x8970
-#define SIOCSIFMAP      0x8971
-
-#define SIOCADDDLCI     0x8980
-#define SIOCDELDLCI     0x8981
-
-#define SIOCDEVPRIVATE		0x89F0
-#define SIOCPROTOPRIVATE	0x89E0
+#define SIOCGSTAMP      _IOR(0x89, 6, char[16])
+#define SIOCGSTAMPNS    _IOR(0x89, 7, char[16])
diff --git a/arch/powerpc/bits/ipc.h b/arch/powerpc/bits/ipc.h
index 3f2ede07..a388d56b 100644
--- a/arch/powerpc/bits/ipc.h
+++ b/arch/powerpc/bits/ipc.h
@@ -10,6 +10,3 @@ struct ipc_perm {
 	long long __pad2;
 	long long __pad3;
 };
-
-#define IPC_64 0x100
-
diff --git a/arch/powerpc/bits/ipcstat.h b/arch/powerpc/bits/ipcstat.h
new file mode 100644
index 00000000..4f4fcb0c
--- /dev/null
+++ b/arch/powerpc/bits/ipcstat.h
@@ -0,0 +1 @@
+#define IPC_STAT 0x102
diff --git a/arch/powerpc/bits/limits.h b/arch/powerpc/bits/limits.h
deleted file mode 100644
index fbc6d238..00000000
--- a/arch/powerpc/bits/limits.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
- || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
-#define LONG_BIT 32
-#endif
-
-#define LONG_MAX  0x7fffffffL
-#define LLONG_MAX  0x7fffffffffffffffLL
diff --git a/arch/powerpc/bits/mman.h b/arch/powerpc/bits/mman.h
index b3a675a8..95ec4358 100644
--- a/arch/powerpc/bits/mman.h
+++ b/arch/powerpc/bits/mman.h
@@ -4,7 +4,6 @@
 #define MAP_NORESERVE   0x40
 #undef MAP_LOCKED
 #define MAP_LOCKED	0x80
-#undef MAP_SYNC
 
 #undef MCL_CURRENT
 #define MCL_CURRENT     0x2000
diff --git a/arch/powerpc/bits/msg.h b/arch/powerpc/bits/msg.h
index 171c11a3..9fb15dcc 100644
--- a/arch/powerpc/bits/msg.h
+++ b/arch/powerpc/bits/msg.h
@@ -1,15 +1,18 @@
 struct msqid_ds {
 	struct ipc_perm msg_perm;
-	int __unused1;
-	time_t msg_stime;
-	int __unused2;
-	time_t msg_rtime;
-	int __unused3;
-	time_t msg_ctime;
+	unsigned long __msg_stime_hi;
+	unsigned long __msg_stime_lo;
+	unsigned long __msg_rtime_hi;
+	unsigned long __msg_rtime_lo;
+	unsigned long __msg_ctime_hi;
+	unsigned long __msg_ctime_lo;
 	unsigned long msg_cbytes;
 	msgqnum_t msg_qnum;
 	msglen_t msg_qbytes;
 	pid_t msg_lspid;
 	pid_t msg_lrpid;
 	unsigned long __unused[2];
+	time_t msg_stime;
+	time_t msg_rtime;
+	time_t msg_ctime;
 };
diff --git a/arch/powerpc/bits/posix.h b/arch/powerpc/bits/posix.h
deleted file mode 100644
index 30a38714..00000000
--- a/arch/powerpc/bits/posix.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#define _POSIX_V6_ILP32_OFFBIG  1
-#define _POSIX_V7_ILP32_OFFBIG  1
diff --git a/arch/powerpc/bits/ptrace.h b/arch/powerpc/bits/ptrace.h
index 75086ca0..303a0735 100644
--- a/arch/powerpc/bits/ptrace.h
+++ b/arch/powerpc/bits/ptrace.h
@@ -8,6 +8,8 @@
 #define PTRACE_SET_DEBUGREG	0x1a
 #define PTRACE_GETVSRREGS	0x1b
 #define PTRACE_SETVSRREGS	0x1c
+#define PTRACE_SYSEMU		0x1d
+#define PTRACE_SYSEMU_SINGLESTEP	0x1e
 #define PTRACE_SINGLEBLOCK	0x100
 
 #define PT_GETVRREGS PTRACE_GETVRREGS
diff --git a/arch/powerpc/bits/reg.h b/arch/powerpc/bits/reg.h
deleted file mode 100644
index 0c7bffca..00000000
--- a/arch/powerpc/bits/reg.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#undef __WORDSIZE
-#define __WORDSIZE 32
-/* FIXME */
diff --git a/arch/powerpc/bits/sem.h b/arch/powerpc/bits/sem.h
index bc2d6d1f..28be4845 100644
--- a/arch/powerpc/bits/sem.h
+++ b/arch/powerpc/bits/sem.h
@@ -1,10 +1,12 @@
 struct semid_ds {
 	struct ipc_perm sem_perm;
-	int __unused1;
-	time_t sem_otime;
-	int  __unused2;
-	time_t sem_ctime;
+	unsigned long __sem_otime_hi;
+	unsigned long __sem_otime_lo;
+	unsigned long __sem_ctime_hi;
+	unsigned long __sem_ctime_lo;
 	unsigned short __sem_nsems_pad, sem_nsems;
 	long __unused3;
 	long __unused4;
+	time_t sem_otime;
+	time_t sem_ctime;
 };
diff --git a/arch/powerpc/bits/shm.h b/arch/powerpc/bits/shm.h
index 40e5e8be..7f1ca17e 100644
--- a/arch/powerpc/bits/shm.h
+++ b/arch/powerpc/bits/shm.h
@@ -2,19 +2,21 @@
 
 struct shmid_ds {
 	struct ipc_perm shm_perm;
-	int __unused1;
-	time_t shm_atime;
-	int __unused2;
-	time_t shm_dtime;
-	int __unused3;
-	time_t shm_ctime;
-	int __unused4;
+	unsigned long __shm_atime_hi;
+	unsigned long __shm_atime_lo;
+	unsigned long __shm_dtime_hi;
+	unsigned long __shm_dtime_lo;
+	unsigned long __shm_ctime_hi;
+	unsigned long __shm_ctime_lo;
+	unsigned long __pad1;
 	size_t shm_segsz;
 	pid_t shm_cpid;
 	pid_t shm_lpid;
 	unsigned long shm_nattch;
-	unsigned long __pad1;
 	unsigned long __pad2;
+	time_t shm_atime;
+	time_t shm_dtime;
+	time_t shm_ctime;
 };
 
 struct shminfo {
@@ -26,4 +28,3 @@ struct shm_info {
 	unsigned long shm_tot, shm_rss, shm_swp;
 	unsigned long __swap_attempts, __swap_successes;
 };
-
diff --git a/arch/powerpc/bits/signal.h b/arch/powerpc/bits/signal.h
index 06efb11c..c1bf3caf 100644
--- a/arch/powerpc/bits/signal.h
+++ b/arch/powerpc/bits/signal.h
@@ -28,7 +28,7 @@ struct sigcontext {
 	int signal;
 	unsigned long handler;
 	unsigned long oldmask;
-	void *regs;
+	struct pt_regs *regs;
 };
 
 typedef struct {
diff --git a/arch/powerpc/bits/socket.h b/arch/powerpc/bits/socket.h
index a94b8bdb..b19ed42b 100644
--- a/arch/powerpc/bits/socket.h
+++ b/arch/powerpc/bits/socket.h
@@ -1,19 +1,3 @@
-struct msghdr {
-	void *msg_name;
-	socklen_t msg_namelen;
-	struct iovec *msg_iov;
-	int msg_iovlen;
-	void *msg_control;
-	socklen_t msg_controllen;
-	int msg_flags;
-};
-
-struct cmsghdr {
-	socklen_t cmsg_len;
-	int cmsg_level;
-	int cmsg_type;
-};
-
 #define SO_DEBUG        1
 #define SO_REUSEADDR    2
 #define SO_TYPE         3
@@ -31,8 +15,6 @@ struct cmsghdr {
 #define SO_REUSEPORT    15
 #define SO_RCVLOWAT     16
 #define SO_SNDLOWAT     17
-#define SO_RCVTIMEO     18
-#define SO_SNDTIMEO     19
 #define SO_PASSCRED     20
 #define SO_PEERCRED     21
 #define SO_ACCEPTCONN   30
diff --git a/arch/powerpc/bits/stat.h b/arch/powerpc/bits/stat.h
index dcb896fd..585d98e9 100644
--- a/arch/powerpc/bits/stat.h
+++ b/arch/powerpc/bits/stat.h
@@ -13,8 +13,12 @@ struct stat {
 	off_t st_size;
 	blksize_t st_blksize;
 	blkcnt_t st_blocks;
+	struct {
+		long tv_sec;
+		long tv_nsec;
+	} __st_atim32, __st_mtim32, __st_ctim32;
+	unsigned __unused[2];
 	struct timespec st_atim;
 	struct timespec st_mtim;
 	struct timespec st_ctim;
-	unsigned __unused[2];
 };
diff --git a/arch/powerpc/bits/stdint.h b/arch/powerpc/bits/stdint.h
deleted file mode 100644
index d1b27121..00000000
--- a/arch/powerpc/bits/stdint.h
+++ /dev/null
@@ -1,20 +0,0 @@
-typedef int32_t int_fast16_t;
-typedef int32_t int_fast32_t;
-typedef uint32_t uint_fast16_t;
-typedef uint32_t uint_fast32_t;
-
-#define INT_FAST16_MIN  INT32_MIN
-#define INT_FAST32_MIN  INT32_MIN
-
-#define INT_FAST16_MAX  INT32_MAX
-#define INT_FAST32_MAX  INT32_MAX
-
-#define UINT_FAST16_MAX UINT32_MAX
-#define UINT_FAST32_MAX UINT32_MAX
-
-#define INTPTR_MIN      INT32_MIN
-#define INTPTR_MAX      INT32_MAX
-#define UINTPTR_MAX     UINT32_MAX
-#define PTRDIFF_MIN     INT32_MIN
-#define PTRDIFF_MAX     INT32_MAX
-#define SIZE_MAX        UINT32_MAX
diff --git a/arch/powerpc/bits/syscall.h.in b/arch/powerpc/bits/syscall.h.in
index 54e155f2..ea95f3ed 100644
--- a/arch/powerpc/bits/syscall.h.in
+++ b/arch/powerpc/bits/syscall.h.in
@@ -76,8 +76,8 @@
 #define __NR_setrlimit               75
 #define __NR_getrlimit               76
 #define __NR_getrusage               77
-#define __NR_gettimeofday            78
-#define __NR_settimeofday            79
+#define __NR_gettimeofday_time32            78
+#define __NR_settimeofday_time32            79
 #define __NR_getgroups               80
 #define __NR_setgroups               81
 #define __NR_select                  82
@@ -238,14 +238,14 @@
 #define __NR_epoll_wait             238
 #define __NR_remap_file_pages       239
 #define __NR_timer_create           240
-#define __NR_timer_settime          241
-#define __NR_timer_gettime          242
+#define __NR_timer_settime32          241
+#define __NR_timer_gettime32          242
 #define __NR_timer_getoverrun       243
 #define __NR_timer_delete           244
-#define __NR_clock_settime          245
-#define __NR_clock_gettime          246
-#define __NR_clock_getres           247
-#define __NR_clock_nanosleep        248
+#define __NR_clock_settime32          245
+#define __NR_clock_gettime32          246
+#define __NR_clock_getres_time32           247
+#define __NR_clock_nanosleep_time32        248
 #define __NR_swapcontext            249
 #define __NR_tgkill                 250
 #define __NR_utimes                 251
@@ -307,8 +307,8 @@
 #define __NR_sync_file_range2	308
 #define __NR_fallocate		309
 #define __NR_subpage_prot		310
-#define __NR_timerfd_settime	311
-#define __NR_timerfd_gettime	312
+#define __NR_timerfd_settime32	311
+#define __NR_timerfd_gettime32	312
 #define __NR_signalfd4		313
 #define __NR_eventfd2		314
 #define __NR_epoll_create1	315
@@ -373,4 +373,61 @@
 #define __NR_pkey_mprotect         386
 #define __NR_rseq                  387
 #define __NR_io_pgetevents         388
+#define __NR_semget                393
+#define __NR_semctl                394
+#define __NR_shmget                395
+#define __NR_shmctl                396
+#define __NR_shmat                 397
+#define __NR_shmdt                 398
+#define __NR_msgget                399
+#define __NR_msgsnd                400
+#define __NR_msgrcv                401
+#define __NR_msgctl                402
+#define __NR_clock_gettime64       403
+#define __NR_clock_settime64       404
+#define __NR_clock_adjtime64       405
+#define __NR_clock_getres_time64   406
+#define __NR_clock_nanosleep_time64 407
+#define __NR_timer_gettime64       408
+#define __NR_timer_settime64       409
+#define __NR_timerfd_gettime64     410
+#define __NR_timerfd_settime64     411
+#define __NR_utimensat_time64      412
+#define __NR_pselect6_time64       413
+#define __NR_ppoll_time64          414
+#define __NR_io_pgetevents_time64  416
+#define __NR_recvmmsg_time64       417
+#define __NR_mq_timedsend_time64   418
+#define __NR_mq_timedreceive_time64 419
+#define __NR_semtimedop_time64     420
+#define __NR_rt_sigtimedwait_time64 421
+#define __NR_futex_time64          422
+#define __NR_sched_rr_get_interval_time64 423
+#define __NR_pidfd_send_signal     424
+#define __NR_io_uring_setup        425
+#define __NR_io_uring_enter        426
+#define __NR_io_uring_register     427
+#define __NR_open_tree		428
+#define __NR_move_mount		429
+#define __NR_fsopen		430
+#define __NR_fsconfig		431
+#define __NR_fsmount		432
+#define __NR_fspick		433
+#define __NR_pidfd_open		434
+#define __NR_clone3		435
+#define __NR_close_range	436
+#define __NR_openat2		437
+#define __NR_pidfd_getfd	438
+#define __NR_faccessat2		439
+#define __NR_process_madvise	440
+#define __NR_epoll_pwait2	441
+#define __NR_mount_setattr	442
+#define __NR_landlock_create_ruleset	444
+#define __NR_landlock_add_rule	445
+#define __NR_landlock_restrict_self	446
+#define __NR_process_mrelease	448
+#define __NR_futex_waitv	449
+#define __NR_set_mempolicy_home_node	450
+#define __NR_cachestat		451
+#define __NR_fchmodat2		452
 
diff --git a/arch/powerpc/bits/termios.h b/arch/powerpc/bits/termios.h
index e3f22e86..da1f406b 100644
--- a/arch/powerpc/bits/termios.h
+++ b/arch/powerpc/bits/termios.h
@@ -167,5 +167,5 @@ struct termios {
 #define EXTPROC 0x10000000
 
 #define XTABS   00006000
-#define TIOCSER_TEMT 0x01
+#define TIOCSER_TEMT 1
 #endif
diff --git a/arch/powerpc/bits/user.h b/arch/powerpc/bits/user.h
index 6cc8aaf7..7f528746 100644
--- a/arch/powerpc/bits/user.h
+++ b/arch/powerpc/bits/user.h
@@ -1,10 +1,8 @@
-struct pt_regs {
-	unsigned long gpr[32], nip, msr, orig_gpr3, ctr, link, xer, ccr, mq;
-	unsigned long trap, dar, dsisr, result;
-};
-
 struct user {
-	struct pt_regs regs;
+	struct {
+		unsigned long gpr[32], nip, msr, orig_gpr3, ctr, link, xer, ccr, mq;
+		unsigned long trap, dar, dsisr, result;
+	} regs;
 	unsigned long u_tsize, u_dsize, u_ssize;
 	unsigned long start_code, start_data, start_stack;
 	long signal;
diff --git a/arch/powerpc/kstat.h b/arch/powerpc/kstat.h
new file mode 100644
index 00000000..5a611e7b
--- /dev/null
+++ b/arch/powerpc/kstat.h
@@ -0,0 +1,20 @@
+struct kstat {
+	dev_t st_dev;
+	ino_t st_ino;
+	mode_t st_mode;
+	nlink_t st_nlink;
+	uid_t st_uid;
+	gid_t st_gid;
+	dev_t st_rdev;
+	short __st_rdev_padding;
+	off_t st_size;
+	blksize_t st_blksize;
+	blkcnt_t st_blocks;
+	long st_atime_sec;
+	long st_atime_nsec;
+	long st_mtime_sec;
+	long st_mtime_nsec;
+	long st_ctime_sec;
+	long st_ctime_nsec;
+	unsigned __unused[2];
+};
diff --git a/arch/powerpc/pthread_arch.h b/arch/powerpc/pthread_arch.h
index ae0f28d6..42e88b07 100644
--- a/arch/powerpc/pthread_arch.h
+++ b/arch/powerpc/pthread_arch.h
@@ -1,18 +1,16 @@
-static inline struct pthread *__pthread_self()
+static inline uintptr_t __get_tp()
 {
-	register char *tp __asm__("r2");
+	register uintptr_t tp __asm__("r2");
 	__asm__ ("" : "=r" (tp) );
-	return (pthread_t)(tp - 0x7000 - sizeof(struct pthread));
+	return tp;
 }
                         
 #define TLS_ABOVE_TP
 #define GAP_ABOVE_TP 0
-#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 
+#define TP_OFFSET 0x7000
 #define DTP_OFFSET 0x8000
 
 // the kernel calls the ip "nip", it's the first saved value after the 32
 // GPRs.
 #define MC_PC gregs[32]
-
-#define CANARY canary_at_end
diff --git a/arch/powerpc/reloc.h b/arch/powerpc/reloc.h
index 1b4cab36..fdfbf827 100644
--- a/arch/powerpc/reloc.h
+++ b/arch/powerpc/reloc.h
@@ -1,4 +1,4 @@
-#ifdef _SOFT_FLOAT
+#if defined(_SOFT_FLOAT) || defined(__NO_FPRS__)
 #define FP_SUFFIX "-sf"
 #else
 #define FP_SUFFIX ""
@@ -9,6 +9,7 @@
 #define TPOFF_K (-0x7000)
 
 #define REL_SYMBOLIC    R_PPC_ADDR32
+#define REL_USYMBOLIC   R_PPC_UADDR32
 #define REL_GOT         R_PPC_GLOB_DAT
 #define REL_PLT         R_PPC_JMP_SLOT
 #define REL_RELATIVE    R_PPC_RELATIVE
diff --git a/arch/powerpc/syscall_arch.h b/arch/powerpc/syscall_arch.h
index 004060e6..54c885cb 100644
--- a/arch/powerpc/syscall_arch.h
+++ b/arch/powerpc/syscall_arch.h
@@ -3,7 +3,98 @@
 ((union { long long ll; long l[2]; }){ .ll = x }).l[1]
 #define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x))
 
-#undef SYSCALL_NO_INLINE
-#define SYSCALL_NO_INLINE
+static inline long __syscall0(long n)
+{
+	register long r0 __asm__("r0") = n;
+	register long r3 __asm__("r3");
+	__asm__ __volatile__("sc ; bns+ 1f ; neg %1, %1 ; 1:"
+	: "+r"(r0), "=r"(r3)
+	:: "memory", "cr0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
+	return r3;
+}
+
+static inline long __syscall1(long n, long a)
+{
+	register long r0 __asm__("r0") = n;
+	register long r3 __asm__("r3") = a;
+	__asm__ __volatile__("sc ; bns+ 1f ; neg %1, %1 ; 1:"
+	: "+r"(r0), "+r"(r3)
+	:: "memory", "cr0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
+	return r3;
+}
+
+static inline long __syscall2(long n, long a, long b)
+{
+	register long r0 __asm__("r0") = n;
+	register long r3 __asm__("r3") = a;
+	register long r4 __asm__("r4") = b;
+	__asm__ __volatile__("sc ; bns+ 1f ; neg %1, %1 ; 1:"
+	: "+r"(r0), "+r"(r3), "+r"(r4)
+	:: "memory", "cr0", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
+	return r3;
+}
+
+static inline long __syscall3(long n, long a, long b, long c)
+{
+	register long r0 __asm__("r0") = n;
+	register long r3 __asm__("r3") = a;
+	register long r4 __asm__("r4") = b;
+	register long r5 __asm__("r5") = c;
+	__asm__ __volatile__("sc ; bns+ 1f ; neg %1, %1 ; 1:"
+	: "+r"(r0), "+r"(r3), "+r"(r4), "+r"(r5)
+	:: "memory", "cr0", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
+	return r3;
+}
+
+static inline long __syscall4(long n, long a, long b, long c, long d)
+{
+	register long r0 __asm__("r0") = n;
+	register long r3 __asm__("r3") = a;
+	register long r4 __asm__("r4") = b;
+	register long r5 __asm__("r5") = c;
+	register long r6 __asm__("r6") = d;
+	__asm__ __volatile__("sc ; bns+ 1f ; neg %1, %1 ; 1:"
+	: "+r"(r0), "+r"(r3), "+r"(r4), "+r"(r5), "+r"(r6)
+	:: "memory", "cr0", "r7", "r8", "r9", "r10", "r11", "r12");
+	return r3;
+}
+
+static inline long __syscall5(long n, long a, long b, long c, long d, long e)
+{
+	register long r0 __asm__("r0") = n;
+	register long r3 __asm__("r3") = a;
+	register long r4 __asm__("r4") = b;
+	register long r5 __asm__("r5") = c;
+	register long r6 __asm__("r6") = d;
+	register long r7 __asm__("r7") = e;
+	__asm__ __volatile__("sc ; bns+ 1f ; neg %1, %1 ; 1:"
+	: "+r"(r0), "+r"(r3), "+r"(r4), "+r"(r5), "+r"(r6), "+r"(r7)
+	:: "memory", "cr0", "r8", "r9", "r10", "r11", "r12");
+	return r3;
+}
+
+static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f)
+{
+	register long r0 __asm__("r0") = n;
+	register long r3 __asm__("r3") = a;
+	register long r4 __asm__("r4") = b;
+	register long r5 __asm__("r5") = c;
+	register long r6 __asm__("r6") = d;
+	register long r7 __asm__("r7") = e;
+	register long r8 __asm__("r8") = f;
+	__asm__ __volatile__("sc ; bns+ 1f ; neg %1, %1 ; 1:"
+	: "+r"(r0), "+r"(r3), "+r"(r4), "+r"(r5), "+r"(r6), "+r"(r7), "+r"(r8)
+	:: "memory", "cr0", "r9", "r10", "r11", "r12");
+	return r3;
+}
 
 #define SYSCALL_FADVISE_6_ARG
+
+#define SO_RCVTIMEO_OLD  18
+#define SO_SNDTIMEO_OLD  19
+
+#define VDSO_USEFUL
+#define VDSO_CGT32_SYM "__kernel_clock_gettime"
+#define VDSO_CGT32_VER "LINUX_2.6.15"
+#define VDSO_CGT_SYM "__kernel_clock_gettime64"
+#define VDSO_CGT_VER "LINUX_5.11"
diff --git a/arch/powerpc64/bits/alltypes.h.in b/arch/powerpc64/bits/alltypes.h.in
index 5b205851..143ffa8d 100644
--- a/arch/powerpc64/bits/alltypes.h.in
+++ b/arch/powerpc64/bits/alltypes.h.in
@@ -2,8 +2,13 @@
 #define _Int64 long
 #define _Reg long
 
-TYPEDEF __builtin_va_list va_list;
-TYPEDEF __builtin_va_list __isoc_va_list;
+#if __BIG_ENDIAN__
+#define __BYTE_ORDER 4321
+#else
+#define __BYTE_ORDER 1234
+#endif
+
+#define __LONG_MAX 0x7fffffffffffffffL
 
 #ifndef __cplusplus
 TYPEDEF int wchar_t;
@@ -13,14 +18,3 @@ TYPEDEF float float_t;
 TYPEDEF double double_t;
 
 TYPEDEF struct { long long __ll; long double __ld; } max_align_t;
-
-TYPEDEF long time_t;
-TYPEDEF long suseconds_t;
-
-TYPEDEF struct { union { int __i[14]; volatile int __vi[14]; unsigned long __s[7]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[10]; volatile int __vi[10]; volatile void *volatile __p[5]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[10]; volatile int __vi[10]; volatile void *volatile __p[5]; } __u; } mtx_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[6]; } __u; } pthread_cond_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[6]; } __u; } cnd_t;
-TYPEDEF struct { union { int __i[14]; volatile int __vi[14]; void *__p[7]; } __u; } pthread_rwlock_t;
-TYPEDEF struct { union { int __i[8]; volatile int __vi[8]; void *__p[4]; } __u; } pthread_barrier_t;
diff --git a/arch/powerpc64/bits/endian.h b/arch/powerpc64/bits/endian.h
deleted file mode 100644
index 2016cb20..00000000
--- a/arch/powerpc64/bits/endian.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#if __BIG_ENDIAN__
-#define __BYTE_ORDER __BIG_ENDIAN
-#else
-#define __BYTE_ORDER __LITTLE_ENDIAN
-#endif
diff --git a/arch/powerpc64/bits/hwcap.h b/arch/powerpc64/bits/hwcap.h
index 803de9b5..12981623 100644
--- a/arch/powerpc64/bits/hwcap.h
+++ b/arch/powerpc64/bits/hwcap.h
@@ -41,3 +41,5 @@
 #define PPC_FEATURE2_DARN		0x00200000
 #define PPC_FEATURE2_SCV		0x00100000
 #define PPC_FEATURE2_HTM_NO_SUSPEND	0x00080000
+#define PPC_FEATURE2_ARCH_3_1		0x00040000
+#define PPC_FEATURE2_MMA		0x00020000
diff --git a/arch/powerpc64/bits/ioctl.h b/arch/powerpc64/bits/ioctl.h
index 47586234..b6cbb18f 100644
--- a/arch/powerpc64/bits/ioctl.h
+++ b/arch/powerpc64/bits/ioctl.h
@@ -78,14 +78,6 @@
 #define TIOCGSERIAL	0x541E
 #define TIOCSSERIAL	0x541F
 #define TIOCPKT	0x5420
-#define TIOCPKT_DATA		0
-#define TIOCPKT_FLUSHREAD	1
-#define TIOCPKT_FLUSHWRITE	2
-#define TIOCPKT_STOP		4
-#define TIOCPKT_START		8
-#define TIOCPKT_NOSTOP		16
-#define TIOCPKT_DOSTOP		32
-#define TIOCPKT_IOCTL		64
 
 #define TIOCNOTTY	0x5422
 #define TIOCSETD	0x5423
@@ -113,38 +105,12 @@
 #define TIOCSLCKTRMIOS	0x5457
 #define TIOCSERGSTRUCT	0x5458
 #define TIOCSERGETLSR	0x5459
-#define TIOCSER_TEMT	0x01
 #define TIOCSERGETMULTI	0x545A
 #define TIOCSERSETMULTI	0x545B
 
 #define TIOCMIWAIT	0x545C
 #define TIOCGICOUNT	0x545D
 
-
-struct winsize {
-	unsigned short ws_row;
-	unsigned short ws_col;
-	unsigned short ws_xpixel;
-	unsigned short ws_ypixel;
-};
-
-#define N_TTY           0
-#define N_SLIP          1
-#define N_MOUSE         2
-#define N_PPP           3
-#define N_STRIP         4
-#define N_AX25          5
-#define N_X25           6
-#define N_6PACK         7
-#define N_MASC          8
-#define N_R3964         9
-#define N_PROFIBUS_FDL  10
-#define N_IRDA          11
-#define N_SMSBLOCK      12
-#define N_HDLC          13
-#define N_SYNC_PPP      14
-#define N_HCI           15
-
 #define FIOSETOWN       0x8901
 #define SIOCSPGRP       0x8902
 #define FIOGETOWN       0x8903
@@ -152,66 +118,3 @@ struct winsize {
 #define SIOCATMARK      0x8905
 #define SIOCGSTAMP      0x8906
 #define SIOCGSTAMPNS    0x8907
-
-#define SIOCADDRT       0x890B
-#define SIOCDELRT       0x890C
-#define SIOCRTMSG       0x890D
-
-#define SIOCGIFNAME     0x8910
-#define SIOCSIFLINK     0x8911
-#define SIOCGIFCONF     0x8912
-#define SIOCGIFFLAGS    0x8913
-#define SIOCSIFFLAGS    0x8914
-#define SIOCGIFADDR     0x8915
-#define SIOCSIFADDR     0x8916
-#define SIOCGIFDSTADDR  0x8917
-#define SIOCSIFDSTADDR  0x8918
-#define SIOCGIFBRDADDR  0x8919
-#define SIOCSIFBRDADDR  0x891a
-#define SIOCGIFNETMASK  0x891b
-#define SIOCSIFNETMASK  0x891c
-#define SIOCGIFMETRIC   0x891d
-#define SIOCSIFMETRIC   0x891e
-#define SIOCGIFMEM      0x891f
-#define SIOCSIFMEM      0x8920
-#define SIOCGIFMTU      0x8921
-#define SIOCSIFMTU      0x8922
-#define SIOCSIFNAME     0x8923
-#define SIOCSIFHWADDR   0x8924
-#define SIOCGIFENCAP    0x8925
-#define SIOCSIFENCAP    0x8926
-#define SIOCGIFHWADDR   0x8927
-#define SIOCGIFSLAVE    0x8929
-#define SIOCSIFSLAVE    0x8930
-#define SIOCADDMULTI    0x8931
-#define SIOCDELMULTI    0x8932
-#define SIOCGIFINDEX    0x8933
-#define SIOGIFINDEX     SIOCGIFINDEX
-#define SIOCSIFPFLAGS   0x8934
-#define SIOCGIFPFLAGS   0x8935
-#define SIOCDIFADDR     0x8936
-#define SIOCSIFHWBROADCAST 0x8937
-#define SIOCGIFCOUNT    0x8938
-
-#define SIOCGIFBR       0x8940
-#define SIOCSIFBR       0x8941
-
-#define SIOCGIFTXQLEN   0x8942
-#define SIOCSIFTXQLEN   0x8943
-
-#define SIOCDARP        0x8953
-#define SIOCGARP        0x8954
-#define SIOCSARP        0x8955
-
-#define SIOCDRARP       0x8960
-#define SIOCGRARP       0x8961
-#define SIOCSRARP       0x8962
-
-#define SIOCGIFMAP      0x8970
-#define SIOCSIFMAP      0x8971
-
-#define SIOCADDDLCI     0x8980
-#define SIOCDELDLCI     0x8981
-
-#define SIOCDEVPRIVATE		0x89F0
-#define SIOCPROTOPRIVATE	0x89E0
diff --git a/arch/powerpc64/bits/ipc.h b/arch/powerpc64/bits/ipc.h
index 3f2ede07..a388d56b 100644
--- a/arch/powerpc64/bits/ipc.h
+++ b/arch/powerpc64/bits/ipc.h
@@ -10,6 +10,3 @@ struct ipc_perm {
 	long long __pad2;
 	long long __pad3;
 };
-
-#define IPC_64 0x100
-
diff --git a/arch/powerpc64/bits/limits.h b/arch/powerpc64/bits/limits.h
deleted file mode 100644
index 0226588c..00000000
--- a/arch/powerpc64/bits/limits.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
- || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
-#define LONG_BIT 64
-#endif
-
-#define LONG_MAX  0x7fffffffffffffffL
-#define LLONG_MAX  0x7fffffffffffffffLL
diff --git a/arch/powerpc64/bits/mman.h b/arch/powerpc64/bits/mman.h
index b3a675a8..95ec4358 100644
--- a/arch/powerpc64/bits/mman.h
+++ b/arch/powerpc64/bits/mman.h
@@ -4,7 +4,6 @@
 #define MAP_NORESERVE   0x40
 #undef MAP_LOCKED
 #define MAP_LOCKED	0x80
-#undef MAP_SYNC
 
 #undef MCL_CURRENT
 #define MCL_CURRENT     0x2000
diff --git a/arch/powerpc64/bits/posix.h b/arch/powerpc64/bits/posix.h
deleted file mode 100644
index c37b94c1..00000000
--- a/arch/powerpc64/bits/posix.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#define _POSIX_V6_LP64_OFF64  1
-#define _POSIX_V7_LP64_OFF64  1
diff --git a/arch/powerpc64/bits/ptrace.h b/arch/powerpc64/bits/ptrace.h
index 75086ca0..303a0735 100644
--- a/arch/powerpc64/bits/ptrace.h
+++ b/arch/powerpc64/bits/ptrace.h
@@ -8,6 +8,8 @@
 #define PTRACE_SET_DEBUGREG	0x1a
 #define PTRACE_GETVSRREGS	0x1b
 #define PTRACE_SETVSRREGS	0x1c
+#define PTRACE_SYSEMU		0x1d
+#define PTRACE_SYSEMU_SINGLESTEP	0x1e
 #define PTRACE_SINGLEBLOCK	0x100
 
 #define PT_GETVRREGS PTRACE_GETVRREGS
diff --git a/arch/powerpc64/bits/reg.h b/arch/powerpc64/bits/reg.h
deleted file mode 100644
index 49382c8f..00000000
--- a/arch/powerpc64/bits/reg.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#undef __WORDSIZE
-#define __WORDSIZE 64
-/* FIXME */
diff --git a/arch/powerpc64/bits/sem.h b/arch/powerpc64/bits/sem.h
deleted file mode 100644
index 558184db..00000000
--- a/arch/powerpc64/bits/sem.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#include <endian.h>
-
-struct semid_ds {
-	struct ipc_perm sem_perm;
-	time_t sem_otime;
-	time_t sem_ctime;
-#if __BYTE_ORDER == __BIG_ENDIAN
-	unsigned short __pad[3], sem_nsems;
-#else
-	unsigned short sem_nsems, __pad[3];
-#endif
-	unsigned long __unused[2];
-};
diff --git a/arch/powerpc64/bits/shm.h b/arch/powerpc64/bits/shm.h
index 8108c3a8..b7f73a8d 100644
--- a/arch/powerpc64/bits/shm.h
+++ b/arch/powerpc64/bits/shm.h
@@ -21,4 +21,3 @@ struct shm_info {
 	unsigned long shm_tot, shm_rss, shm_swp;
 	unsigned long __swap_attempts, __swap_successes;
 };
-
diff --git a/arch/powerpc64/bits/signal.h b/arch/powerpc64/bits/signal.h
index 34693a68..d5493b18 100644
--- a/arch/powerpc64/bits/signal.h
+++ b/arch/powerpc64/bits/signal.h
@@ -9,18 +9,21 @@
 #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 
 typedef unsigned long greg_t, gregset_t[48];
+typedef double fpregset_t[33];
 
 typedef struct {
-	double fpregs[32];
-	double fpscr;
-} fpregset_t;
-
-typedef struct {
-	unsigned __int128 vrregs[32];
-	unsigned _pad[3];
-	unsigned vrsave;
-	unsigned vscr;
-	unsigned _pad2[3];
+#ifdef __GNUC__
+	__attribute__((__aligned__(16)))
+#endif
+	unsigned vrregs[32][4];
+	struct {
+#if __BIG_ENDIAN__
+		unsigned _pad[3], vscr_word;
+#else
+		unsigned vscr_word, _pad[3];
+#endif
+	} vscr;
+	unsigned vrsave, _pad[3];
 } vrregset_t;
 
 typedef struct sigcontext {
@@ -29,7 +32,7 @@ typedef struct sigcontext {
 	int _pad0;
 	unsigned long handler;
 	unsigned long oldmask;
-	void *regs;
+	struct pt_regs *regs;
 	gregset_t gp_regs;
 	fpregset_t fp_regs;
 	vrregset_t *v_regs;
diff --git a/arch/powerpc64/bits/socket.h b/arch/powerpc64/bits/socket.h
index 0f3c9aac..557e324f 100644
--- a/arch/powerpc64/bits/socket.h
+++ b/arch/powerpc64/bits/socket.h
@@ -1,37 +1,3 @@
-#include <endian.h>
-
-struct msghdr {
-	void *msg_name;
-	socklen_t msg_namelen;
-	struct iovec *msg_iov;
-#if __BYTE_ORDER == __BIG_ENDIAN
-	int __pad1, msg_iovlen;
-#else
-	int msg_iovlen, __pad1;
-#endif
-	void *msg_control;
-#if __BYTE_ORDER == __BIG_ENDIAN
-	int __pad2;
-	socklen_t msg_controllen;
-#else
-	socklen_t msg_controllen;
-	int __pad2;
-#endif
-	int msg_flags;
-};
-
-struct cmsghdr {
-#if __BYTE_ORDER == __BIG_ENDIAN
-	int __pad1;
-	socklen_t cmsg_len;
-#else
-	socklen_t cmsg_len;
-	int __pad1;
-#endif
-	int cmsg_level;
-	int cmsg_type;
-};
-
 #define SO_DEBUG        1
 #define SO_REUSEADDR    2
 #define SO_TYPE         3
diff --git a/arch/powerpc64/bits/stdint.h b/arch/powerpc64/bits/stdint.h
deleted file mode 100644
index 1bb147f2..00000000
--- a/arch/powerpc64/bits/stdint.h
+++ /dev/null
@@ -1,20 +0,0 @@
-typedef int32_t int_fast16_t;
-typedef int32_t int_fast32_t;
-typedef uint32_t uint_fast16_t;
-typedef uint32_t uint_fast32_t;
-
-#define INT_FAST16_MIN  INT32_MIN
-#define INT_FAST32_MIN  INT32_MIN
-
-#define INT_FAST16_MAX  INT32_MAX
-#define INT_FAST32_MAX  INT32_MAX
-
-#define UINT_FAST16_MAX UINT32_MAX
-#define UINT_FAST32_MAX UINT32_MAX
-
-#define INTPTR_MIN      INT64_MIN
-#define INTPTR_MAX      INT64_MAX
-#define UINTPTR_MAX     UINT64_MAX
-#define PTRDIFF_MIN     INT64_MIN
-#define PTRDIFF_MAX     INT64_MAX
-#define SIZE_MAX        UINT64_MAX
diff --git a/arch/powerpc64/bits/syscall.h.in b/arch/powerpc64/bits/syscall.h.in
index 4e29cedf..43551079 100644
--- a/arch/powerpc64/bits/syscall.h.in
+++ b/arch/powerpc64/bits/syscall.h.in
@@ -364,4 +364,42 @@
 #define __NR_pkey_mprotect          386
 #define __NR_rseq                   387
 #define __NR_io_pgetevents          388
+#define __NR_semtimedop             392
+#define __NR_semget                 393
+#define __NR_semctl                 394
+#define __NR_shmget                 395
+#define __NR_shmctl                 396
+#define __NR_shmat                  397
+#define __NR_shmdt                  398
+#define __NR_msgget                 399
+#define __NR_msgsnd                 400
+#define __NR_msgrcv                 401
+#define __NR_msgctl                 402
+#define __NR_pidfd_send_signal      424
+#define __NR_io_uring_setup         425
+#define __NR_io_uring_enter         426
+#define __NR_io_uring_register      427
+#define __NR_open_tree		428
+#define __NR_move_mount		429
+#define __NR_fsopen		430
+#define __NR_fsconfig		431
+#define __NR_fsmount		432
+#define __NR_fspick		433
+#define __NR_pidfd_open		434
+#define __NR_clone3		435
+#define __NR_close_range	436
+#define __NR_openat2		437
+#define __NR_pidfd_getfd	438
+#define __NR_faccessat2		439
+#define __NR_process_madvise	440
+#define __NR_epoll_pwait2	441
+#define __NR_mount_setattr	442
+#define __NR_landlock_create_ruleset	444
+#define __NR_landlock_add_rule	445
+#define __NR_landlock_restrict_self	446
+#define __NR_process_mrelease	448
+#define __NR_futex_waitv	449
+#define __NR_set_mempolicy_home_node	450
+#define __NR_cachestat		451
+#define __NR_fchmodat2		452
 
diff --git a/arch/powerpc64/bits/termios.h b/arch/powerpc64/bits/termios.h
index e3f22e86..da1f406b 100644
--- a/arch/powerpc64/bits/termios.h
+++ b/arch/powerpc64/bits/termios.h
@@ -167,5 +167,5 @@ struct termios {
 #define EXTPROC 0x10000000
 
 #define XTABS   00006000
-#define TIOCSER_TEMT 0x01
+#define TIOCSER_TEMT 1
 #endif
diff --git a/arch/powerpc64/bits/user.h b/arch/powerpc64/bits/user.h
index 7ca459b3..7e75d201 100644
--- a/arch/powerpc64/bits/user.h
+++ b/arch/powerpc64/bits/user.h
@@ -1,10 +1,8 @@
-struct pt_regs {
-	unsigned long gpr[32], nip, msr, orig_gpr3, ctr, link, xer, ccr, softe;
-	unsigned long trap, dar, dsisr, result;
-};
-
 struct user {
-	struct pt_regs regs;
+	struct {
+		unsigned long gpr[32], nip, msr, orig_gpr3, ctr, link, xer, ccr, softe;
+		unsigned long trap, dar, dsisr, result;
+	} regs;
 	unsigned long u_tsize, u_dsize, u_ssize;
 	unsigned long start_code, start_data, start_stack;
 	long signal;
diff --git a/arch/powerpc64/kstat.h b/arch/powerpc64/kstat.h
new file mode 100644
index 00000000..887b3e26
--- /dev/null
+++ b/arch/powerpc64/kstat.h
@@ -0,0 +1,19 @@
+struct kstat {
+	dev_t st_dev;
+	ino_t st_ino;
+	nlink_t st_nlink;
+	mode_t st_mode;
+	uid_t st_uid;
+	gid_t st_gid;
+	dev_t st_rdev;
+	off_t st_size;
+	blksize_t st_blksize;
+	blkcnt_t st_blocks;
+	long st_atime_sec;
+	long st_atime_nsec;
+	long st_mtime_sec;
+	long st_mtime_nsec;
+	long st_ctime_sec;
+	long st_ctime_nsec;
+	unsigned long __unused[3];
+};
diff --git a/arch/powerpc64/pthread_arch.h b/arch/powerpc64/pthread_arch.h
index 79c3ecd8..1b7b9079 100644
--- a/arch/powerpc64/pthread_arch.h
+++ b/arch/powerpc64/pthread_arch.h
@@ -1,18 +1,16 @@
-static inline struct pthread *__pthread_self()
+static inline uintptr_t __get_tp()
 {
-	register char *tp __asm__("r13");
+	register uintptr_t tp __asm__("r13");
 	__asm__ ("" : "=r" (tp) );
-	return (pthread_t)(tp - 0x7000 - sizeof(struct pthread));
+	return tp;
 }
 
 #define TLS_ABOVE_TP
 #define GAP_ABOVE_TP 0
-#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 
+#define TP_OFFSET 0x7000
 #define DTP_OFFSET 0x8000
 
 // the kernel calls the ip "nip", it's the first saved value after the 32
 // GPRs.
 #define MC_PC gp_regs[32]
-
-#define CANARY canary_at_end
diff --git a/arch/powerpc64/reloc.h b/arch/powerpc64/reloc.h
index faf70acd..2f1bba05 100644
--- a/arch/powerpc64/reloc.h
+++ b/arch/powerpc64/reloc.h
@@ -1,5 +1,3 @@
-#include <endian.h>
-
 #if __BYTE_ORDER == __LITTLE_ENDIAN
 #define ENDIAN_SUFFIX "le"
 #else
@@ -11,6 +9,7 @@
 #define TPOFF_K (-0x7000)
 
 #define REL_SYMBOLIC    R_PPC64_ADDR64
+#define REL_USYMBOLIC   R_PPC64_UADDR64
 #define REL_GOT         R_PPC64_GLOB_DAT
 #define REL_PLT         R_PPC64_JMP_SLOT
 #define REL_RELATIVE    R_PPC64_RELATIVE
diff --git a/arch/powerpc64/syscall_arch.h b/arch/powerpc64/syscall_arch.h
index 1e730625..7d34fbe4 100644
--- a/arch/powerpc64/syscall_arch.h
+++ b/arch/powerpc64/syscall_arch.h
@@ -85,3 +85,10 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo
 	:: "memory", "cr0", "r9", "r10", "r11", "r12");
 	return r3;
 }
+
+#define SO_RCVTIMEO_OLD  18
+#define SO_SNDTIMEO_OLD  19
+
+#define VDSO_USEFUL
+#define VDSO_CGT_SYM "__kernel_clock_gettime"
+#define VDSO_CGT_VER "LINUX_2.6.15"
diff --git a/arch/riscv32/atomic_arch.h b/arch/riscv32/atomic_arch.h
new file mode 100644
index 00000000..4d418f63
--- /dev/null
+++ b/arch/riscv32/atomic_arch.h
@@ -0,0 +1,21 @@
+#define a_barrier a_barrier
+static inline void a_barrier()
+{
+	__asm__ __volatile__ ("fence rw,rw" : : : "memory");
+}
+
+#define a_cas a_cas
+static inline int a_cas(volatile int *p, int t, int s)
+{
+	int old, tmp;
+	__asm__ __volatile__ (
+		"\n1:	lr.w.aqrl %0, (%2)\n"
+		"	bne %0, %3, 1f\n"
+		"	sc.w.aqrl %1, %4, (%2)\n"
+		"	bnez %1, 1b\n"
+		"1:"
+		: "=&r"(old), "=&r"(tmp)
+		: "r"(p), "r"((long)t), "r"((long)s)
+		: "memory");
+	return old;
+}
diff --git a/arch/riscv32/bits/alltypes.h.in b/arch/riscv32/bits/alltypes.h.in
new file mode 100644
index 00000000..e2b6129e
--- /dev/null
+++ b/arch/riscv32/bits/alltypes.h.in
@@ -0,0 +1,18 @@
+#define _Addr int
+#define _Int64 long long
+#define _Reg int
+
+#define __BYTE_ORDER 1234
+#define __LONG_MAX 0x7fffffffL
+
+#ifndef __cplusplus
+TYPEDEF int wchar_t;
+#endif
+
+TYPEDEF int blksize_t;
+TYPEDEF unsigned int nlink_t;
+
+TYPEDEF float float_t;
+TYPEDEF double double_t;
+
+TYPEDEF struct { long long __ll; long double __ld; } max_align_t;
diff --git a/arch/riscv32/bits/fenv.h b/arch/riscv32/bits/fenv.h
new file mode 100644
index 00000000..806ec40f
--- /dev/null
+++ b/arch/riscv32/bits/fenv.h
@@ -0,0 +1,17 @@
+#define FE_INVALID      16
+#define FE_DIVBYZERO    8
+#define FE_OVERFLOW     4
+#define FE_UNDERFLOW    2
+#define FE_INEXACT      1
+
+#define FE_ALL_EXCEPT   31
+
+#define FE_TONEAREST    0
+#define FE_DOWNWARD     2
+#define FE_UPWARD       3
+#define FE_TOWARDZERO   1
+
+typedef unsigned int fexcept_t;
+typedef unsigned int fenv_t;
+
+#define FE_DFL_ENV      ((const fenv_t *) -1)
diff --git a/arch/riscv32/bits/float.h b/arch/riscv32/bits/float.h
new file mode 100644
index 00000000..719c7908
--- /dev/null
+++ b/arch/riscv32/bits/float.h
@@ -0,0 +1,16 @@
+#define FLT_EVAL_METHOD 0
+
+#define LDBL_TRUE_MIN 6.47517511943802511092443895822764655e-4966L
+#define LDBL_MIN 3.36210314311209350626267781732175260e-4932L
+#define LDBL_MAX 1.18973149535723176508575932662800702e+4932L
+#define LDBL_EPSILON 1.92592994438723585305597794258492732e-34L
+
+#define LDBL_MANT_DIG 113
+#define LDBL_MIN_EXP (-16381)
+#define LDBL_MAX_EXP 16384
+
+#define LDBL_DIG 33
+#define LDBL_MIN_10_EXP (-4931)
+#define LDBL_MAX_10_EXP 4932
+
+#define DECIMAL_DIG 36
diff --git a/arch/riscv32/bits/ipcstat.h b/arch/riscv32/bits/ipcstat.h
new file mode 100644
index 00000000..4f4fcb0c
--- /dev/null
+++ b/arch/riscv32/bits/ipcstat.h
@@ -0,0 +1 @@
+#define IPC_STAT 0x102
diff --git a/arch/powerpc64/bits/msg.h b/arch/riscv32/bits/msg.h
index 2e23ca27..7bbbb2bf 100644
--- a/arch/powerpc64/bits/msg.h
+++ b/arch/riscv32/bits/msg.h
@@ -1,12 +1,18 @@
 struct msqid_ds {
 	struct ipc_perm msg_perm;
-	time_t msg_stime;
-	time_t msg_rtime;
-	time_t msg_ctime;
+	unsigned long __msg_stime_lo;
+	unsigned long __msg_stime_hi;
+	unsigned long __msg_rtime_lo;
+	unsigned long __msg_rtime_hi;
+	unsigned long __msg_ctime_lo;
+	unsigned long __msg_ctime_hi;
 	unsigned long msg_cbytes;
 	msgqnum_t msg_qnum;
 	msglen_t msg_qbytes;
 	pid_t msg_lspid;
 	pid_t msg_lrpid;
 	unsigned long __unused[2];
+	time_t msg_stime;
+	time_t msg_rtime;
+	time_t msg_ctime;
 };
diff --git a/arch/riscv32/bits/sem.h b/arch/riscv32/bits/sem.h
new file mode 100644
index 00000000..544e3d2a
--- /dev/null
+++ b/arch/riscv32/bits/sem.h
@@ -0,0 +1,18 @@
+struct semid_ds {
+	struct ipc_perm sem_perm;
+	unsigned long __sem_otime_lo;
+	unsigned long __sem_otime_hi;
+	unsigned long __sem_ctime_lo;
+	unsigned long __sem_ctime_hi;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+	unsigned short sem_nsems;
+	char __sem_nsems_pad[sizeof(long)-sizeof(short)];
+#else
+	char __sem_nsems_pad[sizeof(long)-sizeof(short)];
+	unsigned short sem_nsems;
+#endif
+	long __unused3;
+	long __unused4;
+	time_t sem_otime;
+	time_t sem_ctime;
+};
diff --git a/arch/riscv32/bits/setjmp.h b/arch/riscv32/bits/setjmp.h
new file mode 100644
index 00000000..51e96276
--- /dev/null
+++ b/arch/riscv32/bits/setjmp.h
@@ -0,0 +1 @@
+typedef unsigned long long __jmp_buf[19];
diff --git a/arch/mips64/bits/shm.h b/arch/riscv32/bits/shm.h
index 8d193781..725fb469 100644
--- a/arch/mips64/bits/shm.h
+++ b/arch/riscv32/bits/shm.h
@@ -3,14 +3,21 @@
 struct shmid_ds {
 	struct ipc_perm shm_perm;
 	size_t shm_segsz;
-	time_t shm_atime;
-	time_t shm_dtime;
-	time_t shm_ctime;
+	unsigned long __shm_atime_lo;
+	unsigned long __shm_atime_hi;
+	unsigned long __shm_dtime_lo;
+	unsigned long __shm_dtime_hi;
+	unsigned long __shm_ctime_lo;
+	unsigned long __shm_ctime_hi;
 	pid_t shm_cpid;
 	pid_t shm_lpid;
 	unsigned long shm_nattch;
 	unsigned long __pad1;
 	unsigned long __pad2;
+	unsigned long __pad3;
+	time_t shm_atime;
+	time_t shm_dtime;
+	time_t shm_ctime;
 };
 
 struct shminfo {
diff --git a/arch/riscv32/bits/signal.h b/arch/riscv32/bits/signal.h
new file mode 100644
index 00000000..50b66ec9
--- /dev/null
+++ b/arch/riscv32/bits/signal.h
@@ -0,0 +1,120 @@
+#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
+ || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+
+#if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+# define MINSIGSTKSZ 2048
+# define SIGSTKSZ 8192
+#endif
+
+typedef unsigned long __riscv_mc_gp_state[32];
+
+struct __riscv_mc_f_ext_state {
+	unsigned int __f[32];
+	unsigned int __fcsr;
+};
+
+struct __riscv_mc_d_ext_state {
+	unsigned long long __f[32];
+	unsigned int __fcsr;
+};
+
+struct __riscv_mc_q_ext_state {
+	unsigned long long __f[64] __attribute__((__aligned__(16)));
+	unsigned int __fcsr;
+	unsigned int __reserved[3];
+};
+
+union __riscv_mc_fp_state {
+	struct __riscv_mc_f_ext_state __f;
+	struct __riscv_mc_d_ext_state __d;
+	struct __riscv_mc_q_ext_state __q;
+};
+
+typedef struct mcontext_t {
+	__riscv_mc_gp_state __gregs;
+	union __riscv_mc_fp_state __fpregs;
+} mcontext_t;
+
+#if defined(_GNU_SOURCE)
+#define REG_PC 0
+#define REG_RA 1
+#define REG_SP 2
+#define REG_TP 4
+#define REG_S0 8
+#define REG_S1 9
+#define REG_A0 10
+#define REG_S2 18
+#endif
+
+#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+typedef unsigned long greg_t;
+typedef unsigned long gregset_t[32];
+typedef union __riscv_mc_fp_state fpregset_t;
+struct sigcontext {
+	gregset_t gregs;
+	fpregset_t fpregs;
+};
+#endif
+
+struct sigaltstack {
+	void *ss_sp;
+	int ss_flags;
+	size_t ss_size;
+};
+
+typedef struct __ucontext
+{
+	unsigned long uc_flags;
+	struct __ucontext *uc_link;
+	stack_t uc_stack;
+	sigset_t uc_sigmask;
+	mcontext_t uc_mcontext;
+} ucontext_t;
+
+#define SA_NOCLDSTOP 1
+#define SA_NOCLDWAIT 2
+#define SA_SIGINFO   4
+#define SA_ONSTACK   0x08000000
+#define SA_RESTART   0x10000000
+#define SA_NODEFER   0x40000000
+#define SA_RESETHAND 0x80000000
+#define SA_RESTORER  0x04000000
+
+#endif
+
+#define SIGHUP     1
+#define SIGINT     2
+#define SIGQUIT    3
+#define SIGILL     4
+#define SIGTRAP    5
+#define SIGABRT    6
+#define SIGIOT     SIGABRT
+#define SIGBUS     7
+#define SIGFPE     8
+#define SIGKILL    9
+#define SIGUSR1   10
+#define SIGSEGV   11
+#define SIGUSR2   12
+#define SIGPIPE   13
+#define SIGALRM   14
+#define SIGTERM   15
+#define SIGSTKFLT 16
+#define SIGCHLD   17
+#define SIGCONT   18
+#define SIGSTOP   19
+#define SIGTSTP   20
+#define SIGTTIN   21
+#define SIGTTOU   22
+#define SIGURG    23
+#define SIGXCPU   24
+#define SIGXFSZ   25
+#define SIGVTALRM 26
+#define SIGPROF   27
+#define SIGWINCH  28
+#define SIGIO     29
+#define SIGPOLL   SIGIO
+#define SIGPWR    30
+#define SIGSYS    31
+#define SIGUNUSED SIGSYS
+
+#define _NSIG     65
diff --git a/arch/riscv32/bits/syscall.h.in b/arch/riscv32/bits/syscall.h.in
new file mode 100644
index 00000000..9228d840
--- /dev/null
+++ b/arch/riscv32/bits/syscall.h.in
@@ -0,0 +1,300 @@
+#define __NR_io_setup 0
+#define __NR_io_destroy 1
+#define __NR_io_submit 2
+#define __NR_io_cancel 3
+#define __NR_setxattr 5
+#define __NR_lsetxattr 6
+#define __NR_fsetxattr 7
+#define __NR_getxattr 8
+#define __NR_lgetxattr 9
+#define __NR_fgetxattr 10
+#define __NR_listxattr 11
+#define __NR_llistxattr 12
+#define __NR_flistxattr 13
+#define __NR_removexattr 14
+#define __NR_lremovexattr 15
+#define __NR_fremovexattr 16
+#define __NR_getcwd 17
+#define __NR_lookup_dcookie 18
+#define __NR_eventfd2 19
+#define __NR_epoll_create1 20
+#define __NR_epoll_ctl 21
+#define __NR_epoll_pwait 22
+#define __NR_dup 23
+#define __NR_dup3 24
+#define __NR_fcntl64 25
+#define __NR_inotify_init1 26
+#define __NR_inotify_add_watch 27
+#define __NR_inotify_rm_watch 28
+#define __NR_ioctl 29
+#define __NR_ioprio_set 30
+#define __NR_ioprio_get 31
+#define __NR_flock 32
+#define __NR_mknodat 33
+#define __NR_mkdirat 34
+#define __NR_unlinkat 35
+#define __NR_symlinkat 36
+#define __NR_linkat 37
+#define __NR_umount2 39
+#define __NR_mount 40
+#define __NR_pivot_root 41
+#define __NR_nfsservctl 42
+#define __NR_statfs64 43
+#define __NR_fstatfs64 44
+#define __NR_truncate64 45
+#define __NR_ftruncate64 46
+#define __NR_fallocate 47
+#define __NR_faccessat 48
+#define __NR_chdir 49
+#define __NR_fchdir 50
+#define __NR_chroot 51
+#define __NR_fchmod 52
+#define __NR_fchmodat 53
+#define __NR_fchownat 54
+#define __NR_fchown 55
+#define __NR_openat 56
+#define __NR_close 57
+#define __NR_vhangup 58
+#define __NR_pipe2 59
+#define __NR_quotactl 60
+#define __NR_getdents64 61
+#define __NR__llseek 62
+#define __NR_read 63
+#define __NR_write 64
+#define __NR_readv 65
+#define __NR_writev 66
+#define __NR_pread64 67
+#define __NR_pwrite64 68
+#define __NR_preadv 69
+#define __NR_pwritev 70
+#define __NR_sendfile64 71
+#define __NR_signalfd4 74
+#define __NR_vmsplice 75
+#define __NR_splice 76
+#define __NR_tee 77
+#define __NR_readlinkat 78
+#define __NR_sync 81
+#define __NR_fsync 82
+#define __NR_fdatasync 83
+#define __NR_sync_file_range 84
+#define __NR_timerfd_create 85
+#define __NR_acct 89
+#define __NR_capget 90
+#define __NR_capset 91
+#define __NR_personality 92
+#define __NR_exit 93
+#define __NR_exit_group 94
+#define __NR_waitid 95
+#define __NR_set_tid_address 96
+#define __NR_unshare 97
+#define __NR_set_robust_list 99
+#define __NR_get_robust_list 100
+#define __NR_nanosleep 101
+#define __NR_getitimer 102
+#define __NR_setitimer 103
+#define __NR_kexec_load 104
+#define __NR_init_module 105
+#define __NR_delete_module 106
+#define __NR_timer_create 107
+#define __NR_timer_getoverrun 109
+#define __NR_timer_delete 111
+#define __NR_syslog 116
+#define __NR_ptrace 117
+#define __NR_sched_setparam 118
+#define __NR_sched_setscheduler 119
+#define __NR_sched_getscheduler 120
+#define __NR_sched_getparam 121
+#define __NR_sched_setaffinity 122
+#define __NR_sched_getaffinity 123
+#define __NR_sched_yield 124
+#define __NR_sched_get_priority_max 125
+#define __NR_sched_get_priority_min 126
+#define __NR_restart_syscall 128
+#define __NR_kill 129
+#define __NR_tkill 130
+#define __NR_tgkill 131
+#define __NR_sigaltstack 132
+#define __NR_rt_sigsuspend 133
+#define __NR_rt_sigaction 134
+#define __NR_rt_sigprocmask 135
+#define __NR_rt_sigpending 136
+#define __NR_rt_sigqueueinfo 138
+#define __NR_rt_sigreturn 139
+#define __NR_setpriority 140
+#define __NR_getpriority 141
+#define __NR_reboot 142
+#define __NR_setregid 143
+#define __NR_setgid 144
+#define __NR_setreuid 145
+#define __NR_setuid 146
+#define __NR_setresuid 147
+#define __NR_getresuid 148
+#define __NR_setresgid 149
+#define __NR_getresgid 150
+#define __NR_setfsuid 151
+#define __NR_setfsgid 152
+#define __NR_times 153
+#define __NR_setpgid 154
+#define __NR_getpgid 155
+#define __NR_getsid 156
+#define __NR_setsid 157
+#define __NR_getgroups 158
+#define __NR_setgroups 159
+#define __NR_uname 160
+#define __NR_sethostname 161
+#define __NR_setdomainname 162
+#define __NR_getrusage 165
+#define __NR_umask 166
+#define __NR_prctl 167
+#define __NR_getcpu 168
+#define __NR_getpid 172
+#define __NR_getppid 173
+#define __NR_getuid 174
+#define __NR_geteuid 175
+#define __NR_getgid 176
+#define __NR_getegid 177
+#define __NR_gettid 178
+#define __NR_sysinfo 179
+#define __NR_mq_open 180
+#define __NR_mq_unlink 181
+#define __NR_mq_notify 184
+#define __NR_mq_getsetattr 185
+#define __NR_msgget 186
+#define __NR_msgctl 187
+#define __NR_msgrcv 188
+#define __NR_msgsnd 189
+#define __NR_semget 190
+#define __NR_semctl 191
+#define __NR_semop 193
+#define __NR_shmget 194
+#define __NR_shmctl 195
+#define __NR_shmat 196
+#define __NR_shmdt 197
+#define __NR_socket 198
+#define __NR_socketpair 199
+#define __NR_bind 200
+#define __NR_listen 201
+#define __NR_accept 202
+#define __NR_connect 203
+#define __NR_getsockname 204
+#define __NR_getpeername 205
+#define __NR_sendto 206
+#define __NR_recvfrom 207
+#define __NR_setsockopt 208
+#define __NR_getsockopt 209
+#define __NR_shutdown 210
+#define __NR_sendmsg 211
+#define __NR_recvmsg 212
+#define __NR_readahead 213
+#define __NR_brk 214
+#define __NR_munmap 215
+#define __NR_mremap 216
+#define __NR_add_key 217
+#define __NR_request_key 218
+#define __NR_keyctl 219
+#define __NR_clone 220
+#define __NR_execve 221
+#define __NR_mmap2 222
+#define __NR_fadvise64_64 223
+#define __NR_swapon 224
+#define __NR_swapoff 225
+#define __NR_mprotect 226
+#define __NR_msync 227
+#define __NR_mlock 228
+#define __NR_munlock 229
+#define __NR_mlockall 230
+#define __NR_munlockall 231
+#define __NR_mincore 232
+#define __NR_madvise 233
+#define __NR_remap_file_pages 234
+#define __NR_mbind 235
+#define __NR_get_mempolicy 236
+#define __NR_set_mempolicy 237
+#define __NR_migrate_pages 238
+#define __NR_move_pages 239
+#define __NR_rt_tgsigqueueinfo 240
+#define __NR_perf_event_open 241
+#define __NR_accept4 242
+#define __NR_arch_specific_syscall 244
+#define __NR_prlimit64 261
+#define __NR_fanotify_init 262
+#define __NR_fanotify_mark 263
+#define __NR_name_to_handle_at 264
+#define __NR_open_by_handle_at 265
+#define __NR_syncfs 267
+#define __NR_setns 268
+#define __NR_sendmmsg 269
+#define __NR_process_vm_readv 270
+#define __NR_process_vm_writev 271
+#define __NR_kcmp 272
+#define __NR_finit_module 273
+#define __NR_sched_setattr 274
+#define __NR_sched_getattr 275
+#define __NR_renameat2 276
+#define __NR_seccomp 277
+#define __NR_getrandom 278
+#define __NR_memfd_create 279
+#define __NR_bpf 280
+#define __NR_execveat 281
+#define __NR_userfaultfd 282
+#define __NR_membarrier 283
+#define __NR_mlock2 284
+#define __NR_copy_file_range 285
+#define __NR_preadv2 286
+#define __NR_pwritev2 287
+#define __NR_pkey_mprotect 288
+#define __NR_pkey_alloc 289
+#define __NR_pkey_free 290
+#define __NR_statx 291
+#define __NR_rseq 293
+#define __NR_kexec_file_load 294
+#define __NR_clock_gettime64		403
+#define __NR_clock_settime64		404
+#define __NR_clock_adjtime64		405
+#define __NR_clock_getres_time64	406
+#define __NR_clock_nanosleep_time64	407
+#define __NR_timer_gettime64		408
+#define __NR_timer_settime64		409
+#define __NR_timerfd_gettime64		410
+#define __NR_timerfd_settime64		411
+#define __NR_utimensat_time64		412
+#define __NR_pselect6_time64		413
+#define __NR_ppoll_time64		414
+#define __NR_io_pgetevents_time64	416
+#define __NR_recvmmsg_time64		417
+#define __NR_mq_timedsend_time64	418
+#define __NR_mq_timedreceive_time64	419
+#define __NR_semtimedop_time64		420
+#define __NR_rt_sigtimedwait_time64	421
+#define __NR_futex_time64		422
+#define __NR_sched_rr_get_interval_time64 423
+#define __NR_pidfd_send_signal 424
+#define __NR_io_uring_setup 425
+#define __NR_io_uring_enter 426
+#define __NR_io_uring_register 427
+#define __NR_open_tree		428
+#define __NR_move_mount		429
+#define __NR_fsopen		430
+#define __NR_fsconfig		431
+#define __NR_fsmount		432
+#define __NR_fspick		433
+#define __NR_pidfd_open		434
+#define __NR_clone3		435
+#define __NR_openat2		437
+#define __NR_pidfd_getfd	438
+#define __NR_faccessat2		439
+#define __NR_process_madvise	440
+#define __NR_epoll_pwait2	441
+#define __NR_mount_setattr	442
+#define __NR_landlock_create_ruleset	444
+#define __NR_landlock_add_rule	445
+#define __NR_landlock_restrict_self	446
+#define __NR_process_mrelease	448
+#define __NR_futex_waitv	449
+#define __NR_set_mempolicy_home_node	450
+#define __NR_cachestat		451
+#define __NR_fchmodat2		452
+#define __NR_futex __NR_futex_time64
+
+#define __NR_sysriscv __NR_arch_specific_syscall
+#define __NR_riscv_flush_icache (__NR_sysriscv + 15)
diff --git a/arch/riscv32/bits/user.h b/arch/riscv32/bits/user.h
new file mode 100644
index 00000000..0d37de0b
--- /dev/null
+++ b/arch/riscv32/bits/user.h
@@ -0,0 +1,6 @@
+#include <signal.h>
+
+#define ELF_NGREG 32
+#define ELF_NFPREG 33
+typedef unsigned long elf_greg_t, elf_gregset_t[ELF_NGREG];
+typedef union __riscv_mc_fp_state elf_fpregset_t;
diff --git a/arch/riscv32/crt_arch.h b/arch/riscv32/crt_arch.h
new file mode 100644
index 00000000..6b93fcfd
--- /dev/null
+++ b/arch/riscv32/crt_arch.h
@@ -0,0 +1,19 @@
+__asm__(
+".section .sdata,\"aw\"\n"
+".text\n"
+".global " START "\n"
+".type " START ",%function\n"
+START ":\n"
+".weak __global_pointer$\n"
+".hidden __global_pointer$\n"
+".option push\n"
+".option norelax\n\t"
+"lla gp, __global_pointer$\n"
+".option pop\n\t"
+"mv a0, sp\n"
+".weak _DYNAMIC\n"
+".hidden _DYNAMIC\n\t"
+"lla a1, _DYNAMIC\n\t"
+"andi sp, sp, -16\n\t"
+"tail " START "_c"
+);
diff --git a/arch/riscv32/kstat.h b/arch/riscv32/kstat.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/arch/riscv32/kstat.h
diff --git a/arch/riscv32/pthread_arch.h b/arch/riscv32/pthread_arch.h
new file mode 100644
index 00000000..a20d7fba
--- /dev/null
+++ b/arch/riscv32/pthread_arch.h
@@ -0,0 +1,13 @@
+static inline uintptr_t __get_tp()
+{
+	uintptr_t tp;
+	__asm__ __volatile__("mv %0, tp" : "=r"(tp));
+	return tp;
+}
+
+#define TLS_ABOVE_TP
+#define GAP_ABOVE_TP 0
+
+#define DTP_OFFSET 0x800
+
+#define MC_PC __gregs[0]
diff --git a/arch/riscv32/reloc.h b/arch/riscv32/reloc.h
new file mode 100644
index 00000000..59d15f17
--- /dev/null
+++ b/arch/riscv32/reloc.h
@@ -0,0 +1,22 @@
+#if defined __riscv_float_abi_soft
+#define RISCV_FP_SUFFIX "-sf"
+#elif defined __riscv_float_abi_single
+#define RISCV_FP_SUFFIX "-sp"
+#elif defined __riscv_float_abi_double
+#define RISCV_FP_SUFFIX ""
+#endif
+
+#define LDSO_ARCH "riscv32" RISCV_FP_SUFFIX
+
+#define TPOFF_K 0
+
+#define REL_SYMBOLIC    R_RISCV_32
+#define REL_PLT         R_RISCV_JUMP_SLOT
+#define REL_RELATIVE    R_RISCV_RELATIVE
+#define REL_COPY        R_RISCV_COPY
+#define REL_DTPMOD      R_RISCV_TLS_DTPMOD32
+#define REL_DTPOFF      R_RISCV_TLS_DTPREL32
+#define REL_TPOFF       R_RISCV_TLS_TPREL32
+
+#define CRTJMP(pc,sp) __asm__ __volatile__( \
+	"mv sp, %1 ; jr %0" : : "r"(pc), "r"(sp) : "memory" )
diff --git a/arch/riscv32/syscall_arch.h b/arch/riscv32/syscall_arch.h
new file mode 100644
index 00000000..70d2773f
--- /dev/null
+++ b/arch/riscv32/syscall_arch.h
@@ -0,0 +1,79 @@
+#define __SYSCALL_LL_E(x) \
+((union { long long ll; long l[2]; }){ .ll = x }).l[0], \
+((union { long long ll; long l[2]; }){ .ll = x }).l[1]
+#define __SYSCALL_LL_O(x) __SYSCALL_LL_E((x))
+
+#define __asm_syscall(...) \
+	__asm__ __volatile__ ("ecall\n\t" \
+	: "=r"(a0) : __VA_ARGS__ : "memory"); \
+	return a0; \
+
+static inline long __syscall0(long n)
+{
+	register long a7 __asm__("a7") = n;
+	register long a0 __asm__("a0");
+	__asm_syscall("r"(a7))
+}
+
+static inline long __syscall1(long n, long a)
+{
+	register long a7 __asm__("a7") = n;
+	register long a0 __asm__("a0") = a;
+	__asm_syscall("r"(a7), "0"(a0))
+}
+
+static inline long __syscall2(long n, long a, long b)
+{
+	register long a7 __asm__("a7") = n;
+	register long a0 __asm__("a0") = a;
+	register long a1 __asm__("a1") = b;
+	__asm_syscall("r"(a7), "0"(a0), "r"(a1))
+}
+
+static inline long __syscall3(long n, long a, long b, long c)
+{
+	register long a7 __asm__("a7") = n;
+	register long a0 __asm__("a0") = a;
+	register long a1 __asm__("a1") = b;
+	register long a2 __asm__("a2") = c;
+	__asm_syscall("r"(a7), "0"(a0), "r"(a1), "r"(a2))
+}
+
+static inline long __syscall4(long n, long a, long b, long c, long d)
+{
+	register long a7 __asm__("a7") = n;
+	register long a0 __asm__("a0") = a;
+	register long a1 __asm__("a1") = b;
+	register long a2 __asm__("a2") = c;
+	register long a3 __asm__("a3") = d;
+	__asm_syscall("r"(a7), "0"(a0), "r"(a1), "r"(a2), "r"(a3))
+}
+
+static inline long __syscall5(long n, long a, long b, long c, long d, long e)
+{
+	register long a7 __asm__("a7") = n;
+	register long a0 __asm__("a0") = a;
+	register long a1 __asm__("a1") = b;
+	register long a2 __asm__("a2") = c;
+	register long a3 __asm__("a3") = d;
+	register long a4 __asm__("a4") = e;
+	__asm_syscall("r"(a7), "0"(a0), "r"(a1), "r"(a2), "r"(a3), "r"(a4))
+}
+
+static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f)
+{
+	register long a7 __asm__("a7") = n;
+	register long a0 __asm__("a0") = a;
+	register long a1 __asm__("a1") = b;
+	register long a2 __asm__("a2") = c;
+	register long a3 __asm__("a3") = d;
+	register long a4 __asm__("a4") = e;
+	register long a5 __asm__("a5") = f;
+	__asm_syscall("r"(a7), "0"(a0), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5))
+}
+
+#define VDSO_USEFUL
+#define VDSO_CGT_SYM "__vdso_clock_gettime"
+#define VDSO_CGT_VER "LINUX_4.15"
+
+#define IPC_64 0
diff --git a/arch/riscv64/atomic_arch.h b/arch/riscv64/atomic_arch.h
new file mode 100644
index 00000000..0c382588
--- /dev/null
+++ b/arch/riscv64/atomic_arch.h
@@ -0,0 +1,38 @@
+#define a_barrier a_barrier
+static inline void a_barrier()
+{
+	__asm__ __volatile__ ("fence rw,rw" : : : "memory");
+}
+
+#define a_cas a_cas
+static inline int a_cas(volatile int *p, int t, int s)
+{
+	int old, tmp;
+	__asm__ __volatile__ (
+		"\n1:	lr.w.aqrl %0, (%2)\n"
+		"	bne %0, %3, 1f\n"
+		"	sc.w.aqrl %1, %4, (%2)\n"
+		"	bnez %1, 1b\n"
+		"1:"
+		: "=&r"(old), "=&r"(tmp)
+		: "r"(p), "r"((long)t), "r"((long)s)
+		: "memory");
+	return old;
+}
+
+#define a_cas_p a_cas_p
+static inline void *a_cas_p(volatile void *p, void *t, void *s)
+{
+	void *old;
+	int tmp;
+	__asm__ __volatile__ (
+		"\n1:	lr.d.aqrl %0, (%2)\n"
+		"	bne %0, %3, 1f\n"
+		"	sc.d.aqrl %1, %4, (%2)\n"
+		"	bnez %1, 1b\n"
+		"1:"
+		: "=&r"(old), "=&r"(tmp)
+		: "r"(p), "r"(t), "r"(s)
+		: "memory");
+	return old;
+}
diff --git a/arch/riscv64/bits/alltypes.h.in b/arch/riscv64/bits/alltypes.h.in
new file mode 100644
index 00000000..4579d174
--- /dev/null
+++ b/arch/riscv64/bits/alltypes.h.in
@@ -0,0 +1,18 @@
+#define _Addr long
+#define _Int64 long
+#define _Reg long
+
+#define __BYTE_ORDER 1234
+#define __LONG_MAX 0x7fffffffffffffffL
+
+#ifndef __cplusplus
+TYPEDEF int wchar_t;
+#endif
+
+TYPEDEF int blksize_t;
+TYPEDEF unsigned int nlink_t;
+
+TYPEDEF float float_t;
+TYPEDEF double double_t;
+
+TYPEDEF struct { long long __ll; long double __ld; } max_align_t;
diff --git a/arch/riscv64/bits/fenv.h b/arch/riscv64/bits/fenv.h
new file mode 100644
index 00000000..806ec40f
--- /dev/null
+++ b/arch/riscv64/bits/fenv.h
@@ -0,0 +1,17 @@
+#define FE_INVALID      16
+#define FE_DIVBYZERO    8
+#define FE_OVERFLOW     4
+#define FE_UNDERFLOW    2
+#define FE_INEXACT      1
+
+#define FE_ALL_EXCEPT   31
+
+#define FE_TONEAREST    0
+#define FE_DOWNWARD     2
+#define FE_UPWARD       3
+#define FE_TOWARDZERO   1
+
+typedef unsigned int fexcept_t;
+typedef unsigned int fenv_t;
+
+#define FE_DFL_ENV      ((const fenv_t *) -1)
diff --git a/arch/riscv64/bits/float.h b/arch/riscv64/bits/float.h
new file mode 100644
index 00000000..719c7908
--- /dev/null
+++ b/arch/riscv64/bits/float.h
@@ -0,0 +1,16 @@
+#define FLT_EVAL_METHOD 0
+
+#define LDBL_TRUE_MIN 6.47517511943802511092443895822764655e-4966L
+#define LDBL_MIN 3.36210314311209350626267781732175260e-4932L
+#define LDBL_MAX 1.18973149535723176508575932662800702e+4932L
+#define LDBL_EPSILON 1.92592994438723585305597794258492732e-34L
+
+#define LDBL_MANT_DIG 113
+#define LDBL_MIN_EXP (-16381)
+#define LDBL_MAX_EXP 16384
+
+#define LDBL_DIG 33
+#define LDBL_MIN_10_EXP (-4931)
+#define LDBL_MAX_10_EXP 4932
+
+#define DECIMAL_DIG 36
diff --git a/arch/riscv64/bits/setjmp.h b/arch/riscv64/bits/setjmp.h
new file mode 100644
index 00000000..ad7e4016
--- /dev/null
+++ b/arch/riscv64/bits/setjmp.h
@@ -0,0 +1 @@
+typedef unsigned long __jmp_buf[26];
diff --git a/arch/riscv64/bits/signal.h b/arch/riscv64/bits/signal.h
new file mode 100644
index 00000000..56f8fe17
--- /dev/null
+++ b/arch/riscv64/bits/signal.h
@@ -0,0 +1,119 @@
+#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
+ || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+
+#if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+# define MINSIGSTKSZ 2048
+# define SIGSTKSZ 8192
+#endif
+
+typedef unsigned long __riscv_mc_gp_state[32];
+
+struct __riscv_mc_f_ext_state {
+	unsigned int __f[32];
+	unsigned int __fcsr;
+};
+
+struct __riscv_mc_d_ext_state {
+	unsigned long long __f[32];
+	unsigned int __fcsr;
+};
+
+struct __riscv_mc_q_ext_state {
+	unsigned long long __f[64] __attribute__((__aligned__(16)));
+	unsigned int __fcsr;
+	unsigned int __reserved[3];
+};
+
+union __riscv_mc_fp_state {
+	struct __riscv_mc_f_ext_state __f;
+	struct __riscv_mc_d_ext_state __d;
+	struct __riscv_mc_q_ext_state __q;
+};
+
+typedef struct mcontext_t {
+	__riscv_mc_gp_state __gregs;
+	union __riscv_mc_fp_state __fpregs;
+} mcontext_t;
+
+#if defined(_GNU_SOURCE)
+#define REG_PC 0
+#define REG_RA 1
+#define REG_SP 2
+#define REG_TP 4
+#define REG_S0 8
+#define REG_S1 9
+#define REG_A0 10
+#define REG_S2 18
+#endif
+
+#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+typedef unsigned long greg_t;
+typedef unsigned long gregset_t[32];
+typedef union __riscv_mc_fp_state fpregset_t;
+struct sigcontext {
+	gregset_t gregs;
+	fpregset_t fpregs;
+};
+#endif
+
+struct sigaltstack {
+	void *ss_sp;
+	int ss_flags;
+	size_t ss_size;
+};
+
+typedef struct __ucontext
+{
+	unsigned long uc_flags;
+	struct __ucontext *uc_link;
+	stack_t uc_stack;
+	sigset_t uc_sigmask;
+	mcontext_t uc_mcontext;
+} ucontext_t;
+
+#define SA_NOCLDSTOP 1
+#define SA_NOCLDWAIT 2
+#define SA_SIGINFO   4
+#define SA_ONSTACK   0x08000000
+#define SA_RESTART   0x10000000
+#define SA_NODEFER   0x40000000
+#define SA_RESETHAND 0x80000000
+
+#endif
+
+#define SIGHUP     1
+#define SIGINT     2
+#define SIGQUIT    3
+#define SIGILL     4
+#define SIGTRAP    5
+#define SIGABRT    6
+#define SIGIOT     SIGABRT
+#define SIGBUS     7
+#define SIGFPE     8
+#define SIGKILL    9
+#define SIGUSR1   10
+#define SIGSEGV   11
+#define SIGUSR2   12
+#define SIGPIPE   13
+#define SIGALRM   14
+#define SIGTERM   15
+#define SIGSTKFLT 16
+#define SIGCHLD   17
+#define SIGCONT   18
+#define SIGSTOP   19
+#define SIGTSTP   20
+#define SIGTTIN   21
+#define SIGTTOU   22
+#define SIGURG    23
+#define SIGXCPU   24
+#define SIGXFSZ   25
+#define SIGVTALRM 26
+#define SIGPROF   27
+#define SIGWINCH  28
+#define SIGIO     29
+#define SIGPOLL   SIGIO
+#define SIGPWR    30
+#define SIGSYS    31
+#define SIGUNUSED SIGSYS
+
+#define _NSIG     65
diff --git a/arch/riscv64/bits/syscall.h.in b/arch/riscv64/bits/syscall.h.in
new file mode 100644
index 00000000..e362bd0e
--- /dev/null
+++ b/arch/riscv64/bits/syscall.h.in
@@ -0,0 +1,309 @@
+#define __NR_io_setup 0
+#define __NR_io_destroy 1
+#define __NR_io_submit 2
+#define __NR_io_cancel 3
+#define __NR_io_getevents 4
+#define __NR_setxattr 5
+#define __NR_lsetxattr 6
+#define __NR_fsetxattr 7
+#define __NR_getxattr 8
+#define __NR_lgetxattr 9
+#define __NR_fgetxattr 10
+#define __NR_listxattr 11
+#define __NR_llistxattr 12
+#define __NR_flistxattr 13
+#define __NR_removexattr 14
+#define __NR_lremovexattr 15
+#define __NR_fremovexattr 16
+#define __NR_getcwd 17
+#define __NR_lookup_dcookie 18
+#define __NR_eventfd2 19
+#define __NR_epoll_create1 20
+#define __NR_epoll_ctl 21
+#define __NR_epoll_pwait 22
+#define __NR_dup 23
+#define __NR_dup3 24
+#define __NR_fcntl 25
+#define __NR_inotify_init1 26
+#define __NR_inotify_add_watch 27
+#define __NR_inotify_rm_watch 28
+#define __NR_ioctl 29
+#define __NR_ioprio_set 30
+#define __NR_ioprio_get 31
+#define __NR_flock 32
+#define __NR_mknodat 33
+#define __NR_mkdirat 34
+#define __NR_unlinkat 35
+#define __NR_symlinkat 36
+#define __NR_linkat 37
+#define __NR_umount2 39
+#define __NR_mount 40
+#define __NR_pivot_root 41
+#define __NR_nfsservctl 42
+#define __NR_statfs 43
+#define __NR_fstatfs 44
+#define __NR_truncate 45
+#define __NR_ftruncate 46
+#define __NR_fallocate 47
+#define __NR_faccessat 48
+#define __NR_chdir 49
+#define __NR_fchdir 50
+#define __NR_chroot 51
+#define __NR_fchmod 52
+#define __NR_fchmodat 53
+#define __NR_fchownat 54
+#define __NR_fchown 55
+#define __NR_openat 56
+#define __NR_close 57
+#define __NR_vhangup 58
+#define __NR_pipe2 59
+#define __NR_quotactl 60
+#define __NR_getdents64 61
+#define __NR_lseek 62
+#define __NR_read 63
+#define __NR_write 64
+#define __NR_readv 65
+#define __NR_writev 66
+#define __NR_pread64 67
+#define __NR_pwrite64 68
+#define __NR_preadv 69
+#define __NR_pwritev 70
+#define __NR_sendfile 71
+#define __NR_pselect6 72
+#define __NR_ppoll 73
+#define __NR_signalfd4 74
+#define __NR_vmsplice 75
+#define __NR_splice 76
+#define __NR_tee 77
+#define __NR_readlinkat 78
+#define __NR_newfstatat 79
+#define __NR_fstat 80
+#define __NR_sync 81
+#define __NR_fsync 82
+#define __NR_fdatasync 83
+#define __NR_sync_file_range 84
+#define __NR_timerfd_create 85
+#define __NR_timerfd_settime 86
+#define __NR_timerfd_gettime 87
+#define __NR_utimensat 88
+#define __NR_acct 89
+#define __NR_capget 90
+#define __NR_capset 91
+#define __NR_personality 92
+#define __NR_exit 93
+#define __NR_exit_group 94
+#define __NR_waitid 95
+#define __NR_set_tid_address 96
+#define __NR_unshare 97
+#define __NR_futex 98
+#define __NR_set_robust_list 99
+#define __NR_get_robust_list 100
+#define __NR_nanosleep 101
+#define __NR_getitimer 102
+#define __NR_setitimer 103
+#define __NR_kexec_load 104
+#define __NR_init_module 105
+#define __NR_delete_module 106
+#define __NR_timer_create 107
+#define __NR_timer_gettime 108
+#define __NR_timer_getoverrun 109
+#define __NR_timer_settime 110
+#define __NR_timer_delete 111
+#define __NR_clock_settime 112
+#define __NR_clock_gettime 113
+#define __NR_clock_getres 114
+#define __NR_clock_nanosleep 115
+#define __NR_syslog 116
+#define __NR_ptrace 117
+#define __NR_sched_setparam 118
+#define __NR_sched_setscheduler 119
+#define __NR_sched_getscheduler 120
+#define __NR_sched_getparam 121
+#define __NR_sched_setaffinity 122
+#define __NR_sched_getaffinity 123
+#define __NR_sched_yield 124
+#define __NR_sched_get_priority_max 125
+#define __NR_sched_get_priority_min 126
+#define __NR_sched_rr_get_interval 127
+#define __NR_restart_syscall 128
+#define __NR_kill 129
+#define __NR_tkill 130
+#define __NR_tgkill 131
+#define __NR_sigaltstack 132
+#define __NR_rt_sigsuspend 133
+#define __NR_rt_sigaction 134
+#define __NR_rt_sigprocmask 135
+#define __NR_rt_sigpending 136
+#define __NR_rt_sigtimedwait 137
+#define __NR_rt_sigqueueinfo 138
+#define __NR_rt_sigreturn 139
+#define __NR_setpriority 140
+#define __NR_getpriority 141
+#define __NR_reboot 142
+#define __NR_setregid 143
+#define __NR_setgid 144
+#define __NR_setreuid 145
+#define __NR_setuid 146
+#define __NR_setresuid 147
+#define __NR_getresuid 148
+#define __NR_setresgid 149
+#define __NR_getresgid 150
+#define __NR_setfsuid 151
+#define __NR_setfsgid 152
+#define __NR_times 153
+#define __NR_setpgid 154
+#define __NR_getpgid 155
+#define __NR_getsid 156
+#define __NR_setsid 157
+#define __NR_getgroups 158
+#define __NR_setgroups 159
+#define __NR_uname 160
+#define __NR_sethostname 161
+#define __NR_setdomainname 162
+#define __NR_getrlimit 163
+#define __NR_setrlimit 164
+#define __NR_getrusage 165
+#define __NR_umask 166
+#define __NR_prctl 167
+#define __NR_getcpu 168
+#define __NR_gettimeofday 169
+#define __NR_settimeofday 170
+#define __NR_adjtimex 171
+#define __NR_getpid 172
+#define __NR_getppid 173
+#define __NR_getuid 174
+#define __NR_geteuid 175
+#define __NR_getgid 176
+#define __NR_getegid 177
+#define __NR_gettid 178
+#define __NR_sysinfo 179
+#define __NR_mq_open 180
+#define __NR_mq_unlink 181
+#define __NR_mq_timedsend 182
+#define __NR_mq_timedreceive 183
+#define __NR_mq_notify 184
+#define __NR_mq_getsetattr 185
+#define __NR_msgget 186
+#define __NR_msgctl 187
+#define __NR_msgrcv 188
+#define __NR_msgsnd 189
+#define __NR_semget 190
+#define __NR_semctl 191
+#define __NR_semtimedop 192
+#define __NR_semop 193
+#define __NR_shmget 194
+#define __NR_shmctl 195
+#define __NR_shmat 196
+#define __NR_shmdt 197
+#define __NR_socket 198
+#define __NR_socketpair 199
+#define __NR_bind 200
+#define __NR_listen 201
+#define __NR_accept 202
+#define __NR_connect 203
+#define __NR_getsockname 204
+#define __NR_getpeername 205
+#define __NR_sendto 206
+#define __NR_recvfrom 207
+#define __NR_setsockopt 208
+#define __NR_getsockopt 209
+#define __NR_shutdown 210
+#define __NR_sendmsg 211
+#define __NR_recvmsg 212
+#define __NR_readahead 213
+#define __NR_brk 214
+#define __NR_munmap 215
+#define __NR_mremap 216
+#define __NR_add_key 217
+#define __NR_request_key 218
+#define __NR_keyctl 219
+#define __NR_clone 220
+#define __NR_execve 221
+#define __NR_mmap 222
+#define __NR_fadvise64 223
+#define __NR_swapon 224
+#define __NR_swapoff 225
+#define __NR_mprotect 226
+#define __NR_msync 227
+#define __NR_mlock 228
+#define __NR_munlock 229
+#define __NR_mlockall 230
+#define __NR_munlockall 231
+#define __NR_mincore 232
+#define __NR_madvise 233
+#define __NR_remap_file_pages 234
+#define __NR_mbind 235
+#define __NR_get_mempolicy 236
+#define __NR_set_mempolicy 237
+#define __NR_migrate_pages 238
+#define __NR_move_pages 239
+#define __NR_rt_tgsigqueueinfo 240
+#define __NR_perf_event_open 241
+#define __NR_accept4 242
+#define __NR_recvmmsg 243
+#define __NR_arch_specific_syscall 244
+#define __NR_wait4 260
+#define __NR_prlimit64 261
+#define __NR_fanotify_init 262
+#define __NR_fanotify_mark 263
+#define __NR_name_to_handle_at 264
+#define __NR_open_by_handle_at 265
+#define __NR_clock_adjtime 266
+#define __NR_syncfs 267
+#define __NR_setns 268
+#define __NR_sendmmsg 269
+#define __NR_process_vm_readv 270
+#define __NR_process_vm_writev 271
+#define __NR_kcmp 272
+#define __NR_finit_module 273
+#define __NR_sched_setattr 274
+#define __NR_sched_getattr 275
+#define __NR_renameat2 276
+#define __NR_seccomp 277
+#define __NR_getrandom 278
+#define __NR_memfd_create 279
+#define __NR_bpf 280
+#define __NR_execveat 281
+#define __NR_userfaultfd 282
+#define __NR_membarrier 283
+#define __NR_mlock2 284
+#define __NR_copy_file_range 285
+#define __NR_preadv2 286
+#define __NR_pwritev2 287
+#define __NR_pkey_mprotect 288
+#define __NR_pkey_alloc 289
+#define __NR_pkey_free 290
+#define __NR_statx 291
+#define __NR_io_pgetevents 292
+#define __NR_rseq 293
+#define __NR_kexec_file_load 294
+#define __NR_pidfd_send_signal 424
+#define __NR_io_uring_setup 425
+#define __NR_io_uring_enter 426
+#define __NR_io_uring_register 427
+#define __NR_open_tree		428
+#define __NR_move_mount		429
+#define __NR_fsopen		430
+#define __NR_fsconfig		431
+#define __NR_fsmount		432
+#define __NR_fspick		433
+#define __NR_pidfd_open		434
+#define __NR_clone3		435
+#define __NR_close_range	436
+#define __NR_openat2		437
+#define __NR_pidfd_getfd	438
+#define __NR_faccessat2		439
+#define __NR_process_madvise	440
+#define __NR_epoll_pwait2	441
+#define __NR_mount_setattr	442
+#define __NR_landlock_create_ruleset	444
+#define __NR_landlock_add_rule	445
+#define __NR_landlock_restrict_self	446
+#define __NR_process_mrelease	448
+#define __NR_futex_waitv	449
+#define __NR_set_mempolicy_home_node	450
+#define __NR_cachestat		451
+#define __NR_fchmodat2		452
+
+#define __NR_sysriscv __NR_arch_specific_syscall
+#define __NR_riscv_flush_icache (__NR_sysriscv + 15)
diff --git a/arch/riscv64/bits/user.h b/arch/riscv64/bits/user.h
new file mode 100644
index 00000000..0d37de0b
--- /dev/null
+++ b/arch/riscv64/bits/user.h
@@ -0,0 +1,6 @@
+#include <signal.h>
+
+#define ELF_NGREG 32
+#define ELF_NFPREG 33
+typedef unsigned long elf_greg_t, elf_gregset_t[ELF_NGREG];
+typedef union __riscv_mc_fp_state elf_fpregset_t;
diff --git a/arch/riscv64/crt_arch.h b/arch/riscv64/crt_arch.h
new file mode 100644
index 00000000..6b93fcfd
--- /dev/null
+++ b/arch/riscv64/crt_arch.h
@@ -0,0 +1,19 @@
+__asm__(
+".section .sdata,\"aw\"\n"
+".text\n"
+".global " START "\n"
+".type " START ",%function\n"
+START ":\n"
+".weak __global_pointer$\n"
+".hidden __global_pointer$\n"
+".option push\n"
+".option norelax\n\t"
+"lla gp, __global_pointer$\n"
+".option pop\n\t"
+"mv a0, sp\n"
+".weak _DYNAMIC\n"
+".hidden _DYNAMIC\n\t"
+"lla a1, _DYNAMIC\n\t"
+"andi sp, sp, -16\n\t"
+"tail " START "_c"
+);
diff --git a/arch/riscv64/kstat.h b/arch/riscv64/kstat.h
new file mode 100644
index 00000000..92625f36
--- /dev/null
+++ b/arch/riscv64/kstat.h
@@ -0,0 +1,21 @@
+struct kstat {
+	dev_t st_dev;
+	ino_t st_ino;
+	mode_t st_mode;
+	nlink_t st_nlink;
+	uid_t st_uid;
+	gid_t st_gid;
+	dev_t st_rdev;
+	unsigned long __pad;
+	off_t st_size;
+	blksize_t st_blksize;
+	int __pad2;
+	blkcnt_t st_blocks;
+	long st_atime_sec;
+	long st_atime_nsec;
+	long st_mtime_sec;
+	long st_mtime_nsec;
+	long st_ctime_sec;
+	long st_ctime_nsec;
+	unsigned __unused[2];
+};
diff --git a/arch/riscv64/pthread_arch.h b/arch/riscv64/pthread_arch.h
new file mode 100644
index 00000000..a20d7fba
--- /dev/null
+++ b/arch/riscv64/pthread_arch.h
@@ -0,0 +1,13 @@
+static inline uintptr_t __get_tp()
+{
+	uintptr_t tp;
+	__asm__ __volatile__("mv %0, tp" : "=r"(tp));
+	return tp;
+}
+
+#define TLS_ABOVE_TP
+#define GAP_ABOVE_TP 0
+
+#define DTP_OFFSET 0x800
+
+#define MC_PC __gregs[0]
diff --git a/arch/riscv64/reloc.h b/arch/riscv64/reloc.h
new file mode 100644
index 00000000..7c7c0611
--- /dev/null
+++ b/arch/riscv64/reloc.h
@@ -0,0 +1,23 @@
+#if defined __riscv_float_abi_soft
+#define RISCV_FP_SUFFIX "-sf"
+#elif defined __riscv_float_abi_single
+#define RISCV_FP_SUFFIX "-sp"
+#elif defined __riscv_float_abi_double
+#define RISCV_FP_SUFFIX ""
+#endif
+
+#define LDSO_ARCH "riscv64" RISCV_FP_SUFFIX
+
+#define TPOFF_K 0
+
+#define REL_SYMBOLIC    R_RISCV_64
+#define REL_PLT         R_RISCV_JUMP_SLOT
+#define REL_RELATIVE    R_RISCV_RELATIVE
+#define REL_COPY        R_RISCV_COPY
+#define REL_DTPMOD      R_RISCV_TLS_DTPMOD64
+#define REL_DTPOFF      R_RISCV_TLS_DTPREL64
+#define REL_TPOFF       R_RISCV_TLS_TPREL64
+#define REL_TLSDESC     R_RISCV_TLSDESC
+
+#define CRTJMP(pc,sp) __asm__ __volatile__( \
+	"mv sp, %1 ; jr %0" : : "r"(pc), "r"(sp) : "memory" )
diff --git a/arch/riscv64/syscall_arch.h b/arch/riscv64/syscall_arch.h
new file mode 100644
index 00000000..81993fc8
--- /dev/null
+++ b/arch/riscv64/syscall_arch.h
@@ -0,0 +1,77 @@
+#define __SYSCALL_LL_E(x) (x)
+#define __SYSCALL_LL_O(x) (x)
+
+#define __asm_syscall(...) \
+	__asm__ __volatile__ ("ecall\n\t" \
+	: "=r"(a0) : __VA_ARGS__ : "memory"); \
+	return a0; \
+
+static inline long __syscall0(long n)
+{
+	register long a7 __asm__("a7") = n;
+	register long a0 __asm__("a0");
+	__asm_syscall("r"(a7))
+}
+
+static inline long __syscall1(long n, long a)
+{
+	register long a7 __asm__("a7") = n;
+	register long a0 __asm__("a0") = a;
+	__asm_syscall("r"(a7), "0"(a0))
+}
+
+static inline long __syscall2(long n, long a, long b)
+{
+	register long a7 __asm__("a7") = n;
+	register long a0 __asm__("a0") = a;
+	register long a1 __asm__("a1") = b;
+	__asm_syscall("r"(a7), "0"(a0), "r"(a1))
+}
+
+static inline long __syscall3(long n, long a, long b, long c)
+{
+	register long a7 __asm__("a7") = n;
+	register long a0 __asm__("a0") = a;
+	register long a1 __asm__("a1") = b;
+	register long a2 __asm__("a2") = c;
+	__asm_syscall("r"(a7), "0"(a0), "r"(a1), "r"(a2))
+}
+
+static inline long __syscall4(long n, long a, long b, long c, long d)
+{
+	register long a7 __asm__("a7") = n;
+	register long a0 __asm__("a0") = a;
+	register long a1 __asm__("a1") = b;
+	register long a2 __asm__("a2") = c;
+	register long a3 __asm__("a3") = d;
+	__asm_syscall("r"(a7), "0"(a0), "r"(a1), "r"(a2), "r"(a3))
+}
+
+static inline long __syscall5(long n, long a, long b, long c, long d, long e)
+{
+	register long a7 __asm__("a7") = n;
+	register long a0 __asm__("a0") = a;
+	register long a1 __asm__("a1") = b;
+	register long a2 __asm__("a2") = c;
+	register long a3 __asm__("a3") = d;
+	register long a4 __asm__("a4") = e;
+	__asm_syscall("r"(a7), "0"(a0), "r"(a1), "r"(a2), "r"(a3), "r"(a4))
+}
+
+static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f)
+{
+	register long a7 __asm__("a7") = n;
+	register long a0 __asm__("a0") = a;
+	register long a1 __asm__("a1") = b;
+	register long a2 __asm__("a2") = c;
+	register long a3 __asm__("a3") = d;
+	register long a4 __asm__("a4") = e;
+	register long a5 __asm__("a5") = f;
+	__asm_syscall("r"(a7), "0"(a0), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5))
+}
+
+#define VDSO_USEFUL
+#define VDSO_CGT_SYM "__vdso_clock_gettime"
+#define VDSO_CGT_VER "LINUX_4.15"
+
+#define IPC_64 0
diff --git a/arch/s390x/bits/alltypes.h.in b/arch/s390x/bits/alltypes.h.in
index 1a838462..6c0eb7f4 100644
--- a/arch/s390x/bits/alltypes.h.in
+++ b/arch/s390x/bits/alltypes.h.in
@@ -2,25 +2,18 @@
 #define _Int64 long
 #define _Reg long
 
-TYPEDEF __builtin_va_list va_list;
-TYPEDEF __builtin_va_list __isoc_va_list;
+#define __BYTE_ORDER 4321
+#define __LONG_MAX 0x7fffffffffffffffL
 
 #ifndef __cplusplus
 TYPEDEF int wchar_t;
 #endif
 
+#if defined(__FLT_EVAL_METHOD__) && __FLT_EVAL_METHOD__ == 1
 TYPEDEF double float_t;
+#else
+TYPEDEF float float_t;
+#endif
 TYPEDEF double double_t;
 
 TYPEDEF struct { long long __ll; long double __ld; } max_align_t;
-
-TYPEDEF long time_t;
-TYPEDEF long suseconds_t;
-
-TYPEDEF struct { union { int __i[14]; volatile int __vi[14]; unsigned long __s[7]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[10]; volatile int __vi[10]; volatile void *volatile __p[5]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[10]; volatile int __vi[10]; volatile void *volatile __p[5]; } __u; } mtx_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[6]; } __u; } pthread_cond_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[6]; } __u; } cnd_t;
-TYPEDEF struct { union { int __i[14]; volatile int __vi[14]; void *__p[7]; } __u; } pthread_rwlock_t;
-TYPEDEF struct { union { int __i[8]; volatile int __vi[8]; void *__p[4]; } __u; } pthread_barrier_t;
diff --git a/arch/s390x/bits/endian.h b/arch/s390x/bits/endian.h
deleted file mode 100644
index ef074b77..00000000
--- a/arch/s390x/bits/endian.h
+++ /dev/null
@@ -1 +0,0 @@
-#define __BYTE_ORDER __BIG_ENDIAN
diff --git a/arch/s390x/bits/fcntl.h b/arch/s390x/bits/fcntl.h
index 1eca6ba5..a231efb4 100644
--- a/arch/s390x/bits/fcntl.h
+++ b/arch/s390x/bits/fcntl.h
@@ -38,3 +38,6 @@
 #define F_GETOWN_EX 16
 
 #define F_GETOWNER_UIDS 17
+
+#define POSIX_FADV_DONTNEED   6
+#define POSIX_FADV_NOREUSE    7
diff --git a/arch/s390x/bits/float.h b/arch/s390x/bits/float.h
index 90b73bee..e188cb61 100644
--- a/arch/s390x/bits/float.h
+++ b/arch/s390x/bits/float.h
@@ -1,4 +1,8 @@
-#define FLT_EVAL_METHOD 1
+#ifdef __FLT_EVAL_METHOD__
+#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__
+#else
+#define FLT_EVAL_METHOD 0
+#endif
 
 #define LDBL_TRUE_MIN 6.47517511943802511092443895822764655e-4966L
 #define LDBL_MIN 3.36210314311209350626267781732175260e-4932L
diff --git a/arch/s390x/bits/ipc.h b/arch/s390x/bits/ipc.h
deleted file mode 100644
index 4710c12b..00000000
--- a/arch/s390x/bits/ipc.h
+++ /dev/null
@@ -1,14 +0,0 @@
-struct ipc_perm {
-	key_t __ipc_perm_key;
-	uid_t uid;
-	gid_t gid;
-	uid_t cuid;
-	gid_t cgid;
-	mode_t mode;
-	unsigned short __pad1;
-	unsigned short __ipc_perm_seq;
-	unsigned long __pad2;
-	unsigned long __pad3;
-};
-
-#define IPC_64 0x100
diff --git a/arch/s390x/bits/limits.h b/arch/s390x/bits/limits.h
index 86ef7663..07743b6f 100644
--- a/arch/s390x/bits/limits.h
+++ b/arch/s390x/bits/limits.h
@@ -1,8 +1 @@
-#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
- || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 #define PAGESIZE 4096
-#define LONG_BIT 64
-#endif
-
-#define LONG_MAX  0x7fffffffffffffffL
-#define LLONG_MAX  0x7fffffffffffffffLL
diff --git a/arch/s390x/bits/posix.h b/arch/s390x/bits/posix.h
deleted file mode 100644
index c37b94c1..00000000
--- a/arch/s390x/bits/posix.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#define _POSIX_V6_LP64_OFF64  1
-#define _POSIX_V7_LP64_OFF64  1
diff --git a/arch/s390x/bits/ptrace.h b/arch/s390x/bits/ptrace.h
index d50e3262..a06cb077 100644
--- a/arch/s390x/bits/ptrace.h
+++ b/arch/s390x/bits/ptrace.h
@@ -1,4 +1,7 @@
 #define PTRACE_SINGLEBLOCK		12
+#define PTRACE_OLDSETOPTIONS		21
+#define PTRACE_SYSEMU			31
+#define PTRACE_SYSEMU_SINGLESTEP	32
 #define PTRACE_PEEKUSR_AREA		0x5000
 #define PTRACE_POKEUSR_AREA		0x5001
 #define PTRACE_GET_LAST_BREAK		0x5006
diff --git a/arch/s390x/bits/reg.h b/arch/s390x/bits/reg.h
deleted file mode 100644
index 2633f39d..00000000
--- a/arch/s390x/bits/reg.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#undef __WORDSIZE
-#define __WORDSIZE 64
diff --git a/arch/s390x/bits/sem.h b/arch/s390x/bits/sem.h
deleted file mode 100644
index 644f68a0..00000000
--- a/arch/s390x/bits/sem.h
+++ /dev/null
@@ -1,7 +0,0 @@
-struct semid_ds {
-	struct ipc_perm sem_perm;
-	time_t sem_otime;
-	time_t sem_ctime;
-	unsigned short __pad[3], sem_nsems;
-	unsigned long __unused[2];
-};
diff --git a/arch/s390x/bits/socket.h b/arch/s390x/bits/socket.h
deleted file mode 100644
index 2b81bfec..00000000
--- a/arch/s390x/bits/socket.h
+++ /dev/null
@@ -1,44 +0,0 @@
-struct msghdr {
-	void *msg_name;
-	socklen_t msg_namelen;
-	struct iovec *msg_iov;
-	int __pad1, msg_iovlen;
-	void *msg_control;
-	int __pad2;
-	socklen_t msg_controllen;
-	int msg_flags;
-};
-
-struct cmsghdr {
-	int __pad1;
-	socklen_t cmsg_len;
-	int cmsg_level;
-	int cmsg_type;
-};
-
-#define SO_DEBUG        1
-#define SO_REUSEADDR    2
-#define SO_TYPE         3
-#define SO_ERROR        4
-#define SO_DONTROUTE    5
-#define SO_BROADCAST    6
-#define SO_SNDBUF       7
-#define SO_RCVBUF       8
-#define SO_KEEPALIVE    9
-#define SO_OOBINLINE    10
-#define SO_NO_CHECK     11
-#define SO_PRIORITY     12
-#define SO_LINGER       13
-#define SO_BSDCOMPAT    14
-#define SO_REUSEPORT    15
-#define SO_PASSCRED     16
-#define SO_PEERCRED     17
-#define SO_RCVLOWAT     18
-#define SO_SNDLOWAT     19
-#define SO_RCVTIMEO     20
-#define SO_SNDTIMEO     21
-#define SO_ACCEPTCONN   30
-#define SO_SNDBUFFORCE  32
-#define SO_RCVBUFFORCE  33
-#define SO_PROTOCOL     38
-#define SO_DOMAIN       39
diff --git a/arch/s390x/bits/stdint.h b/arch/s390x/bits/stdint.h
deleted file mode 100644
index 1bb147f2..00000000
--- a/arch/s390x/bits/stdint.h
+++ /dev/null
@@ -1,20 +0,0 @@
-typedef int32_t int_fast16_t;
-typedef int32_t int_fast32_t;
-typedef uint32_t uint_fast16_t;
-typedef uint32_t uint_fast32_t;
-
-#define INT_FAST16_MIN  INT32_MIN
-#define INT_FAST32_MIN  INT32_MIN
-
-#define INT_FAST16_MAX  INT32_MAX
-#define INT_FAST32_MAX  INT32_MAX
-
-#define UINT_FAST16_MAX UINT32_MAX
-#define UINT_FAST32_MAX UINT32_MAX
-
-#define INTPTR_MIN      INT64_MIN
-#define INTPTR_MAX      INT64_MAX
-#define UINTPTR_MAX     UINT64_MAX
-#define PTRDIFF_MIN     INT64_MIN
-#define PTRDIFF_MAX     INT64_MAX
-#define SIZE_MAX        UINT64_MAX
diff --git a/arch/s390x/bits/syscall.h.in b/arch/s390x/bits/syscall.h.in
index 85a18e79..e60711a6 100644
--- a/arch/s390x/bits/syscall.h.in
+++ b/arch/s390x/bits/syscall.h.in
@@ -326,4 +326,46 @@
 #define __NR_kexec_file_load            381
 #define __NR_io_pgetevents              382
 #define __NR_rseq                       383
+#define __NR_pkey_mprotect              384
+#define __NR_pkey_alloc                 385
+#define __NR_pkey_free                  386
+#define __NR_semtimedop                 392
+#define __NR_semget                     393
+#define __NR_semctl                     394
+#define __NR_shmget                     395
+#define __NR_shmctl                     396
+#define __NR_shmat                      397
+#define __NR_shmdt                      398
+#define __NR_msgget                     399
+#define __NR_msgsnd                     400
+#define __NR_msgrcv                     401
+#define __NR_msgctl                     402
+#define __NR_pidfd_send_signal          424
+#define __NR_io_uring_setup             425
+#define __NR_io_uring_enter             426
+#define __NR_io_uring_register          427
+#define __NR_open_tree		428
+#define __NR_move_mount		429
+#define __NR_fsopen		430
+#define __NR_fsconfig		431
+#define __NR_fsmount		432
+#define __NR_fspick		433
+#define __NR_pidfd_open		434
+#define __NR_clone3		435
+#define __NR_close_range	436
+#define __NR_openat2		437
+#define __NR_pidfd_getfd	438
+#define __NR_faccessat2		439
+#define __NR_process_madvise	440
+#define __NR_epoll_pwait2	441
+#define __NR_mount_setattr	442
+#define __NR_landlock_create_ruleset	444
+#define __NR_landlock_add_rule	445
+#define __NR_landlock_restrict_self	446
+#define __NR_memfd_secret	447
+#define __NR_process_mrelease	448
+#define __NR_futex_waitv	449
+#define __NR_set_mempolicy_home_node	450
+#define __NR_cachestat		451
+#define __NR_fchmodat2		452
 
diff --git a/arch/s390x/bits/user.h b/arch/s390x/bits/user.h
index ff3f0483..47f94f20 100644
--- a/arch/s390x/bits/user.h
+++ b/arch/s390x/bits/user.h
@@ -1,6 +1,3 @@
-#undef __WORDSIZE
-#define __WORDSIZE 64
-
 typedef union {
 	double d;
 	float f;
diff --git a/arch/s390x/kstat.h b/arch/s390x/kstat.h
new file mode 100644
index 00000000..001c10be
--- /dev/null
+++ b/arch/s390x/kstat.h
@@ -0,0 +1,19 @@
+struct kstat {
+	dev_t st_dev;
+	ino_t st_ino;
+	nlink_t st_nlink;
+	mode_t st_mode;
+	uid_t st_uid;
+	gid_t st_gid;
+	dev_t st_rdev;
+	off_t st_size;
+	long st_atime_sec;
+	long st_atime_nsec;
+	long st_mtime_sec;
+	long st_mtime_nsec;
+	long st_ctime_sec;
+	long st_ctime_nsec;
+	blksize_t st_blksize;
+	blkcnt_t st_blocks;
+	unsigned long __unused[3];
+};
diff --git a/arch/s390x/pthread_arch.h b/arch/s390x/pthread_arch.h
index e2251f1f..e54fec3f 100644
--- a/arch/s390x/pthread_arch.h
+++ b/arch/s390x/pthread_arch.h
@@ -1,14 +1,12 @@
-static inline struct pthread *__pthread_self()
+static inline uintptr_t __get_tp()
 {
-	struct pthread *self;
+	uintptr_t tp;
 	__asm__ (
 		"ear  %0, %%a0\n"
 		"sllg %0, %0, 32\n"
 		"ear  %0, %%a1\n"
-		: "=r"(self));
-	return self;
+		: "=r"(tp));
+	return tp;
 }
 
-#define TP_ADJ(p) (p)
-
 #define MC_PC psw.addr
diff --git a/arch/s390x/reloc.h b/arch/s390x/reloc.h
index a238dc65..38de9d9b 100644
--- a/arch/s390x/reloc.h
+++ b/arch/s390x/reloc.h
@@ -1,5 +1,3 @@
-#include <endian.h>
-
 #define LDSO_ARCH "s390x"
 
 #define REL_SYMBOLIC    R_390_64
@@ -12,4 +10,4 @@
 #define REL_TPOFF       R_390_TLS_TPOFF
 
 #define CRTJMP(pc,sp) __asm__ __volatile__( \
-	"lgr %%r15,%1; br %0" : : "r"(pc), "r"(sp) : "memory" )
+	"lgr %%r15,%1; br %0" : : "a"(pc), "r"(sp) : "memory" )
diff --git a/arch/s390x/syscall_arch.h b/arch/s390x/syscall_arch.h
index afb99852..d1cfd2e8 100644
--- a/arch/s390x/syscall_arch.h
+++ b/arch/s390x/syscall_arch.h
@@ -73,4 +73,6 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo
 	__asm_syscall("+r"(r2), "r"(r1), "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7));
 }
 
-#define SYSCALL_USE_SOCKETCALL
+#define VDSO_USEFUL
+#define VDSO_CGT_SYM "__kernel_clock_gettime"
+#define VDSO_CGT_VER "LINUX_2.6.29"
diff --git a/arch/sh/arch.mak b/arch/sh/arch.mak
new file mode 100644
index 00000000..aa4d05ce
--- /dev/null
+++ b/arch/sh/arch.mak
@@ -0,0 +1 @@
+COMPAT_SRC_DIRS = compat/time32
diff --git a/arch/sh/bits/alltypes.h.in b/arch/sh/bits/alltypes.h.in
index 37f27d6f..6a538352 100644
--- a/arch/sh/bits/alltypes.h.in
+++ b/arch/sh/bits/alltypes.h.in
@@ -1,26 +1,25 @@
+#define _REDIR_TIME64 1
 #define _Addr int
 #define _Int64 long long
 #define _Reg int
 
-TYPEDEF __builtin_va_list va_list;
-TYPEDEF __builtin_va_list __isoc_va_list;
+#if __BIG_ENDIAN__
+#define __BYTE_ORDER 4321
+#else
+#define __BYTE_ORDER 1234
+#endif
+
+#define __LONG_MAX 0x7fffffffL
 
 #ifndef __cplusplus
+#ifdef __WCHAR_TYPE__
+TYPEDEF __WCHAR_TYPE__ wchar_t;
+#else
 TYPEDEF long wchar_t;
 #endif
+#endif
 
 TYPEDEF float float_t;
 TYPEDEF double double_t;
 
 TYPEDEF struct { long long __ll; long double __ld; } max_align_t;
-
-TYPEDEF long time_t;
-TYPEDEF long suseconds_t;
-
-TYPEDEF struct { union { int __i[9]; volatile int __vi[9]; unsigned __s[9]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } mtx_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } pthread_cond_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } cnd_t;
-TYPEDEF struct { union { int __i[8]; volatile int __vi[8]; void *__p[8]; } __u; } pthread_rwlock_t;
-TYPEDEF struct { union { int __i[5]; volatile int __vi[5]; void *__p[5]; } __u; } pthread_barrier_t;
diff --git a/arch/sh/bits/endian.h b/arch/sh/bits/endian.h
deleted file mode 100644
index 2016cb20..00000000
--- a/arch/sh/bits/endian.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#if __BIG_ENDIAN__
-#define __BYTE_ORDER __BIG_ENDIAN
-#else
-#define __BYTE_ORDER __LITTLE_ENDIAN
-#endif
diff --git a/arch/sh/bits/ioctl.h b/arch/sh/bits/ioctl.h
index 3c7ab4bb..370b6901 100644
--- a/arch/sh/bits/ioctl.h
+++ b/arch/sh/bits/ioctl.h
@@ -65,14 +65,6 @@
 #define TIOCGSERIAL         _IOR('T', 30, char[60])
 #define TIOCSSERIAL         _IOW('T', 31, char[60])
 #define TIOCPKT             _IOW('T', 32, int)
-#define TIOCPKT_DATA        0
-#define TIOCPKT_FLUSHREAD   1
-#define TIOCPKT_FLUSHWRITE  2
-#define TIOCPKT_STOP        4
-#define TIOCPKT_START       8
-#define TIOCPKT_NOSTOP      16
-#define TIOCPKT_DOSTOP      32
-#define TIOCPKT_IOCTL       64
 
 #define TIOCNOTTY           _IO('T',  34)
 #define TIOCSETD            _IOW('T', 35, int)
@@ -104,105 +96,17 @@
 #define TIOCSLCKTRMIOS      _IO('T',  87)
 #define TIOCSERGSTRUCT      _IOR('T', 88, char[216])
 #define TIOCSERGETLSR       _IOR('T', 89, unsigned int)
-#define TIOCSER_TEMT        0x01
 #define TIOCSERGETMULTI     _IOR('T', 90, char[168])
 #define TIOCSERSETMULTI     _IOW('T', 91, char[168])
 
 #define TIOCMIWAIT          _IO('T', 92)
 #define TIOCGICOUNT         _IO('T', 93)
 
-struct winsize {
-    unsigned short ws_row;
-    unsigned short ws_col;
-    unsigned short ws_xpixel;
-    unsigned short ws_ypixel;
-};
-
-#define N_TTY           0
-#define N_SLIP          1
-#define N_MOUSE         2
-#define N_PPP           3
-#define N_STRIP         4
-#define N_AX25          5
-#define N_X25           6
-#define N_6PACK         7
-#define N_MASC          8
-#define N_R3964         9
-#define N_PROFIBUS_FDL  10
-#define N_IRDA          11
-#define N_SMSBLOCK      12
-#define N_HDLC          13
-#define N_SYNC_PPP      14
-#define N_HCI           15
-
 #define FIOGETOWN       _IOR('f', 123, int)
 #define FIOSETOWN       _IOW('f', 124, int)
 
 #define SIOCATMARK      _IOR('s', 7, int)
 #define SIOCSPGRP       _IOW('s', 8, int)
 #define SIOCGPGRP       _IOW('s', 9, int)
-#define SIOCGSTAMP      _IOR('s', 100, char[8])
-#define SIOCGSTAMPNS    _IOR('s', 101, char[8])
-
-#define SIOCADDRT       0x890B
-#define SIOCDELRT       0x890C
-#define SIOCRTMSG       0x890D
-
-#define SIOCGIFNAME     0x8910
-#define SIOCSIFLINK     0x8911
-#define SIOCGIFCONF     0x8912
-#define SIOCGIFFLAGS    0x8913
-#define SIOCSIFFLAGS    0x8914
-#define SIOCGIFADDR     0x8915
-#define SIOCSIFADDR     0x8916
-#define SIOCGIFDSTADDR  0x8917
-#define SIOCSIFDSTADDR  0x8918
-#define SIOCGIFBRDADDR  0x8919
-#define SIOCSIFBRDADDR  0x891a
-#define SIOCGIFNETMASK  0x891b
-#define SIOCSIFNETMASK  0x891c
-#define SIOCGIFMETRIC   0x891d
-#define SIOCSIFMETRIC   0x891e
-#define SIOCGIFMEM      0x891f
-#define SIOCSIFMEM      0x8920
-#define SIOCGIFMTU      0x8921
-#define SIOCSIFMTU      0x8922
-#define SIOCSIFNAME     0x8923
-#define SIOCSIFHWADDR   0x8924
-#define SIOCGIFENCAP    0x8925
-#define SIOCSIFENCAP    0x8926
-#define SIOCGIFHWADDR   0x8927
-#define SIOCGIFSLAVE    0x8929
-#define SIOCSIFSLAVE    0x8930
-#define SIOCADDMULTI    0x8931
-#define SIOCDELMULTI    0x8932
-#define SIOCGIFINDEX    0x8933
-#define SIOGIFINDEX     SIOCGIFINDEX
-#define SIOCSIFPFLAGS   0x8934
-#define SIOCGIFPFLAGS   0x8935
-#define SIOCDIFADDR     0x8936
-#define SIOCSIFHWBROADCAST 0x8937
-#define SIOCGIFCOUNT    0x8938
-
-#define SIOCGIFBR       0x8940
-#define SIOCSIFBR       0x8941
-
-#define SIOCGIFTXQLEN   0x8942
-#define SIOCSIFTXQLEN   0x8943
-
-#define SIOCDARP        0x8953
-#define SIOCGARP        0x8954
-#define SIOCSARP        0x8955
-
-#define SIOCDRARP       0x8960
-#define SIOCGRARP       0x8961
-#define SIOCSRARP       0x8962
-
-#define SIOCGIFMAP      0x8970
-#define SIOCSIFMAP      0x8971
-
-#define SIOCADDDLCI     0x8980
-#define SIOCDELDLCI     0x8981
-
-#define SIOCDEVPRIVATE      0x89F0
-#define SIOCPROTOPRIVATE    0x89E0
+#define SIOCGSTAMP      _IOR(0x89, 6, char[16])
+#define SIOCGSTAMPNS    _IOR(0x89, 7, char[16])
diff --git a/arch/sh/bits/ipcstat.h b/arch/sh/bits/ipcstat.h
new file mode 100644
index 00000000..4f4fcb0c
--- /dev/null
+++ b/arch/sh/bits/ipcstat.h
@@ -0,0 +1 @@
+#define IPC_STAT 0x102
diff --git a/arch/sh/bits/limits.h b/arch/sh/bits/limits.h
index c340ceb2..07743b6f 100644
--- a/arch/sh/bits/limits.h
+++ b/arch/sh/bits/limits.h
@@ -1,8 +1 @@
-#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
- || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 #define PAGESIZE 4096
-#define LONG_BIT 32
-#endif
-
-#define LONG_MAX  0x7fffffffL
-#define LLONG_MAX  0x7fffffffffffffffLL
diff --git a/arch/x86_64/bits/msg.h b/arch/sh/bits/msg.h
index 2e23ca27..7bbbb2bf 100644
--- a/arch/x86_64/bits/msg.h
+++ b/arch/sh/bits/msg.h
@@ -1,12 +1,18 @@
 struct msqid_ds {
 	struct ipc_perm msg_perm;
-	time_t msg_stime;
-	time_t msg_rtime;
-	time_t msg_ctime;
+	unsigned long __msg_stime_lo;
+	unsigned long __msg_stime_hi;
+	unsigned long __msg_rtime_lo;
+	unsigned long __msg_rtime_hi;
+	unsigned long __msg_ctime_lo;
+	unsigned long __msg_ctime_hi;
 	unsigned long msg_cbytes;
 	msgqnum_t msg_qnum;
 	msglen_t msg_qbytes;
 	pid_t msg_lspid;
 	pid_t msg_lrpid;
 	unsigned long __unused[2];
+	time_t msg_stime;
+	time_t msg_rtime;
+	time_t msg_ctime;
 };
diff --git a/arch/sh/bits/posix.h b/arch/sh/bits/posix.h
deleted file mode 100644
index 30a38714..00000000
--- a/arch/sh/bits/posix.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#define _POSIX_V6_ILP32_OFFBIG  1
-#define _POSIX_V7_ILP32_OFFBIG  1
diff --git a/arch/sh/bits/sem.h b/arch/sh/bits/sem.h
new file mode 100644
index 00000000..544e3d2a
--- /dev/null
+++ b/arch/sh/bits/sem.h
@@ -0,0 +1,18 @@
+struct semid_ds {
+	struct ipc_perm sem_perm;
+	unsigned long __sem_otime_lo;
+	unsigned long __sem_otime_hi;
+	unsigned long __sem_ctime_lo;
+	unsigned long __sem_ctime_hi;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+	unsigned short sem_nsems;
+	char __sem_nsems_pad[sizeof(long)-sizeof(short)];
+#else
+	char __sem_nsems_pad[sizeof(long)-sizeof(short)];
+	unsigned short sem_nsems;
+#endif
+	long __unused3;
+	long __unused4;
+	time_t sem_otime;
+	time_t sem_ctime;
+};
diff --git a/arch/sh/bits/shm.h b/arch/sh/bits/shm.h
index 6cdac131..adc01e34 100644
--- a/arch/sh/bits/shm.h
+++ b/arch/sh/bits/shm.h
@@ -3,17 +3,21 @@
 struct shmid_ds {
 	struct ipc_perm shm_perm;
 	size_t shm_segsz;
-	time_t shm_atime;
-	int __unused1;
-	time_t shm_dtime;
-	int __unused2;
-	time_t shm_ctime;
-	int __unused3;
+	unsigned long __shm_atime_lo;
+	unsigned long __shm_atime_hi;
+	unsigned long __shm_dtime_lo;
+	unsigned long __shm_dtime_hi;
+	unsigned long __shm_ctime_lo;
+	unsigned long __shm_ctime_hi;
 	pid_t shm_cpid;
 	pid_t shm_lpid;
 	unsigned long shm_nattch;
 	unsigned long __pad1;
 	unsigned long __pad2;
+	unsigned long __pad3;
+	time_t shm_atime;
+	time_t shm_dtime;
+	time_t shm_ctime;
 };
 
 struct shminfo {
diff --git a/arch/sh/bits/signal.h b/arch/sh/bits/signal.h
index 160311fa..d0b14828 100644
--- a/arch/sh/bits/signal.h
+++ b/arch/sh/bits/signal.h
@@ -9,7 +9,16 @@
 #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 typedef int greg_t, gregset_t[16];
 typedef int freg_t, fpregset_t[16];
-typedef struct sigcontext {
+typedef struct {
+	unsigned long oldmask;
+	unsigned long gregs[16];
+	unsigned long pc, pr, sr;
+	unsigned long gbr, mach, macl;
+	unsigned long fpregs[16];
+	unsigned long xfpregs[16];
+	unsigned int fpscr, fpul, ownedfp;
+} mcontext_t;
+struct sigcontext {
 	unsigned long oldmask;
 	unsigned long sc_regs[16];
 	unsigned long sc_pc, sc_pr, sc_sr;
@@ -17,7 +26,7 @@ typedef struct sigcontext {
 	unsigned long sc_fpregs[16];
 	unsigned long sc_xfpregs[16];
 	unsigned int sc_fpscr, sc_fpul, sc_ownedfp;
-} mcontext_t;
+};
 #else
 typedef struct {
 	unsigned long __regs[58];
diff --git a/arch/sh/bits/stat.h b/arch/sh/bits/stat.h
index 22b19bbf..5d7828cf 100644
--- a/arch/sh/bits/stat.h
+++ b/arch/sh/bits/stat.h
@@ -14,8 +14,12 @@ struct stat {
 	off_t st_size;
 	blksize_t st_blksize;
 	blkcnt_t st_blocks;
+	struct {
+		long tv_sec;
+		long tv_nsec;
+	} __st_atim32, __st_mtim32, __st_ctim32;
+	ino_t st_ino;
 	struct timespec st_atim;
 	struct timespec st_mtim;
 	struct timespec st_ctim;
-	ino_t st_ino;
 };
diff --git a/arch/sh/bits/stdint.h b/arch/sh/bits/stdint.h
deleted file mode 100644
index d1b27121..00000000
--- a/arch/sh/bits/stdint.h
+++ /dev/null
@@ -1,20 +0,0 @@
-typedef int32_t int_fast16_t;
-typedef int32_t int_fast32_t;
-typedef uint32_t uint_fast16_t;
-typedef uint32_t uint_fast32_t;
-
-#define INT_FAST16_MIN  INT32_MIN
-#define INT_FAST32_MIN  INT32_MIN
-
-#define INT_FAST16_MAX  INT32_MAX
-#define INT_FAST32_MAX  INT32_MAX
-
-#define UINT_FAST16_MAX UINT32_MAX
-#define UINT_FAST32_MAX UINT32_MAX
-
-#define INTPTR_MIN      INT32_MIN
-#define INTPTR_MAX      INT32_MAX
-#define UINTPTR_MAX     UINT32_MAX
-#define PTRDIFF_MIN     INT32_MIN
-#define PTRDIFF_MAX     INT32_MAX
-#define SIZE_MAX        UINT32_MAX
diff --git a/arch/sh/bits/syscall.h.in b/arch/sh/bits/syscall.h.in
index 420f4a0f..915a79cd 100644
--- a/arch/sh/bits/syscall.h.in
+++ b/arch/sh/bits/syscall.h.in
@@ -67,8 +67,8 @@
 #define __NR_setrlimit              75
 #define __NR_getrlimit              76
 #define __NR_getrusage              77
-#define __NR_gettimeofday           78
-#define __NR_settimeofday           79
+#define __NR_gettimeofday_time32           78
+#define __NR_settimeofday_time32           79
 #define __NR_getgroups              80
 #define __NR_setgroups              81
 #define __NR_symlink                83
@@ -231,14 +231,14 @@
 #define __NR_remap_file_pages       257
 #define __NR_set_tid_address        258
 #define __NR_timer_create           259
-#define __NR_timer_settime          260
-#define __NR_timer_gettime          261
+#define __NR_timer_settime32          260
+#define __NR_timer_gettime32          261
 #define __NR_timer_getoverrun       262
 #define __NR_timer_delete           263
-#define __NR_clock_settime          264
-#define __NR_clock_gettime          265
-#define __NR_clock_getres           266
-#define __NR_clock_nanosleep        267
+#define __NR_clock_settime32          264
+#define __NR_clock_gettime32          265
+#define __NR_clock_getres_time32           266
+#define __NR_clock_nanosleep_time32        267
 #define __NR_statfs64               268
 #define __NR_fstatfs64              269
 #define __NR_tgkill                 270
@@ -294,8 +294,8 @@
 #define __NR_timerfd_create         322
 #define __NR_eventfd                323
 #define __NR_fallocate              324
-#define __NR_timerfd_settime        325
-#define __NR_timerfd_gettime        326
+#define __NR_timerfd_settime32        325
+#define __NR_timerfd_gettime32        326
 #define __NR_signalfd4              327
 #define __NR_eventfd2               328
 #define __NR_epoll_create1          329
@@ -352,4 +352,66 @@
 #define __NR_copy_file_range        380
 #define __NR_preadv2                381
 #define __NR_pwritev2               382
+#define __NR_statx                  383
+#define __NR_pkey_mprotect          384
+#define __NR_pkey_alloc             385
+#define __NR_pkey_free              386
+#define __NR_rseq                   387
+#define __NR_semget                 393
+#define __NR_semctl                 394
+#define __NR_shmget                 395
+#define __NR_shmctl                 396
+#define __NR_shmat                  397
+#define __NR_shmdt                  398
+#define __NR_msgget                 399
+#define __NR_msgsnd                 400
+#define __NR_msgrcv                 401
+#define __NR_msgctl                 402
+#define __NR_clock_gettime64        403
+#define __NR_clock_settime64        404
+#define __NR_clock_adjtime64        405
+#define __NR_clock_getres_time64    406
+#define __NR_clock_nanosleep_time64 407
+#define __NR_timer_gettime64        408
+#define __NR_timer_settime64        409
+#define __NR_timerfd_gettime64      410
+#define __NR_timerfd_settime64      411
+#define __NR_utimensat_time64       412
+#define __NR_pselect6_time64        413
+#define __NR_ppoll_time64           414
+#define __NR_io_pgetevents_time64   416
+#define __NR_recvmmsg_time64        417
+#define __NR_mq_timedsend_time64    418
+#define __NR_mq_timedreceive_time64 419
+#define __NR_semtimedop_time64      420
+#define __NR_rt_sigtimedwait_time64 421
+#define __NR_futex_time64           422
+#define __NR_sched_rr_get_interval_time64 423
+#define __NR_pidfd_send_signal      424
+#define __NR_io_uring_setup         425
+#define __NR_io_uring_enter         426
+#define __NR_io_uring_register      427
+#define __NR_open_tree		428
+#define __NR_move_mount		429
+#define __NR_fsopen		430
+#define __NR_fsconfig		431
+#define __NR_fsmount		432
+#define __NR_fspick		433
+#define __NR_pidfd_open		434
+#define __NR_clone3		435
+#define __NR_close_range	436
+#define __NR_openat2		437
+#define __NR_pidfd_getfd	438
+#define __NR_faccessat2		439
+#define __NR_process_madvise	440
+#define __NR_epoll_pwait2	441
+#define __NR_mount_setattr	442
+#define __NR_landlock_create_ruleset	444
+#define __NR_landlock_add_rule	445
+#define __NR_landlock_restrict_self	446
+#define __NR_process_mrelease	448
+#define __NR_futex_waitv	449
+#define __NR_set_mempolicy_home_node	450
+#define __NR_cachestat		451
+#define __NR_fchmodat2		452
 
diff --git a/arch/sh/bits/user.h b/arch/sh/bits/user.h
index d7363f74..b6ba16ed 100644
--- a/arch/sh/bits/user.h
+++ b/arch/sh/bits/user.h
@@ -1,6 +1,3 @@
-#undef __WORDSIZE
-#define __WORDSIZE 32
-
 #define REG_REG0	 0
 #define REG_REG15	15
 #define REG_PC		16
@@ -17,34 +14,6 @@
 #define REG_FPSCR	55
 #define REG_FPUL	56
 
-struct pt_regs {
-	unsigned long regs[16];
-	unsigned long pc;
-	unsigned long pr;
-	unsigned long sr;
-	unsigned long gbr;
-	unsigned long mach;
-	unsigned long macl;
-	long tra;
-};
-
-struct pt_dspregs {
-	unsigned long a1;
-	unsigned long a0g;
-	unsigned long a1g;
-	unsigned long m0;
-	unsigned long m1;
-	unsigned long a0;
-	unsigned long x0;
-	unsigned long x1;
-	unsigned long y0;
-	unsigned long y1;
-	unsigned long dsr;
-	unsigned long rs;
-	unsigned long re;
-	unsigned long mod;
-};
-
 struct user_fpu_struct {
 	unsigned long fp_regs[16];
 	unsigned long xfp_regs[16];
@@ -58,7 +27,11 @@ typedef elf_greg_t elf_gregset_t[ELF_NGREG];
 typedef struct user_fpu_struct elf_fpregset_t;
 
 struct user {
-	struct pt_regs regs;
+	struct {
+		unsigned long regs[16];
+		unsigned long pc, pr, sr, gbr, mach, macl;
+		long tra;
+	} regs;
 	struct user_fpu_struct fpu;
 	int u_fpvalid;
 	unsigned long u_tsize;
diff --git a/arch/sh/kstat.h b/arch/sh/kstat.h
new file mode 100644
index 00000000..af449c95
--- /dev/null
+++ b/arch/sh/kstat.h
@@ -0,0 +1,21 @@
+struct kstat {
+	dev_t st_dev;
+	int __st_dev_padding;
+	long __st_ino_truncated;
+	mode_t st_mode;
+	nlink_t st_nlink;
+	uid_t st_uid;
+	gid_t st_gid;
+	dev_t st_rdev;
+	int __st_rdev_padding;
+	off_t st_size;
+	blksize_t st_blksize;
+	blkcnt_t st_blocks;
+	long st_atime_sec;
+	long st_atime_nsec;
+	long st_mtime_sec;
+	long st_mtime_nsec;
+	long st_ctime_sec;
+	long st_ctime_nsec;
+	ino_t st_ino;
+};
diff --git a/arch/sh/pthread_arch.h b/arch/sh/pthread_arch.h
index 3ee9c1a9..199c2d55 100644
--- a/arch/sh/pthread_arch.h
+++ b/arch/sh/pthread_arch.h
@@ -1,17 +1,16 @@
-static inline struct pthread *__pthread_self()
+static inline uintptr_t __get_tp()
 {
-	char *self;
-	__asm__ ("stc gbr,%0" : "=r" (self) );
-	return (struct pthread *) (self - sizeof(struct pthread));
+	uintptr_t tp;
+	__asm__ ("stc gbr,%0" : "=r" (tp) );
+	return tp;
 }
 
 #define TLS_ABOVE_TP
 #define GAP_ABOVE_TP 8
-#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
 
-#define MC_PC sc_pc
+#define MC_PC pc
 
 #ifdef __FDPIC__
-#define MC_GOT sc_regs[12]
+#define MC_GOT gregs[12]
 #define CANCEL_GOT (*(uintptr_t *)((char *)__syscall_cp_asm+sizeof(uintptr_t)))
 #endif
diff --git a/arch/sh/reloc.h b/arch/sh/reloc.h
index a1f16cb1..17b1a9a9 100644
--- a/arch/sh/reloc.h
+++ b/arch/sh/reloc.h
@@ -1,5 +1,3 @@
-#include <endian.h>
-
 #if __BYTE_ORDER == __BIG_ENDIAN
 #define ENDIAN_SUFFIX "eb"
 #else
diff --git a/arch/sh/syscall_arch.h b/arch/sh/syscall_arch.h
index 48f61d94..628d8d37 100644
--- a/arch/sh/syscall_arch.h
+++ b/arch/sh/syscall_arch.h
@@ -88,3 +88,6 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo
 }
 
 #define SYSCALL_IPC_BROKEN_MODE
+
+#define SIOCGSTAMP_OLD   (2U<<30 | 's'<<8 | 100 | 8<<16)
+#define SIOCGSTAMPNS_OLD (2U<<30 | 's'<<8 | 101 | 8<<16)
diff --git a/arch/x32/bits/alltypes.h.in b/arch/x32/bits/alltypes.h.in
index 16391295..9f9d2410 100644
--- a/arch/x32/bits/alltypes.h.in
+++ b/arch/x32/bits/alltypes.h.in
@@ -2,12 +2,16 @@
 #define _Int64 long long
 #define _Reg long long
 
-TYPEDEF __builtin_va_list va_list;
-TYPEDEF __builtin_va_list __isoc_va_list;
+#define __BYTE_ORDER 1234
+#define __LONG_MAX 0x7fffffffL
 
 #ifndef __cplusplus
+#ifdef __WCHAR_TYPE__
+TYPEDEF __WCHAR_TYPE__ wchar_t;
+#else
 TYPEDEF long wchar_t;
 #endif
+#endif
 
 #if defined(__FLT_EVAL_METHOD__) && __FLT_EVAL_METHOD__ == 2
 TYPEDEF long double float_t;
@@ -18,14 +22,3 @@ TYPEDEF double double_t;
 #endif
 
 TYPEDEF struct { long long __ll; long double __ld; } max_align_t;
-
-TYPEDEF long long time_t;
-TYPEDEF long long suseconds_t;
-
-TYPEDEF struct { union { int __i[9]; volatile int __vi[9]; unsigned __s[9]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } mtx_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } pthread_cond_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } cnd_t;
-TYPEDEF struct { union { int __i[8]; volatile int __vi[8]; void *__p[8]; } __u; } pthread_rwlock_t;
-TYPEDEF struct { union { int __i[5]; volatile int __vi[5]; void *__p[5]; } __u; } pthread_barrier_t;
diff --git a/arch/x32/bits/endian.h b/arch/x32/bits/endian.h
deleted file mode 100644
index 172c338f..00000000
--- a/arch/x32/bits/endian.h
+++ /dev/null
@@ -1 +0,0 @@
-#define __BYTE_ORDER __LITTLE_ENDIAN
diff --git a/arch/x32/bits/fcntl.h b/arch/x32/bits/fcntl.h
index 1b88ad39..08627f81 100644
--- a/arch/x32/bits/fcntl.h
+++ b/arch/x32/bits/fcntl.h
@@ -13,7 +13,7 @@
 
 #define O_ASYNC      020000
 #define O_DIRECT     040000
-#define O_LARGEFILE       0
+#define O_LARGEFILE 0100000
 #define O_NOATIME  01000000
 #define O_PATH    010000000
 #define O_TMPFILE 020200000
diff --git a/arch/x32/bits/ioctl_fix.h b/arch/x32/bits/ioctl_fix.h
new file mode 100644
index 00000000..83b957bd
--- /dev/null
+++ b/arch/x32/bits/ioctl_fix.h
@@ -0,0 +1,4 @@
+#undef SIOCGSTAMP
+#undef SIOCGSTAMPNS
+#define SIOCGSTAMP      0x8906
+#define SIOCGSTAMPNS    0x8907
diff --git a/arch/x32/bits/ipc.h b/arch/x32/bits/ipc.h
index 55d2e41a..a12380f6 100644
--- a/arch/x32/bits/ipc.h
+++ b/arch/x32/bits/ipc.h
@@ -9,5 +9,3 @@ struct ipc_perm {
 	long long __pad1;
 	long long __pad2;
 };
-
-#define IPC_64 0
diff --git a/arch/x32/bits/limits.h b/arch/x32/bits/limits.h
index c340ceb2..07743b6f 100644
--- a/arch/x32/bits/limits.h
+++ b/arch/x32/bits/limits.h
@@ -1,8 +1 @@
-#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
- || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 #define PAGESIZE 4096
-#define LONG_BIT 32
-#endif
-
-#define LONG_MAX  0x7fffffffL
-#define LLONG_MAX  0x7fffffffffffffffLL
diff --git a/arch/x32/bits/posix.h b/arch/x32/bits/posix.h
deleted file mode 100644
index 30a38714..00000000
--- a/arch/x32/bits/posix.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#define _POSIX_V6_ILP32_OFFBIG  1
-#define _POSIX_V7_ILP32_OFFBIG  1
diff --git a/arch/x32/bits/reg.h b/arch/x32/bits/reg.h
index 5faaef1a..6e54abcf 100644
--- a/arch/x32/bits/reg.h
+++ b/arch/x32/bits/reg.h
@@ -1,5 +1,3 @@
-#undef __WORDSIZE
-#define __WORDSIZE 32
 #define R15    0
 #define R14    1
 #define R13    2
diff --git a/arch/x32/bits/sem.h b/arch/x32/bits/sem.h
new file mode 100644
index 00000000..18745f4c
--- /dev/null
+++ b/arch/x32/bits/sem.h
@@ -0,0 +1,11 @@
+struct semid_ds {
+	struct ipc_perm sem_perm;
+	time_t sem_otime;
+	long long __unused1;
+	time_t sem_ctime;
+	long long __unused2;
+	unsigned short sem_nsems;
+	char __sem_nsems_pad[sizeof(long long)-sizeof(short)];
+	long long __unused3;
+	long long __unused4;
+};
diff --git a/arch/x32/bits/socket.h b/arch/x32/bits/socket.h
index a4c89f3d..8d830010 100644
--- a/arch/x32/bits/socket.h
+++ b/arch/x32/bits/socket.h
@@ -1,16 +1,5 @@
-struct msghdr {
-	void *msg_name;
-	socklen_t msg_namelen;
-	struct iovec *msg_iov;
-	int msg_iovlen, __pad1;
-	void *msg_control;
-	socklen_t msg_controllen, __pad2;
-	int msg_flags;
-};
-
-struct cmsghdr {
-	socklen_t cmsg_len;
-	int __pad1;
-	int cmsg_level;
-	int cmsg_type;
-};
+#define SO_RCVTIMEO     20
+#define SO_SNDTIMEO     21
+#define SO_TIMESTAMP    29
+#define SO_TIMESTAMPNS  35
+#define SO_TIMESTAMPING 37
diff --git a/arch/x32/bits/stdint.h b/arch/x32/bits/stdint.h
deleted file mode 100644
index d1b27121..00000000
--- a/arch/x32/bits/stdint.h
+++ /dev/null
@@ -1,20 +0,0 @@
-typedef int32_t int_fast16_t;
-typedef int32_t int_fast32_t;
-typedef uint32_t uint_fast16_t;
-typedef uint32_t uint_fast32_t;
-
-#define INT_FAST16_MIN  INT32_MIN
-#define INT_FAST32_MIN  INT32_MIN
-
-#define INT_FAST16_MAX  INT32_MAX
-#define INT_FAST32_MAX  INT32_MAX
-
-#define UINT_FAST16_MAX UINT32_MAX
-#define UINT_FAST32_MAX UINT32_MAX
-
-#define INTPTR_MIN      INT32_MIN
-#define INTPTR_MAX      INT32_MAX
-#define UINTPTR_MAX     UINT32_MAX
-#define PTRDIFF_MIN     INT32_MIN
-#define PTRDIFF_MAX     INT32_MAX
-#define SIZE_MAX        UINT32_MAX
diff --git a/arch/x32/bits/syscall.h.in b/arch/x32/bits/syscall.h.in
index 67d89f91..1d065eea 100644
--- a/arch/x32/bits/syscall.h.in
+++ b/arch/x32/bits/syscall.h.in
@@ -286,6 +286,35 @@
 #define __NR_statx (0x40000000 + 332)
 #define __NR_io_pgetevents (0x40000000 + 333)
 #define __NR_rseq (0x40000000 + 334)
+#define __NR_pidfd_send_signal (0x40000000 + 424)
+#define __NR_io_uring_setup (0x40000000 + 425)
+#define __NR_io_uring_enter (0x40000000 + 426)
+#define __NR_io_uring_register (0x40000000 + 427)
+#define __NR_open_tree		(0x40000000 + 428)
+#define __NR_move_mount		(0x40000000 + 429)
+#define __NR_fsopen		(0x40000000 + 430)
+#define __NR_fsconfig		(0x40000000 + 431)
+#define __NR_fsmount		(0x40000000 + 432)
+#define __NR_fspick		(0x40000000 + 433)
+#define __NR_pidfd_open		(0x40000000 + 434)
+#define __NR_clone3		(0x40000000 + 435)
+#define __NR_close_range	(0x40000000 + 436)
+#define __NR_openat2		(0x40000000 + 437)
+#define __NR_pidfd_getfd	(0x40000000 + 438)
+#define __NR_faccessat2		(0x40000000 + 439)
+#define __NR_process_madvise	(0x40000000 + 440)
+#define __NR_epoll_pwait2	(0x40000000 + 441)
+#define __NR_mount_setattr	(0x40000000 + 442)
+#define __NR_landlock_create_ruleset	(0x40000000 + 444)
+#define __NR_landlock_add_rule	(0x40000000 + 445)
+#define __NR_landlock_restrict_self	(0x40000000 + 446)
+#define __NR_memfd_secret	(0x40000000 + 447)
+#define __NR_process_mrelease	(0x40000000 + 448)
+#define __NR_futex_waitv	(0x40000000 + 449)
+#define __NR_set_mempolicy_home_node	(0x40000000 + 450)
+#define __NR_cachestat		(0x40000000 + 451)
+#define __NR_fchmodat2		(0x40000000 + 452)
+
 
 #define __NR_rt_sigaction (0x40000000 + 512)
 #define __NR_rt_sigreturn (0x40000000 + 513)
diff --git a/arch/x32/bits/user.h b/arch/x32/bits/user.h
index 4073cc06..b328edf9 100644
--- a/arch/x32/bits/user.h
+++ b/arch/x32/bits/user.h
@@ -1,6 +1,3 @@
-#undef __WORDSIZE
-#define __WORDSIZE 64
-
 typedef struct user_fpregs_struct {
 	uint16_t cwd, swd, ftw, fop;
 	uint64_t rip, rdp;
diff --git a/arch/x32/crt_arch.h b/arch/x32/crt_arch.h
index 3eec61bd..b1c9c476 100644
--- a/arch/x32/crt_arch.h
+++ b/arch/x32/crt_arch.h
@@ -1,6 +1,7 @@
 __asm__(
 ".text \n"
 ".global " START " \n"
+".type " START ",%function \n"
 START ": \n"
 "	xor %rbp,%rbp \n"
 "	mov %rsp,%rdi \n"
diff --git a/arch/x32/kstat.h b/arch/x32/kstat.h
new file mode 100644
index 00000000..ce25fce3
--- /dev/null
+++ b/arch/x32/kstat.h
@@ -0,0 +1,22 @@
+struct kstat {
+	dev_t st_dev;
+	ino_t st_ino;
+	nlink_t st_nlink;
+
+	mode_t st_mode;
+	uid_t st_uid;
+	gid_t st_gid;
+	unsigned int    __pad0;
+	dev_t st_rdev;
+	off_t st_size;
+	blksize_t st_blksize;
+	blkcnt_t st_blocks;
+
+	long long st_atime_sec;
+	long st_atime_nsec;
+	long long st_mtime_sec;
+	long st_mtime_nsec;
+	long long st_ctime_sec;
+	long st_ctime_nsec;
+	long long __unused[3];
+};
diff --git a/arch/x32/pthread_arch.h b/arch/x32/pthread_arch.h
index f640a1a1..c1e7716d 100644
--- a/arch/x32/pthread_arch.h
+++ b/arch/x32/pthread_arch.h
@@ -1,14 +1,12 @@
-static inline struct pthread *__pthread_self()
+static inline uintptr_t __get_tp()
 {
-	struct pthread *self;
-	__asm__ ("mov %%fs:0,%0" : "=r" (self) );
-	return self;
+	uintptr_t tp;
+	__asm__ ("mov %%fs:0,%0" : "=r" (tp) );
+	return tp;
 }
 
-#define TP_ADJ(p) (p)
-
 #define MC_PC gregs[REG_RIP]
 
-#define CANARY canary2
+#define CANARY_PAD
 
 #define tls_mod_off_t unsigned long long
diff --git a/arch/x32/syscall_arch.h b/arch/x32/syscall_arch.h
index 344da03c..e0111cc0 100644
--- a/arch/x32/syscall_arch.h
+++ b/arch/x32/syscall_arch.h
@@ -3,35 +3,6 @@
 
 #define __scc(X) sizeof(1?(X):0ULL) < 8 ? (unsigned long) (X) : (long long) (X)
 typedef long long syscall_arg_t;
-struct __timespec { long long tv_sec; long tv_nsec; };
-struct __timespec_kernel { long long tv_sec; long long tv_nsec; };
-#define __tsc(X) ((struct __timespec*)(unsigned long)(X))
-#define __fixup(X) do { if(X) { \
-	ts->tv_sec = __tsc(X)->tv_sec; \
-	ts->tv_nsec = __tsc(X)->tv_nsec; \
-	(X) = (unsigned long)ts; } } while(0)
-#define __fixup_case_2 \
-	case SYS_nanosleep: \
-		__fixup(a1); break; \
-	case SYS_clock_settime: \
-		__fixup(a2); break;
-#define __fixup_case_3 \
-	case SYS_clock_nanosleep: case SYS_rt_sigtimedwait: case SYS_ppoll: \
-		__fixup(a3); break; \
-	case SYS_utimensat: \
-		if(a3) { \
-			ts[0].tv_sec = __tsc(a3)[0].tv_sec; \
-			ts[0].tv_nsec = __tsc(a3)[0].tv_nsec; \
-			ts[1].tv_sec = __tsc(a3)[1].tv_sec; \
-			ts[1].tv_nsec = __tsc(a3)[1].tv_nsec; \
-			a3 = (unsigned long)ts; \
-		} break;
-#define __fixup_case_4 \
-	case SYS_futex: \
-		if((a2 & (~128 /* FUTEX_PRIVATE_FLAG */)) == 0 /* FUTEX_WAIT */) __fixup(a4); break;
-#define __fixup_case_5 \
-	case SYS_mq_timedsend: case SYS_mq_timedreceive: case SYS_pselect6: \
-		__fixup(a5); break;
 
 static __inline long __syscall0(long long n)
 {
@@ -50,10 +21,6 @@ static __inline long __syscall1(long long n, long long a1)
 static __inline long __syscall2(long long n, long long a1, long long a2)
 {
 	unsigned long ret;
-	struct __timespec_kernel ts[1];
-	switch (n) {
-		__fixup_case_2;
-	}
 	__asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2)
 					: "rcx", "r11", "memory");
 	return ret;
@@ -62,11 +29,6 @@ static __inline long __syscall2(long long n, long long a1, long long a2)
 static __inline long __syscall3(long long n, long long a1, long long a2, long long a3)
 {
 	unsigned long ret;
-	struct __timespec_kernel ts[2];
-	switch (n) {
-		__fixup_case_2;
-		__fixup_case_3;
-	}
 	__asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2),
 						  "d"(a3) : "rcx", "r11", "memory");
 	return ret;
@@ -77,12 +39,6 @@ static __inline long __syscall4(long long n, long long a1, long long a2, long lo
 {
 	unsigned long ret;
 	register long long a4 __asm__("r10") = a4_;
-	struct __timespec_kernel ts[2];
-	switch (n) {
-		__fixup_case_2;
-		__fixup_case_3;
-		__fixup_case_4;
-	}
 	__asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2),
 					  "d"(a3), "r"(a4): "rcx", "r11", "memory");
 	return ret;
@@ -94,13 +50,6 @@ static __inline long __syscall5(long long n, long long a1, long long a2, long lo
 	unsigned long ret;
 	register long long a4 __asm__("r10") = a4_;
 	register long long a5 __asm__("r8") = a5_;
-	struct __timespec_kernel ts[2];
-	switch (n) {
-		__fixup_case_2;
-		__fixup_case_3;
-		__fixup_case_4;
-		__fixup_case_5;
-	}
 	__asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2),
 					  "d"(a3), "r"(a4), "r"(a5) : "rcx", "r11", "memory");
 	return ret;
@@ -113,14 +62,32 @@ static __inline long __syscall6(long long n, long long a1, long long a2, long lo
 	register long long a4 __asm__("r10") = a4_;
 	register long long a5 __asm__("r8") = a5_;
 	register long long a6 __asm__("r9") = a6_;
-	struct __timespec_kernel ts[2];
-	switch (n) {
-		__fixup_case_2;
-		__fixup_case_3;
-		__fixup_case_4;
-		__fixup_case_5;
-	}
 	__asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2),
 					  "d"(a3), "r"(a4), "r"(a5), "r"(a6) : "rcx", "r11", "memory");
 	return ret;
 }
+
+#undef SYS_futimesat
+
+#define SYS_clock_gettime64 SYS_clock_gettime
+#define SYS_clock_settime64 SYS_clock_settime
+#define SYS_clock_adjtime64 SYS_clock_adjtime
+#define SYS_clock_nanosleep_time64 SYS_clock_nanosleep
+#define SYS_timer_gettime64 SYS_timer_gettime
+#define SYS_timer_settime64 SYS_timer_settime
+#define SYS_timerfd_gettime64 SYS_timerfd_gettime
+#define SYS_timerfd_settime64 SYS_timerfd_settime
+#define SYS_utimensat_time64 SYS_utimensat
+#define SYS_pselect6_time64 SYS_pselect6
+#define SYS_ppoll_time64 SYS_ppoll
+#define SYS_recvmmsg_time64 SYS_recvmmsg
+#define SYS_mq_timedsend_time64 SYS_mq_timedsend
+#define SYS_mq_timedreceive_time64 SYS_mq_timedreceive
+#define SYS_semtimedop_time64 SYS_semtimedop
+#define SYS_rt_sigtimedwait_time64 SYS_rt_sigtimedwait
+#define SYS_futex_time64 SYS_futex
+#define SYS_sched_rr_get_interval_time64 SYS_sched_rr_get_interval
+#define SYS_getrusage_time64 SYS_getrusage
+#define SYS_wait4_time64 SYS_wait4
+
+#define IPC_64 0
diff --git a/arch/x86_64/bits/alltypes.h.in b/arch/x86_64/bits/alltypes.h.in
index dc551d47..5cd8a299 100644
--- a/arch/x86_64/bits/alltypes.h.in
+++ b/arch/x86_64/bits/alltypes.h.in
@@ -2,8 +2,8 @@
 #define _Int64 long
 #define _Reg long
 
-TYPEDEF __builtin_va_list va_list;
-TYPEDEF __builtin_va_list __isoc_va_list;
+#define __BYTE_ORDER 1234
+#define __LONG_MAX 0x7fffffffffffffffL
 
 #ifndef __cplusplus
 TYPEDEF int wchar_t;
@@ -18,14 +18,3 @@ TYPEDEF double double_t;
 #endif
 
 TYPEDEF struct { long long __ll; long double __ld; } max_align_t;
-
-TYPEDEF long time_t;
-TYPEDEF long suseconds_t;
-
-TYPEDEF struct { union { int __i[14]; volatile int __vi[14]; unsigned long __s[7]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[10]; volatile int __vi[10]; volatile void *volatile __p[5]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[10]; volatile int __vi[10]; volatile void *volatile __p[5]; } __u; } mtx_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[6]; } __u; } pthread_cond_t;
-TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[6]; } __u; } cnd_t;
-TYPEDEF struct { union { int __i[14]; volatile int __vi[14]; void *__p[7]; } __u; } pthread_rwlock_t;
-TYPEDEF struct { union { int __i[8]; volatile int __vi[8]; void *__p[4]; } __u; } pthread_barrier_t;
diff --git a/arch/x86_64/bits/endian.h b/arch/x86_64/bits/endian.h
deleted file mode 100644
index 172c338f..00000000
--- a/arch/x86_64/bits/endian.h
+++ /dev/null
@@ -1 +0,0 @@
-#define __BYTE_ORDER __LITTLE_ENDIAN
diff --git a/arch/x86_64/bits/fcntl.h b/arch/x86_64/bits/fcntl.h
deleted file mode 100644
index 1b88ad39..00000000
--- a/arch/x86_64/bits/fcntl.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#define O_CREAT        0100
-#define O_EXCL         0200
-#define O_NOCTTY       0400
-#define O_TRUNC       01000
-#define O_APPEND      02000
-#define O_NONBLOCK    04000
-#define O_DSYNC      010000
-#define O_SYNC     04010000
-#define O_RSYNC    04010000
-#define O_DIRECTORY 0200000
-#define O_NOFOLLOW  0400000
-#define O_CLOEXEC  02000000
-
-#define O_ASYNC      020000
-#define O_DIRECT     040000
-#define O_LARGEFILE       0
-#define O_NOATIME  01000000
-#define O_PATH    010000000
-#define O_TMPFILE 020200000
-#define O_NDELAY O_NONBLOCK
-
-#define F_DUPFD  0
-#define F_GETFD  1
-#define F_SETFD  2
-#define F_GETFL  3
-#define F_SETFL  4
-
-#define F_SETOWN 8
-#define F_GETOWN 9
-#define F_SETSIG 10
-#define F_GETSIG 11
-
-#define F_GETLK 5
-#define F_SETLK 6
-#define F_SETLKW 7
-
-#define F_SETOWN_EX 15
-#define F_GETOWN_EX 16
-
-#define F_GETOWNER_UIDS 17
diff --git a/arch/x86_64/bits/ipc.h b/arch/x86_64/bits/ipc.h
deleted file mode 100644
index 3d894e30..00000000
--- a/arch/x86_64/bits/ipc.h
+++ /dev/null
@@ -1,13 +0,0 @@
-struct ipc_perm {
-	key_t __ipc_perm_key;
-	uid_t uid;
-	gid_t gid;
-	uid_t cuid;
-	gid_t cgid;
-	mode_t mode;
-	int __ipc_perm_seq;
-	long __pad1;
-	long __pad2;
-};
-
-#define IPC_64 0
diff --git a/arch/x86_64/bits/limits.h b/arch/x86_64/bits/limits.h
index 86ef7663..07743b6f 100644
--- a/arch/x86_64/bits/limits.h
+++ b/arch/x86_64/bits/limits.h
@@ -1,8 +1 @@
-#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
- || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 #define PAGESIZE 4096
-#define LONG_BIT 64
-#endif
-
-#define LONG_MAX  0x7fffffffffffffffL
-#define LLONG_MAX  0x7fffffffffffffffLL
diff --git a/arch/x86_64/bits/posix.h b/arch/x86_64/bits/posix.h
deleted file mode 100644
index c37b94c1..00000000
--- a/arch/x86_64/bits/posix.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#define _POSIX_V6_LP64_OFF64  1
-#define _POSIX_V7_LP64_OFF64  1
diff --git a/arch/x86_64/bits/reg.h b/arch/x86_64/bits/reg.h
index a4df04ce..6e54abcf 100644
--- a/arch/x86_64/bits/reg.h
+++ b/arch/x86_64/bits/reg.h
@@ -1,5 +1,3 @@
-#undef __WORDSIZE
-#define __WORDSIZE 64
 #define R15    0
 #define R14    1
 #define R13    2
diff --git a/arch/x86_64/bits/sem.h b/arch/x86_64/bits/sem.h
new file mode 100644
index 00000000..e61571c1
--- /dev/null
+++ b/arch/x86_64/bits/sem.h
@@ -0,0 +1,11 @@
+struct semid_ds {
+	struct ipc_perm sem_perm;
+	time_t sem_otime;
+	long __unused1;
+	time_t sem_ctime;
+	long __unused2;
+	unsigned short sem_nsems;
+	char __sem_nsems_pad[sizeof(long)-sizeof(short)];
+	long __unused3;
+	long __unused4;
+};
diff --git a/arch/x86_64/bits/socket.h b/arch/x86_64/bits/socket.h
deleted file mode 100644
index a4c89f3d..00000000
--- a/arch/x86_64/bits/socket.h
+++ /dev/null
@@ -1,16 +0,0 @@
-struct msghdr {
-	void *msg_name;
-	socklen_t msg_namelen;
-	struct iovec *msg_iov;
-	int msg_iovlen, __pad1;
-	void *msg_control;
-	socklen_t msg_controllen, __pad2;
-	int msg_flags;
-};
-
-struct cmsghdr {
-	socklen_t cmsg_len;
-	int __pad1;
-	int cmsg_level;
-	int cmsg_type;
-};
diff --git a/arch/x86_64/bits/stdint.h b/arch/x86_64/bits/stdint.h
deleted file mode 100644
index 1bb147f2..00000000
--- a/arch/x86_64/bits/stdint.h
+++ /dev/null
@@ -1,20 +0,0 @@
-typedef int32_t int_fast16_t;
-typedef int32_t int_fast32_t;
-typedef uint32_t uint_fast16_t;
-typedef uint32_t uint_fast32_t;
-
-#define INT_FAST16_MIN  INT32_MIN
-#define INT_FAST32_MIN  INT32_MIN
-
-#define INT_FAST16_MAX  INT32_MAX
-#define INT_FAST32_MAX  INT32_MAX
-
-#define UINT_FAST16_MAX UINT32_MAX
-#define UINT_FAST32_MAX UINT32_MAX
-
-#define INTPTR_MIN      INT64_MIN
-#define INTPTR_MAX      INT64_MAX
-#define UINTPTR_MAX     UINT64_MAX
-#define PTRDIFF_MIN     INT64_MIN
-#define PTRDIFF_MAX     INT64_MAX
-#define SIZE_MAX        UINT64_MAX
diff --git a/arch/x86_64/bits/syscall.h.in b/arch/x86_64/bits/syscall.h.in
index 9cdb7789..6543bbba 100644
--- a/arch/x86_64/bits/syscall.h.in
+++ b/arch/x86_64/bits/syscall.h.in
@@ -333,4 +333,32 @@
 #define __NR_statx				332
 #define __NR_io_pgetevents			333
 #define __NR_rseq				334
+#define __NR_pidfd_send_signal			424
+#define __NR_io_uring_setup			425
+#define __NR_io_uring_enter			426
+#define __NR_io_uring_register			427
+#define __NR_open_tree		428
+#define __NR_move_mount		429
+#define __NR_fsopen		430
+#define __NR_fsconfig		431
+#define __NR_fsmount		432
+#define __NR_fspick		433
+#define __NR_pidfd_open		434
+#define __NR_clone3		435
+#define __NR_close_range	436
+#define __NR_openat2		437
+#define __NR_pidfd_getfd	438
+#define __NR_faccessat2		439
+#define __NR_process_madvise	440
+#define __NR_epoll_pwait2	441
+#define __NR_mount_setattr	442
+#define __NR_landlock_create_ruleset	444
+#define __NR_landlock_add_rule	445
+#define __NR_landlock_restrict_self	446
+#define __NR_memfd_secret	447
+#define __NR_process_mrelease	448
+#define __NR_futex_waitv	449
+#define __NR_set_mempolicy_home_node	450
+#define __NR_cachestat		451
+#define __NR_fchmodat2		452
 
diff --git a/arch/x86_64/bits/user.h b/arch/x86_64/bits/user.h
index 4073cc06..b328edf9 100644
--- a/arch/x86_64/bits/user.h
+++ b/arch/x86_64/bits/user.h
@@ -1,6 +1,3 @@
-#undef __WORDSIZE
-#define __WORDSIZE 64
-
 typedef struct user_fpregs_struct {
 	uint16_t cwd, swd, ftw, fop;
 	uint64_t rip, rdp;
diff --git a/arch/x86_64/crt_arch.h b/arch/x86_64/crt_arch.h
index 3eec61bd..b1c9c476 100644
--- a/arch/x86_64/crt_arch.h
+++ b/arch/x86_64/crt_arch.h
@@ -1,6 +1,7 @@
 __asm__(
 ".text \n"
 ".global " START " \n"
+".type " START ",%function \n"
 START ": \n"
 "	xor %rbp,%rbp \n"
 "	mov %rsp,%rdi \n"
diff --git a/arch/x86_64/kstat.h b/arch/x86_64/kstat.h
new file mode 100644
index 00000000..5976c04e
--- /dev/null
+++ b/arch/x86_64/kstat.h
@@ -0,0 +1,22 @@
+struct kstat {
+	dev_t st_dev;
+	ino_t st_ino;
+	nlink_t st_nlink;
+
+	mode_t st_mode;
+	uid_t st_uid;
+	gid_t st_gid;
+	unsigned int    __pad0;
+	dev_t st_rdev;
+	off_t st_size;
+	blksize_t st_blksize;
+	blkcnt_t st_blocks;
+
+	long st_atime_sec;
+	long st_atime_nsec;
+	long st_mtime_sec;
+	long st_mtime_nsec;
+	long st_ctime_sec;
+	long st_ctime_nsec;
+	long __unused[3];
+};
diff --git a/arch/x86_64/pthread_arch.h b/arch/x86_64/pthread_arch.h
index 65e880c6..c8c63f2e 100644
--- a/arch/x86_64/pthread_arch.h
+++ b/arch/x86_64/pthread_arch.h
@@ -1,10 +1,8 @@
-static inline struct pthread *__pthread_self()
+static inline uintptr_t __get_tp()
 {
-	struct pthread *self;
-	__asm__ ("mov %%fs:0,%0" : "=r" (self) );
-	return self;
+	uintptr_t tp;
+	__asm__ ("mov %%fs:0,%0" : "=r" (tp) );
+	return tp;
 }
 
-#define TP_ADJ(p) (p)
-
 #define MC_PC gregs[REG_RIP]
diff --git a/arch/x86_64/syscall_arch.h b/arch/x86_64/syscall_arch.h
index 54e05ff6..92d5c179 100644
--- a/arch/x86_64/syscall_arch.h
+++ b/arch/x86_64/syscall_arch.h
@@ -66,3 +66,5 @@ static __inline long __syscall6(long n, long a1, long a2, long a3, long a4, long
 #define VDSO_CGT_VER "LINUX_2.6"
 #define VDSO_GETCPU_SYM "__vdso_getcpu"
 #define VDSO_GETCPU_VER "LINUX_2.6"
+
+#define IPC_64 0
diff --git a/compat/time32/__xstat.c b/compat/time32/__xstat.c
new file mode 100644
index 00000000..e52b5deb
--- /dev/null
+++ b/compat/time32/__xstat.c
@@ -0,0 +1,24 @@
+#include "time32.h"
+#include <sys/stat.h>
+
+struct stat32;
+
+int __fxstat(int ver, int fd, struct stat32 *buf)
+{
+	return __fstat_time32(fd, buf);
+}
+
+int __fxstatat(int ver, int fd, const char *path, struct stat32 *buf, int flag)
+{
+	return __fstatat_time32(fd, path, buf, flag);
+}
+
+int __lxstat(int ver, const char *path, struct stat32 *buf)
+{
+	return __lstat_time32(path, buf);
+}
+
+int __xstat(int ver, const char *path, struct stat32 *buf)
+{
+	return __stat_time32(path, buf);
+}
diff --git a/compat/time32/adjtime32.c b/compat/time32/adjtime32.c
new file mode 100644
index 00000000..b0042c63
--- /dev/null
+++ b/compat/time32/adjtime32.c
@@ -0,0 +1,21 @@
+#define _GNU_SOURCE
+#include "time32.h"
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+
+int __adjtime32(const struct timeval32 *in32, struct timeval32 *out32)
+{
+	struct timeval out;
+	int r = adjtime((&(struct timeval){
+		.tv_sec = in32->tv_sec,
+		.tv_usec = in32->tv_usec}), &out);
+	if (r) return r;
+	/* We can't range-check the result because success was already
+	 * committed by the above call. */
+	if (out32) {
+		out32->tv_sec = out.tv_sec;
+		out32->tv_usec = out.tv_usec;
+	}
+	return r;
+}
diff --git a/compat/time32/adjtimex_time32.c b/compat/time32/adjtimex_time32.c
new file mode 100644
index 00000000..9c6f190a
--- /dev/null
+++ b/compat/time32/adjtimex_time32.c
@@ -0,0 +1,10 @@
+#include "time32.h"
+#include <time.h>
+#include <sys/timex.h>
+
+struct timex32;
+
+int __adjtimex_time32(struct timex32 *tx32)
+{
+	return __clock_adjtime32(CLOCK_REALTIME, tx32);
+}
diff --git a/compat/time32/aio_suspend_time32.c b/compat/time32/aio_suspend_time32.c
new file mode 100644
index 00000000..d99cb651
--- /dev/null
+++ b/compat/time32/aio_suspend_time32.c
@@ -0,0 +1,9 @@
+#include "time32.h"
+#include <time.h>
+#include <aio.h>
+
+int __aio_suspend_time32(const struct aiocb *const cbs[], int cnt, const struct timespec32 *ts32)
+{
+	return aio_suspend(cbs, cnt, ts32 ? (&(struct timespec){
+		.tv_sec = ts32->tv_sec, .tv_nsec = ts32->tv_nsec}) : 0);
+}
diff --git a/compat/time32/clock_adjtime32.c b/compat/time32/clock_adjtime32.c
new file mode 100644
index 00000000..5a25b8ac
--- /dev/null
+++ b/compat/time32/clock_adjtime32.c
@@ -0,0 +1,70 @@
+#include "time32.h"
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <stddef.h>
+
+struct timex32 {
+	unsigned modes;
+	long offset, freq, maxerror, esterror;
+	int status;
+	long constant, precision, tolerance;
+	struct timeval32 time;
+	long tick, ppsfreq, jitter;
+	int shift;
+	long stabil, jitcnt, calcnt, errcnt, stbcnt;
+	int tai;
+	int __padding[11];
+};
+
+int __clock_adjtime32(clockid_t clock_id, struct timex32 *tx32)
+{
+	struct timex utx = {
+		.modes = tx32->modes,
+		.offset = tx32->offset,
+		.freq = tx32->freq,
+		.maxerror = tx32->maxerror,
+		.esterror = tx32->esterror,
+		.status = tx32->status,
+		.constant = tx32->constant,
+		.precision = tx32->precision,
+		.tolerance = tx32->tolerance,
+		.time.tv_sec = tx32->time.tv_sec,
+		.time.tv_usec = tx32->time.tv_usec,
+		.tick = tx32->tick,
+		.ppsfreq = tx32->ppsfreq,
+		.jitter = tx32->jitter,
+		.shift = tx32->shift,
+		.stabil = tx32->stabil,
+		.jitcnt = tx32->jitcnt,
+		.calcnt = tx32->calcnt,
+		.errcnt = tx32->errcnt,
+		.stbcnt = tx32->stbcnt,
+		.tai = tx32->tai,
+	};
+	int r = clock_adjtime(clock_id, &utx);
+	if (r<0) return r;
+	tx32->modes = utx.modes;
+	tx32->offset = utx.offset;
+	tx32->freq = utx.freq;
+	tx32->maxerror = utx.maxerror;
+	tx32->esterror = utx.esterror;
+	tx32->status = utx.status;
+	tx32->constant = utx.constant;
+	tx32->precision = utx.precision;
+	tx32->tolerance = utx.tolerance;
+	tx32->time.tv_sec = utx.time.tv_sec;
+	tx32->time.tv_usec = utx.time.tv_usec;
+	tx32->tick = utx.tick;
+	tx32->ppsfreq = utx.ppsfreq;
+	tx32->jitter = utx.jitter;
+	tx32->shift = utx.shift;
+	tx32->stabil = utx.stabil;
+	tx32->jitcnt = utx.jitcnt;
+	tx32->calcnt = utx.calcnt;
+	tx32->errcnt = utx.errcnt;
+	tx32->stbcnt = utx.stbcnt;
+	tx32->tai = utx.tai;
+	return r;
+}
diff --git a/compat/time32/clock_getres_time32.c b/compat/time32/clock_getres_time32.c
new file mode 100644
index 00000000..47a24c13
--- /dev/null
+++ b/compat/time32/clock_getres_time32.c
@@ -0,0 +1,13 @@
+#include "time32.h"
+#include <time.h>
+
+int __clock_getres_time32(clockid_t clk, struct timespec32 *ts32)
+{
+	struct timespec ts;
+	int r = clock_getres(clk, &ts);
+	if (!r && ts32) {
+		ts32->tv_sec = ts.tv_sec;
+		ts32->tv_nsec = ts.tv_nsec;
+	}
+	return r;
+}
diff --git a/compat/time32/clock_gettime32.c b/compat/time32/clock_gettime32.c
new file mode 100644
index 00000000..0cac7bbd
--- /dev/null
+++ b/compat/time32/clock_gettime32.c
@@ -0,0 +1,18 @@
+#include "time32.h"
+#include <time.h>
+#include <errno.h>
+#include <stdint.h>
+
+int __clock_gettime32(clockid_t clk, struct timespec32 *ts32)
+{
+	struct timespec ts;
+	int r = clock_gettime(clk, &ts);
+	if (r) return r;
+	if (ts.tv_sec < INT32_MIN || ts.tv_sec > INT32_MAX) {
+		errno = EOVERFLOW;
+		return -1;
+	}
+	ts32->tv_sec = ts.tv_sec;
+	ts32->tv_nsec = ts.tv_nsec;
+	return 0;
+}
diff --git a/compat/time32/clock_nanosleep_time32.c b/compat/time32/clock_nanosleep_time32.c
new file mode 100644
index 00000000..91ef067d
--- /dev/null
+++ b/compat/time32/clock_nanosleep_time32.c
@@ -0,0 +1,15 @@
+#include "time32.h"
+#include <time.h>
+#include <errno.h>
+
+int __clock_nanosleep_time32(clockid_t clk, int flags, const struct timespec32 *req32, struct timespec32 *rem32)
+{
+	struct timespec rem;
+	int ret = clock_nanosleep(clk, flags, (&(struct timespec){
+		.tv_sec = req32->tv_sec, .tv_nsec = req32->tv_nsec}), &rem);
+	if (ret==EINTR && rem32 && !(flags & TIMER_ABSTIME)) {
+		rem32->tv_sec = rem.tv_sec;
+		rem32->tv_nsec = rem.tv_nsec;
+	}
+	return ret;
+}
diff --git a/compat/time32/clock_settime32.c b/compat/time32/clock_settime32.c
new file mode 100644
index 00000000..7ca4f0e9
--- /dev/null
+++ b/compat/time32/clock_settime32.c
@@ -0,0 +1,9 @@
+#include "time32.h"
+#include <time.h>
+
+int __clock_settime32(clockid_t clk, const struct timespec32 *ts32)
+{
+	return clock_settime(clk, (&(struct timespec){
+		.tv_sec = ts32->tv_sec,
+		.tv_nsec = ts32->tv_nsec}));
+}
diff --git a/compat/time32/cnd_timedwait_time32.c b/compat/time32/cnd_timedwait_time32.c
new file mode 100644
index 00000000..314251d1
--- /dev/null
+++ b/compat/time32/cnd_timedwait_time32.c
@@ -0,0 +1,9 @@
+#include "time32.h"
+#include <time.h>
+#include <threads.h>
+
+int __cnd_timedwait_time32(cnd_t *restrict c, mtx_t *restrict m, const struct timespec32 *restrict ts32)
+{
+	return cnd_timedwait(c, m, ts32 ? (&(struct timespec){
+		.tv_sec = ts32->tv_sec, .tv_nsec = ts32->tv_nsec}) : 0);
+}
diff --git a/compat/time32/ctime32.c b/compat/time32/ctime32.c
new file mode 100644
index 00000000..a057274e
--- /dev/null
+++ b/compat/time32/ctime32.c
@@ -0,0 +1,7 @@
+#include "time32.h"
+#include <time.h>
+
+char *__ctime32(time32_t *t)
+{
+	return ctime(&(time_t){*t});
+}
diff --git a/compat/time32/ctime32_r.c b/compat/time32/ctime32_r.c
new file mode 100644
index 00000000..e1ad2e28
--- /dev/null
+++ b/compat/time32/ctime32_r.c
@@ -0,0 +1,7 @@
+#include "time32.h"
+#include <time.h>
+
+char *__ctime32_r(time32_t *t, char *buf)
+{
+	return ctime_r(&(time_t){*t}, buf);
+}
diff --git a/compat/time32/difftime32.c b/compat/time32/difftime32.c
new file mode 100644
index 00000000..5950943a
--- /dev/null
+++ b/compat/time32/difftime32.c
@@ -0,0 +1,7 @@
+#include "time32.h"
+#include <time.h>
+
+double __difftime32(time32_t t1, time32_t t2)
+{
+	return difftime(t1, t2);
+}
diff --git a/compat/time32/fstat_time32.c b/compat/time32/fstat_time32.c
new file mode 100644
index 00000000..e5d52022
--- /dev/null
+++ b/compat/time32/fstat_time32.c
@@ -0,0 +1,15 @@
+#include "time32.h"
+#include <time.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <stddef.h>
+
+struct stat32;
+
+int __fstat_time32(int fd, struct stat32 *restrict st32)
+{
+	struct stat st;
+	int r = fstat(fd, &st);
+	if (!r) memcpy(st32, &st, offsetof(struct stat, st_atim));
+	return r;
+}
diff --git a/compat/time32/fstatat_time32.c b/compat/time32/fstatat_time32.c
new file mode 100644
index 00000000..31d42e63
--- /dev/null
+++ b/compat/time32/fstatat_time32.c
@@ -0,0 +1,15 @@
+#include "time32.h"
+#include <time.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <stddef.h>
+
+struct stat32;
+
+int __fstatat_time32(int fd, const char *restrict path, struct stat32 *restrict st32, int flag)
+{
+	struct stat st;
+	int r = fstatat(fd, path, &st, flag);
+	if (!r) memcpy(st32, &st, offsetof(struct stat, st_atim));
+	return r;
+}
diff --git a/compat/time32/ftime32.c b/compat/time32/ftime32.c
new file mode 100644
index 00000000..166a6dae
--- /dev/null
+++ b/compat/time32/ftime32.c
@@ -0,0 +1,25 @@
+#include "time32.h"
+#include <sys/timeb.h>
+#include <errno.h>
+#include <stdint.h>
+
+struct timeb32 {
+	int32_t time;
+	unsigned short millitm;
+	short timezone, dstflag;
+};
+
+int __ftime32(struct timeb32 *tp)
+{
+	struct timeb tb;
+	if (ftime(&tb) < 0) return -1;
+	if (tb.time < INT32_MIN || tb.time > INT32_MAX) {
+		errno = EOVERFLOW;
+		return -1;
+	}
+	tp->time = tb.time;
+	tp->millitm = tb.millitm;
+	tp->timezone = tb.timezone;
+	tp->dstflag = tb.dstflag;
+	return 0;
+}
diff --git a/compat/time32/futimens_time32.c b/compat/time32/futimens_time32.c
new file mode 100644
index 00000000..7856f176
--- /dev/null
+++ b/compat/time32/futimens_time32.c
@@ -0,0 +1,10 @@
+#include "time32.h"
+#include <time.h>
+#include <sys/stat.h>
+
+int __futimens_time32(int fd, const struct timespec32 *times32)
+{
+	return futimens(fd, !times32 ? 0 : ((struct timespec[2]){
+		{.tv_sec = times32[0].tv_sec,.tv_nsec = times32[0].tv_nsec},
+		{.tv_sec = times32[1].tv_sec,.tv_nsec = times32[1].tv_nsec}}));
+}
diff --git a/compat/time32/futimes_time32.c b/compat/time32/futimes_time32.c
new file mode 100644
index 00000000..f29533f1
--- /dev/null
+++ b/compat/time32/futimes_time32.c
@@ -0,0 +1,12 @@
+#define _GNU_SOURCE
+#include "time32.h"
+#include <time.h>
+#include <sys/time.h>
+#include <sys/stat.h>
+
+int __futimes_time32(int fd, const struct timeval32 times32[2])
+{
+	return futimes(fd, !times32 ? 0 : ((struct timeval[2]){
+		{.tv_sec = times32[0].tv_sec,.tv_usec = times32[0].tv_usec},
+		{.tv_sec = times32[1].tv_sec,.tv_usec = times32[1].tv_usec}}));
+}
diff --git a/compat/time32/futimesat_time32.c b/compat/time32/futimesat_time32.c
new file mode 100644
index 00000000..5a1295bd
--- /dev/null
+++ b/compat/time32/futimesat_time32.c
@@ -0,0 +1,12 @@
+#define _GNU_SOURCE
+#include "time32.h"
+#include <time.h>
+#include <sys/time.h>
+#include <sys/stat.h>
+
+int __futimesat_time32(int dirfd, const char *pathname, const struct timeval32 times32[2])
+{
+	return futimesat(dirfd, pathname, !times32 ? 0 : ((struct timeval[2]){
+		{.tv_sec = times32[0].tv_sec,.tv_usec = times32[0].tv_usec},
+		{.tv_sec = times32[1].tv_sec,.tv_usec = times32[1].tv_usec}}));
+}
diff --git a/compat/time32/getitimer_time32.c b/compat/time32/getitimer_time32.c
new file mode 100644
index 00000000..4bac4bf5
--- /dev/null
+++ b/compat/time32/getitimer_time32.c
@@ -0,0 +1,15 @@
+#include "time32.h"
+#include <time.h>
+#include <sys/time.h>
+
+int __getitimer_time32(int which, struct itimerval32 *old32)
+{
+	struct itimerval old;
+	int r = getitimer(which, &old);
+	if (r) return r;
+	old32->it_interval.tv_sec = old.it_interval.tv_sec;
+	old32->it_interval.tv_usec = old.it_interval.tv_usec;
+	old32->it_value.tv_sec = old.it_value.tv_sec;
+	old32->it_value.tv_usec = old.it_value.tv_usec;
+	return 0;
+}
diff --git a/compat/time32/getrusage_time32.c b/compat/time32/getrusage_time32.c
new file mode 100644
index 00000000..d7487dee
--- /dev/null
+++ b/compat/time32/getrusage_time32.c
@@ -0,0 +1,39 @@
+#include "time32.h"
+#include <string.h>
+#include <stddef.h>
+#include <sys/resource.h>
+
+struct compat_rusage {
+	struct timeval32 ru_utime;
+	struct timeval32 ru_stime;
+	long	ru_maxrss;
+	long	ru_ixrss;
+	long	ru_idrss;
+	long	ru_isrss;
+	long	ru_minflt;
+	long	ru_majflt;
+	long	ru_nswap;
+	long	ru_inblock;
+	long	ru_oublock;
+	long	ru_msgsnd;
+	long	ru_msgrcv;
+	long	ru_nsignals;
+	long	ru_nvcsw;
+	long	ru_nivcsw;
+};
+
+int __getrusage_time32(int who, struct compat_rusage *usage)
+{
+	struct rusage ru;
+	int r = getrusage(who, &ru);
+	if (!r) {
+		usage->ru_utime.tv_sec = ru.ru_utime.tv_sec;
+		usage->ru_utime.tv_usec = ru.ru_utime.tv_usec;
+		usage->ru_stime.tv_sec = ru.ru_stime.tv_sec;
+		usage->ru_stime.tv_usec = ru.ru_stime.tv_usec;
+		memcpy(&usage->ru_maxrss, &ru.ru_maxrss,
+			sizeof(struct compat_rusage) -
+			offsetof(struct compat_rusage, ru_maxrss));
+	}
+	return r;
+}
diff --git a/compat/time32/gettimeofday_time32.c b/compat/time32/gettimeofday_time32.c
new file mode 100644
index 00000000..1f3ce68e
--- /dev/null
+++ b/compat/time32/gettimeofday_time32.c
@@ -0,0 +1,19 @@
+#include "time32.h"
+#include <sys/time.h>
+#include <errno.h>
+#include <stdint.h>
+
+int __gettimeofday_time32(struct timeval32 *tv32, void *tz)
+{
+	struct timeval tv;
+	if (!tv32) return 0;
+	int r = gettimeofday(&tv, 0);
+	if (r) return r;
+	if (tv.tv_sec < INT32_MIN || tv.tv_sec > INT32_MAX) {
+		errno = EOVERFLOW;
+		return -1;
+	}
+	tv32->tv_sec = tv.tv_sec;
+	tv32->tv_usec = tv.tv_usec;
+	return 0;
+}
diff --git a/compat/time32/gmtime32.c b/compat/time32/gmtime32.c
new file mode 100644
index 00000000..963f0e05
--- /dev/null
+++ b/compat/time32/gmtime32.c
@@ -0,0 +1,7 @@
+#include "time32.h"
+#include <time.h>
+
+struct tm *__gmtime32(time32_t *t)
+{
+	return gmtime(&(time_t){*t});
+}
diff --git a/compat/time32/gmtime32_r.c b/compat/time32/gmtime32_r.c
new file mode 100644
index 00000000..7d72bfb3
--- /dev/null
+++ b/compat/time32/gmtime32_r.c
@@ -0,0 +1,7 @@
+#include "time32.h"
+#include <time.h>
+
+struct tm *__gmtime32_r(time32_t *t, struct tm *tm)
+{
+	return gmtime_r(&(time_t){*t}, tm);
+}
diff --git a/compat/time32/localtime32.c b/compat/time32/localtime32.c
new file mode 100644
index 00000000..96bc3034
--- /dev/null
+++ b/compat/time32/localtime32.c
@@ -0,0 +1,7 @@
+#include "time32.h"
+#include <time.h>
+
+struct tm *__localtime32(time32_t *t)
+{
+	return localtime(&(time_t){*t});
+}
diff --git a/compat/time32/localtime32_r.c b/compat/time32/localtime32_r.c
new file mode 100644
index 00000000..633ec829
--- /dev/null
+++ b/compat/time32/localtime32_r.c
@@ -0,0 +1,7 @@
+#include "time32.h"
+#include <time.h>
+
+struct tm *__localtime32_r(time32_t *t, struct tm *tm)
+{
+	return localtime_r(&(time_t){*t}, tm);
+}
diff --git a/compat/time32/lstat_time32.c b/compat/time32/lstat_time32.c
new file mode 100644
index 00000000..28cb5a0b
--- /dev/null
+++ b/compat/time32/lstat_time32.c
@@ -0,0 +1,15 @@
+#include "time32.h"
+#include <time.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <stddef.h>
+
+struct stat32;
+
+int __lstat_time32(const char *restrict path, struct stat32 *restrict st32)
+{
+	struct stat st;
+	int r = lstat(path, &st);
+	if (!r) memcpy(st32, &st, offsetof(struct stat, st_atim));
+	return r;
+}
diff --git a/compat/time32/lutimes_time32.c b/compat/time32/lutimes_time32.c
new file mode 100644
index 00000000..7f75cd4a
--- /dev/null
+++ b/compat/time32/lutimes_time32.c
@@ -0,0 +1,12 @@
+#define _GNU_SOURCE
+#include "time32.h"
+#include <time.h>
+#include <sys/time.h>
+#include <sys/stat.h>
+
+int __lutimes_time32(const char *path, const struct timeval32 times32[2])
+{
+	return lutimes(path, !times32 ? 0 : ((struct timeval[2]){
+		{.tv_sec = times32[0].tv_sec,.tv_usec = times32[0].tv_usec},
+		{.tv_sec = times32[1].tv_sec,.tv_usec = times32[1].tv_usec}}));
+}
diff --git a/compat/time32/mktime32.c b/compat/time32/mktime32.c
new file mode 100644
index 00000000..e6f15d51
--- /dev/null
+++ b/compat/time32/mktime32.c
@@ -0,0 +1,16 @@
+#include "time32.h"
+#include <time.h>
+#include <errno.h>
+#include <stdint.h>
+
+time32_t __mktime32(struct tm *tm)
+{
+	struct tm tmp = *tm;
+	time_t t = mktime(&tmp);
+	if (t < INT32_MIN || t > INT32_MAX) {
+		errno = EOVERFLOW;
+		return -1;
+	}
+	*tm = tmp;
+	return t;
+}
diff --git a/compat/time32/mq_timedreceive_time32.c b/compat/time32/mq_timedreceive_time32.c
new file mode 100644
index 00000000..211cea4b
--- /dev/null
+++ b/compat/time32/mq_timedreceive_time32.c
@@ -0,0 +1,9 @@
+#include "time32.h"
+#include <mqueue.h>
+#include <time.h>
+
+ssize_t __mq_timedreceive_time32(mqd_t mqd, char *restrict msg, size_t len, unsigned *restrict prio, const struct timespec32 *restrict ts32)
+{
+	return mq_timedreceive(mqd, msg, len, prio, ts32 ? (&(struct timespec){
+		.tv_sec = ts32->tv_sec, .tv_nsec = ts32->tv_nsec}) : 0);
+}
diff --git a/compat/time32/mq_timedsend_time32.c b/compat/time32/mq_timedsend_time32.c
new file mode 100644
index 00000000..93b697a7
--- /dev/null
+++ b/compat/time32/mq_timedsend_time32.c
@@ -0,0 +1,9 @@
+#include "time32.h"
+#include <mqueue.h>
+#include <time.h>
+
+int __mq_timedsend_time32(mqd_t mqd, const char *msg, size_t len, unsigned prio, const struct timespec32 *ts32)
+{
+	return mq_timedsend(mqd, msg, len, prio, ts32 ? (&(struct timespec){
+		.tv_sec = ts32->tv_sec, .tv_nsec = ts32->tv_nsec}) : 0);
+}
diff --git a/compat/time32/mtx_timedlock_time32.c b/compat/time32/mtx_timedlock_time32.c
new file mode 100644
index 00000000..a01f09b8
--- /dev/null
+++ b/compat/time32/mtx_timedlock_time32.c
@@ -0,0 +1,9 @@
+#include "time32.h"
+#include <time.h>
+#include <threads.h>
+
+int __mtx_timedlock_time32(mtx_t *restrict m, const struct timespec32 *restrict ts32)
+{
+	return mtx_timedlock(m, !ts32 ? 0 : (&(struct timespec){
+		.tv_sec = ts32->tv_sec, .tv_nsec = ts32->tv_nsec}));
+}
diff --git a/compat/time32/nanosleep_time32.c b/compat/time32/nanosleep_time32.c
new file mode 100644
index 00000000..ea6bdd81
--- /dev/null
+++ b/compat/time32/nanosleep_time32.c
@@ -0,0 +1,15 @@
+#include "time32.h"
+#include <time.h>
+#include <errno.h>
+
+int __nanosleep_time32(const struct timespec32 *req32, struct timespec32 *rem32)
+{
+	struct timespec rem;
+	int ret = nanosleep((&(struct timespec){
+		.tv_sec = req32->tv_sec, .tv_nsec = req32->tv_nsec}), &rem);
+	if (ret<0 && errno==EINTR && rem32) {
+		rem32->tv_sec = rem.tv_sec;
+		rem32->tv_nsec = rem.tv_nsec;
+	}
+	return ret;
+}
diff --git a/compat/time32/ppoll_time32.c b/compat/time32/ppoll_time32.c
new file mode 100644
index 00000000..43b4b0df
--- /dev/null
+++ b/compat/time32/ppoll_time32.c
@@ -0,0 +1,10 @@
+#include "time32.h"
+#define _GNU_SOURCE
+#include <time.h>
+#include <poll.h>
+
+int __ppoll_time32(struct pollfd *fds, nfds_t n, const struct timespec32 *ts32, const sigset_t *mask)
+{
+	return ppoll(fds, n, !ts32 ? 0 : (&(struct timespec){
+		.tv_sec = ts32->tv_sec, .tv_nsec = ts32->tv_nsec}), mask);
+}
diff --git a/compat/time32/pselect_time32.c b/compat/time32/pselect_time32.c
new file mode 100644
index 00000000..ecaa8f86
--- /dev/null
+++ b/compat/time32/pselect_time32.c
@@ -0,0 +1,9 @@
+#include "time32.h"
+#include <time.h>
+#include <sys/select.h>
+
+int __pselect_time32(int n, fd_set *restrict rfds, fd_set *restrict wfds, fd_set *restrict efds, const struct timespec32 *restrict ts32, const sigset_t *restrict mask)
+{
+	return pselect(n, rfds, wfds, efds, !ts32 ? 0 : (&(struct timespec){
+		.tv_sec = ts32->tv_sec, .tv_nsec = ts32->tv_nsec}), mask);
+}
diff --git a/compat/time32/pthread_cond_timedwait_time32.c b/compat/time32/pthread_cond_timedwait_time32.c
new file mode 100644
index 00000000..fba1f2a9
--- /dev/null
+++ b/compat/time32/pthread_cond_timedwait_time32.c
@@ -0,0 +1,9 @@
+#include "time32.h"
+#include <time.h>
+#include <pthread.h>
+
+int __pthread_cond_timedwait_time32(pthread_cond_t *restrict c, pthread_mutex_t *restrict m, const struct timespec32 *restrict ts32)
+{
+	return pthread_cond_timedwait(c, m, !ts32 ? 0 : (&(struct timespec){
+		.tv_sec = ts32->tv_sec, .tv_nsec = ts32->tv_nsec}));
+}
diff --git a/compat/time32/pthread_mutex_timedlock_time32.c b/compat/time32/pthread_mutex_timedlock_time32.c
new file mode 100644
index 00000000..2d29602c
--- /dev/null
+++ b/compat/time32/pthread_mutex_timedlock_time32.c
@@ -0,0 +1,9 @@
+#include "time32.h"
+#include <time.h>
+#include <pthread.h>
+
+int __pthread_mutex_timedlock_time32(pthread_mutex_t *restrict m, const struct timespec32 *restrict ts32)
+{
+	return pthread_mutex_timedlock(m, !ts32 ? 0 : (&(struct timespec){
+		.tv_sec = ts32->tv_sec, .tv_nsec = ts32->tv_nsec}));
+}
diff --git a/compat/time32/pthread_rwlock_timedrdlock_time32.c b/compat/time32/pthread_rwlock_timedrdlock_time32.c
new file mode 100644
index 00000000..33df27a4
--- /dev/null
+++ b/compat/time32/pthread_rwlock_timedrdlock_time32.c
@@ -0,0 +1,9 @@
+#include "time32.h"
+#include <time.h>
+#include <pthread.h>
+
+int __pthread_rwlock_timedrdlock_time32(pthread_rwlock_t *restrict rw, const struct timespec32 *restrict ts32)
+{
+	return pthread_rwlock_timedrdlock(rw, !ts32 ? 0 : (&(struct timespec){
+		.tv_sec = ts32->tv_sec, .tv_nsec = ts32->tv_nsec}));
+}
diff --git a/compat/time32/pthread_rwlock_timedwrlock_time32.c b/compat/time32/pthread_rwlock_timedwrlock_time32.c
new file mode 100644
index 00000000..99f24f73
--- /dev/null
+++ b/compat/time32/pthread_rwlock_timedwrlock_time32.c
@@ -0,0 +1,9 @@
+#include "time32.h"
+#include <time.h>
+#include <pthread.h>
+
+int __pthread_rwlock_timedwrlock_time32(pthread_rwlock_t *restrict rw, const struct timespec32 *restrict ts32)
+{
+	return pthread_rwlock_timedwrlock(rw, !ts32 ? 0 : (&(struct timespec){
+		.tv_sec = ts32->tv_sec, .tv_nsec = ts32->tv_nsec}));
+}
diff --git a/compat/time32/pthread_timedjoin_np_time32.c b/compat/time32/pthread_timedjoin_np_time32.c
new file mode 100644
index 00000000..3ec29951
--- /dev/null
+++ b/compat/time32/pthread_timedjoin_np_time32.c
@@ -0,0 +1,10 @@
+#define _GNU_SOURCE
+#include "time32.h"
+#include <time.h>
+#include <pthread.h>
+
+int __pthread_timedjoin_np_time32(pthread_t t, void **res, const struct timespec32 *at32)
+{
+	return pthread_timedjoin_np(t, res, !at32 ? 0 : (&(struct timespec){
+		.tv_sec = at32->tv_sec, .tv_nsec = at32->tv_nsec}));
+}
diff --git a/compat/time32/recvmmsg_time32.c b/compat/time32/recvmmsg_time32.c
new file mode 100644
index 00000000..acf1cfb8
--- /dev/null
+++ b/compat/time32/recvmmsg_time32.c
@@ -0,0 +1,10 @@
+#include "time32.h"
+#define _GNU_SOURCE
+#include <time.h>
+#include <sys/socket.h>
+
+int __recvmmsg_time32(int fd, struct mmsghdr *msgvec, unsigned int vlen, unsigned int flags, struct timespec32 *ts32)
+{
+	return recvmmsg(fd, msgvec, vlen, flags, ts32 ? (&(struct timespec){
+		.tv_sec = ts32->tv_sec, .tv_nsec = ts32->tv_nsec}) : 0);
+}
diff --git a/compat/time32/sched_rr_get_interval_time32.c b/compat/time32/sched_rr_get_interval_time32.c
new file mode 100644
index 00000000..36cbbaca
--- /dev/null
+++ b/compat/time32/sched_rr_get_interval_time32.c
@@ -0,0 +1,13 @@
+#include "time32.h"
+#include <time.h>
+#include <sched.h>
+
+int __sched_rr_get_interval_time32(pid_t pid, struct timespec32 *ts32)
+{
+	struct timespec ts;
+	int r = sched_rr_get_interval(pid, &ts);
+	if (r) return r;
+	ts32->tv_sec = ts.tv_sec;
+	ts32->tv_nsec = ts.tv_nsec;
+	return r;
+}
diff --git a/compat/time32/select_time32.c b/compat/time32/select_time32.c
new file mode 100644
index 00000000..2d8df9ac
--- /dev/null
+++ b/compat/time32/select_time32.c
@@ -0,0 +1,10 @@
+#include "time32.h"
+#include <time.h>
+#include <sys/time.h>
+#include <sys/select.h>
+
+int __select_time32(int n, fd_set *restrict rfds, fd_set *restrict wfds, fd_set *restrict efds, struct timeval32 *restrict tv32)
+{
+	return select(n, rfds, wfds, efds, !tv32 ? 0 : (&(struct timeval){
+		.tv_sec = tv32->tv_sec, .tv_usec = tv32->tv_usec}));
+}
diff --git a/compat/time32/sem_timedwait_time32.c b/compat/time32/sem_timedwait_time32.c
new file mode 100644
index 00000000..c3469f9b
--- /dev/null
+++ b/compat/time32/sem_timedwait_time32.c
@@ -0,0 +1,9 @@
+#include "time32.h"
+#include <time.h>
+#include <semaphore.h>
+
+int __sem_timedwait_time32(sem_t *sem, const struct timespec32 *restrict ts32)
+{
+	return sem_timedwait(sem, !ts32 ? 0 : (&(struct timespec){
+		.tv_sec = ts32->tv_sec, .tv_nsec = ts32->tv_nsec}));
+}
diff --git a/compat/time32/semtimedop_time32.c b/compat/time32/semtimedop_time32.c
new file mode 100644
index 00000000..34ec5281
--- /dev/null
+++ b/compat/time32/semtimedop_time32.c
@@ -0,0 +1,10 @@
+#include "time32.h"
+#define _GNU_SOURCE
+#include <sys/sem.h>
+#include <time.h>
+
+int __semtimedop_time32(int id, struct sembuf *buf, size_t n, const struct timespec32 *ts32)
+{
+	return semtimedop(id, buf, n, !ts32 ? 0 : (&(struct timespec){
+		.tv_sec = ts32->tv_sec, .tv_nsec = ts32->tv_nsec}));
+}
diff --git a/compat/time32/setitimer_time32.c b/compat/time32/setitimer_time32.c
new file mode 100644
index 00000000..2475fd8c
--- /dev/null
+++ b/compat/time32/setitimer_time32.c
@@ -0,0 +1,25 @@
+#include "time32.h"
+#include <time.h>
+#include <sys/time.h>
+
+int __setitimer_time32(int which, const struct itimerval32 *restrict new32, struct itimerval32 *restrict old32)
+{
+	struct itimerval old;
+	int r = setitimer(which, (&(struct itimerval){
+		.it_interval.tv_sec = new32->it_interval.tv_sec,
+		.it_interval.tv_usec = new32->it_interval.tv_usec,
+		.it_value.tv_sec = new32->it_value.tv_sec,
+		.it_value.tv_usec = new32->it_value.tv_usec}), &old);
+	if (r) return r;
+	/* The above call has already committed to success by changing the
+	 * timer setting, so we can't fail on out-of-range old value.
+	 * Since these are relative times, values large enough to overflow
+	 * don't make sense anyway. */
+	if (old32) {
+		old32->it_interval.tv_sec = old.it_interval.tv_sec;
+		old32->it_interval.tv_usec = old.it_interval.tv_usec;
+		old32->it_value.tv_sec = old.it_value.tv_sec;
+		old32->it_value.tv_usec = old.it_value.tv_usec;
+	}
+	return 0;
+}
diff --git a/compat/time32/settimeofday_time32.c b/compat/time32/settimeofday_time32.c
new file mode 100644
index 00000000..09e625cb
--- /dev/null
+++ b/compat/time32/settimeofday_time32.c
@@ -0,0 +1,10 @@
+#define _BSD_SOURCE
+#include "time32.h"
+#include <sys/time.h>
+
+int __settimeofday_time32(const struct timeval32 *tv32, const void *tz)
+{
+	return settimeofday(!tv32 ? 0 : (&(struct timeval){
+		.tv_sec = tv32->tv_sec,
+		.tv_usec = tv32->tv_usec}), 0);
+}
diff --git a/compat/time32/sigtimedwait_time32.c b/compat/time32/sigtimedwait_time32.c
new file mode 100644
index 00000000..6b3aa39c
--- /dev/null
+++ b/compat/time32/sigtimedwait_time32.c
@@ -0,0 +1,9 @@
+#include "time32.h"
+#include <time.h>
+#include <signal.h>
+
+int __sigtimedwait_time32(const sigset_t *restrict set, siginfo_t *restrict si, const struct timespec32 *restrict ts32)
+{
+	return sigtimedwait(set, si, !ts32 ? 0 : (&(struct timespec){
+		.tv_sec = ts32->tv_sec, .tv_nsec = ts32->tv_nsec}));
+}
diff --git a/compat/time32/stat_time32.c b/compat/time32/stat_time32.c
new file mode 100644
index 00000000..b154b0f9
--- /dev/null
+++ b/compat/time32/stat_time32.c
@@ -0,0 +1,15 @@
+#include "time32.h"
+#include <time.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <stddef.h>
+
+struct stat32;
+
+int __stat_time32(const char *restrict path, struct stat32 *restrict st32)
+{
+	struct stat st;
+	int r = stat(path, &st);
+	if (!r) memcpy(st32, &st, offsetof(struct stat, st_atim));
+	return r;
+}
diff --git a/compat/time32/stime32.c b/compat/time32/stime32.c
new file mode 100644
index 00000000..cc76364d
--- /dev/null
+++ b/compat/time32/stime32.c
@@ -0,0 +1,8 @@
+#define _GNU_SOURCE
+#include "time32.h"
+#include <time.h>
+
+int __stime32(const time32_t *t)
+{
+	return stime(&(time_t){*t});
+}
diff --git a/compat/time32/thrd_sleep_time32.c b/compat/time32/thrd_sleep_time32.c
new file mode 100644
index 00000000..59088001
--- /dev/null
+++ b/compat/time32/thrd_sleep_time32.c
@@ -0,0 +1,16 @@
+#include "time32.h"
+#include <time.h>
+#include <threads.h>
+#include <errno.h>
+
+int __thrd_sleep_time32(const struct timespec32 *req32, struct timespec32 *rem32)
+{
+	struct timespec rem;
+	int ret = thrd_sleep((&(struct timespec){
+		.tv_sec = req32->tv_sec, .tv_nsec = req32->tv_nsec}), &rem);
+	if (ret<0 && errno==EINTR && rem32) {
+		rem32->tv_sec = rem.tv_sec;
+		rem32->tv_nsec = rem.tv_nsec;
+	}
+	return ret;
+}
diff --git a/compat/time32/time32.c b/compat/time32/time32.c
new file mode 100644
index 00000000..4b8fac1c
--- /dev/null
+++ b/compat/time32/time32.c
@@ -0,0 +1,15 @@
+#include "time32.h"
+#include <time.h>
+#include <errno.h>
+#include <stdint.h>
+
+time32_t __time32(time32_t *p)
+{
+	time_t t = time(0);
+	if (t < INT32_MIN || t > INT32_MAX) {
+		errno = EOVERFLOW;
+		return -1;
+	}
+	if (p) *p = t;
+	return t;
+}
diff --git a/compat/time32/time32.h b/compat/time32/time32.h
new file mode 100644
index 00000000..fdec17c3
--- /dev/null
+++ b/compat/time32/time32.h
@@ -0,0 +1,91 @@
+#ifndef TIME32_H
+#define TIME32_H
+
+#include <sys/types.h>
+
+typedef long time32_t;
+
+struct timeval32 {
+	long tv_sec;
+	long tv_usec;
+};
+
+struct itimerval32 {
+	struct timeval32 it_interval;
+	struct timeval32 it_value;
+};
+
+struct timespec32 {
+	long tv_sec;
+	long tv_nsec;
+};
+
+struct itimerspec32 {
+	struct timespec32 it_interval;
+	struct timespec32 it_value;
+};
+
+int __adjtime32() __asm__("adjtime");
+int __adjtimex_time32() __asm__("adjtimex");
+int __aio_suspend_time32() __asm__("aio_suspend");
+int __clock_adjtime32() __asm__("clock_adjtime");
+int __clock_getres_time32() __asm__("clock_getres");
+int __clock_gettime32() __asm__("clock_gettime");
+int __clock_nanosleep_time32() __asm__("clock_nanosleep");
+int __clock_settime32() __asm__("clock_settime");
+int __cnd_timedwait_time32() __asm__("cnd_timedwait");
+char *__ctime32() __asm__("ctime");
+char *__ctime32_r() __asm__("ctime_r");
+double __difftime32() __asm__("difftime");
+int __fstat_time32() __asm__("fstat");
+int __fstatat_time32() __asm__("fstatat");
+int __ftime32() __asm__("ftime");
+int __futimens_time32() __asm__("futimens");
+int __futimes_time32() __asm__("futimes");
+int __futimesat_time32() __asm__("futimesat");
+int __getitimer_time32() __asm__("getitimer");
+int __getrusage_time32() __asm__("getrusage");
+int __gettimeofday_time32() __asm__("gettimeofday");
+struct tm *__gmtime32() __asm__("gmtime");
+struct tm *__gmtime32_r() __asm__("gmtime_r");
+struct tm *__localtime32() __asm__("localtime");
+struct tm *__localtime32_r() __asm__("localtime_r");
+int __lstat_time32() __asm__("lstat");
+int __lutimes_time32() __asm__("lutimes");
+time32_t __mktime32() __asm__("mktime");
+ssize_t __mq_timedreceive_time32() __asm__("mq_timedreceive");
+int __mq_timedsend_time32() __asm__("mq_timedsend");
+int __mtx_timedlock_time32() __asm__("mtx_timedlock");
+int __nanosleep_time32() __asm__("nanosleep");
+int __ppoll_time32() __asm__("ppoll");
+int __pselect_time32() __asm__("pselect");
+int __pthread_cond_timedwait_time32() __asm__("pthread_cond_timedwait");
+int __pthread_mutex_timedlock_time32() __asm__("pthread_mutex_timedlock");
+int __pthread_rwlock_timedrdlock_time32() __asm__("pthread_rwlock_timedrdlock");
+int __pthread_rwlock_timedwrlock_time32() __asm__("pthread_rwlock_timedwrlock");
+int __pthread_timedjoin_np_time32() __asm__("pthread_timedjoin_np");
+int __recvmmsg_time32() __asm__("recvmmsg");
+int __sched_rr_get_interval_time32() __asm__("sched_rr_get_interval");
+int __select_time32() __asm__("select");
+int __sem_timedwait_time32() __asm__("sem_timedwait");
+int __semtimedop_time32() __asm__("semtimedop");
+int __setitimer_time32() __asm__("setitimer");
+int __settimeofday_time32() __asm__("settimeofday");
+int __sigtimedwait_time32() __asm__("sigtimedwait");
+int __stat_time32() __asm__("stat");
+int __stime32() __asm__("stime");
+int __thrd_sleep_time32() __asm__("thrd_sleep");
+time32_t __time32() __asm__("time");
+time32_t __time32gm() __asm__("timegm");
+int __timer_gettime32() __asm__("timer_gettime");
+int __timer_settime32() __asm__("timer_settime");
+int __timerfd_gettime32() __asm__("timerfd_gettime");
+int __timerfd_settime32() __asm__("timerfd_settime");
+int __timespec_get_time32() __asm__("timespec_get");
+int __utime_time32() __asm__("utime");
+int __utimensat_time32() __asm__("utimensat");
+int __utimes_time32() __asm__("utimes");
+pid_t __wait3_time32() __asm__("wait3");
+pid_t __wait4_time32() __asm__("wait4");
+
+#endif
diff --git a/compat/time32/time32gm.c b/compat/time32/time32gm.c
new file mode 100644
index 00000000..60d68fbf
--- /dev/null
+++ b/compat/time32/time32gm.c
@@ -0,0 +1,15 @@
+#define _GNU_SOURCE
+#include "time32.h"
+#include <time.h>
+#include <errno.h>
+#include <stdint.h>
+
+time32_t __time32gm(struct tm *tm)
+{
+	time_t t = timegm(tm);
+	if (t < INT32_MIN || t > INT32_MAX) {
+		errno = EOVERFLOW;
+		return -1;
+	}
+	return t;
+}
diff --git a/compat/time32/timer_gettime32.c b/compat/time32/timer_gettime32.c
new file mode 100644
index 00000000..b4184cc2
--- /dev/null
+++ b/compat/time32/timer_gettime32.c
@@ -0,0 +1,15 @@
+#include "time32.h"
+#include <time.h>
+
+int __timer_gettime32(timer_t t, struct itimerspec32 *val32)
+{
+	struct itimerspec old;
+	int r = timer_gettime(t, &old);
+	if (r) return r;
+	/* No range checking for consistency with settime */
+	val32->it_interval.tv_sec = old.it_interval.tv_sec;
+	val32->it_interval.tv_nsec = old.it_interval.tv_nsec;
+	val32->it_value.tv_sec = old.it_value.tv_sec;
+	val32->it_value.tv_nsec = old.it_value.tv_nsec;
+	return 0;
+}
diff --git a/compat/time32/timer_settime32.c b/compat/time32/timer_settime32.c
new file mode 100644
index 00000000..a447e7d4
--- /dev/null
+++ b/compat/time32/timer_settime32.c
@@ -0,0 +1,25 @@
+#include "time32.h"
+#include <time.h>
+
+int __timer_settime32(timer_t t, int flags, const struct itimerspec32 *restrict val32, struct itimerspec32 *restrict old32)
+{
+	struct itimerspec old;
+	int r = timer_settime(t, flags, (&(struct itimerspec){
+		.it_interval.tv_sec = val32->it_interval.tv_sec,
+		.it_interval.tv_nsec = val32->it_interval.tv_nsec,
+		.it_value.tv_sec = val32->it_value.tv_sec,
+		.it_value.tv_nsec = val32->it_value.tv_nsec}),
+		old32 ? &old : 0);
+	if (r) return r;
+	/* The above call has already committed to success by changing the
+	 * timer setting, so we can't fail on out-of-range old value.
+	 * Since these are relative times, values large enough to overflow
+	 * don't make sense anyway. */
+	if (old32) {
+		old32->it_interval.tv_sec = old.it_interval.tv_sec;
+		old32->it_interval.tv_nsec = old.it_interval.tv_nsec;
+		old32->it_value.tv_sec = old.it_value.tv_sec;
+		old32->it_value.tv_nsec = old.it_value.tv_nsec;
+	}
+	return 0;
+}
diff --git a/compat/time32/timerfd_gettime32.c b/compat/time32/timerfd_gettime32.c
new file mode 100644
index 00000000..75e5435f
--- /dev/null
+++ b/compat/time32/timerfd_gettime32.c
@@ -0,0 +1,16 @@
+#include "time32.h"
+#include <time.h>
+#include <sys/timerfd.h>
+
+int __timerfd_gettime32(int t, struct itimerspec32 *val32)
+{
+	struct itimerspec old;
+	int r = timerfd_gettime(t, &old);
+	if (r) return r;
+	/* No range checking for consistency with settime */
+	val32->it_interval.tv_sec = old.it_interval.tv_sec;
+	val32->it_interval.tv_nsec = old.it_interval.tv_nsec;
+	val32->it_value.tv_sec = old.it_value.tv_sec;
+	val32->it_value.tv_nsec = old.it_value.tv_nsec;
+	return 0;
+}
diff --git a/compat/time32/timerfd_settime32.c b/compat/time32/timerfd_settime32.c
new file mode 100644
index 00000000..67830d34
--- /dev/null
+++ b/compat/time32/timerfd_settime32.c
@@ -0,0 +1,26 @@
+#include "time32.h"
+#include <time.h>
+#include <sys/timerfd.h>
+
+int __timerfd_settime32(int t, int flags, const struct itimerspec32 *restrict val32, struct itimerspec32 *restrict old32)
+{
+	struct itimerspec old;
+	int r = timerfd_settime(t, flags, (&(struct itimerspec){
+		.it_interval.tv_sec = val32->it_interval.tv_sec,
+		.it_interval.tv_nsec = val32->it_interval.tv_nsec,
+		.it_value.tv_sec = val32->it_value.tv_sec,
+		.it_value.tv_nsec = val32->it_value.tv_nsec}),
+		old32 ? &old : 0);
+	if (r) return r;
+	/* The above call has already committed to success by changing the
+	 * timer setting, so we can't fail on out-of-range old value.
+	 * Since these are relative times, values large enough to overflow
+	 * don't make sense anyway. */
+	if (old32) {
+		old32->it_interval.tv_sec = old.it_interval.tv_sec;
+		old32->it_interval.tv_nsec = old.it_interval.tv_nsec;
+		old32->it_value.tv_sec = old.it_value.tv_sec;
+		old32->it_value.tv_nsec = old.it_value.tv_nsec;
+	}
+	return 0;
+}
diff --git a/compat/time32/timespec_get_time32.c b/compat/time32/timespec_get_time32.c
new file mode 100644
index 00000000..e9ca94cb
--- /dev/null
+++ b/compat/time32/timespec_get_time32.c
@@ -0,0 +1,18 @@
+#include "time32.h"
+#include <time.h>
+#include <errno.h>
+#include <stdint.h>
+
+int __timespec_get_time32(struct timespec32 *ts32, int base)
+{
+	struct timespec ts;
+	int r = timespec_get(&ts, base);
+	if (!r) return r;
+	if (ts.tv_sec < INT32_MIN || ts.tv_sec > INT32_MAX) {
+		errno = EOVERFLOW;
+		return 0;
+	}
+	ts32->tv_sec = ts.tv_sec;
+	ts32->tv_nsec = ts.tv_nsec;
+	return r;
+}
diff --git a/compat/time32/utime_time32.c b/compat/time32/utime_time32.c
new file mode 100644
index 00000000..65f11d46
--- /dev/null
+++ b/compat/time32/utime_time32.c
@@ -0,0 +1,14 @@
+#include "time32.h"
+#include <time.h>
+#include <utime.h>
+
+struct utimbuf32 {
+	time32_t actime;
+	time32_t modtime;
+};
+
+int __utime_time32(const char *path, const struct utimbuf32 *times32)
+{
+	return utime(path, !times32 ? 0 : (&(struct utimbuf){
+		.actime = times32->actime, .modtime = times32->modtime}));
+}
diff --git a/compat/time32/utimensat_time32.c b/compat/time32/utimensat_time32.c
new file mode 100644
index 00000000..c687b8d1
--- /dev/null
+++ b/compat/time32/utimensat_time32.c
@@ -0,0 +1,11 @@
+#include "time32.h"
+#include <time.h>
+#include <sys/stat.h>
+
+int __utimensat_time32(int fd, const char *path, const struct timespec32 times32[2], int flags)
+{
+	return utimensat(fd, path, !times32 ? 0 : ((struct timespec[2]){
+		{.tv_sec = times32[0].tv_sec,.tv_nsec = times32[0].tv_nsec},
+		{.tv_sec = times32[1].tv_sec,.tv_nsec = times32[1].tv_nsec}}),
+		flags);
+}
diff --git a/compat/time32/utimes_time32.c b/compat/time32/utimes_time32.c
new file mode 100644
index 00000000..59248f62
--- /dev/null
+++ b/compat/time32/utimes_time32.c
@@ -0,0 +1,11 @@
+#include "time32.h"
+#include <time.h>
+#include <sys/time.h>
+#include <sys/stat.h>
+
+int __utimes_time32(const char *path, const struct timeval32 times32[2])
+{
+	return utimes(path, !times32 ? 0 : ((struct timeval[2]){
+		{.tv_sec = times32[0].tv_sec,.tv_usec = times32[0].tv_usec},
+		{.tv_sec = times32[1].tv_sec,.tv_usec = times32[1].tv_usec}}));
+}
diff --git a/compat/time32/wait3_time32.c b/compat/time32/wait3_time32.c
new file mode 100644
index 00000000..8fe128ed
--- /dev/null
+++ b/compat/time32/wait3_time32.c
@@ -0,0 +1,40 @@
+#define _BSD_SOURCE
+#include "time32.h"
+#include <string.h>
+#include <stddef.h>
+#include <sys/wait.h>
+
+struct compat_rusage {
+	struct timeval32 ru_utime;
+	struct timeval32 ru_stime;
+	long	ru_maxrss;
+	long	ru_ixrss;
+	long	ru_idrss;
+	long	ru_isrss;
+	long	ru_minflt;
+	long	ru_majflt;
+	long	ru_nswap;
+	long	ru_inblock;
+	long	ru_oublock;
+	long	ru_msgsnd;
+	long	ru_msgrcv;
+	long	ru_nsignals;
+	long	ru_nvcsw;
+	long	ru_nivcsw;
+};
+
+pid_t __wait3_time32(int *status, int options, struct compat_rusage *usage)
+{
+	struct rusage ru;
+	int r = wait3(status, options, usage ? &ru : 0);
+	if (!r && usage) {
+		usage->ru_utime.tv_sec = ru.ru_utime.tv_sec;
+		usage->ru_utime.tv_usec = ru.ru_utime.tv_usec;
+		usage->ru_stime.tv_sec = ru.ru_stime.tv_sec;
+		usage->ru_stime.tv_usec = ru.ru_stime.tv_usec;
+		memcpy(&usage->ru_maxrss, &ru.ru_maxrss,
+			sizeof(struct compat_rusage) -
+			offsetof(struct compat_rusage, ru_maxrss));
+	}
+	return r;
+}
diff --git a/compat/time32/wait4_time32.c b/compat/time32/wait4_time32.c
new file mode 100644
index 00000000..918548e7
--- /dev/null
+++ b/compat/time32/wait4_time32.c
@@ -0,0 +1,40 @@
+#define _BSD_SOURCE
+#include "time32.h"
+#include <string.h>
+#include <stddef.h>
+#include <sys/wait.h>
+
+struct compat_rusage {
+	struct timeval32 ru_utime;
+	struct timeval32 ru_stime;
+	long	ru_maxrss;
+	long	ru_ixrss;
+	long	ru_idrss;
+	long	ru_isrss;
+	long	ru_minflt;
+	long	ru_majflt;
+	long	ru_nswap;
+	long	ru_inblock;
+	long	ru_oublock;
+	long	ru_msgsnd;
+	long	ru_msgrcv;
+	long	ru_nsignals;
+	long	ru_nvcsw;
+	long	ru_nivcsw;
+};
+
+pid_t __wait4_time32(pid_t pid, int *status, int options, struct compat_rusage *usage)
+{
+	struct rusage ru;
+	int r = wait4(pid, status, options, usage ? &ru : 0);
+	if (!r && usage) {
+		usage->ru_utime.tv_sec = ru.ru_utime.tv_sec;
+		usage->ru_utime.tv_usec = ru.ru_utime.tv_usec;
+		usage->ru_stime.tv_sec = ru.ru_stime.tv_sec;
+		usage->ru_stime.tv_usec = ru.ru_stime.tv_usec;
+		memcpy(&usage->ru_maxrss, &ru.ru_maxrss,
+			sizeof(struct compat_rusage) -
+			offsetof(struct compat_rusage, ru_maxrss));
+	}
+	return r;
+}
diff --git a/configure b/configure
index 997e6652..bc9fbe48 100755
--- a/configure
+++ b/configure
@@ -30,11 +30,14 @@ System types:
 Optional features:
   --enable-optimize=...   optimize listed components for speed over size [auto]
   --enable-debug          build with debugging information [disabled]
-  --enable-warnings       build with recommended warnings flags [disabled]
+  --disable-warnings      build with recommended warnings flags [enabled]
   --enable-wrapper=...    build given musl toolchain wrapper [auto]
   --disable-shared        inhibit building shared library [enabled]
   --disable-static        inhibit building static library [enabled]
 
+Optional packages:
+  --with-malloc=...       choose malloc implementation [mallocng]
+
 Some influential environment variables:
   CC                      C compiler command [detected]
   CFLAGS                  C compiler flags [-Os -pipe ...]
@@ -133,12 +136,13 @@ build=
 target=
 optimize=auto
 debug=no
-warnings=no
+warnings=yes
 shared=auto
 static=yes
 wrapper=auto
 gcc_wrapper=no
 clang_wrapper=no
+malloc_dir=mallocng
 
 for arg ; do
 case "$arg" in
@@ -168,10 +172,13 @@ case "$arg" in
 --disable-wrapper|--enable-wrapper=no) wrapper=no ;;
 --enable-gcc-wrapper|--enable-gcc-wrapper=yes) wrapper=yes ; gcc_wrapper=yes ;;
 --disable-gcc-wrapper|--enable-gcc-wrapper=no) wrapper=no ;;
+--with-malloc=*) malloc_dir=${arg#*=} ;;
 --enable-*|--disable-*|--with-*|--without-*|--*dir=*) ;;
 --host=*|--target=*) target=${arg#*=} ;;
 --build=*) build=${arg#*=} ;;
 -* ) echo "$0: unknown option $arg" ;;
+AR=*) AR=${arg#*=} ;;
+RANLIB=*) RANLIB=${arg#*=} ;;
 CC=*) CC=${arg#*=} ;;
 CFLAGS=*) CFLAGS=${arg#*=} ;;
 CPPFLAGS=*) CPPFLAGS=${arg#*=} ;;
@@ -197,7 +204,7 @@ fi
 abs_builddir="$(pwd)" || fail "$0: cannot determine working directory"
 abs_srcdir="$(cd $srcdir && pwd)" || fail "$0: invalid source directory $srcdir"
 test "$abs_srcdir" = "$abs_builddir" && srcdir=.
-test "$srcdir" != "." -a -f Makefile -a ! -h Makefile && fail "$0: Makefile already exists in the working directory"
+test "$srcdir" != "." && test -f Makefile && test ! -h Makefile && fail "$0: Makefile already exists in the working directory"
 
 #
 # Get a temp filename we can use
@@ -213,6 +220,12 @@ set +C
 trap 'rm "$tmpc"' EXIT INT QUIT TERM HUP
 
 #
+# Check that the requested malloc implementation exists
+#
+test -d "$srcdir/src/malloc/$malloc_dir" \
+|| fail "$0: error: chosen malloc implementation '$malloc_dir' does not exist"
+
+#
 # Check whether we are cross-compiling, and set a default
 # CROSS_COMPILE prefix if none was provided.
 #
@@ -266,7 +279,7 @@ echo "$cc_family"
 #
 # Figure out toolchain wrapper to build
 #
-if test "$wrapper" = auto -o "$wrapper" = detect ; then
+if test "$wrapper" = auto || test "$wrapper" = detect ; then
 echo "#include <stdlib.h>" > "$tmpc"
 echo "#if ! __GLIBC__" >> "$tmpc"
 echo "#error no" >> "$tmpc"
@@ -315,13 +328,16 @@ i?86*) ARCH=i386 ;;
 x86_64-x32*|x32*|x86_64*x32) ARCH=x32 ;;
 x86_64-nt64*) ARCH=nt64 ;;
 x86_64*) ARCH=x86_64 ;;
+loongarch64*) ARCH=loongarch64 ;;
 m68k*) ARCH=m68k ;;
 mips64*|mipsisa64*) ARCH=mips64 ;;
 mips*) ARCH=mips ;;
 microblaze*) ARCH=microblaze ;;
 or1k*) ARCH=or1k ;;
-powerpc64*) ARCH=powerpc64 ;;
-powerpc*) ARCH=powerpc ;;
+powerpc64*|ppc64*) ARCH=powerpc64 ;;
+powerpc*|ppc*) ARCH=powerpc ;;
+riscv64*) ARCH=riscv64 ;;
+riscv32*) ARCH=riscv32 ;;
 sh[1-9bel-]*|sh|superh*) ARCH=sh ;;
 s390x*) ARCH=s390x ;;
 unknown) fail "$0: unable to detect target arch; try $0 --target=..." ;;
@@ -340,6 +356,14 @@ tryflag CFLAGS_C99FSE -fexcess-precision=standard \
 tryflag CFLAGS_C99FSE -frounding-math
 
 #
+# Semantically we want to insist that our sources follow the
+# C rules for type-based aliasing, but most if not all real-world
+# compilers are known or suspected to have critical bugs in their
+# type-based aliasing analysis. See for example GCC bug 107107.
+#
+tryflag CFLAGS_C99FSE -fno-strict-aliasing
+
+#
 # We may use the may_alias attribute if __GNUC__ is defined, so
 # if the compiler defines __GNUC__ but does not provide it,
 # it must be defined away as part of the CFLAGS.
@@ -395,7 +419,7 @@ test "$debug" = yes && CFLAGS_AUTO=-g
 #
 printf "checking whether we should preprocess assembly to add debugging information... "
 if fnmatch '-g*|*\ -g*' "$CFLAGS_AUTO $CFLAGS" &&
-   test -f "tools/add-cfi.$ARCH.awk" &&
+   test -f "$srcdir/tools/add-cfi.$ARCH.awk" &&
    printf ".file 1 \"srcfile.s\"\n.line 1\n.cfi_startproc\n.cfi_endproc" | $CC -g -x assembler -c -o /dev/null 2>/dev/null -
 then
   ADD_CFI=yes
@@ -422,7 +446,20 @@ xno|x) printf "disabled\n" ; optimize=no ;;
 *) printf "custom\n" ;;
 esac
 
-test "$optimize" = no || tryflag CFLAGS_AUTO -Os || tryflag CFLAGS_AUTO -O2
+if test "$optimize" = no ; then :
+else
+tryflag CFLAGS_AUTO -O2
+tryflag CFLAGS_AUTO -fno-align-jumps
+tryflag CFLAGS_AUTO -fno-align-functions
+tryflag CFLAGS_AUTO -fno-align-loops
+tryflag CFLAGS_AUTO -fno-align-labels
+tryflag CFLAGS_AUTO -fira-region=one
+tryflag CFLAGS_AUTO -fira-hoist-pressure
+tryflag CFLAGS_AUTO -freorder-blocks-algorithm=simple \
+|| tryflag CFLAGS_AUTO -fno-reorder-blocks
+tryflag CFLAGS_AUTO -fno-prefetch-loop-arrays
+tryflag CFLAGS_AUTO -fno-tree-ch
+fi
 test "$optimize" = yes && optimize="internal,malloc,string"
 
 if fnmatch 'no|size' "$optimize" ; then :
@@ -454,7 +491,7 @@ tryflag CFLAGS_AUTO -pipe
 # pointer is no longer needed for debugging.
 #
 if fnmatch '-g*|*\ -g*' "$CFLAGS_AUTO $CFLAGS" ; then :
-else 
+else
 tryflag CFLAGS_AUTO -fomit-frame-pointer
 fi
 
@@ -492,6 +529,16 @@ fnmatch '-mtune=*|*\ -mtune=*' "$CC $CFLAGS" || tryldflag CFLAGS_AUTO -mtune=gen
 fi
 
 #
+# GCC defines -w as overriding any -W options, regardless of order, but
+# clang has a bunch of annoying warnings enabled by default and needs -w
+# to start from a clean slate. So use -w if building with clang. Also
+# turn off a common on-by-default cast warning regardless of compiler.
+#
+test "$cc_family" = clang && tryflag CFLAGS_AUTO -w
+
+tryflag CFLAGS_AUTO -Wno-pointer-to-int-cast
+
+#
 # Even with -std=c99, gcc accepts some constructs which are constraint
 # violations. We want to treat these as errors regardless of whether
 # other purely stylistic warnings are enabled -- especially implicit
@@ -501,6 +548,10 @@ tryflag CFLAGS_AUTO -Werror=implicit-function-declaration
 tryflag CFLAGS_AUTO -Werror=implicit-int
 tryflag CFLAGS_AUTO -Werror=pointer-sign
 tryflag CFLAGS_AUTO -Werror=pointer-arith
+tryflag CFLAGS_AUTO -Werror=int-conversion
+tryflag CFLAGS_AUTO -Werror=incompatible-pointer-types
+tryflag CFLAGS_AUTO -Werror=discarded-qualifiers
+tryflag CFLAGS_AUTO -Werror=discarded-array-qualifiers
 
 #
 # GCC ignores unused arguements by default, but Clang needs this extra
@@ -510,14 +561,17 @@ tryflag CFLAGS_AUTO -Werror=pointer-arith
 test "$cc_family" = clang && tryflag CFLAGS_AUTO -Qunused-arguments
 
 if test "x$warnings" = xyes ; then
-tryflag CFLAGS_AUTO -Wall
-tryflag CFLAGS_AUTO -Wno-parentheses
-tryflag CFLAGS_AUTO -Wno-uninitialized
-tryflag CFLAGS_AUTO -Wno-missing-braces
-tryflag CFLAGS_AUTO -Wno-unused-value
-tryflag CFLAGS_AUTO -Wno-unused-but-set-variable
-tryflag CFLAGS_AUTO -Wno-unknown-pragmas
-tryflag CFLAGS_AUTO -Wno-pointer-to-int-cast
+tryflag CFLAGS_AUTO -Waddress
+tryflag CFLAGS_AUTO -Warray-bounds
+tryflag CFLAGS_AUTO -Wchar-subscripts
+tryflag CFLAGS_AUTO -Wduplicate-decl-specifier
+tryflag CFLAGS_AUTO -Winit-self
+tryflag CFLAGS_AUTO -Wreturn-type
+tryflag CFLAGS_AUTO -Wsequence-point
+tryflag CFLAGS_AUTO -Wstrict-aliasing
+tryflag CFLAGS_AUTO -Wunused-function
+tryflag CFLAGS_AUTO -Wunused-label
+tryflag CFLAGS_AUTO -Wunused-variable
 fi
 
 # Determine if the compiler produces position-independent code (PIC)
@@ -573,6 +627,20 @@ printf "using compiler runtime libraries: %s\n" "$LIBCC"
 SUBARCH=
 t="$CFLAGS_C99FSE $CPPFLAGS $CFLAGS"
 
+if test "$ARCH" = "i386" ; then
+printf "checking whether compiler can use ebx in PIC asm constraints... "
+cat > "$tmpc" <<EOF
+int foo(int x) { __asm__ ( "" : "+b"(x) ); return x; }
+EOF
+if $CC $CFLAGS_C99FSE $CPPFLAGS $CFLAGS -fPIC \
+  -c -o /dev/null "$tmpc" >/dev/null 2>&1 ; then
+printf "yes\n"
+else
+printf "no\n"
+CFLAGS_AUTO="$CFLAGS_AUTO -DBROKEN_EBX_ASM"
+fi
+fi
+
 if test "$ARCH" = "x86_64" ; then
 trycppif __ILP32__ "$t" && ARCH=x32
 fi
@@ -605,6 +673,19 @@ if test "$ARCH" = "aarch64" ; then
 trycppif __AARCH64EB__ "$t" && SUBARCH=${SUBARCH}_be
 fi
 
+if test "$ARCH" = "loongarch64" ; then
+trycppif __loongarch_soft_float "$t" && SUBARCH=${SUBARCH}-sf
+trycppif __loongarch_single_float "$t" && SUBARCH=${SUBARCH}-sp
+printf "checking whether assembler support FCSRs... "
+echo "__asm__(\"movfcsr2gr \$t0,\$fcsr0\");" > "$tmpc"
+if $CC -c -o /dev/null "$tmpc" >/dev/null 2>&1 ; then
+printf "yes\n"
+else
+printf "no\n"
+CFLAGS_AUTO="$CFLAGS_AUTO -DBROKEN_LOONGARCH_FCSR_ASM"
+fi
+fi
+
 if test "$ARCH" = "m68k" ; then
 if trycppif "__HAVE_68881__" ; then : ;
 elif trycppif "__mcffpu__" ; then SUBARCH="-fp64"
@@ -626,9 +707,16 @@ trycppif __mips_soft_float "$t" && SUBARCH=${SUBARCH}-sf
 fi
 
 if test "$ARCH" = "powerpc" ; then
-trycppif "__NO_FPRS__ && !_SOFT_FLOAT" "$t" && fail \
-  "$0: error: compiler's floating point configuration is unsupported"
-trycppif _SOFT_FLOAT "$t" && SUBARCH=${SUBARCH}-sf
+trycppif "_SOFT_FLOAT || __NO_FPRS__" "$t" && SUBARCH=${SUBARCH}-sf
+printf "checking whether compiler can use 'd' constraint in asm... "
+echo 'double f(double x) { __asm__ ("fabs %0, %1" : "=d"(x) : "d"(x)); return x; }' > "$tmpc"
+if $CC $CFLAGS_C99FSE $CPPFLAGS $CFLAGS -c -o /dev/null "$tmpc" >/dev/null 2>&1 ; then
+printf "yes\n"
+else
+printf "no\n"
+CFLAGS_AUTO="$CFLAGS_AUTO -DBROKEN_PPC_D_ASM"
+CFLAGS_AUTO="${CFLAGS_AUTO# }"
+fi
 fi
 
 test "$ARCH" = "microblaze" && trycppif __MICROBLAZEEL__ "$t" \
@@ -640,6 +728,11 @@ trycppif __LITTLE_ENDIAN__ "$t" && SUBARCH=${SUBARCH}le
 trycppif _SOFT_FLOAT "$t" && fail "$0: error: soft-float not supported on powerpc64"
 fi
 
+if test "$ARCH" = "riscv64" -o "$ARCH" = "riscv32" ; then
+trycppif __riscv_float_abi_soft "$t" && SUBARCH=${SUBARCH}-sf
+trycppif __riscv_float_abi_single "$t" && SUBARCH=${SUBARCH}-sp
+fi
+
 if test "$ARCH" = "sh" ; then
 tryflag CFLAGS_AUTO -Wa,--isa=any
 trycppif __BIG_ENDIAN__ "$t" && SUBARCH=${SUBARCH}eb
@@ -666,11 +759,6 @@ fi
 test "$SUBARCH" \
 && printf "configured for %s variant: %s\n" "$ARCH" "$ARCH$SUBARCH"
 
-case "$ARCH$SUBARCH" in
-arm) ASMSUBARCH=el ;;
-*) ASMSUBARCH=$SUBARCH ;;
-esac
-
 #
 # Some archs (powerpc) have different possible long double formats
 # that the compiler can be configured for. The logic for whether this
@@ -714,6 +802,8 @@ cat << EOF
 # This version of config.mak was generated by:
 # $cmdline
 # Any changes made here will be lost if configure is re-run
+AR = ${AR:-\$(CROSS_COMPILE)ar}
+RANLIB = ${RANLIB:-\$(CROSS_COMPILE)ranlib}
 ARCH = $ARCH
 SUBARCH = $SUBARCH
 ASMSUBARCH = $ASMSUBARCH
@@ -739,6 +829,7 @@ OPTIMIZE_GLOBS = $OPTIMIZE_GLOBS
 ALL_TOOLS = $tools
 TOOL_LIBS = $tool_libs
 ADD_CFI = $ADD_CFI
+MALLOC_DIR = $malloc_dir
 EOF
 test "x$static" = xno && echo "STATIC_LIBS ="
 test "x$shared" = xno && echo "SHARED_LIBS ="
diff --git a/crt/aarch64/crti.s b/crt/aarch64/crti.s
index 775df0ac..3776fa64 100644
--- a/crt/aarch64/crti.s
+++ b/crt/aarch64/crti.s
@@ -1,6 +1,7 @@
 .section .init
 .global _init
 .type _init,%function
+.align 2
 _init:
 	stp x29,x30,[sp,-16]!
 	mov x29,sp
@@ -8,6 +9,7 @@ _init:
 .section .fini
 .global _fini
 .type _fini,%function
+.align 2
 _fini:
 	stp x29,x30,[sp,-16]!
 	mov x29,sp
diff --git a/crt/arm/crti.s b/crt/arm/crti.s
index 18dc1e41..cccda3ea 100644
--- a/crt/arm/crti.s
+++ b/crt/arm/crti.s
@@ -3,11 +3,13 @@
 .section .init
 .global _init
 .type _init,%function
+.align 2
 _init:
 	push {r0,lr}
 
 .section .fini
 .global _fini
 .type _fini,%function
+.align 2
 _fini:
 	push {r0,lr}
diff --git a/crt/crt1.c b/crt/crt1.c
index 7b12665f..10601215 100644
--- a/crt/crt1.c
+++ b/crt/crt1.c
@@ -8,10 +8,10 @@
 int main();
 weak void _init();
 weak void _fini();
-_Noreturn int __libc_start_main(int (*)(), int, char **,
+int __libc_start_main(int (*)(), int, char **,
 	void (*)(), void(*)(), void(*)());
 
-void _start_c(long *p)
+hidden void _start_c(long *p)
 {
 	int argc = p[0];
 	char **argv = (void *)(p+1);
diff --git a/crt/mips/crtn.s b/crt/mips/crtn.s
index 506a04b7..92eb3d0e 100644
--- a/crt/mips/crtn.s
+++ b/crt/mips/crtn.s
@@ -3,11 +3,11 @@
 .section .init
 	lw $gp,24($sp)
 	lw $ra,28($sp)
-	j $ra
+	jr $ra
 	addu $sp,$sp,32
 
 .section .fini
 	lw $gp,24($sp)
 	lw $ra,28($sp)
-	j $ra
+	jr $ra
 	addu $sp,$sp,32
diff --git a/crt/mips64/crtn.s b/crt/mips64/crtn.s
index f3930b24..8f090ed3 100644
--- a/crt/mips64/crtn.s
+++ b/crt/mips64/crtn.s
@@ -3,11 +3,11 @@
 .section .init
 	ld $gp,16($sp)
 	ld $ra,24($sp)
-	j $ra
+	jr $ra
 	daddu $sp,$sp,32
 
 .section .fini
 	ld $gp,16($sp)
 	ld $ra,24($sp)
-	j $ra
+	jr $ra
 	daddu $sp,$sp,32
diff --git a/crt/mipsn32/crtn.s b/crt/mipsn32/crtn.s
index dccd7e89..0679eac3 100644
--- a/crt/mipsn32/crtn.s
+++ b/crt/mipsn32/crtn.s
@@ -2,11 +2,11 @@
 .section	.init
 	ld	$gp, 16($sp)
 	ld	$ra, 24($sp)
-	j	$ra
+	jr	$ra
 	addu	$sp, $sp, 32
 
 .section	.fini
 	ld	$gp, 16($sp)
 	ld	$ra, 24($sp)
-	j	$ra
+	jr	$ra
 	addu	$sp, $sp, 32
diff --git a/crt/rcrt1.c b/crt/rcrt1.c
index 7bb3322f..901dff68 100644
--- a/crt/rcrt1.c
+++ b/crt/rcrt1.c
@@ -5,10 +5,10 @@
 int main();
 weak void _init();
 weak void _fini();
-_Noreturn int __libc_start_main(int (*)(), int, char **,
+int __libc_start_main(int (*)(), int, char **,
 	void (*)(), void(*)(), void(*)());
 
-hidden _Noreturn void __dls2(unsigned char *base, size_t *sp)
+hidden void __dls2(unsigned char *base, size_t *sp)
 {
 	__libc_start_main(main, *sp, (void *)(sp+1), _init, _fini, 0);
 }
diff --git a/include/aio.h b/include/aio.h
index 19bc28a9..a938fcad 100644
--- a/include/aio.h
+++ b/include/aio.h
@@ -49,7 +49,7 @@ int aio_fsync(int, struct aiocb *);
 
 int lio_listio(int, struct aiocb *__restrict const *__restrict, int, struct sigevent *__restrict);
 
-#if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE)
+#if defined(_LARGEFILE64_SOURCE)
 #define aiocb64 aiocb
 #define aio_read64 aio_read
 #define aio_write64 aio_write
@@ -62,6 +62,10 @@ int lio_listio(int, struct aiocb *__restrict const *__restrict, int, struct sige
 #define off64_t off_t
 #endif
 
+#if _REDIR_TIME64
+__REDIR(aio_suspend, __aio_suspend_time64);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/alloca.h b/include/alloca.h
index d2e6f1c6..b8d183d1 100644
--- a/include/alloca.h
+++ b/include/alloca.h
@@ -10,9 +10,7 @@ extern "C" {
 
 void *alloca(size_t);
 
-#ifdef __GNUC__
 #define alloca __builtin_alloca
-#endif
 
 #ifdef __cplusplus
 }
diff --git a/include/alltypes.h.in b/include/alltypes.h.in
index 622ca01d..d47aeea9 100644
--- a/include/alltypes.h.in
+++ b/include/alltypes.h.in
@@ -1,3 +1,7 @@
+#define __LITTLE_ENDIAN 1234
+#define __BIG_ENDIAN 4321
+#define __USE_TIME_BITS64 1
+
 TYPEDEF unsigned _Addr size_t;
 TYPEDEF unsigned _Addr uintptr_t;
 TYPEDEF _Addr ptrdiff_t;
@@ -5,6 +9,8 @@ TYPEDEF _Addr ssize_t;
 TYPEDEF _Addr intptr_t;
 TYPEDEF _Addr regoff_t;
 TYPEDEF _Reg register_t;
+TYPEDEF _Int64 time_t;
+TYPEDEF _Int64 suseconds_t;
 
 TYPEDEF signed char     int8_t;
 TYPEDEF signed short    int16_t;
@@ -35,7 +41,7 @@ TYPEDEF void * timer_t;
 TYPEDEF int clockid_t;
 TYPEDEF long clock_t;
 STRUCT timeval { time_t tv_sec; suseconds_t tv_usec; };
-STRUCT timespec { time_t tv_sec; long tv_nsec; };
+STRUCT timespec { time_t tv_sec; int :8*(sizeof(time_t)-sizeof(long))*(__BYTE_ORDER==4321); long tv_nsec; int :8*(sizeof(time_t)-sizeof(long))*(__BYTE_ORDER!=4321); };
 
 TYPEDEF int pid_t;
 TYPEDEF unsigned id_t;
@@ -57,8 +63,12 @@ TYPEDEF struct { unsigned __attr; } pthread_condattr_t;
 TYPEDEF struct { unsigned __attr; } pthread_barrierattr_t;
 TYPEDEF struct { unsigned __attr[2]; } pthread_rwlockattr_t;
 
+STRUCT _IO_FILE { char __x; };
 TYPEDEF struct _IO_FILE FILE;
 
+TYPEDEF __builtin_va_list va_list;
+TYPEDEF __builtin_va_list __isoc_va_list;
+
 TYPEDEF struct __mbstate_t { unsigned __opaque1, __opaque2; } mbstate_t;
 
 TYPEDEF struct __locale_struct * locale_t;
@@ -67,9 +77,19 @@ TYPEDEF struct __sigset_t { unsigned long __bits[128/sizeof(long)]; } sigset_t;
 
 STRUCT iovec { void *iov_base; size_t iov_len; };
 
+STRUCT winsize { unsigned short ws_row, ws_col, ws_xpixel, ws_ypixel; };
+
 TYPEDEF unsigned socklen_t;
 TYPEDEF unsigned short sa_family_t;
 
+TYPEDEF struct { union { int __i[sizeof(long)==8?14:9]; volatile int __vi[sizeof(long)==8?14:9]; unsigned long __s[sizeof(long)==8?7:9]; } __u; } pthread_attr_t;
+TYPEDEF struct { union { int __i[sizeof(long)==8?10:6]; volatile int __vi[sizeof(long)==8?10:6]; volatile void *volatile __p[sizeof(long)==8?5:6]; } __u; } pthread_mutex_t;
+TYPEDEF struct { union { int __i[sizeof(long)==8?10:6]; volatile int __vi[sizeof(long)==8?10:6]; volatile void *volatile __p[sizeof(long)==8?5:6]; } __u; } mtx_t;
+TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12*sizeof(int)/sizeof(void*)]; } __u; } pthread_cond_t;
+TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12*sizeof(int)/sizeof(void*)]; } __u; } cnd_t;
+TYPEDEF struct { union { int __i[sizeof(long)==8?14:8]; volatile int __vi[sizeof(long)==8?14:8]; void *__p[sizeof(long)==8?7:8]; } __u; } pthread_rwlock_t;
+TYPEDEF struct { union { int __i[sizeof(long)==8?8:5]; volatile int __vi[sizeof(long)==8?8:5]; void *__p[sizeof(long)==8?4:5]; } __u; } pthread_barrier_t;
+
 #undef _Addr
 #undef _Int64
 #undef _Reg
diff --git a/include/arpa/inet.h b/include/arpa/inet.h
index 37f8c11e..9d20a15b 100644
--- a/include/arpa/inet.h
+++ b/include/arpa/inet.h
@@ -24,11 +24,6 @@ struct in_addr inet_makeaddr(in_addr_t, in_addr_t);
 in_addr_t inet_lnaof(struct in_addr);
 in_addr_t inet_netof(struct in_addr);
 
-#undef INET_ADDRSTRLEN
-#undef INET6_ADDRSTRLEN
-#define INET_ADDRSTRLEN  16
-#define INET6_ADDRSTRLEN 46
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/arpa/nameser.h b/include/arpa/nameser.h
index b315e0f3..9c1327a1 100644
--- a/include/arpa/nameser.h
+++ b/include/arpa/nameser.h
@@ -7,7 +7,6 @@ extern "C" {
 
 #include <stddef.h>
 #include <stdint.h>
-#include <endian.h>
 
 #define __NAMESER	19991006
 #define NS_PACKETSZ	512
@@ -189,6 +188,36 @@ typedef enum __ns_type {
 	ns_t_sink = 40,
 	ns_t_opt = 41,
 	ns_t_apl = 42,
+	ns_t_ds = 43,
+	ns_t_sshfp = 44,
+	ns_t_ipseckey = 45,
+	ns_t_rrsig = 46,
+	ns_t_nsec = 47,
+	ns_t_dnskey = 48,
+	ns_t_dhcid = 49,
+	ns_t_nsec3 = 50,
+	ns_t_nsec3param = 51,
+	ns_t_tlsa = 52,
+	ns_t_smimea = 53,
+	ns_t_hip = 55,
+	ns_t_ninfo = 56,
+	ns_t_rkey = 57,
+	ns_t_talink = 58,
+	ns_t_cds = 59,
+	ns_t_cdnskey = 60,
+	ns_t_openpgpkey = 61,
+	ns_t_csync = 62,
+	ns_t_spf = 99,
+	ns_t_uinfo = 100,
+	ns_t_uid = 101,
+	ns_t_gid = 102,
+	ns_t_unspec = 103,
+	ns_t_nid = 104,
+	ns_t_l32 = 105,
+	ns_t_l64 = 106,
+	ns_t_lp = 107,
+	ns_t_eui48 = 108,
+	ns_t_eui64 = 109,
 	ns_t_tkey = 249,
 	ns_t_tsig = 250,
 	ns_t_ixfr = 251,
@@ -197,6 +226,11 @@ typedef enum __ns_type {
 	ns_t_maila = 254,
 	ns_t_any = 255,
 	ns_t_zxfr = 256,
+	ns_t_uri = 256,
+	ns_t_caa = 257,
+	ns_t_avc = 258,
+	ns_t_ta = 32768,
+	ns_t_dlv = 32769,
 	ns_t_max = 65536
 } ns_type;
 
@@ -431,12 +465,48 @@ typedef struct {
 #define T_NAPTR		ns_t_naptr
 #define T_A6		ns_t_a6
 #define T_DNAME		ns_t_dname
+#define T_DS		ns_t_ds
+#define T_SSHFP		ns_t_sshfp
+#define T_IPSECKEY	ns_t_ipseckey
+#define T_RRSIG		ns_t_rrsig
+#define T_NSEC		ns_t_nsec
+#define T_DNSKEY	ns_t_dnskey
+#define T_DHCID		ns_t_dhcid
+#define T_NSEC3		ns_t_nsec3
+#define T_NSEC3PARAM	ns_t_nsec3param
+#define T_TLSA		ns_t_tlsa
+#define T_SMIMEA	ns_t_smimea
+#define T_HIP		ns_t_hip
+#define T_NINFO		ns_t_ninfo
+#define T_RKEY		ns_t_rkey
+#define T_TALINK	ns_t_talink
+#define T_CDS		ns_t_cds
+#define T_CDNSKEY	ns_t_cdnskey
+#define T_OPENPGPKEY	ns_t_openpgpkey
+#define T_CSYNC		ns_t_csync
+#define T_SPF		ns_t_spf
+#define T_UINFO		ns_t_uinfo
+#define T_UID		ns_t_uid
+#define T_GID		ns_t_gid
+#define T_UNSPEC	ns_t_unspec
+#define T_NID		ns_t_nid
+#define T_L32		ns_t_l32
+#define T_L64		ns_t_l64
+#define T_LP		ns_t_lp
+#define T_EUI48		ns_t_eui48
+#define T_EUI64		ns_t_eui64
+#define T_TKEY		ns_t_tkey
 #define	T_TSIG		ns_t_tsig
 #define	T_IXFR		ns_t_ixfr
 #define T_AXFR		ns_t_axfr
 #define T_MAILB		ns_t_mailb
 #define T_MAILA		ns_t_maila
 #define T_ANY		ns_t_any
+#define T_URI		ns_t_uri
+#define T_CAA		ns_t_caa
+#define T_AVC		ns_t_avc
+#define T_TA		ns_t_ta
+#define T_DLV		ns_t_dlv
 
 #define C_IN		ns_c_in
 #define C_CHAOS		ns_c_chaos
diff --git a/include/ctype.h b/include/ctype.h
index 7936536f..32bcef4d 100644
--- a/include/ctype.h
+++ b/include/ctype.h
@@ -64,7 +64,9 @@ int   isascii(int);
 int   toascii(int);
 #define _tolower(a) ((a)|0x20)
 #define _toupper(a) ((a)&0x5f)
+#ifndef __cplusplus
 #define isascii(a) (0 ? isascii(a) : (unsigned)(a) < 128)
+#endif
 
 #endif
 
diff --git a/include/dirent.h b/include/dirent.h
index e0a8fe6a..7fa60e06 100644
--- a/include/dirent.h
+++ b/include/dirent.h
@@ -9,26 +9,25 @@ extern "C" {
 
 #define __NEED_ino_t
 #define __NEED_off_t
-#if defined(_BSD_SOURCE) || defined(_GNU_SOURCE)
 #define __NEED_size_t
-#endif
+#define __NEED_ssize_t
 
 #include <bits/alltypes.h>
 
-typedef struct __dirstream DIR;
+#include <bits/dirent.h>
 
-#define _DIRENT_HAVE_D_RECLEN
-#define _DIRENT_HAVE_D_OFF
-#define _DIRENT_HAVE_D_TYPE
+typedef unsigned short reclen_t;
 
-struct dirent {
+struct posix_dent {
 	ino_t d_ino;
 	off_t d_off;
-	unsigned short d_reclen;
+	reclen_t d_reclen;
 	unsigned char d_type;
-	char d_name[256];
+	char d_name[];
 };
 
+typedef struct __dirstream DIR;
+
 #define d_fileno d_ino
 
 int            closedir(DIR *);
@@ -39,6 +38,8 @@ int            readdir_r(DIR *__restrict, struct dirent *__restrict, struct dire
 void           rewinddir(DIR *);
 int            dirfd(DIR *);
 
+ssize_t posix_getdents(int, void *, size_t, int);
+
 int alphasort(const struct dirent **, const struct dirent **);
 int scandir(const char *, struct dirent ***, int (*)(const struct dirent *), int (*)(const struct dirent **, const struct dirent **));
 
@@ -47,7 +48,6 @@ void           seekdir(DIR *, long);
 long           telldir(DIR *);
 #endif
 
-#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 #define DT_UNKNOWN 0
 #define DT_FIFO 1
 #define DT_CHR 2
@@ -57,6 +57,8 @@ long           telldir(DIR *);
 #define DT_LNK 10
 #define DT_SOCK 12
 #define DT_WHT 14
+
+#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 #define IFTODT(x) ((x)>>12 & 017)
 #define DTTOIF(x) ((x)<<12)
 int getdents(int, struct dirent *, size_t);
@@ -66,7 +68,7 @@ int getdents(int, struct dirent *, size_t);
 int versionsort(const struct dirent **, const struct dirent **);
 #endif
 
-#if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE)
+#if defined(_LARGEFILE64_SOURCE)
 #define dirent64 dirent
 #define readdir64 readdir
 #define readdir64_r readdir_r
diff --git a/include/dlfcn.h b/include/dlfcn.h
index 78fb0733..13ab71dd 100644
--- a/include/dlfcn.h
+++ b/include/dlfcn.h
@@ -35,6 +35,10 @@ int dladdr(const void *, Dl_info *);
 int dlinfo(void *, int, void *);
 #endif
 
+#if _REDIR_TIME64
+__REDIR(dlsym, __dlsym_time64);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/elf.h b/include/elf.h
index 54f41a10..8b622f63 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -314,7 +314,9 @@ typedef struct {
 #define EM_AMDGPU	224
 #define EM_RISCV	243
 #define EM_BPF		247
-#define EM_NUM		248
+#define EM_CSKY		252
+#define EM_LOONGARCH	258
+#define EM_NUM		259
 
 #define EM_ALPHA	0x9026
 
@@ -384,7 +386,8 @@ typedef struct {
 #define SHT_PREINIT_ARRAY 16
 #define SHT_GROUP	  17
 #define SHT_SYMTAB_SHNDX  18
-#define	SHT_NUM		  19
+#define SHT_RELR	  19
+#define	SHT_NUM		  20
 #define SHT_LOOS	  0x60000000
 #define SHT_GNU_ATTRIBUTES 0x6ffffff5
 #define SHT_GNU_HASH	  0x6ffffff6
@@ -435,6 +438,7 @@ typedef struct {
 } Elf64_Chdr;
 
 #define ELFCOMPRESS_ZLIB	1
+#define ELFCOMPRESS_ZSTD	2
 #define ELFCOMPRESS_LOOS	0x60000000
 #define ELFCOMPRESS_HIOS	0x6fffffff
 #define ELFCOMPRESS_LOPROC	0x70000000
@@ -555,6 +559,11 @@ typedef struct {
 
 
 
+typedef Elf32_Word Elf32_Relr;
+typedef Elf64_Xword Elf64_Relr;
+
+
+
 #define ELF32_R_SYM(val)		((val) >> 8)
 #define ELF32_R_TYPE(val)		((val) & 0xff)
 #define ELF32_R_INFO(sym, type)		(((sym) << 8) + ((type) & 0xff))
@@ -602,6 +611,7 @@ typedef struct {
 #define PT_GNU_EH_FRAME	0x6474e550
 #define PT_GNU_STACK	0x6474e551
 #define PT_GNU_RELRO	0x6474e552
+#define PT_GNU_PROPERTY	0x6474e553
 #define PT_LOSUNW	0x6ffffffa
 #define PT_SUNWBSS	0x6ffffffa
 #define PT_SUNWSTACK	0x6ffffffb
@@ -681,12 +691,27 @@ typedef struct {
 #define NT_ARM_HW_WATCH	0x403
 #define NT_ARM_SYSTEM_CALL	0x404
 #define NT_ARM_SVE	0x405
+#define NT_ARM_PAC_MASK	0x406
+#define NT_ARM_PACA_KEYS	0x407
+#define NT_ARM_PACG_KEYS	0x408
+#define NT_ARM_TAGGED_ADDR_CTRL	0x409
+#define NT_ARM_PAC_ENABLED_KEYS	0x40a
 #define NT_METAG_CBUF	0x500
 #define NT_METAG_RPIPE	0x501
 #define NT_METAG_TLS	0x502
 #define NT_ARC_V2	0x600
 #define NT_VMCOREDD	0x700
+#define NT_MIPS_DSP	0x800
+#define NT_MIPS_FP_MODE	0x801
+#define NT_MIPS_MSA	0x802
+#define NT_RISCV_CSR	0x900
+#define NT_RISCV_VECTOR	0x901
 #define NT_VERSION	1
+#define NT_LOONGARCH_CPUCFG	0xa00
+#define NT_LOONGARCH_CSR	0xa01
+#define NT_LOONGARCH_LSX	0xa02
+#define NT_LOONGARCH_LASX	0xa03
+#define NT_LOONGARCH_LBT	0xa04
 
 
 
@@ -744,7 +769,10 @@ typedef struct {
 #define DT_PREINIT_ARRAY 32
 #define DT_PREINIT_ARRAYSZ 33
 #define DT_SYMTAB_SHNDX	34
-#define	DT_NUM		35
+#define DT_RELRSZ	35
+#define DT_RELR		36
+#define DT_RELRENT	37
+#define	DT_NUM		38
 #define DT_LOOS		0x6000000d
 #define DT_HIOS		0x6ffff000
 #define DT_LOPROC	0x70000000
@@ -1078,6 +1106,7 @@ typedef struct {
 
 #define NT_GNU_BUILD_ID	3
 #define NT_GNU_GOLD_VERSION	4
+#define NT_GNU_PROPERTY_TYPE_0	5
 
 
 
@@ -2619,6 +2648,61 @@ enum
 #define R_ARM_NUM		256
 
 
+#define R_CKCORE_NONE               0
+#define R_CKCORE_ADDR32             1
+#define R_CKCORE_PCRELIMM8BY4       2
+#define R_CKCORE_PCRELIMM11BY2      3
+#define R_CKCORE_PCREL32            5
+#define R_CKCORE_PCRELJSR_IMM11BY2  6
+#define R_CKCORE_RELATIVE           9
+#define R_CKCORE_COPY               10
+#define R_CKCORE_GLOB_DAT           11
+#define R_CKCORE_JUMP_SLOT          12
+#define R_CKCORE_GOTOFF             13
+#define R_CKCORE_GOTPC              14
+#define R_CKCORE_GOT32              15
+#define R_CKCORE_PLT32              16
+#define R_CKCORE_ADDRGOT            17
+#define R_CKCORE_ADDRPLT            18
+#define R_CKCORE_PCREL_IMM26BY2     19
+#define R_CKCORE_PCREL_IMM16BY2     20
+#define R_CKCORE_PCREL_IMM16BY4     21
+#define R_CKCORE_PCREL_IMM10BY2     22
+#define R_CKCORE_PCREL_IMM10BY4     23
+#define R_CKCORE_ADDR_HI16          24
+#define R_CKCORE_ADDR_LO16          25
+#define R_CKCORE_GOTPC_HI16         26
+#define R_CKCORE_GOTPC_LO16         27
+#define R_CKCORE_GOTOFF_HI16        28
+#define R_CKCORE_GOTOFF_LO16        29
+#define R_CKCORE_GOT12              30
+#define R_CKCORE_GOT_HI16           31
+#define R_CKCORE_GOT_LO16           32
+#define R_CKCORE_PLT12              33
+#define R_CKCORE_PLT_HI16           34
+#define R_CKCORE_PLT_LO16           35
+#define R_CKCORE_ADDRGOT_HI16       36
+#define R_CKCORE_ADDRGOT_LO16       37
+#define R_CKCORE_ADDRPLT_HI16       38
+#define R_CKCORE_ADDRPLT_LO16       39
+#define R_CKCORE_PCREL_JSR_IMM26BY2 40
+#define R_CKCORE_TOFFSET_LO16       41
+#define R_CKCORE_DOFFSET_LO16       42
+#define R_CKCORE_PCREL_IMM18BY2     43
+#define R_CKCORE_DOFFSET_IMM18      44
+#define R_CKCORE_DOFFSET_IMM18BY2   45
+#define R_CKCORE_DOFFSET_IMM18BY4   46
+#define R_CKCORE_GOT_IMM18BY4       48
+#define R_CKCORE_PLT_IMM18BY4       49
+#define R_CKCORE_PCREL_IMM7BY4      50
+#define R_CKCORE_TLS_LE32           51
+#define R_CKCORE_TLS_IE32           52
+#define R_CKCORE_TLS_GD32           53
+#define R_CKCORE_TLS_LDM32          54
+#define R_CKCORE_TLS_LDO32          55
+#define R_CKCORE_TLS_DTPMOD32       56
+#define R_CKCORE_TLS_DTPOFF32       57
+#define R_CKCORE_TLS_TPOFF32        58
 
 
 #define EF_IA_64_MASKOS		0x0000000f
@@ -3166,6 +3250,163 @@ enum
 #define R_BPF_NONE		0
 #define R_BPF_MAP_FD		1
 
+#define R_RISCV_NONE            0
+#define R_RISCV_32              1
+#define R_RISCV_64              2
+#define R_RISCV_RELATIVE        3
+#define R_RISCV_COPY            4
+#define R_RISCV_JUMP_SLOT       5
+#define R_RISCV_TLS_DTPMOD32    6
+#define R_RISCV_TLS_DTPMOD64    7
+#define R_RISCV_TLS_DTPREL32    8
+#define R_RISCV_TLS_DTPREL64    9
+#define R_RISCV_TLS_TPREL32     10
+#define R_RISCV_TLS_TPREL64     11
+#define R_RISCV_TLSDESC         12
+
+#define R_RISCV_BRANCH          16
+#define R_RISCV_JAL             17
+#define R_RISCV_CALL            18
+#define R_RISCV_CALL_PLT        19
+#define R_RISCV_GOT_HI20        20
+#define R_RISCV_TLS_GOT_HI20    21
+#define R_RISCV_TLS_GD_HI20     22
+#define R_RISCV_PCREL_HI20      23
+#define R_RISCV_PCREL_LO12_I    24
+#define R_RISCV_PCREL_LO12_S    25
+#define R_RISCV_HI20            26
+#define R_RISCV_LO12_I          27
+#define R_RISCV_LO12_S          28
+#define R_RISCV_TPREL_HI20      29
+#define R_RISCV_TPREL_LO12_I    30
+#define R_RISCV_TPREL_LO12_S    31
+#define R_RISCV_TPREL_ADD       32
+#define R_RISCV_ADD8            33
+#define R_RISCV_ADD16           34
+#define R_RISCV_ADD32           35
+#define R_RISCV_ADD64           36
+#define R_RISCV_SUB8            37
+#define R_RISCV_SUB16           38
+#define R_RISCV_SUB32           39
+#define R_RISCV_SUB64           40
+#define R_RISCV_GOT32_PCREL     41
+#define R_RISCV_ALIGN           43
+#define R_RISCV_RVC_BRANCH      44
+#define R_RISCV_RVC_JUMP        45
+#define R_RISCV_RVC_LUI         46
+#define R_RISCV_RELAX           51
+#define R_RISCV_SUB6            52
+#define R_RISCV_SET6            53
+#define R_RISCV_SET8            54
+#define R_RISCV_SET16           55
+#define R_RISCV_SET32           56
+#define R_RISCV_32_PCREL        57
+#define R_RISCV_IRELATIVE       58
+#define R_RISCV_PLT32           59
+#define R_RISCV_SET_ULEB128     60
+#define R_RISCV_SUB_ULEB128     61
+#define R_RISCV_TLSDESC_HI20    62
+#define R_RISCV_TLSDESC_LOAD_LO12 63
+#define R_RISCV_TLSDESC_ADD_LO12  64
+#define R_RISCV_TLSDESC_CALL    65
+
+#define EF_LARCH_ABI_MODIFIER_MASK    0x07
+#define EF_LARCH_ABI_SOFT_FLOAT       0x01
+#define EF_LARCH_ABI_SINGLE_FLOAT     0x02
+#define EF_LARCH_ABI_DOUBLE_FLOAT     0x03
+#define EF_LARCH_OBJABI_V1            0x40
+
+#define R_LARCH_NONE                        0
+#define R_LARCH_32                          1
+#define R_LARCH_64                          2
+#define R_LARCH_RELATIVE                    3
+#define R_LARCH_COPY                        4
+#define R_LARCH_JUMP_SLOT                   5
+#define R_LARCH_TLS_DTPMOD32                6
+#define R_LARCH_TLS_DTPMOD64                7
+#define R_LARCH_TLS_DTPREL32                8
+#define R_LARCH_TLS_DTPREL64                9
+#define R_LARCH_TLS_TPREL32                 10
+#define R_LARCH_TLS_TPREL64                 11
+#define R_LARCH_IRELATIVE                   12
+#define R_LARCH_TLS_DESC64                  14
+#define R_LARCH_MARK_LA                     20
+#define R_LARCH_MARK_PCREL                  21
+#define R_LARCH_SOP_PUSH_PCREL              22
+#define R_LARCH_SOP_PUSH_ABSOLUTE           23
+#define R_LARCH_SOP_PUSH_DUP                24
+#define R_LARCH_SOP_PUSH_GPREL              25
+#define R_LARCH_SOP_PUSH_TLS_TPREL          26
+#define R_LARCH_SOP_PUSH_TLS_GOT            27
+#define R_LARCH_SOP_PUSH_TLS_GD             28
+#define R_LARCH_SOP_PUSH_PLT_PCREL          29
+#define R_LARCH_SOP_ASSERT                  30
+#define R_LARCH_SOP_NOT                     31
+#define R_LARCH_SOP_SUB                     32
+#define R_LARCH_SOP_SL                      33
+#define R_LARCH_SOP_SR                      34
+#define R_LARCH_SOP_ADD                     35
+#define R_LARCH_SOP_AND                     36
+#define R_LARCH_SOP_IF_ELSE                 37
+#define R_LARCH_SOP_POP_32_S_10_5           38
+#define R_LARCH_SOP_POP_32_U_10_12          39
+#define R_LARCH_SOP_POP_32_S_10_12          40
+#define R_LARCH_SOP_POP_32_S_10_16          41
+#define R_LARCH_SOP_POP_32_S_10_16_S2       42
+#define R_LARCH_SOP_POP_32_S_5_20           43
+#define R_LARCH_SOP_POP_32_S_0_5_10_16_S2   44
+#define R_LARCH_SOP_POP_32_S_0_10_10_16_S2  45
+#define R_LARCH_SOP_POP_32_U                46
+#define R_LARCH_ADD8                        47
+#define R_LARCH_ADD16                       48
+#define R_LARCH_ADD24                       49
+#define R_LARCH_ADD32                       50
+#define R_LARCH_ADD64                       51
+#define R_LARCH_SUB8                        52
+#define R_LARCH_SUB16                       53
+#define R_LARCH_SUB24                       54
+#define R_LARCH_SUB32                       55
+#define R_LARCH_SUB64                       56
+#define R_LARCH_GNU_VTINHERIT               57
+#define R_LARCH_GNU_VTENTRY                 58
+#define R_LARCH_B16                         64
+#define R_LARCH_B21                         65
+#define R_LARCH_B26                         66
+#define R_LARCH_ABS_HI20                    67
+#define R_LARCH_ABS_LO12                    68
+#define R_LARCH_ABS64_LO20                  69
+#define R_LARCH_ABS64_HI12                  70
+#define R_LARCH_PCALA_HI20                  71
+#define R_LARCH_PCALA_LO12                  72
+#define R_LARCH_PCALA64_LO20                73
+#define R_LARCH_PCALA64_HI12                74
+#define R_LARCH_GOT_PC_HI20                 75
+#define R_LARCH_GOT_PC_LO12                 76
+#define R_LARCH_GOT64_PC_LO20               77
+#define R_LARCH_GOT64_PC_HI12               78
+#define R_LARCH_GOT_HI20                    79
+#define R_LARCH_GOT_LO12                    80
+#define R_LARCH_GOT64_LO20                  81
+#define R_LARCH_GOT64_HI12                  82
+#define R_LARCH_TLS_LE_HI20                 83
+#define R_LARCH_TLS_LE_LO12                 84
+#define R_LARCH_TLS_LE64_LO20               85
+#define R_LARCH_TLS_LE64_HI12               86
+#define R_LARCH_TLS_IE_PC_HI20              87
+#define R_LARCH_TLS_IE_PC_LO12              88
+#define R_LARCH_TLS_IE64_PC_LO20            89
+#define R_LARCH_TLS_IE64_PC_HI12            90
+#define R_LARCH_TLS_IE_HI20                 91
+#define R_LARCH_TLS_IE_LO12                 92
+#define R_LARCH_TLS_IE64_LO20               93
+#define R_LARCH_TLS_IE64_HI12               94
+#define R_LARCH_TLS_LD_PC_HI20              95
+#define R_LARCH_TLS_LD_HI20                 96
+#define R_LARCH_TLS_GD_PC_HI20              97
+#define R_LARCH_TLS_GD_HI20                 98
+#define R_LARCH_32_PCREL                    99
+#define R_LARCH_RELAX                       100
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/endian.h b/include/endian.h
index 1bd44451..172c4320 100644
--- a/include/endian.h
+++ b/include/endian.h
@@ -3,25 +3,19 @@
 
 #include <features.h>
 
-#define __LITTLE_ENDIAN 1234
-#define __BIG_ENDIAN 4321
-#define __PDP_ENDIAN 3412
+#define __NEED_uint16_t
+#define __NEED_uint32_t
+#define __NEED_uint64_t
 
-#if defined(__GNUC__) && defined(__BYTE_ORDER__)
-#define __BYTE_ORDER __BYTE_ORDER__
-#else
-#include <bits/endian.h>
-#endif
+#include <bits/alltypes.h>
 
-#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+#define __PDP_ENDIAN 3412
 
 #define BIG_ENDIAN __BIG_ENDIAN
 #define LITTLE_ENDIAN __LITTLE_ENDIAN
 #define PDP_ENDIAN __PDP_ENDIAN
 #define BYTE_ORDER __BYTE_ORDER
 
-#include <stdint.h>
-
 static __inline uint16_t __bswap16(uint16_t __x)
 {
 	return __x<<8 | __x>>8;
@@ -40,43 +34,47 @@ static __inline uint64_t __bswap64(uint64_t __x)
 #if __BYTE_ORDER == __LITTLE_ENDIAN
 #define htobe16(x) __bswap16(x)
 #define be16toh(x) __bswap16(x)
-#define betoh16(x) __bswap16(x)
 #define htobe32(x) __bswap32(x)
 #define be32toh(x) __bswap32(x)
-#define betoh32(x) __bswap32(x)
 #define htobe64(x) __bswap64(x)
 #define be64toh(x) __bswap64(x)
-#define betoh64(x) __bswap64(x)
 #define htole16(x) (uint16_t)(x)
 #define le16toh(x) (uint16_t)(x)
-#define letoh16(x) (uint16_t)(x)
 #define htole32(x) (uint32_t)(x)
 #define le32toh(x) (uint32_t)(x)
-#define letoh32(x) (uint32_t)(x)
 #define htole64(x) (uint64_t)(x)
 #define le64toh(x) (uint64_t)(x)
-#define letoh64(x) (uint64_t)(x)
 #else
 #define htobe16(x) (uint16_t)(x)
 #define be16toh(x) (uint16_t)(x)
-#define betoh16(x) (uint16_t)(x)
 #define htobe32(x) (uint32_t)(x)
 #define be32toh(x) (uint32_t)(x)
-#define betoh32(x) (uint32_t)(x)
 #define htobe64(x) (uint64_t)(x)
 #define be64toh(x) (uint64_t)(x)
-#define betoh64(x) (uint64_t)(x)
 #define htole16(x) __bswap16(x)
 #define le16toh(x) __bswap16(x)
-#define letoh16(x) __bswap16(x)
 #define htole32(x) __bswap32(x)
 #define le32toh(x) __bswap32(x)
-#define letoh32(x) __bswap32(x)
 #define htole64(x) __bswap64(x)
 #define le64toh(x) __bswap64(x)
-#define letoh64(x) __bswap64(x)
 #endif
 
+#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define betoh16(x) __bswap16(x)
+#define betoh32(x) __bswap32(x)
+#define betoh64(x) __bswap64(x)
+#define letoh16(x) (uint16_t)(x)
+#define letoh32(x) (uint32_t)(x)
+#define letoh64(x) (uint64_t)(x)
+#else
+#define betoh16(x) (uint16_t)(x)
+#define betoh32(x) (uint32_t)(x)
+#define betoh64(x) (uint64_t)(x)
+#define letoh16(x) __bswap16(x)
+#define letoh32(x) __bswap32(x)
+#define letoh64(x) __bswap64(x)
+#endif
 #endif
 
 #endif
diff --git a/include/fcntl.h b/include/fcntl.h
index 4d91338b..53f98a8b 100644
--- a/include/fcntl.h
+++ b/include/fcntl.h
@@ -36,8 +36,9 @@ int openat(int, const char *, int, ...);
 int posix_fadvise(int, off_t, off_t, int);
 int posix_fallocate(int, off_t, off_t);
 
-#define O_SEARCH  O_PATH
-#define O_EXEC    O_PATH
+#define O_SEARCH   O_PATH
+#define O_EXEC     O_PATH
+#define O_TTY_INIT 0
 
 #define O_ACCMODE (03|O_SEARCH)
 #define O_RDONLY  00
@@ -66,8 +67,10 @@ int posix_fallocate(int, off_t, off_t);
 #define POSIX_FADV_RANDOM     1
 #define POSIX_FADV_SEQUENTIAL 2
 #define POSIX_FADV_WILLNEED   3
+#ifndef POSIX_FADV_DONTNEED
 #define POSIX_FADV_DONTNEED   4
 #define POSIX_FADV_NOREUSE    5
+#endif
 
 #undef SEEK_SET
 #undef SEEK_CUR
@@ -97,6 +100,11 @@ int posix_fallocate(int, off_t, off_t);
 #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 #define AT_NO_AUTOMOUNT 0x800
 #define AT_EMPTY_PATH 0x1000
+#define AT_STATX_SYNC_TYPE 0x6000
+#define AT_STATX_SYNC_AS_STAT 0x0000
+#define AT_STATX_FORCE_SYNC 0x2000
+#define AT_STATX_DONT_SYNC 0x4000
+#define AT_RECURSIVE 0x8000
 
 #define FAPPEND O_APPEND
 #define FFSYNC O_SYNC
@@ -126,6 +134,7 @@ int posix_fallocate(int, off_t, off_t);
 #define F_SEAL_SHRINK	0x0002
 #define F_SEAL_GROW	0x0004
 #define F_SEAL_WRITE	0x0008
+#define F_SEAL_FUTURE_WRITE	0x0010
 
 #define F_GET_RW_HINT		1035
 #define F_SET_RW_HINT		1036
@@ -175,7 +184,6 @@ struct f_owner_ex {
 #define SPLICE_F_MORE 4
 #define SPLICE_F_GIFT 8
 int fallocate(int, int, off_t, off_t);
-#define fallocate64 fallocate
 int name_to_handle_at(int, const char *, struct file_handle *, int *, int);
 int open_by_handle_at(int, struct file_handle *, int);
 ssize_t readahead(int, off_t, size_t);
@@ -186,7 +194,7 @@ ssize_t tee(int, int, size_t, unsigned);
 #define loff_t off_t
 #endif
 
-#if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE)
+#if defined(_LARGEFILE64_SOURCE)
 #define F_GETLK64 F_GETLK
 #define F_SETLK64 F_SETLK
 #define F_SETLKW64 F_SETLKW
@@ -198,6 +206,9 @@ ssize_t tee(int, int, size_t, unsigned);
 #define posix_fadvise64 posix_fadvise
 #define posix_fallocate64 posix_fallocate
 #define off64_t off_t
+#if defined(_GNU_SOURCE)
+#define fallocate64 fallocate
+#endif
 #endif
 
 #ifdef __cplusplus
diff --git a/include/features.h b/include/features.h
index f4d651ef..85cfb72a 100644
--- a/include/features.h
+++ b/include/features.h
@@ -35,4 +35,6 @@
 #define _Noreturn
 #endif
 
+#define __REDIR(x,y) __typeof__(x) x __asm__(#y)
+
 #endif
diff --git a/include/ftw.h b/include/ftw.h
index b15c062a..d0445e8a 100644
--- a/include/ftw.h
+++ b/include/ftw.h
@@ -29,7 +29,7 @@ struct FTW {
 int ftw(const char *, int (*)(const char *, const struct stat *, int), int);
 int nftw(const char *, int (*)(const char *, const struct stat *, int, struct FTW *), int, int);
 
-#if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE)
+#if defined(_LARGEFILE64_SOURCE)
 #define ftw64 ftw
 #define nftw64 nftw
 #endif
diff --git a/include/glob.h b/include/glob.h
index 76f6c1c6..fed06745 100644
--- a/include/glob.h
+++ b/include/glob.h
@@ -31,12 +31,15 @@ void globfree(glob_t *);
 #define GLOB_NOESCAPE 0x40
 #define	GLOB_PERIOD   0x80
 
+#define GLOB_TILDE       0x1000
+#define GLOB_TILDE_CHECK 0x4000
+
 #define GLOB_NOSPACE 1
 #define GLOB_ABORTED 2
 #define GLOB_NOMATCH 3
 #define GLOB_NOSYS   4
 
-#if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE)
+#if defined(_LARGEFILE64_SOURCE)
 #define glob64 glob
 #define globfree64 globfree
 #define glob64_t glob_t
diff --git a/include/limits.h b/include/limits.h
index 02c2139d..53a27b9d 100644
--- a/include/limits.h
+++ b/include/limits.h
@@ -3,9 +3,7 @@
 
 #include <features.h>
 
-/* Most limits are system-specific */
-
-#include <bits/limits.h>
+#include <bits/alltypes.h> /* __LONG_MAX */
 
 /* Support signed or unsigned plain-char */
 
@@ -17,8 +15,6 @@
 #define CHAR_MAX 127
 #endif
 
-/* Some universal constants... */
-
 #define CHAR_BIT 8
 #define SCHAR_MIN (-128)
 #define SCHAR_MAX 127
@@ -30,8 +26,10 @@
 #define INT_MAX  0x7fffffff
 #define UINT_MAX 0xffffffffU
 #define LONG_MIN (-LONG_MAX-1)
+#define LONG_MAX __LONG_MAX
 #define ULONG_MAX (2UL*LONG_MAX+1)
 #define LLONG_MIN (-LLONG_MAX-1)
+#define LLONG_MAX  0x7fffffffffffffffLL
 #define ULLONG_MAX (2ULL*LLONG_MAX+1)
 
 #define MB_LEN_MAX 4
@@ -39,9 +37,13 @@
 #if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
  || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 
+#include <bits/limits.h>
+
 #define PIPE_BUF 4096
 #define FILESIZEBITS 64
+#ifndef NAME_MAX
 #define NAME_MAX 255
+#endif
 #define PATH_MAX 4096
 #define NGROUPS_MAX 32
 #define ARG_MAX 131072
@@ -53,6 +55,12 @@
 #define TTY_NAME_MAX 32
 #define HOST_NAME_MAX 255
 
+#if LONG_MAX == 0x7fffffffL
+#define LONG_BIT 32
+#else
+#define LONG_BIT 64
+#endif
+
 /* Implementation choices... */
 
 #define PTHREAD_KEYS_MAX 128
diff --git a/include/locale.h b/include/locale.h
index ce384381..11106fea 100644
--- a/include/locale.h
+++ b/include/locale.h
@@ -7,7 +7,9 @@ extern "C" {
 
 #include <features.h>
 
-#ifdef __cplusplus
+#if __cplusplus >= 201103L
+#define NULL nullptr
+#elif defined(__cplusplus)
 #define NULL 0L
 #else
 #define NULL ((void*)0)
diff --git a/include/math.h b/include/math.h
index fea34686..14f28ec8 100644
--- a/include/math.h
+++ b/include/math.h
@@ -36,6 +36,18 @@ extern "C" {
 #define FP_SUBNORMAL 3
 #define FP_NORMAL    4
 
+#ifdef __FP_FAST_FMA
+#define FP_FAST_FMA 1
+#endif
+
+#ifdef __FP_FAST_FMAF
+#define FP_FAST_FMAF 1
+#endif
+
+#ifdef __FP_FAST_FMAL
+#define FP_FAST_FMAL 1
+#endif
+
 int __fpclassify(double);
 int __fpclassifyf(float);
 int __fpclassifyl(long double);
diff --git a/include/mqueue.h b/include/mqueue.h
index f5cbe796..0c807ea0 100644
--- a/include/mqueue.h
+++ b/include/mqueue.h
@@ -30,6 +30,11 @@ ssize_t mq_timedreceive(mqd_t, char *__restrict, size_t, unsigned *__restrict, c
 int mq_timedsend(mqd_t, const char *, size_t, unsigned, const struct timespec *);
 int mq_unlink(const char *);
 
+#if _REDIR_TIME64
+__REDIR(mq_timedreceive, __mq_timedreceive_time64);
+__REDIR(mq_timedsend, __mq_timedsend_time64);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/netdb.h b/include/netdb.h
index d096c781..3af065e2 100644
--- a/include/netdb.h
+++ b/include/netdb.h
@@ -44,6 +44,7 @@ struct addrinfo {
 #define EAI_NONAME     -2
 #define EAI_AGAIN      -3
 #define EAI_FAIL       -4
+#define EAI_NODATA     -5
 #define EAI_FAMILY     -6
 #define EAI_SOCKTYPE   -7
 #define EAI_SERVICE    -8
diff --git a/include/netinet/icmp6.h b/include/netinet/icmp6.h
index cf951d91..01269e7d 100644
--- a/include/netinet/icmp6.h
+++ b/include/netinet/icmp6.h
@@ -9,7 +9,6 @@ extern "C" {
 #include <string.h>
 #include <sys/types.h>
 #include <netinet/in.h>
-#include <endian.h>
 
 #define ICMP6_FILTER 1
 
diff --git a/include/netinet/if_ether.h b/include/netinet/if_ether.h
index ecd6c73c..3479f511 100644
--- a/include/netinet/if_ether.h
+++ b/include/netinet/if_ether.h
@@ -58,12 +58,15 @@
 #define ETH_P_ERSPAN	0x88BE
 #define ETH_P_PREAUTH	0x88C7
 #define ETH_P_TIPC	0x88CA
+#define ETH_P_LLDP	0x88CC
+#define ETH_P_MRP	0x88E3
 #define ETH_P_MACSEC	0x88E5
 #define ETH_P_8021AH	0x88E7
 #define ETH_P_MVRP	0x88F5
 #define ETH_P_1588	0x88F7
 #define ETH_P_NCSI	0x88F8
 #define ETH_P_PRP	0x88FB
+#define ETH_P_CFM	0x8902
 #define ETH_P_FCOE	0x8906
 #define ETH_P_TDLS	0x890D
 #define ETH_P_FIP	0x8914
@@ -76,6 +79,7 @@
 #define ETH_P_QINQ2	0x9200
 #define ETH_P_QINQ3	0x9300
 #define ETH_P_EDSA	0xDADA
+#define ETH_P_DSA_8021Q	0xDADB
 #define ETH_P_IFE	0xED3E
 #define ETH_P_AF_IUCV	0xFBFB
 
diff --git a/include/netinet/in.h b/include/netinet/in.h
index 192679a6..fb628b61 100644
--- a/include/netinet/in.h
+++ b/include/netinet/in.h
@@ -48,10 +48,12 @@ struct ipv6_mreq {
 #define INADDR_BROADCAST  ((in_addr_t) 0xffffffff)
 #define INADDR_NONE       ((in_addr_t) 0xffffffff)
 #define INADDR_LOOPBACK   ((in_addr_t) 0x7f000001)
+#define INADDR_DUMMY      ((in_addr_t) 0xc0000008)
 
 #define INADDR_UNSPEC_GROUP     ((in_addr_t) 0xe0000000)
 #define INADDR_ALLHOSTS_GROUP   ((in_addr_t) 0xe0000001)
 #define INADDR_ALLRTRS_GROUP    ((in_addr_t) 0xe0000002)
+#define INADDR_ALLSNOOPERS_GROUP ((in_addr_t) 0xe000006a)
 #define INADDR_MAX_LOCAL_GROUP  ((in_addr_t) 0xe00000ff)
 
 #define IN6ADDR_ANY_INIT      { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } } }
@@ -59,8 +61,6 @@ struct ipv6_mreq {
 
 extern const struct in6_addr in6addr_any, in6addr_loopback;
 
-#undef INET_ADDRSTRLEN
-#undef INET6_ADDRSTRLEN
 #define INET_ADDRSTRLEN  16
 #define INET6_ADDRSTRLEN 46
 
@@ -102,8 +102,10 @@ uint16_t ntohs(uint16_t);
 #define IPPROTO_MH       135
 #define IPPROTO_UDPLITE  136
 #define IPPROTO_MPLS     137
+#define IPPROTO_ETHERNET 143
 #define IPPROTO_RAW      255
-#define IPPROTO_MAX      256
+#define IPPROTO_MPTCP    262
+#define IPPROTO_MAX      263
 
 #define IN6_IS_ADDR_UNSPECIFIED(a) \
         (((uint32_t *) (a))[0] == 0 && ((uint32_t *) (a))[1] == 0 && \
@@ -201,6 +203,7 @@ uint16_t ntohs(uint16_t);
 #define IP_CHECKSUM        23
 #define IP_BIND_ADDRESS_NO_PORT 24
 #define IP_RECVFRAGSIZE    25
+#define IP_RECVERR_RFC4884 26
 #define IP_MULTICAST_IF    32
 #define IP_MULTICAST_TTL   33
 #define IP_MULTICAST_LOOP  34
@@ -335,6 +338,8 @@ struct ip6_mtuinfo {
 #define IPV6_V6ONLY             26
 #define IPV6_JOIN_ANYCAST       27
 #define IPV6_LEAVE_ANYCAST      28
+#define IPV6_MULTICAST_ALL      29
+#define IPV6_ROUTER_ALERT_ISOLATE 30
 #define IPV6_IPSEC_POLICY       34
 #define IPV6_XFRM_POLICY        35
 #define IPV6_HDRINCL            36
diff --git a/include/netinet/ip.h b/include/netinet/ip.h
index d7fa8d5e..0ae132a5 100644
--- a/include/netinet/ip.h
+++ b/include/netinet/ip.h
@@ -7,7 +7,6 @@ extern "C" {
 
 #include <stdint.h>
 #include <netinet/in.h>
-#include <endian.h>
 
 struct timestamp {
 	uint8_t len;
@@ -191,6 +190,8 @@ struct ip_timestamp {
 
 #define	IP_MSS		576
 
+#define __UAPI_DEF_IPHDR	0
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/netinet/ip6.h b/include/netinet/ip6.h
index a4347a53..50c626a6 100644
--- a/include/netinet/ip6.h
+++ b/include/netinet/ip6.h
@@ -7,7 +7,6 @@ extern "C" {
 
 #include <stdint.h>
 #include <netinet/in.h>
-#include <endian.h>
 
 struct ip6_hdr {
 	union {
diff --git a/include/netinet/tcp.h b/include/netinet/tcp.h
index 584af2f5..fad1d844 100644
--- a/include/netinet/tcp.h
+++ b/include/netinet/tcp.h
@@ -38,6 +38,7 @@
 #define TCP_FASTOPEN_NO_COOKIE 34
 #define TCP_ZEROCOPY_RECEIVE   35
 #define TCP_INQ          36
+#define TCP_TX_DELAY     37
 
 #define TCP_CM_INQ TCP_INQ
 
@@ -72,6 +73,15 @@ enum {
 	TCP_NLA_SND_SSTHRESH,
 	TCP_NLA_DELIVERED,
 	TCP_NLA_DELIVERED_CE,
+	TCP_NLA_BYTES_SENT,
+	TCP_NLA_BYTES_RETRANS,
+	TCP_NLA_DSACK_DUPS,
+	TCP_NLA_REORD_SEEN,
+	TCP_NLA_SRTT,
+	TCP_NLA_TIMEOUT_REHASH,
+	TCP_NLA_BYTES_NOTSENT,
+	TCP_NLA_EDT,
+	TCP_NLA_TTL,
 };
 
 #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
@@ -92,7 +102,6 @@ enum {
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <stdint.h>
-#include <endian.h>
 
 typedef uint32_t tcp_seq;
 
@@ -176,6 +185,13 @@ struct tcphdr {
 #define TCP_CA_Recovery		3
 #define TCP_CA_Loss		4
 
+enum tcp_fastopen_client_fail {
+	TFO_STATUS_UNSPEC,
+	TFO_COOKIE_UNAVAILABLE,
+	TFO_DATA_NOT_ACKED,
+	TFO_SYN_RETRANSMITTED,
+};
+
 struct tcp_info {
 	uint8_t tcpi_state;
 	uint8_t tcpi_ca_state;
@@ -184,7 +200,7 @@ struct tcp_info {
 	uint8_t tcpi_backoff;
 	uint8_t tcpi_options;
 	uint8_t tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4;
-	uint8_t tcpi_delivery_rate_app_limited : 1;
+	uint8_t tcpi_delivery_rate_app_limited : 1, tcpi_fastopen_client_fail : 2;
 	uint32_t tcpi_rto;
 	uint32_t tcpi_ato;
 	uint32_t tcpi_snd_mss;
@@ -225,18 +241,25 @@ struct tcp_info {
 	uint64_t tcpi_sndbuf_limited;
 	uint32_t tcpi_delivered;
 	uint32_t tcpi_delivered_ce;
+	uint64_t tcpi_bytes_sent;
+	uint64_t tcpi_bytes_retrans;
+	uint32_t tcpi_dsack_dups;
+	uint32_t tcpi_reord_seen;
+	uint32_t tcpi_rcv_ooopack;
+	uint32_t tcpi_snd_wnd;
 };
 
 #define TCP_MD5SIG_MAXKEYLEN    80
 
-#define TCP_MD5SIG_FLAG_PREFIX  1
+#define TCP_MD5SIG_FLAG_PREFIX  0x1
+#define TCP_MD5SIG_FLAG_IFINDEX 0x2
 
 struct tcp_md5sig {
 	struct sockaddr_storage tcpm_addr;
 	uint8_t tcpm_flags;
 	uint8_t tcpm_prefixlen;
 	uint16_t tcpm_keylen;
-	uint32_t __tcpm_pad;
+	int tcpm_ifindex;
 	uint8_t tcpm_key[TCP_MD5SIG_MAXKEYLEN];
 };
 
@@ -260,10 +283,21 @@ struct tcp_repair_window {
 	uint32_t rcv_wup;
 };
 
+#define TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT 0x1
+
 struct tcp_zerocopy_receive {
 	uint64_t address;
 	uint32_t length;
 	uint32_t recv_skip_hint;
+	uint32_t inq;
+	int32_t err;
+	uint64_t copybuf_address;
+	int32_t copybuf_len;
+	uint32_t flags;
+	uint64_t msg_control;
+	uint64_t msg_controllen;
+	uint32_t msg_flags;
+	uint32_t reserved;
 };
 
 #endif
diff --git a/include/netinet/udp.h b/include/netinet/udp.h
index 993c3478..40c3f203 100644
--- a/include/netinet/udp.h
+++ b/include/netinet/udp.h
@@ -27,12 +27,15 @@ struct udphdr {
 #define UDP_NO_CHECK6_TX 101
 #define UDP_NO_CHECK6_RX 102
 #define UDP_SEGMENT	103
+#define UDP_GRO		104
 
 #define UDP_ENCAP_ESPINUDP_NON_IKE 1
 #define UDP_ENCAP_ESPINUDP	2
 #define UDP_ENCAP_L2TPINUDP	3
 #define UDP_ENCAP_GTP0		4
 #define UDP_ENCAP_GTP1U		5
+#define UDP_ENCAP_RXRPC		6
+#define TCP_ENCAP_ESPINTCP	7
 
 #define SOL_UDP            17
 
diff --git a/include/netpacket/packet.h b/include/netpacket/packet.h
index f2210ce8..b36e092a 100644
--- a/include/netpacket/packet.h
+++ b/include/netpacket/packet.h
@@ -48,6 +48,7 @@ struct packet_mreq {
 #define PACKET_QDISC_BYPASS		20
 #define PACKET_ROLLOVER_STATS		21
 #define PACKET_FANOUT_DATA		22
+#define PACKET_IGNORE_OUTGOING		23
 
 #define PACKET_MR_MULTICAST	0
 #define PACKET_MR_PROMISC	1
diff --git a/include/poll.h b/include/poll.h
index daccc760..272dc34a 100644
--- a/include/poll.h
+++ b/include/poll.h
@@ -36,7 +36,7 @@ struct pollfd {
 
 int poll (struct pollfd *, nfds_t, int);
 
-#ifdef _GNU_SOURCE
+#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 #define __NEED_time_t
 #define __NEED_struct_timespec
 #define __NEED_sigset_t
@@ -44,6 +44,12 @@ int poll (struct pollfd *, nfds_t, int);
 int ppoll(struct pollfd *, nfds_t, const struct timespec *, const sigset_t *);
 #endif
 
+#if _REDIR_TIME64
+#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+__REDIR(ppoll, __ppoll_time64);
+#endif
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/pthread.h b/include/pthread.h
index e238321b..89fd9ff7 100644
--- a/include/pthread.h
+++ b/include/pthread.h
@@ -74,6 +74,9 @@ extern "C" {
 #define PTHREAD_BARRIER_SERIAL_THREAD (-1)
 
 
+#define PTHREAD_NULL ((pthread_t)0)
+
+
 int pthread_create(pthread_t *__restrict, const pthread_attr_t *__restrict, void *(*)(void *), void *__restrict);
 int pthread_detach(pthread_t);
 _Noreturn void pthread_exit(void *);
@@ -218,12 +221,23 @@ int pthread_getaffinity_np(pthread_t, size_t, struct cpu_set_t *);
 int pthread_setaffinity_np(pthread_t, size_t, const struct cpu_set_t *);
 int pthread_getattr_np(pthread_t, pthread_attr_t *);
 int pthread_setname_np(pthread_t, const char *);
+int pthread_getname_np(pthread_t, char *, size_t);
 int pthread_getattr_default_np(pthread_attr_t *);
 int pthread_setattr_default_np(const pthread_attr_t *);
 int pthread_tryjoin_np(pthread_t, void **);
 int pthread_timedjoin_np(pthread_t, void **, const struct timespec *);
 #endif
 
+#if _REDIR_TIME64
+__REDIR(pthread_mutex_timedlock, __pthread_mutex_timedlock_time64);
+__REDIR(pthread_cond_timedwait, __pthread_cond_timedwait_time64);
+__REDIR(pthread_rwlock_timedrdlock, __pthread_rwlock_timedrdlock_time64);
+__REDIR(pthread_rwlock_timedwrlock, __pthread_rwlock_timedwrlock_time64);
+#ifdef _GNU_SOURCE
+__REDIR(pthread_timedjoin_np, __pthread_timedjoin_np_time64);
+#endif
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/sched.h b/include/sched.h
index 05d40b1e..8c3b53f0 100644
--- a/include/sched.h
+++ b/include/sched.h
@@ -18,10 +18,16 @@ extern "C" {
 
 struct sched_param {
 	int sched_priority;
-	int sched_ss_low_priority;
-	struct timespec sched_ss_repl_period;
-	struct timespec sched_ss_init_budget;
-	int sched_ss_max_repl;
+	int __reserved1;
+#if _REDIR_TIME64
+	long __reserved2[4];
+#else
+	struct {
+		time_t __reserved1;
+		long __reserved2;
+	} __reserved2[2];
+#endif
+	int __reserved3;
 };
 
 int    sched_get_priority_max(int);
@@ -43,10 +49,12 @@ int     sched_yield(void);
 
 #ifdef _GNU_SOURCE
 #define CSIGNAL		0x000000ff
+#define CLONE_NEWTIME	0x00000080
 #define CLONE_VM	0x00000100
 #define CLONE_FS	0x00000200
 #define CLONE_FILES	0x00000400
 #define CLONE_SIGHAND	0x00000800
+#define CLONE_PIDFD	0x00001000
 #define CLONE_PTRACE	0x00002000
 #define CLONE_VFORK	0x00004000
 #define CLONE_PARENT	0x00008000
@@ -70,11 +78,10 @@ int clone (int (*)(void *), void *, int, void *, ...);
 int unshare(int);
 int setns(int, int);
 
-void *memcpy(void *__restrict, const void *__restrict, size_t);
-int memcmp(const void *, const void *, size_t);
-void *memset (void *, int, size_t);
-void *calloc(size_t, size_t);
-void free(void *);
+int (memcmp)(const void *, const void *, size_t);
+void *(memset)(void *, int, size_t);
+void *(calloc)(size_t, size_t);
+void (free)(void *);
 
 typedef struct cpu_set_t { unsigned long __bits[128/sizeof(long)]; } cpu_set_t;
 int __sched_cpucount(size_t, const cpu_set_t *);
@@ -108,15 +115,15 @@ __CPU_op_func_S(XOR, ^)
 #define CPU_XOR_S(a,b,c,d) __CPU_XOR_S(a,b,c,d)
 
 #define CPU_COUNT_S(size,set) __sched_cpucount(size,set)
-#define CPU_ZERO_S(size,set) memset(set,0,size)
-#define CPU_EQUAL_S(size,set1,set2) (!memcmp(set1,set2,size))
+#define CPU_ZERO_S(size,set) (memset)(set,0,size)
+#define CPU_EQUAL_S(size,set1,set2) (!(memcmp)(set1,set2,size))
 
 #define CPU_ALLOC_SIZE(n) (sizeof(long) * ( (n)/(8*sizeof(long)) \
 	+ ((n)%(8*sizeof(long)) + 8*sizeof(long)-1)/(8*sizeof(long)) ) )
-#define CPU_ALLOC(n) ((cpu_set_t *)calloc(1,CPU_ALLOC_SIZE(n)))
-#define CPU_FREE(set) free(set)
+#define CPU_ALLOC(n) ((cpu_set_t *)(calloc)(1,CPU_ALLOC_SIZE(n)))
+#define CPU_FREE(set) (free)(set)
 
-#define CPU_SETSIZE 128
+#define CPU_SETSIZE 1024
 
 #define CPU_SET(i, set) CPU_SET_S(i,sizeof(cpu_set_t),set)
 #define CPU_CLR(i, set) CPU_CLR_S(i,sizeof(cpu_set_t),set)
@@ -130,6 +137,10 @@ __CPU_op_func_S(XOR, ^)
 
 #endif
 
+#if _REDIR_TIME64
+__REDIR(sched_rr_get_interval, __sched_rr_get_interval_time64);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/semaphore.h b/include/semaphore.h
index 277c47d6..3690f496 100644
--- a/include/semaphore.h
+++ b/include/semaphore.h
@@ -29,6 +29,10 @@ int    sem_trywait(sem_t *);
 int    sem_unlink(const char *);
 int    sem_wait(sem_t *);
 
+#if _REDIR_TIME64
+__REDIR(sem_timedwait, __sem_timedwait_time64);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/setjmp.h b/include/setjmp.h
index 2d43abf8..1976af23 100644
--- a/include/setjmp.h
+++ b/include/setjmp.h
@@ -15,25 +15,33 @@ typedef struct __jmp_buf_tag {
 	unsigned long __ss[128/sizeof(long)];
 } jmp_buf[1];
 
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
+#define __setjmp_attr __attribute__((__returns_twice__))
+#else
+#define __setjmp_attr
+#endif
+
 #if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
  || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) \
  || defined(_BSD_SOURCE)
 typedef jmp_buf sigjmp_buf;
-int sigsetjmp (sigjmp_buf, int);
+int sigsetjmp (sigjmp_buf, int) __setjmp_attr;
 _Noreturn void siglongjmp (sigjmp_buf, int);
 #endif
 
 #if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) \
  || defined(_BSD_SOURCE)
-int _setjmp (jmp_buf);
+int _setjmp (jmp_buf) __setjmp_attr;
 _Noreturn void _longjmp (jmp_buf, int);
 #endif
 
-int setjmp (jmp_buf);
+int setjmp (jmp_buf) __setjmp_attr;
 _Noreturn void longjmp (jmp_buf, int);
 
 #define setjmp setjmp
 
+#undef __setjmp_attr
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/shadow.h b/include/shadow.h
index 2b1be413..a9d6940f 100644
--- a/include/shadow.h
+++ b/include/shadow.h
@@ -28,7 +28,6 @@ void setspent(void);
 void endspent(void);
 struct spwd *getspent(void);
 struct spwd *fgetspent(FILE *);
-struct spwd *sgetspent(const char *);
 int putspent(const struct spwd *, FILE *);
 
 struct spwd *getspnam(const char *);
diff --git a/include/signal.h b/include/signal.h
index 5c48cb83..c347f861 100644
--- a/include/signal.h
+++ b/include/signal.h
@@ -75,6 +75,8 @@ typedef struct sigaltstack stack_t;
 #define SEGV_ACCERR 2
 #define SEGV_BNDERR 3
 #define SEGV_PKUERR 4
+#define SEGV_MTEAERR 8
+#define SEGV_MTESERR 9
 
 #define BUS_ADRALN 1
 #define BUS_ADRERR 2
@@ -176,18 +178,31 @@ struct sigaction {
 #define sa_handler   __sa_handler.sa_handler
 #define sa_sigaction __sa_handler.sa_sigaction
 
+#define SA_UNSUPPORTED 0x00000400
+#define SA_EXPOSE_TAGBITS 0x00000800
+
 struct sigevent {
 	union sigval sigev_value;
 	int sigev_signo;
 	int sigev_notify;
-	void (*sigev_notify_function)(union sigval);
-	pthread_attr_t *sigev_notify_attributes;
-	char __pad[56-3*sizeof(long)];
+	union {
+		char __pad[64 - 2*sizeof(int) - sizeof(union sigval)];
+		pid_t sigev_notify_thread_id;
+		struct {
+			void (*sigev_notify_function)(union sigval);
+			pthread_attr_t *sigev_notify_attributes;
+		} __sev_thread;
+	} __sev_fields;
 };
 
+#define sigev_notify_thread_id __sev_fields.sigev_notify_thread_id
+#define sigev_notify_function __sev_fields.__sev_thread.sigev_notify_function
+#define sigev_notify_attributes __sev_fields.__sev_thread.sigev_notify_attributes
+
 #define SIGEV_SIGNAL 0
 #define SIGEV_NONE 1
 #define SIGEV_THREAD 2
+#define SIGEV_THREAD_ID 4
 
 int __libc_current_sigrtmin(void);
 int __libc_current_sigrtmax(void);
@@ -249,6 +264,9 @@ void (*sigset(int, void (*)(int)))(int);
 #if defined(_BSD_SOURCE) || defined(_GNU_SOURCE)
 #define NSIG _NSIG
 typedef void (*sig_t)(int);
+
+#define SYS_SECCOMP 1
+#define SYS_USER_DISPATCH 2
 #endif
 
 #ifdef _GNU_SOURCE
@@ -271,6 +289,14 @@ typedef int sig_atomic_t;
 void (*signal(int, void (*)(int)))(int);
 int raise(int);
 
+#if _REDIR_TIME64
+#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
+ || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) \
+ || defined(_BSD_SOURCE)
+__REDIR(sigtimedwait, __sigtimedwait_time64);
+#endif
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/spawn.h b/include/spawn.h
index c9bd1939..8eb73e00 100644
--- a/include/spawn.h
+++ b/include/spawn.h
@@ -71,6 +71,11 @@ int posix_spawn_file_actions_addopen(posix_spawn_file_actions_t *__restrict, int
 int posix_spawn_file_actions_addclose(posix_spawn_file_actions_t *, int);
 int posix_spawn_file_actions_adddup2(posix_spawn_file_actions_t *, int, int);
 
+#if defined(_BSD_SOURCE) || defined(_GNU_SOURCE)
+int posix_spawn_file_actions_addchdir_np(posix_spawn_file_actions_t *__restrict, const char *__restrict);
+int posix_spawn_file_actions_addfchdir_np(posix_spawn_file_actions_t *, int);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/stdc-predef.h b/include/stdc-predef.h
index f8cd4b89..642bad2d 100644
--- a/include/stdc-predef.h
+++ b/include/stdc-predef.h
@@ -7,4 +7,12 @@
 #define __STDC_IEC_559__ 1
 #endif
 
+#if !defined(__STDC_UTF_16__)
+#define __STDC_UTF_16__ 1
+#endif
+
+#if !defined(__STDC_UTF_32__)
+#define __STDC_UTF_32__ 1
+#endif
+
 #endif
diff --git a/include/stddef.h b/include/stddef.h
index bd753853..f25b8639 100644
--- a/include/stddef.h
+++ b/include/stddef.h
@@ -1,7 +1,9 @@
 #ifndef _STDDEF_H
 #define _STDDEF_H
 
-#ifdef __cplusplus
+#if __cplusplus >= 201103L
+#define NULL nullptr
+#elif defined(__cplusplus)
 #define NULL 0L
 #else
 #define NULL ((void*)0)
diff --git a/include/stdio.h b/include/stdio.h
index afadd912..4ea4c170 100644
--- a/include/stdio.h
+++ b/include/stdio.h
@@ -11,6 +11,10 @@ extern "C" {
 #define __NEED___isoc_va_list
 #define __NEED_size_t
 
+#if __STDC_VERSION__ < 201112L
+#define __NEED_struct__IO_FILE
+#endif
+
 #if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
  || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) \
  || defined(_BSD_SOURCE)
@@ -21,7 +25,9 @@ extern "C" {
 
 #include <bits/alltypes.h>
 
-#ifdef __cplusplus
+#if __cplusplus >= 201103L
+#define NULL nullptr
+#elif defined(__cplusplus)
 #define NULL 0L
 #else
 #define NULL ((void*)0)
@@ -152,6 +158,13 @@ char *ctermid(char *);
 #define L_ctermid 20
 #endif
 
+#if defined(_GNU_SOURCE)
+#define RENAME_NOREPLACE (1 << 0)
+#define RENAME_EXCHANGE  (1 << 1)
+#define RENAME_WHITEOUT  (1 << 2)
+
+int renameat2(int, const char *, int, const char *, unsigned);
+#endif
 
 #if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) \
  || defined(_BSD_SOURCE)
@@ -199,7 +212,7 @@ typedef struct _IO_cookie_io_functions_t {
 FILE *fopencookie(void *, const char *, cookie_io_functions_t);
 #endif
 
-#if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE)
+#if defined(_LARGEFILE64_SOURCE)
 #define tmpfile64 tmpfile
 #define fopen64 fopen
 #define freopen64 freopen
diff --git a/include/stdlib.h b/include/stdlib.h
index 42ca8336..475190bf 100644
--- a/include/stdlib.h
+++ b/include/stdlib.h
@@ -7,7 +7,9 @@ extern "C" {
 
 #include <features.h>
 
-#ifdef __cplusplus
+#if __cplusplus >= 201103L
+#define NULL nullptr
+#elif defined(__cplusplus)
 #define NULL 0L
 #else
 #define NULL ((void*)0)
@@ -93,7 +95,7 @@ size_t __ctype_get_mb_cur_max(void);
 #define WTERMSIG(s) ((s) & 0x7f)
 #define WSTOPSIG(s) WEXITSTATUS(s)
 #define WIFEXITED(s) (!WTERMSIG(s))
-#define WIFSTOPPED(s) ((short)((((s)&0xffff)*0x10001)>>8) > 0x7f00)
+#define WIFSTOPPED(s) ((short)((((s)&0xffff)*0x10001U)>>8) > 0x7f00)
 #define WIFSIGNALED(s) (((s)&0xffff)-1U < 0xffu)
 
 int posix_memalign (void **, size_t, size_t);
@@ -145,6 +147,8 @@ int getloadavg(double *, int);
 int clearenv(void);
 #define WCOREDUMP(s) ((s) & 0x80)
 #define WIFCONTINUED(s) ((s) == 0xffff)
+void *reallocarray (void *, size_t, size_t);
+void qsort_r (void *, size_t, size_t, int (*)(const void *, const void *, void *), void *);
 #endif
 
 #ifdef _GNU_SOURCE
@@ -152,13 +156,14 @@ int ptsname_r(int, char *, size_t);
 char *ecvt(double, int, int *, int *);
 char *fcvt(double, int, int *, int *);
 char *gcvt(double, int, char *);
+char *secure_getenv(const char *);
 struct __locale_struct;
 float strtof_l(const char *__restrict, char **__restrict, struct __locale_struct *);
 double strtod_l(const char *__restrict, char **__restrict, struct __locale_struct *);
 long double strtold_l(const char *__restrict, char **__restrict, struct __locale_struct *);
 #endif
 
-#if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE)
+#if defined(_LARGEFILE64_SOURCE)
 #define mkstemp64 mkstemp
 #define mkostemp64 mkostemp
 #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
diff --git a/include/string.h b/include/string.h
index 795a2abc..83e2b946 100644
--- a/include/string.h
+++ b/include/string.h
@@ -7,7 +7,9 @@ extern "C" {
 
 #include <features.h>
 
-#ifdef __cplusplus
+#if __cplusplus >= 201103L
+#define NULL nullptr
+#elif defined(__cplusplus)
 #define NULL 0L
 #else
 #define NULL ((void*)0)
@@ -71,6 +73,7 @@ char *strsignal(int);
 char *strerror_l (int, locale_t);
 int strcoll_l (const char *, const char *, locale_t);
 size_t strxfrm_l (char *__restrict, const char *__restrict, size_t, locale_t);
+void *memmem(const void *, size_t, const void *, size_t);
 #endif
 
 #if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) \
@@ -90,12 +93,8 @@ void explicit_bzero (void *, size_t);
 int strverscmp (const char *, const char *);
 char *strchrnul(const char *, int);
 char *strcasestr(const char *, const char *);
-void *memmem(const void *, size_t, const void *, size_t);
 void *memrchr(const void *, int, size_t);
 void *mempcpy(void *, const void *, size_t);
-#ifndef __cplusplus
-char *basename();
-#endif
 #endif
 
 #ifdef __cplusplus
diff --git a/include/strings.h b/include/strings.h
index db0960b4..b7a5ea08 100644
--- a/include/strings.h
+++ b/include/strings.h
@@ -5,6 +5,7 @@
 extern "C" {
 #endif
 
+#include <features.h>
 
 #define __NEED_size_t
 #define __NEED_locale_t
diff --git a/include/sys/acct.h b/include/sys/acct.h
index 9b0ba36f..fae9d050 100644
--- a/include/sys/acct.h
+++ b/include/sys/acct.h
@@ -6,7 +6,6 @@ extern "C" {
 #endif
 
 #include <features.h>
-#include <endian.h>
 #include <time.h>
 #include <stdint.h>
 
diff --git a/include/sys/epoll.h b/include/sys/epoll.h
index ac81a841..5f975c4a 100644
--- a/include/sys/epoll.h
+++ b/include/sys/epoll.h
@@ -7,6 +7,7 @@ extern "C" {
 
 #include <stdint.h>
 #include <sys/types.h>
+#include <sys/ioctl.h>
 #include <fcntl.h>
 
 #define __NEED_sigset_t
@@ -54,6 +55,17 @@ __attribute__ ((__packed__))
 #endif
 ;
 
+struct epoll_params {
+	uint32_t busy_poll_usecs;
+	uint16_t busy_poll_budget;
+	uint8_t prefer_busy_poll;
+
+	uint8_t __pad;
+};
+
+#define EPOLL_IOC_TYPE 0x8A
+#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params)
+#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
 
 int epoll_create(int);
 int epoll_create1(int);
diff --git a/include/sys/fanotify.h b/include/sys/fanotify.h
index daab76b1..10e5f15e 100644
--- a/include/sys/fanotify.h
+++ b/include/sys/fanotify.h
@@ -5,6 +5,8 @@
 extern "C" {
 #endif
 
+#include <sys/statfs.h>
+
 struct fanotify_event_metadata {
 	unsigned event_len;
 	unsigned char vers;
@@ -19,6 +21,18 @@ struct fanotify_event_metadata {
 	int pid;
 };
 
+struct fanotify_event_info_header {
+	unsigned char info_type;
+	unsigned char pad;
+	unsigned short len;
+};
+
+struct fanotify_event_info_fid {
+	struct fanotify_event_info_header hdr;
+	fsid_t fsid;
+	unsigned char handle[];
+};
+
 struct fanotify_response {
 	int fd;
 	unsigned response;
@@ -26,15 +40,26 @@ struct fanotify_response {
 
 #define FAN_ACCESS 0x01
 #define FAN_MODIFY 0x02
+#define FAN_ATTRIB 0x04
 #define FAN_CLOSE_WRITE 0x08
 #define FAN_CLOSE_NOWRITE 0x10
 #define FAN_OPEN 0x20
+#define FAN_MOVED_FROM 0x40
+#define FAN_MOVED_TO 0x80
+#define FAN_CREATE 0x100
+#define FAN_DELETE 0x200
+#define FAN_DELETE_SELF 0x400
+#define FAN_MOVE_SELF 0x800
+#define FAN_OPEN_EXEC 0x1000
 #define FAN_Q_OVERFLOW 0x4000
 #define FAN_OPEN_PERM 0x10000
 #define FAN_ACCESS_PERM 0x20000
-#define FAN_ONDIR 0x40000000
+#define FAN_OPEN_EXEC_PERM 0x40000
+#define FAN_DIR_MODIFY 0x00080000
 #define FAN_EVENT_ON_CHILD 0x08000000
+#define FAN_ONDIR 0x40000000
 #define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE)
+#define FAN_MOVE (FAN_MOVED_FROM | FAN_MOVED_TO)
 #define FAN_CLOEXEC 0x01
 #define FAN_NONBLOCK 0x02
 #define FAN_CLASS_NOTIF 0
@@ -43,22 +68,35 @@ struct fanotify_response {
 #define FAN_ALL_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | FAN_CLASS_PRE_CONTENT)
 #define FAN_UNLIMITED_QUEUE 0x10
 #define FAN_UNLIMITED_MARKS 0x20
+#define FAN_ENABLE_AUDIT 0x40
+#define FAN_REPORT_TID 0x100
+#define FAN_REPORT_FID 0x200
+#define FAN_REPORT_DIR_FID 0x00000400
+#define FAN_REPORT_NAME 0x00000800
+#define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME)
 #define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE | FAN_UNLIMITED_MARKS)
 #define FAN_MARK_ADD 0x01
 #define FAN_MARK_REMOVE 0x02
 #define FAN_MARK_DONT_FOLLOW 0x04
 #define FAN_MARK_ONLYDIR 0x08
-#define FAN_MARK_MOUNT 0x10
 #define FAN_MARK_IGNORED_MASK 0x20
 #define FAN_MARK_IGNORED_SURV_MODIFY 0x40
 #define FAN_MARK_FLUSH 0x80
+#define FAN_MARK_INODE 0x00
+#define FAN_MARK_MOUNT 0x10
+#define FAN_MARK_FILESYSTEM 0x100
+#define FAN_MARK_TYPE_MASK (FAN_MARK_INODE | FAN_MARK_MOUNT | FAN_MARK_FILESYSTEM)
 #define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_DONT_FOLLOW | FAN_MARK_ONLYDIR | FAN_MARK_MOUNT | FAN_MARK_IGNORED_MASK | FAN_MARK_IGNORED_SURV_MODIFY | FAN_MARK_FLUSH)
 #define FAN_ALL_EVENTS (FAN_ACCESS | FAN_MODIFY | FAN_CLOSE | FAN_OPEN)
 #define FAN_ALL_PERM_EVENTS (FAN_OPEN_PERM | FAN_ACCESS_PERM)
 #define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_Q_OVERFLOW)
 #define FANOTIFY_METADATA_VERSION 3
+#define FAN_EVENT_INFO_TYPE_FID 1
+#define FAN_EVENT_INFO_TYPE_DFID_NAME 2
+#define FAN_EVENT_INFO_TYPE_DFID 3
 #define FAN_ALLOW 0x01
 #define FAN_DENY 0x02
+#define FAN_AUDIT 0x10
 #define FAN_NOFD -1
 #define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata))
 #define FAN_EVENT_NEXT(meta, len) ((len) -= (meta)->event_len, (struct fanotify_event_metadata*)(((char *)(meta)) + (meta)->event_len))
diff --git a/include/sys/inotify.h b/include/sys/inotify.h
index 46638cac..69b58631 100644
--- a/include/sys/inotify.h
+++ b/include/sys/inotify.h
@@ -40,6 +40,7 @@ struct inotify_event {
 #define IN_ONLYDIR       0x01000000
 #define IN_DONT_FOLLOW   0x02000000
 #define IN_EXCL_UNLINK   0x04000000
+#define IN_MASK_CREATE   0x10000000
 #define IN_MASK_ADD      0x20000000
 
 #define IN_ISDIR         0x40000000
diff --git a/include/sys/ioctl.h b/include/sys/ioctl.h
index d0415b3d..a9a2346e 100644
--- a/include/sys/ioctl.h
+++ b/include/sys/ioctl.h
@@ -4,8 +4,114 @@
 extern "C" {
 #endif
 
+#define __NEED_struct_winsize
+
+#include <bits/alltypes.h>
 #include <bits/ioctl.h>
 
+#define N_TTY           0
+#define N_SLIP          1
+#define N_MOUSE         2
+#define N_PPP           3
+#define N_STRIP         4
+#define N_AX25          5
+#define N_X25           6
+#define N_6PACK         7
+#define N_MASC          8
+#define N_R3964         9
+#define N_PROFIBUS_FDL  10
+#define N_IRDA          11
+#define N_SMSBLOCK      12
+#define N_HDLC          13
+#define N_SYNC_PPP      14
+#define N_HCI           15
+#define N_GIGASET_M101  16
+#define N_SLCAN         17
+#define N_PPS           18
+#define N_V253          19
+#define N_CAIF          20
+#define N_GSM0710       21
+#define N_TI_WL         22
+#define N_TRACESINK     23
+#define N_TRACEROUTER   24
+#define N_NCI           25
+#define N_SPEAKUP       26
+#define N_NULL          27
+
+#define TIOCPKT_DATA       0
+#define TIOCPKT_FLUSHREAD  1
+#define TIOCPKT_FLUSHWRITE 2
+#define TIOCPKT_STOP       4
+#define TIOCPKT_START      8
+#define TIOCPKT_NOSTOP    16
+#define TIOCPKT_DOSTOP    32
+#define TIOCPKT_IOCTL     64
+
+#define TIOCSER_TEMT 1
+
+#define SIOCADDRT          0x890B
+#define SIOCDELRT          0x890C
+#define SIOCRTMSG          0x890D
+
+#define SIOCGIFNAME        0x8910
+#define SIOCSIFLINK        0x8911
+#define SIOCGIFCONF        0x8912
+#define SIOCGIFFLAGS       0x8913
+#define SIOCSIFFLAGS       0x8914
+#define SIOCGIFADDR        0x8915
+#define SIOCSIFADDR        0x8916
+#define SIOCGIFDSTADDR     0x8917
+#define SIOCSIFDSTADDR     0x8918
+#define SIOCGIFBRDADDR     0x8919
+#define SIOCSIFBRDADDR     0x891a
+#define SIOCGIFNETMASK     0x891b
+#define SIOCSIFNETMASK     0x891c
+#define SIOCGIFMETRIC      0x891d
+#define SIOCSIFMETRIC      0x891e
+#define SIOCGIFMEM         0x891f
+#define SIOCSIFMEM         0x8920
+#define SIOCGIFMTU         0x8921
+#define SIOCSIFMTU         0x8922
+#define SIOCSIFNAME        0x8923
+#define SIOCSIFHWADDR      0x8924
+#define SIOCGIFENCAP       0x8925
+#define SIOCSIFENCAP       0x8926
+#define SIOCGIFHWADDR      0x8927
+#define SIOCGIFSLAVE       0x8929
+#define SIOCSIFSLAVE       0x8930
+#define SIOCADDMULTI       0x8931
+#define SIOCDELMULTI       0x8932
+#define SIOCGIFINDEX       0x8933
+#define SIOGIFINDEX        SIOCGIFINDEX
+#define SIOCSIFPFLAGS      0x8934
+#define SIOCGIFPFLAGS      0x8935
+#define SIOCDIFADDR        0x8936
+#define SIOCSIFHWBROADCAST 0x8937
+#define SIOCGIFCOUNT       0x8938
+
+#define SIOCGIFBR          0x8940
+#define SIOCSIFBR          0x8941
+
+#define SIOCGIFTXQLEN      0x8942
+#define SIOCSIFTXQLEN      0x8943
+
+#define SIOCDARP           0x8953
+#define SIOCGARP           0x8954
+#define SIOCSARP           0x8955
+
+#define SIOCDRARP          0x8960
+#define SIOCGRARP          0x8961
+#define SIOCSRARP          0x8962
+
+#define SIOCGIFMAP         0x8970
+#define SIOCSIFMAP         0x8971
+
+#define SIOCADDDLCI        0x8980
+#define SIOCDELDLCI        0x8981
+
+#define SIOCDEVPRIVATE     0x89F0
+#define SIOCPROTOPRIVATE   0x89E0
+
 int ioctl (int, int, ...);
 
 #ifdef __cplusplus
diff --git a/include/sys/ipc.h b/include/sys/ipc.h
index c5a39819..9e366b7b 100644
--- a/include/sys/ipc.h
+++ b/include/sys/ipc.h
@@ -22,6 +22,7 @@ extern "C" {
 #endif
 
 #include <bits/ipc.h>
+#include <bits/ipcstat.h>
 
 #define IPC_CREAT  01000
 #define IPC_EXCL   02000
@@ -29,7 +30,6 @@ extern "C" {
 
 #define IPC_RMID 0
 #define IPC_SET  1
-#define IPC_STAT 2
 #define IPC_INFO 3
 
 #define IPC_PRIVATE ((key_t) 0)
diff --git a/include/sys/membarrier.h b/include/sys/membarrier.h
new file mode 100644
index 00000000..11193eda
--- /dev/null
+++ b/include/sys/membarrier.h
@@ -0,0 +1,21 @@
+#ifndef _SYS_MEMBARRIER_H
+#define _SYS_MEMBARRIER_H
+
+#define MEMBARRIER_CMD_QUERY 0
+#define MEMBARRIER_CMD_GLOBAL 1
+#define MEMBARRIER_CMD_GLOBAL_EXPEDITED 2
+#define MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED 4
+#define MEMBARRIER_CMD_PRIVATE_EXPEDITED 8
+#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED 16
+#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE 32
+#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE 64
+#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ 128
+#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ 256
+
+#define MEMBARRIER_CMD_SHARED MEMBARRIER_CMD_GLOBAL
+
+#define MEMBARRIER_CMD_FLAG_CPU 1
+
+int membarrier(int, int);
+
+#endif
diff --git a/include/sys/mman.h b/include/sys/mman.h
index 99d02a2e..3d5d0f9c 100644
--- a/include/sys/mman.h
+++ b/include/sys/mman.h
@@ -40,13 +40,16 @@ extern "C" {
 
 #define MAP_HUGE_SHIFT 26
 #define MAP_HUGE_MASK  0x3f
+#define MAP_HUGE_16KB  (14 << 26)
 #define MAP_HUGE_64KB  (16 << 26)
 #define MAP_HUGE_512KB (19 << 26)
 #define MAP_HUGE_1MB   (20 << 26)
 #define MAP_HUGE_2MB   (21 << 26)
 #define MAP_HUGE_8MB   (23 << 26)
 #define MAP_HUGE_16MB  (24 << 26)
+#define MAP_HUGE_32MB  (25 << 26)
 #define MAP_HUGE_256MB (28 << 26)
+#define MAP_HUGE_512MB (29 << 26)
 #define MAP_HUGE_1GB   (30 << 26)
 #define MAP_HUGE_2GB   (31 << 26)
 #define MAP_HUGE_16GB  (34U << 26)
@@ -90,6 +93,8 @@ extern "C" {
 #define MADV_DODUMP      17
 #define MADV_WIPEONFORK  18
 #define MADV_KEEPONFORK  19
+#define MADV_COLD        20
+#define MADV_PAGEOUT     21
 #define MADV_HWPOISON    100
 #define MADV_SOFT_OFFLINE 101
 #endif
@@ -97,6 +102,7 @@ extern "C" {
 #ifdef _GNU_SOURCE
 #define MREMAP_MAYMOVE 1
 #define MREMAP_FIXED 2
+#define MREMAP_DONTUNMAP 4
 
 #define MLOCK_ONFAULT 0x01
 
@@ -135,7 +141,7 @@ int mincore (void *, size_t, unsigned char *);
 int shm_open (const char *, int, mode_t);
 int shm_unlink (const char *);
 
-#if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE)
+#if defined(_LARGEFILE64_SOURCE)
 #define mmap64 mmap
 #define off64_t off_t
 #endif
diff --git a/include/sys/mount.h b/include/sys/mount.h
index 57a89c09..09bd6e9d 100644
--- a/include/sys/mount.h
+++ b/include/sys/mount.h
@@ -31,6 +31,7 @@ extern "C" {
 #define MS_REMOUNT     32
 #define MS_MANDLOCK    64
 #define MS_DIRSYNC     128
+#define MS_NOSYMFOLLOW 256
 #define MS_NOATIME     1024
 #define MS_NODIRATIME  2048
 #define MS_BIND        4096
diff --git a/include/sys/msg.h b/include/sys/msg.h
index be6afc34..db5c62a4 100644
--- a/include/sys/msg.h
+++ b/include/sys/msg.h
@@ -25,9 +25,9 @@ typedef unsigned long msglen_t;
 #define MSG_NOERROR 010000
 #define MSG_EXCEPT  020000
 
-#define MSG_STAT 11
+#define MSG_STAT (11 | (IPC_STAT & 0x100))
 #define MSG_INFO 12
-#define MSG_STAT_ANY 13
+#define MSG_STAT_ANY (13 | (IPC_STAT & 0x100))
 
 struct msginfo {
 	int msgpool, msgmap, msgmax, msgmnb, msgmni, msgssz, msgtql;
diff --git a/include/sys/personality.h b/include/sys/personality.h
index 31d43dfe..411dc475 100644
--- a/include/sys/personality.h
+++ b/include/sys/personality.h
@@ -5,7 +5,9 @@
 extern "C" {
 #endif
 
+#define UNAME26            0x0020000
 #define ADDR_NO_RANDOMIZE  0x0040000
+#define FDPIC_FUNCPTRS     0x0080000
 #define MMAP_PAGE_ZERO     0x0100000
 #define ADDR_COMPAT_LAYOUT 0x0200000
 #define READ_IMPLIES_EXEC  0x0400000
@@ -17,6 +19,7 @@ extern "C" {
 
 #define PER_LINUX 0
 #define PER_LINUX_32BIT ADDR_LIMIT_32BIT
+#define PER_LINUX_FDPIC FDPIC_FUNCPTRS
 #define PER_SVR4 (1 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO)
 #define PER_SVR3 (2 | STICKY_TIMEOUTS | SHORT_INODE)
 #define PER_SCOSVR3 (3 | STICKY_TIMEOUTS | WHOLE_SECONDS | SHORT_INODE)
diff --git a/include/sys/prctl.h b/include/sys/prctl.h
index af76408c..087a75c9 100644
--- a/include/sys/prctl.h
+++ b/include/sys/prctl.h
@@ -139,11 +139,43 @@ struct prctl_mm_map {
 #define PR_GET_SPECULATION_CTRL 52
 #define PR_SET_SPECULATION_CTRL 53
 #define PR_SPEC_STORE_BYPASS 0
+#define PR_SPEC_INDIRECT_BRANCH 1
 #define PR_SPEC_NOT_AFFECTED 0
 #define PR_SPEC_PRCTL (1UL << 0)
 #define PR_SPEC_ENABLE (1UL << 1)
 #define PR_SPEC_DISABLE (1UL << 2)
 #define PR_SPEC_FORCE_DISABLE (1UL << 3)
+#define PR_SPEC_DISABLE_NOEXEC (1UL << 4)
+
+#define PR_PAC_RESET_KEYS       54
+#define PR_PAC_APIAKEY (1UL << 0)
+#define PR_PAC_APIBKEY (1UL << 1)
+#define PR_PAC_APDAKEY (1UL << 2)
+#define PR_PAC_APDBKEY (1UL << 3)
+#define PR_PAC_APGAKEY (1UL << 4)
+
+#define PR_SET_TAGGED_ADDR_CTRL 55
+#define PR_GET_TAGGED_ADDR_CTRL 56
+#define PR_TAGGED_ADDR_ENABLE (1UL << 0)
+#define PR_MTE_TCF_SHIFT 1
+#define PR_MTE_TCF_NONE  (0UL << 1)
+#define PR_MTE_TCF_SYNC  (1UL << 1)
+#define PR_MTE_TCF_ASYNC (2UL << 1)
+#define PR_MTE_TCF_MASK  (3UL << 1)
+#define PR_MTE_TAG_SHIFT 3
+#define PR_MTE_TAG_MASK  (0xffffUL << 3)
+
+#define PR_SET_IO_FLUSHER 57
+#define PR_GET_IO_FLUSHER 58
+
+#define PR_SET_SYSCALL_USER_DISPATCH 59
+#define PR_SYS_DISPATCH_OFF 0
+#define PR_SYS_DISPATCH_ON 1
+#define SYSCALL_DISPATCH_FILTER_ALLOW 0
+#define SYSCALL_DISPATCH_FILTER_BLOCK 1
+
+#define PR_PAC_SET_ENABLED_KEYS 60
+#define PR_PAC_GET_ENABLED_KEYS 61
 
 int prctl (int, ...);
 
diff --git a/include/sys/procfs.h b/include/sys/procfs.h
index e23bf1ad..38e58c16 100644
--- a/include/sys/procfs.h
+++ b/include/sys/procfs.h
@@ -23,10 +23,9 @@ struct elf_prstatus {
 	pid_t pr_ppid;
 	pid_t pr_pgrp;
 	pid_t pr_sid;
-	struct timeval pr_utime;
-	struct timeval pr_stime;
-	struct timeval pr_cutime;
-	struct timeval pr_cstime;
+	struct {
+		long tv_sec, tv_usec;
+	} pr_utime, pr_stime, pr_cutime, pr_cstime;
 	elf_gregset_t pr_reg;
 	int pr_fpvalid;
 };
diff --git a/include/sys/ptrace.h b/include/sys/ptrace.h
index 229e1f3d..c72e3c06 100644
--- a/include/sys/ptrace.h
+++ b/include/sys/ptrace.h
@@ -41,6 +41,8 @@ extern "C" {
 #define PTRACE_SETSIGMASK 0x420b
 #define PTRACE_SECCOMP_GET_FILTER 0x420c
 #define PTRACE_SECCOMP_GET_METADATA 0x420d
+#define PTRACE_GET_SYSCALL_INFO 0x420e
+#define PTRACE_GET_RSEQ_CONFIGURATION	0x420f
 
 #define PT_READ_I PTRACE_PEEKTEXT
 #define PT_READ_D PTRACE_PEEKDATA
@@ -88,6 +90,11 @@ extern "C" {
 
 #define PTRACE_PEEKSIGINFO_SHARED 1
 
+#define PTRACE_SYSCALL_INFO_NONE 0
+#define PTRACE_SYSCALL_INFO_ENTRY 1
+#define PTRACE_SYSCALL_INFO_EXIT 2
+#define PTRACE_SYSCALL_INFO_SECCOMP 3
+
 #include <bits/ptrace.h>
 
 struct __ptrace_peeksiginfo_args {
@@ -101,6 +108,37 @@ struct __ptrace_seccomp_metadata {
 	uint64_t flags;
 };
 
+struct __ptrace_syscall_info {
+	uint8_t op;
+	uint8_t __pad[3];
+	uint32_t arch;
+	uint64_t instruction_pointer;
+	uint64_t stack_pointer;
+	union {
+		struct {
+			uint64_t nr;
+			uint64_t args[6];
+		} entry;
+		struct {
+			int64_t rval;
+			uint8_t is_error;
+		} exit;
+		struct {
+			uint64_t nr;
+			uint64_t args[6];
+			uint32_t ret_data;
+		} seccomp;
+	};
+};
+
+struct __ptrace_rseq_configuration {
+	uint64_t rseq_abi_pointer;
+	uint32_t rseq_abi_size;
+	uint32_t signature;
+	uint32_t flags;
+	uint32_t pad;
+};
+
 long ptrace(int, ...);
 
 #ifdef __cplusplus
diff --git a/include/sys/random.h b/include/sys/random.h
index 4ee7bf2c..59e40ab8 100644
--- a/include/sys/random.h
+++ b/include/sys/random.h
@@ -10,6 +10,7 @@ extern "C" {
 
 #define GRND_NONBLOCK	0x0001
 #define GRND_RANDOM	0x0002
+#define GRND_INSECURE	0x0004
 
 ssize_t getrandom(void *, size_t, unsigned);
 
diff --git a/include/sys/reg.h b/include/sys/reg.h
index b47452d0..0272e137 100644
--- a/include/sys/reg.h
+++ b/include/sys/reg.h
@@ -4,6 +4,15 @@
 #include <limits.h>
 #include <unistd.h>
 
+#include <bits/alltypes.h>
+
+#undef __WORDSIZE
+#if __LONG_MAX == 0x7fffffffL
+#define __WORDSIZE 32
+#else
+#define __WORDSIZE 64
+#endif
+
 #include <bits/reg.h>
 
 #endif
diff --git a/include/sys/resource.h b/include/sys/resource.h
index 70d793d5..e8bfbe1f 100644
--- a/include/sys/resource.h
+++ b/include/sys/resource.h
@@ -90,11 +90,12 @@ int prlimit(pid_t, int, const struct rlimit *, struct rlimit *);
 #define RLIMIT_MSGQUEUE 12
 #define RLIMIT_NICE    13
 #define RLIMIT_RTPRIO  14
-#define RLIMIT_NLIMITS 15
+#define RLIMIT_RTTIME  15
+#define RLIMIT_NLIMITS 16
 
 #define RLIM_NLIMITS RLIMIT_NLIMITS
 
-#if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE)
+#if defined(_LARGEFILE64_SOURCE)
 #define RLIM64_INFINITY RLIM_INFINITY
 #define RLIM64_SAVED_CUR RLIM_SAVED_CUR
 #define RLIM64_SAVED_MAX RLIM_SAVED_MAX
@@ -104,6 +105,10 @@ int prlimit(pid_t, int, const struct rlimit *, struct rlimit *);
 #define rlim64_t rlim_t
 #endif
 
+#if _REDIR_TIME64
+__REDIR(getrusage, __getrusage_time64);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/sys/select.h b/include/sys/select.h
index d34cbf10..b3bab1d5 100644
--- a/include/sys/select.h
+++ b/include/sys/select.h
@@ -35,6 +35,11 @@ int pselect (int, fd_set *__restrict, fd_set *__restrict, fd_set *__restrict, co
 #define NFDBITS (8*(int)sizeof(long))
 #endif
 
+#if _REDIR_TIME64
+__REDIR(select, __select_time64);
+__REDIR(pselect, __pselect_time64);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/sys/sem.h b/include/sys/sem.h
index 61cdb83d..a747784e 100644
--- a/include/sys/sem.h
+++ b/include/sys/sem.h
@@ -25,15 +25,13 @@ extern "C" {
 #define SETVAL		16
 #define SETALL		17
 
-#include <endian.h>
-
 #include <bits/sem.h>
 
 #define _SEM_SEMUN_UNDEFINED 1
 
-#define SEM_STAT 18
+#define SEM_STAT (18 | (IPC_STAT & 0x100))
 #define SEM_INFO 19
-#define SEM_STAT_ANY 20
+#define SEM_STAT_ANY (20 | (IPC_STAT & 0x100))
 
 struct  seminfo {
 	int semmap;
@@ -62,6 +60,12 @@ int semop(int, struct sembuf *, size_t);
 int semtimedop(int, struct sembuf *, size_t, const struct timespec *);
 #endif
 
+#if _REDIR_TIME64
+#ifdef _GNU_SOURCE
+__REDIR(semtimedop, __semtimedop_time64);
+#endif
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/sys/sendfile.h b/include/sys/sendfile.h
index e7570d8e..253a041b 100644
--- a/include/sys/sendfile.h
+++ b/include/sys/sendfile.h
@@ -10,7 +10,7 @@ extern "C" {
 
 ssize_t sendfile(int, int, off_t *, size_t);
 
-#if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE)
+#if defined(_LARGEFILE64_SOURCE)
 #define sendfile64 sendfile
 #define off64_t off_t
 #endif
diff --git a/include/sys/shm.h b/include/sys/shm.h
index 662fde59..fd708cab 100644
--- a/include/sys/shm.h
+++ b/include/sys/shm.h
@@ -33,9 +33,9 @@ extern "C" {
 
 #define SHM_LOCK 11
 #define SHM_UNLOCK 12
-#define SHM_STAT 13
+#define SHM_STAT (13 | (IPC_STAT & 0x100))
 #define SHM_INFO 14
-#define SHM_STAT_ANY 15
+#define SHM_STAT_ANY (15 | (IPC_STAT & 0x100))
 #define SHM_DEST 01000
 #define SHM_LOCKED 02000
 #define SHM_HUGETLB 04000
@@ -49,7 +49,9 @@ extern "C" {
 #define SHM_HUGE_2MB   (21 << 26)
 #define SHM_HUGE_8MB   (23 << 26)
 #define SHM_HUGE_16MB  (24 << 26)
+#define SHM_HUGE_32MB  (25 << 26)
 #define SHM_HUGE_256MB (28 << 26)
+#define SHM_HUGE_512MB (29 << 26)
 #define SHM_HUGE_1GB   (30 << 26)
 #define SHM_HUGE_2GB   (31 << 26)
 #define SHM_HUGE_16GB  (34U << 26)
diff --git a/include/sys/socket.h b/include/sys/socket.h
index 1dec4b00..6dc1e40a 100644
--- a/include/sys/socket.h
+++ b/include/sys/socket.h
@@ -19,6 +19,40 @@ extern "C" {
 
 #include <bits/socket.h>
 
+struct msghdr {
+	void *msg_name;
+	socklen_t msg_namelen;
+	struct iovec *msg_iov;
+#if __LONG_MAX > 0x7fffffff && __BYTE_ORDER == __BIG_ENDIAN
+	int __pad1;
+#endif
+	int msg_iovlen;
+#if __LONG_MAX > 0x7fffffff && __BYTE_ORDER == __LITTLE_ENDIAN
+	int __pad1;
+#endif
+	void *msg_control;
+#if __LONG_MAX > 0x7fffffff && __BYTE_ORDER == __BIG_ENDIAN
+	int __pad2;
+#endif
+	socklen_t msg_controllen;
+#if __LONG_MAX > 0x7fffffff && __BYTE_ORDER == __LITTLE_ENDIAN
+	int __pad2;
+#endif
+	int msg_flags;
+};
+
+struct cmsghdr {
+#if __LONG_MAX > 0x7fffffff && __BYTE_ORDER == __BIG_ENDIAN
+	int __pad1;
+#endif
+	socklen_t cmsg_len;
+#if __LONG_MAX > 0x7fffffff && __BYTE_ORDER == __LITTLE_ENDIAN
+	int __pad1;
+#endif
+	int cmsg_level;
+	int cmsg_type;
+};
+
 #ifdef _GNU_SOURCE
 struct ucred {
 	pid_t pid;
@@ -182,8 +216,6 @@ struct linger {
 #define SO_PEERCRED     17
 #define SO_RCVLOWAT     18
 #define SO_SNDLOWAT     19
-#define SO_RCVTIMEO     20
-#define SO_SNDTIMEO     21
 #define SO_ACCEPTCONN   30
 #define SO_PEERSEC      31
 #define SO_SNDBUFFORCE  32
@@ -192,6 +224,28 @@ struct linger {
 #define SO_DOMAIN       39
 #endif
 
+#ifndef SO_RCVTIMEO
+#if __LONG_MAX == 0x7fffffff
+#define SO_RCVTIMEO     66
+#define SO_SNDTIMEO     67
+#else
+#define SO_RCVTIMEO     20
+#define SO_SNDTIMEO     21
+#endif
+#endif
+
+#ifndef SO_TIMESTAMP
+#if __LONG_MAX == 0x7fffffff
+#define SO_TIMESTAMP    63
+#define SO_TIMESTAMPNS  64
+#define SO_TIMESTAMPING 65
+#else
+#define SO_TIMESTAMP    29
+#define SO_TIMESTAMPNS  35
+#define SO_TIMESTAMPING 37
+#endif
+#endif
+
 #define SO_SECURITY_AUTHENTICATION              22
 #define SO_SECURITY_ENCRYPTION_TRANSPORT        23
 #define SO_SECURITY_ENCRYPTION_NETWORK          24
@@ -203,14 +257,10 @@ struct linger {
 #define SO_GET_FILTER           SO_ATTACH_FILTER
 
 #define SO_PEERNAME             28
-#define SO_TIMESTAMP            29
 #define SCM_TIMESTAMP           SO_TIMESTAMP
-
 #define SO_PASSSEC              34
-#define SO_TIMESTAMPNS          35
 #define SCM_TIMESTAMPNS         SO_TIMESTAMPNS
 #define SO_MARK                 36
-#define SO_TIMESTAMPING         37
 #define SCM_TIMESTAMPING        SO_TIMESTAMPING
 #define SO_RXQ_OVFL             40
 #define SO_WIFI_STATUS          41
@@ -235,6 +285,12 @@ struct linger {
 #define SCM_TIMESTAMPING_PKTINFO 58
 #define SO_PEERGROUPS           59
 #define SO_ZEROCOPY             60
+#define SO_TXTIME               61
+#define SCM_TXTIME              SO_TXTIME
+#define SO_BINDTOIFINDEX        62
+#define SO_DETACH_REUSEPORT_BPF 68
+#define SO_PREFER_BUSY_POLL     69
+#define SO_BUSY_POLL_BUDGET     70
 
 #ifndef SOL_SOCKET
 #define SOL_SOCKET      1
@@ -347,6 +403,12 @@ int setsockopt (int, int, int, const void *, socklen_t);
 
 int sockatmark (int);
 
+#if _REDIR_TIME64
+#ifdef _GNU_SOURCE
+__REDIR(recvmmsg, __recvmmsg_time64);
+#endif
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/sys/stat.h b/include/sys/stat.h
index 9d096624..c924ce2f 100644
--- a/include/sys/stat.h
+++ b/include/sys/stat.h
@@ -18,6 +18,13 @@ extern "C" {
 #define __NEED_blkcnt_t
 #define __NEED_struct_timespec
 
+#ifdef _GNU_SOURCE
+#define __NEED_int64_t
+#define __NEED_uint64_t
+#define __NEED_uint32_t
+#define __NEED_uint16_t
+#endif
+
 #include <bits/alltypes.h>
 
 #include <bits/stat.h>
@@ -98,7 +105,72 @@ int lchmod(const char *, mode_t);
 #define S_IEXEC S_IXUSR
 #endif
 
-#if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE)
+#if defined(_GNU_SOURCE)
+#define STATX_TYPE 1U
+#define STATX_MODE 2U
+#define STATX_NLINK 4U
+#define STATX_UID 8U
+#define STATX_GID 0x10U
+#define STATX_ATIME 0x20U
+#define STATX_MTIME 0x40U
+#define STATX_CTIME 0x80U
+#define STATX_INO 0x100U
+#define STATX_SIZE 0x200U
+#define STATX_BLOCKS 0x400U
+#define STATX_BASIC_STATS 0x7ffU
+#define STATX_BTIME 0x800U
+#define STATX_ALL 0xfffU
+#define STATX_MNT_ID 0x1000U
+#define STATX_DIOALIGN 0x2000U
+#define STATX_MNT_ID_UNIQUE 0x4000U
+
+#define STATX_ATTR_COMPRESSED 0x4
+#define STATX_ATTR_IMMUTABLE 0x10
+#define STATX_ATTR_APPEND 0x20
+#define STATX_ATTR_NODUMP 0x40
+#define STATX_ATTR_ENCRYPTED 0x800
+#define STATX_ATTR_AUTOMOUNT 0x1000
+#define STATX_ATTR_MOUNT_ROOT 0x2000
+#define STATX_ATTR_VERITY 0x100000
+#define STATX_ATTR_DAX 0x200000
+
+struct statx_timestamp {
+	int64_t tv_sec;
+	uint32_t tv_nsec, __pad;
+};
+
+struct statx {
+	uint32_t stx_mask;
+	uint32_t stx_blksize;
+	uint64_t stx_attributes;
+	uint32_t stx_nlink;
+	uint32_t stx_uid;
+	uint32_t stx_gid;
+	uint16_t stx_mode;
+	uint16_t __pad0[1];
+	uint64_t stx_ino;
+	uint64_t stx_size;
+	uint64_t stx_blocks;
+	uint64_t stx_attributes_mask;
+	struct statx_timestamp stx_atime;
+	struct statx_timestamp stx_btime;
+	struct statx_timestamp stx_ctime;
+	struct statx_timestamp stx_mtime;
+	uint32_t stx_rdev_major;
+	uint32_t stx_rdev_minor;
+	uint32_t stx_dev_major;
+	uint32_t stx_dev_minor;
+	uint64_t stx_mnt_id;
+	uint32_t stx_dio_mem_align;
+	uint32_t stx_dio_offset_align;
+	uint64_t stx_subvol;
+	uint64_t __pad1[11];
+};
+
+int statx(int, const char *__restrict, int, unsigned, struct statx *__restrict);
+#endif
+
+#if defined(_LARGEFILE64_SOURCE)
 #define stat64 stat
 #define fstat64 fstat
 #define lstat64 lstat
@@ -110,6 +182,15 @@ int lchmod(const char *, mode_t);
 #define off64_t off_t
 #endif
 
+#if _REDIR_TIME64
+__REDIR(stat, __stat_time64);
+__REDIR(fstat, __fstat_time64);
+__REDIR(lstat, __lstat_time64);
+__REDIR(fstatat, __fstatat_time64);
+__REDIR(futimens, __futimens_time64);
+__REDIR(utimensat, __utimensat_time64);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/sys/statfs.h b/include/sys/statfs.h
index 6f4c6230..7a2e11cd 100644
--- a/include/sys/statfs.h
+++ b/include/sys/statfs.h
@@ -18,7 +18,7 @@ typedef struct __fsid_t {
 int statfs (const char *, struct statfs *);
 int fstatfs (int, struct statfs *);
 
-#if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE)
+#if defined(_LARGEFILE64_SOURCE)
 #define statfs64 statfs
 #define fstatfs64 fstatfs
 #define fsblkcnt64_t fsblkcnt_t
diff --git a/include/sys/statvfs.h b/include/sys/statvfs.h
index ef07d684..71d9d1f9 100644
--- a/include/sys/statvfs.h
+++ b/include/sys/statvfs.h
@@ -11,8 +11,6 @@ extern "C" {
 #define __NEED_fsfilcnt_t
 #include <bits/alltypes.h>
 
-#include <endian.h>
-
 struct statvfs {
 	unsigned long f_bsize, f_frsize;
 	fsblkcnt_t f_blocks, f_bfree, f_bavail;
@@ -25,7 +23,8 @@ struct statvfs {
 	unsigned long f_fsid;
 #endif
 	unsigned long f_flag, f_namemax;
-	int __reserved[6];
+	unsigned int f_type;
+	int __reserved[5];
 };
 
 int statvfs (const char *__restrict, struct statvfs *__restrict);
@@ -44,7 +43,7 @@ int fstatvfs (int, struct statvfs *);
 #define ST_NODIRATIME  2048
 #define ST_RELATIME    4096
 
-#if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE)
+#if defined(_LARGEFILE64_SOURCE)
 #define statvfs64 statvfs
 #define fstatvfs64 fstatvfs
 #define fsblkcnt64_t fsblkcnt_t
diff --git a/include/sys/time.h b/include/sys/time.h
index c5cab814..cdc67ef6 100644
--- a/include/sys/time.h
+++ b/include/sys/time.h
@@ -56,6 +56,20 @@ int adjtime (const struct timeval *, struct timeval *);
 	(void)0 )
 #endif
 
+#if _REDIR_TIME64
+__REDIR(gettimeofday, __gettimeofday_time64);
+__REDIR(getitimer, __getitimer_time64);
+__REDIR(setitimer, __setitimer_time64);
+__REDIR(utimes, __utimes_time64);
+#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+__REDIR(futimes, __futimes_time64);
+__REDIR(futimesat, __futimesat_time64);
+__REDIR(lutimes, __lutimes_time64);
+__REDIR(settimeofday, __settimeofday_time64);
+__REDIR(adjtime, __adjtime64);
+#endif
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/sys/timeb.h b/include/sys/timeb.h
index 108c1f5c..628239b7 100644
--- a/include/sys/timeb.h
+++ b/include/sys/timeb.h
@@ -4,6 +4,8 @@
 extern "C" {
 #endif
 
+#include <features.h>
+
 #define __NEED_time_t
 
 #include <bits/alltypes.h>
@@ -16,6 +18,10 @@ struct timeb {
 
 int ftime(struct timeb *);
 
+#if _REDIR_TIME64
+__REDIR(ftime, __ftime64);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/sys/timerfd.h b/include/sys/timerfd.h
index 2794d36a..1b832cdd 100644
--- a/include/sys/timerfd.h
+++ b/include/sys/timerfd.h
@@ -20,6 +20,11 @@ int timerfd_create(int, int);
 int timerfd_settime(int, int, const struct itimerspec *, struct itimerspec *);
 int timerfd_gettime(int, struct itimerspec *);
 
+#if _REDIR_TIME64
+__REDIR(timerfd_settime, __timerfd_settime64);
+__REDIR(timerfd_gettime, __timerfd_gettime64);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/sys/timex.h b/include/sys/timex.h
index 2e688880..8b417e1b 100644
--- a/include/sys/timex.h
+++ b/include/sys/timex.h
@@ -91,6 +91,11 @@ struct timex {
 int adjtimex(struct timex *);
 int clock_adjtime(clockid_t, struct timex *);
 
+#if _REDIR_TIME64
+__REDIR(adjtimex, __adjtimex_time64);
+__REDIR(clock_adjtime, __clock_adjtime64);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/sys/ttydefaults.h b/include/sys/ttydefaults.h
index d251b715..edb55bc4 100644
--- a/include/sys/ttydefaults.h
+++ b/include/sys/ttydefaults.h
@@ -6,16 +6,11 @@
 #define TTYDEF_LFLAG (ECHO | ICANON | ISIG | IEXTEN | ECHOE|ECHOKE|ECHOCTL)
 #define TTYDEF_CFLAG (CREAD | CS7 | PARENB | HUPCL)
 #define TTYDEF_SPEED (B9600)
-#define CTRL(x) (x&037)
+#define CTRL(x) ((x)&037)
 #define CEOF CTRL('d')
 
-#ifdef _POSIX_VDISABLE
-#define CEOL _POSIX_VDISABLE
-#define CSTATUS _POSIX_VDISABLE
-#else
 #define CEOL '\0'
 #define CSTATUS '\0'
-#endif
 
 #define CERASE 0177
 #define CINTR CTRL('c')
diff --git a/include/sys/types.h b/include/sys/types.h
index 75e489c5..3363374f 100644
--- a/include/sys/types.h
+++ b/include/sys/types.h
@@ -69,10 +69,9 @@ typedef long long quad_t;
 typedef unsigned long long u_quad_t;
 #include <endian.h>
 #include <sys/select.h>
-#include <sys/sysmacros.h>
 #endif
 
-#if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE)
+#if defined(_LARGEFILE64_SOURCE)
 #define blkcnt64_t blkcnt_t
 #define fsblkcnt64_t fsblkcnt_t
 #define fsfilcnt64_t fsfilcnt_t
diff --git a/include/sys/uio.h b/include/sys/uio.h
index 00f73a2f..5e99c7fa 100644
--- a/include/sys/uio.h
+++ b/include/sys/uio.h
@@ -29,7 +29,7 @@ ssize_t writev (int, const struct iovec *, int);
 #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 ssize_t preadv (int, const struct iovec *, int, off_t);
 ssize_t pwritev (int, const struct iovec *, int, off_t);
-#if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE)
+#if defined(_LARGEFILE64_SOURCE)
 #define preadv64 preadv
 #define pwritev64 pwritev
 #define off64_t off_t
@@ -39,6 +39,14 @@ ssize_t pwritev (int, const struct iovec *, int, off_t);
 #ifdef _GNU_SOURCE
 ssize_t process_vm_writev(pid_t, const struct iovec *, unsigned long, const struct iovec *, unsigned long, unsigned long);
 ssize_t process_vm_readv(pid_t, const struct iovec *, unsigned long, const struct iovec *, unsigned long, unsigned long);
+ssize_t preadv2 (int, const struct iovec *, int, off_t, int);
+ssize_t pwritev2 (int, const struct iovec *, int, off_t, int);
+#define RWF_HIPRI 0x00000001
+#define RWF_DSYNC 0x00000002
+#define RWF_SYNC 0x00000004
+#define RWF_NOWAIT 0x00000008
+#define RWF_APPEND 0x00000010
+#define RWF_NOAPPEND 0x00000020
 #endif
 
 #ifdef __cplusplus
diff --git a/include/sys/user.h b/include/sys/user.h
index 96a03400..511caba3 100644
--- a/include/sys/user.h
+++ b/include/sys/user.h
@@ -8,6 +8,15 @@ extern "C" {
 #include <stdint.h>
 #include <unistd.h>
 
+#include <bits/alltypes.h>
+
+#undef __WORDSIZE
+#if __LONG_MAX == 0x7fffffffL
+#define __WORDSIZE 32
+#else
+#define __WORDSIZE 64
+#endif
+
 #include <bits/user.h>
 
 #ifdef __cplusplus
diff --git a/include/sys/wait.h b/include/sys/wait.h
index 50c5c709..8ced671b 100644
--- a/include/sys/wait.h
+++ b/include/sys/wait.h
@@ -13,7 +13,8 @@ extern "C" {
 typedef enum {
 	P_ALL = 0,
 	P_PID = 1,
-	P_PGID = 2
+	P_PGID = 2,
+	P_PIDFD = 3
 } idtype_t;
 
 pid_t wait (int *);
@@ -49,10 +50,17 @@ pid_t wait4 (pid_t, int *, int, struct rusage *);
 #define WSTOPSIG(s) WEXITSTATUS(s)
 #define WCOREDUMP(s) ((s) & 0x80)
 #define WIFEXITED(s) (!WTERMSIG(s))
-#define WIFSTOPPED(s) ((short)((((s)&0xffff)*0x10001)>>8) > 0x7f00)
+#define WIFSTOPPED(s) ((short)((((s)&0xffff)*0x10001U)>>8) > 0x7f00)
 #define WIFSIGNALED(s) (((s)&0xffff)-1U < 0xffu)
 #define WIFCONTINUED(s) ((s) == 0xffff)
 
+#if _REDIR_TIME64
+#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+__REDIR(wait3, __wait3_time64);
+__REDIR(wait4, __wait4_time64);
+#endif
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/syslog.h b/include/syslog.h
index 5b4d2964..57599e07 100644
--- a/include/syslog.h
+++ b/include/syslog.h
@@ -18,7 +18,7 @@ extern "C" {
 
 #define LOG_PRIMASK 7
 #define LOG_PRI(p) ((p)&LOG_PRIMASK)
-#define	LOG_MAKEPRI(f, p) (((f)<<3)|(p))
+#define	LOG_MAKEPRI(f, p) ((f)|(p))
 
 #define LOG_MASK(p) (1<<(p))
 #define LOG_UPTO(p) ((1<<((p)+1))-1)
diff --git a/include/tar.h b/include/tar.h
index 2eba66ec..be589842 100644
--- a/include/tar.h
+++ b/include/tar.h
@@ -1,13 +1,9 @@
 #ifndef	_TAR_H
 #define	_TAR_H
 
-#include <features.h>
-
 #define TSUID   04000
 #define TSGID   02000
-#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) || defined(_XOPEN_SOURCE)
 #define TSVTX   01000
-#endif
 #define TUREAD  00400
 #define TUWRITE 00200
 #define TUEXEC  00100
diff --git a/include/termios.h b/include/termios.h
index d73c780d..cbb53301 100644
--- a/include/termios.h
+++ b/include/termios.h
@@ -8,6 +8,7 @@ extern "C" {
 #include <features.h>
 
 #define __NEED_pid_t
+#define __NEED_struct_winsize
 
 #include <bits/alltypes.h>
 
@@ -27,6 +28,9 @@ int cfsetispeed (struct termios *, speed_t);
 int tcgetattr (int, struct termios *);
 int tcsetattr (int, int, const struct termios *);
 
+int tcgetwinsize (int, struct winsize *);
+int tcsetwinsize (int, const struct winsize *);
+
 int tcsendbreak (int, int);
 int tcdrain (int);
 int tcflush (int, int);
diff --git a/include/threads.h b/include/threads.h
index 8122b3b1..52ec3100 100644
--- a/include/threads.h
+++ b/include/threads.h
@@ -80,6 +80,12 @@ void tss_delete(tss_t);
 int tss_set(tss_t, void *);
 void *tss_get(tss_t);
 
+#if _REDIR_TIME64
+__REDIR(thrd_sleep, __thrd_sleep_time64);
+__REDIR(mtx_timedlock, __mtx_timedlock_time64);
+__REDIR(cnd_timedwait, __cnd_timedwait_time64);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/time.h b/include/time.h
index 672b3fc3..3d948372 100644
--- a/include/time.h
+++ b/include/time.h
@@ -7,7 +7,9 @@ extern "C" {
 
 #include <features.h>
 
-#ifdef __cplusplus
+#if __cplusplus >= 201103L
+#define NULL nullptr
+#elif defined(__cplusplus)
 #define NULL 0L
 #else
 #define NULL ((void*)0)
@@ -130,6 +132,34 @@ int stime(const time_t *);
 time_t timegm(struct tm *);
 #endif
 
+#if _REDIR_TIME64
+__REDIR(time, __time64);
+__REDIR(difftime, __difftime64);
+__REDIR(mktime, __mktime64);
+__REDIR(gmtime, __gmtime64);
+__REDIR(localtime, __localtime64);
+__REDIR(ctime, __ctime64);
+__REDIR(timespec_get, __timespec_get_time64);
+#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
+ || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) \
+ || defined(_BSD_SOURCE)
+__REDIR(gmtime_r, __gmtime64_r);
+__REDIR(localtime_r, __localtime64_r);
+__REDIR(ctime_r, __ctime64_r);
+__REDIR(nanosleep, __nanosleep_time64);
+__REDIR(clock_getres, __clock_getres_time64);
+__REDIR(clock_gettime, __clock_gettime64);
+__REDIR(clock_settime, __clock_settime64);
+__REDIR(clock_nanosleep, __clock_nanosleep_time64);
+__REDIR(timer_settime, __timer_settime64);
+__REDIR(timer_gettime, __timer_gettime64);
+#endif
+#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+__REDIR(stime, __stime64);
+__REDIR(timegm, __timegm_time64);
+#endif
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/ucontext.h b/include/ucontext.h
index 3bb776ed..0f757125 100644
--- a/include/ucontext.h
+++ b/include/ucontext.h
@@ -15,7 +15,7 @@ extern "C" {
 struct __ucontext;
 
 int  getcontext(struct __ucontext *);
-void makecontext(struct __ucontext *, void (*)(void), int, ...);
+void makecontext(struct __ucontext *, void (*)(), int, ...);
 int  setcontext(const struct __ucontext *);
 int  swapcontext(struct __ucontext *, const struct __ucontext *);
 
diff --git a/include/unistd.h b/include/unistd.h
index 9485da7a..42b0e82b 100644
--- a/include/unistd.h
+++ b/include/unistd.h
@@ -14,8 +14,12 @@ extern "C" {
 #define SEEK_SET 0
 #define SEEK_CUR 1
 #define SEEK_END 2
+#define SEEK_DATA 3
+#define SEEK_HOLE 4
 
-#ifdef __cplusplus
+#if __cplusplus >= 201103L
+#define NULL nullptr
+#elif defined(__cplusplus)
 #define NULL 0L
 #else
 #define NULL ((void*)0)
@@ -82,6 +86,7 @@ unsigned sleep(unsigned);
 int pause(void);
 
 pid_t fork(void);
+pid_t _Fork(void);
 int execve(const char *, char *const [], char *const []);
 int execv(const char *, char *const []);
 int execle(const char *, const char *, ...);
@@ -176,6 +181,7 @@ long syscall(long, ...);
 int execvpe(const char *, char *const [], char *const []);
 int issetugid(void);
 int getentropy(void *, size_t);
+extern int optreset;
 #endif
 
 #ifdef _GNU_SOURCE
@@ -188,9 +194,11 @@ char *get_current_dir_name(void);
 int syncfs(int);
 int euidaccess(const char *, int);
 int eaccess(const char *, int);
+ssize_t copy_file_range(int, off_t *, int, off_t *, size_t, unsigned);
+pid_t gettid(void);
 #endif
 
-#if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE)
+#if defined(_LARGEFILE64_SOURCE)
 #define lseek64 lseek
 #define pread64 pread
 #define pwrite64 pwrite
@@ -249,7 +257,13 @@ int eaccess(const char *, int);
 
 #define _POSIX2_C_BIND          _POSIX_VERSION
 
-#include <bits/posix.h>
+#if __LONG_MAX == 0x7fffffffL
+#define _POSIX_V6_ILP32_OFFBIG  1
+#define _POSIX_V7_ILP32_OFFBIG  1
+#else
+#define _POSIX_V6_LP64_OFF64  1
+#define _POSIX_V7_LP64_OFF64  1
+#endif
 
 
 
@@ -417,6 +431,8 @@ int eaccess(const char *, int);
 #define _SC_XOPEN_STREAMS	246
 #define _SC_THREAD_ROBUST_PRIO_INHERIT	247
 #define _SC_THREAD_ROBUST_PRIO_PROTECT	248
+#define _SC_MINSIGSTKSZ	249
+#define _SC_SIGSTKSZ	250
 
 #define _CS_PATH	0
 #define _CS_POSIX_V6_WIDTH_RESTRICTED_ENVS	1
@@ -459,6 +475,8 @@ int eaccess(const char *, int);
 #define _CS_POSIX_V7_LPBIG_OFFBIG_LINTFLAGS	1147
 #define _CS_V6_ENV	1148
 #define _CS_V7_ENV	1149
+#define _CS_POSIX_V7_THREADS_CFLAGS	1150
+#define _CS_POSIX_V7_THREADS_LDFLAGS	1151
 
 #ifdef __cplusplus
 }
diff --git a/include/utime.h b/include/utime.h
index dd5ff927..5755bd53 100644
--- a/include/utime.h
+++ b/include/utime.h
@@ -5,6 +5,8 @@
 extern "C" {
 #endif
 
+#include <features.h>
+
 #define __NEED_time_t
 
 #include <bits/alltypes.h>
@@ -16,6 +18,10 @@ struct utimbuf {
 
 int utime (const char *, const struct utimbuf *);
 
+#if _REDIR_TIME64
+__REDIR(utime, __utime64);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/utmpx.h b/include/utmpx.h
index 0429014d..b293f427 100644
--- a/include/utmpx.h
+++ b/include/utmpx.h
@@ -16,6 +16,7 @@ extern "C" {
 
 struct utmpx {
 	short ut_type;
+	short __ut_pad1;
 	pid_t ut_pid;
 	char ut_line[32];
 	char ut_id[4];
@@ -25,7 +26,11 @@ struct utmpx {
 		short __e_termination;
 		short __e_exit;
 	} ut_exit;
-	long ut_session;
+#if __BYTE_ORDER == 1234
+	int ut_session, __ut_pad2;
+#else
+	int __ut_pad2, ut_session;
+#endif
 	struct timeval ut_tv;
 	unsigned ut_addr_v6[4];
 	char __unused[20];
diff --git a/include/wchar.h b/include/wchar.h
index 369b1e9f..ed5d774d 100644
--- a/include/wchar.h
+++ b/include/wchar.h
@@ -14,6 +14,10 @@ extern "C" {
 #define __NEED_wint_t
 #define __NEED_mbstate_t
 
+#if __STDC_VERSION__ < 201112L
+#define __NEED_struct__IO_FILE
+#endif
+
 #if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
  || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 #define __NEED_locale_t
@@ -34,7 +38,9 @@ extern "C" {
 #define WCHAR_MIN (-1-0x7fffffff+L'\0')
 #endif
 
-#ifdef __cplusplus
+#if __cplusplus >= 201103L
+#define NULL nullptr
+#elif defined(__cplusplus)
 #define NULL 0L
 #else
 #define NULL ((void*)0)
diff --git a/ldso/dlstart.c b/ldso/dlstart.c
index 20d50f2c..259f5e18 100644
--- a/ldso/dlstart.c
+++ b/ldso/dlstart.c
@@ -140,6 +140,21 @@ hidden void _dlstart_c(size_t *sp, size_t *dynv)
 		size_t *rel_addr = (void *)(base + rel[0]);
 		*rel_addr = base + rel[2];
 	}
+
+	rel = (void *)(base+dyn[DT_RELR]);
+	rel_size = dyn[DT_RELRSZ];
+	size_t *relr_addr = 0;
+	for (; rel_size; rel++, rel_size-=sizeof(size_t)) {
+		if ((rel[0]&1) == 0) {
+			relr_addr = (void *)(base + rel[0]);
+			*relr_addr++ += base;
+		} else {
+			for (size_t i=0, bitmap=rel[0]; bitmap>>=1; i++)
+				if (bitmap&1)
+					relr_addr[i] += base;
+			relr_addr += 8*sizeof(size_t)-1;
+		}
+	}
 #endif
 
 	stage2_func dls2;
diff --git a/ldso/dynlink.c b/ldso/dynlink.c
index ec921dfd..715948f4 100644
--- a/ldso/dynlink.c
+++ b/ldso/dynlink.c
@@ -1,5 +1,5 @@
 #define _GNU_SOURCE
-#include <stdio.h>
+#define SYSCALL_NO_TLS 1
 #include <stdlib.h>
 #include <stdarg.h>
 #include <stddef.h>
@@ -17,16 +17,36 @@
 #include <pthread.h>
 #include <ctype.h>
 #include <dlfcn.h>
+#include <semaphore.h>
+#include <sys/membarrier.h>
 #include "pthread_impl.h"
+#include "fork_impl.h"
 #include "libc.h"
 #include "dynlink.h"
-#include "malloc_impl.h"
 
-static void error(const char *, ...);
+static size_t ldso_page_size;
+/* libc.h may have defined a macro for dynamic PAGE_SIZE already, but
+ * PAGESIZE is only defined if it's constant for the arch. */
+#ifndef PAGESIZE
+#undef PAGE_SIZE
+#define PAGE_SIZE ldso_page_size
+#endif
+
+#define malloc __libc_malloc
+#define calloc __libc_calloc
+#define realloc __libc_realloc
+#define free __libc_free
+
+static void error_impl(const char *, ...);
+static void error_noop(const char *, ...);
+static void (*error)(const char *, ...) = error_noop;
 
 #define MAXP2(a,b) (-(-(a)&-(b)))
 #define ALIGN(x,y) ((x)+(y)-1 & -(y))
 
+#define container_of(p,t,m) ((t*)((char *)(p)-offsetof(t,m)))
+#define countof(a) ((sizeof (a))/(sizeof (a)[0]))
+
 struct debug {
 	int ver;
 	void *head;
@@ -67,14 +87,19 @@ struct dso {
 	char relocated;
 	char constructed;
 	char kernel_mapped;
+	char mark;
+	char bfs_built;
+	char runtime_loaded;
 	struct dso **deps, *needed_by;
+	size_t ndeps_direct;
+	size_t next_dep;
+	pthread_t ctor_visitor;
 	char *rpath_orig, *rpath;
 	struct tls_module tls;
 	size_t tls_id;
 	size_t relro_start, relro_end;
 	uintptr_t *new_dtv;
 	unsigned char *new_tls;
-	volatile int new_dtv_idx, new_tls_idx;
 	struct td_index *td_index;
 	struct dso *fini_next;
 	char *shortname;
@@ -96,6 +121,8 @@ struct symdef {
 	struct dso *dso;
 };
 
+typedef void (*stage3_func)(size_t *, size_t *);
+
 static struct builtin_tls {
 	char c;
 	struct pthread pt;
@@ -114,19 +141,26 @@ static int runtime;
 static int ldd_mode;
 static int ldso_fail;
 static int noload;
+static int shutting_down;
 static jmp_buf *rtld_fail;
 static pthread_rwlock_t lock;
 static struct debug debug;
 static struct tls_module *tls_tail;
 static size_t tls_cnt, tls_offset, tls_align = MIN_TLS_ALIGN;
 static size_t static_tls_cnt;
-static pthread_mutex_t init_fini_lock = { ._m_type = PTHREAD_MUTEX_RECURSIVE };
+static pthread_mutex_t init_fini_lock;
+static pthread_cond_t ctor_cond;
+static struct dso *builtin_deps[2];
+static struct dso *const no_deps[1];
+static struct dso *builtin_ctor_queue[4];
+static struct dso **main_ctor_queue;
 static struct fdpic_loadmap *app_loadmap;
 static struct fdpic_dummy_loadmap app_dummy_loadmap;
-static struct dso *const nodeps_dummy;
 
 struct debug *_dl_debug_addr = &debug;
 
+extern weak hidden char __ehdr_start[];
+
 extern hidden int __malloc_replaced;
 
 hidden void (*const __init_array_start)(void)=0, (*const __fini_array_start)(void)=0;
@@ -167,8 +201,14 @@ static void *laddr_pg(const struct dso *p, size_t v)
 	}
 	return (void *)(v - p->loadmap->segs[j].p_vaddr + p->loadmap->segs[j].addr);
 }
-#define fpaddr(p, v) ((void (*)())&(struct funcdesc){ \
-	laddr(p, v), (p)->got })
+static void (*fdbarrier(void *p))()
+{
+	void (*fd)();
+	__asm__("" : "=r"(fd) : "0"(p));
+	return fd;
+}
+#define fpaddr(p, v) fdbarrier((&(struct funcdesc){ \
+	laddr(p, v), (p)->got }))
 #else
 #define laddr(p, v) (void *)((p)->base + (v))
 #define laddr_pg(p, v) laddr(p, v)
@@ -180,7 +220,8 @@ static void decode_vec(size_t *v, size_t *a, size_t cnt)
 	size_t i;
 	for (i=0; i<cnt; i++) a[i] = 0;
 	for (; v[0]; v+=2) if (v[0]-1<cnt-1) {
-		a[0] |= 1UL<<v[0];
+		if (v[0] < 8*sizeof(long))
+			a[0] |= 1UL<<v[0];
 		a[v[0]] = v[1];
 	}
 }
@@ -267,12 +308,16 @@ static Sym *gnu_lookup_filtered(uint32_t h1, uint32_t *hashtab, struct dso *dso,
 #define ARCH_SYM_REJECT_UND(s) 0
 #endif
 
-static struct symdef find_sym(struct dso *dso, const char *s, int need_def)
+#if defined(__GNUC__)
+__attribute__((always_inline))
+#endif
+static inline struct symdef find_sym2(struct dso *dso, const char *s, int need_def, int use_deps)
 {
 	uint32_t h = 0, gh = gnu_hash(s), gho = gh / (8*sizeof(size_t)), *ght;
 	size_t ghm = 1ul << gh % (8*sizeof(size_t));
 	struct symdef def = {0};
-	for (; dso; dso=dso->syms_next) {
+	struct dso **deps = use_deps ? dso->deps : 0;
+	for (; dso; dso=use_deps ? *deps++ : dso->syms_next) {
 		Sym *sym;
 		if ((ght = dso->ghashtab)) {
 			sym = gnu_lookup_filtered(gh, ght, dso, s, gho, ghm);
@@ -297,6 +342,40 @@ static struct symdef find_sym(struct dso *dso, const char *s, int need_def)
 	return def;
 }
 
+static struct symdef find_sym(struct dso *dso, const char *s, int need_def)
+{
+	return find_sym2(dso, s, need_def, 0);
+}
+
+static struct symdef get_lfs64(const char *name)
+{
+	const char *p;
+	static const char lfs64_list[] =
+		"aio_cancel\0aio_error\0aio_fsync\0aio_read\0aio_return\0"
+		"aio_suspend\0aio_write\0alphasort\0creat\0fallocate\0"
+		"fgetpos\0fopen\0freopen\0fseeko\0fsetpos\0fstat\0"
+		"fstatat\0fstatfs\0fstatvfs\0ftello\0ftruncate\0ftw\0"
+		"getdents\0getrlimit\0glob\0globfree\0lio_listio\0"
+		"lockf\0lseek\0lstat\0mkostemp\0mkostemps\0mkstemp\0"
+		"mkstemps\0mmap\0nftw\0open\0openat\0posix_fadvise\0"
+		"posix_fallocate\0pread\0preadv\0prlimit\0pwrite\0"
+		"pwritev\0readdir\0scandir\0sendfile\0setrlimit\0"
+		"stat\0statfs\0statvfs\0tmpfile\0truncate\0versionsort\0"
+		"__fxstat\0__fxstatat\0__lxstat\0__xstat\0";
+	if (!strcmp(name, "readdir64_r"))
+		return find_sym(&ldso, "readdir_r", 1);
+	size_t l = strnlen(name, 18);
+	if (l<2 || name[l-2]!='6' || name[l-1]!='4' || name[l])
+		goto nomatch;
+	for (p=lfs64_list; *p; p++) {
+		if (!strncmp(name, p, l-2) && !p[l-2])
+			return find_sym(&ldso, p, 1);
+		while (*p) p++;
+	}
+nomatch:
+	return (struct symdef){ 0 };
+}
+
 static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stride)
 {
 	unsigned char *base = dso->base;
@@ -347,9 +426,10 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 			sym = syms + sym_index;
 			name = strings + sym->st_name;
 			ctx = type==REL_COPY ? head->syms_next : head;
-			def = (sym->st_info&0xf) == STT_SECTION
+			def = (sym->st_info>>4) == STB_LOCAL
 				? (struct symdef){ .dso = dso, .sym = sym }
 				: find_sym(ctx, name, type==REL_PLT);
+			if (!def.sym) def = get_lfs64(name);
 			if (!def.sym && (sym->st_shndx != SHN_UNDEF
 			    || sym->st_info>>4 != STB_WEAK)) {
 				if (dso->lazy && (type==REL_PLT || type==REL_GOT)) {
@@ -374,7 +454,7 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 		tls_val = def.sym ? def.sym->st_value : 0;
 
 		if ((type == REL_TPOFF || type == REL_TPOFF_NEG)
-		    && runtime && def.dso->tls_id > static_tls_cnt) {
+		    && def.dso->tls_id > static_tls_cnt) {
 			error("Error relocating %s: %s: initial-exec TLS "
 				"resolves to dynamic definition in %s",
 				dso->name, name, def.dso->name);
@@ -382,8 +462,6 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 		}
 
 		switch(type) {
-		case REL_NONE:
-			break;
 		case REL_OFFSET:
 			addend -= (size_t)reloc_addr;
 		case REL_SYMBOLIC:
@@ -391,6 +469,9 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 		case REL_PLT:
 			*reloc_addr = sym_val + addend;
 			break;
+		case REL_USYMBOLIC:
+			memcpy(reloc_addr, &(size_t){sym_val + addend}, sizeof(size_t));
+			break;
 		case REL_RELATIVE:
 			*reloc_addr = (size_t)base + addend;
 			break;
@@ -433,8 +514,8 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 			break;
 #endif
 		case REL_TLSDESC:
-			if (stride<3) addend = reloc_addr[1];
-			if (runtime && def.dso->tls_id > static_tls_cnt) {
+			if (stride<3) addend = reloc_addr[!TLSDESC_BACKWARDS];
+			if (def.dso->tls_id > static_tls_cnt) {
 				struct td_index *new = malloc(sizeof *new);
 				if (!new) {
 					error(
@@ -458,13 +539,13 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 					+ addend;
 #endif
 			}
-#ifdef TLSDESC_BACKWARDS
 			/* Some archs (32-bit ARM at least) invert the order of
 			 * the descriptor members. Fix them up here. */
-			size_t tmp = reloc_addr[0];
-			reloc_addr[0] = reloc_addr[1];
-			reloc_addr[1] = tmp;
-#endif
+			if (TLSDESC_BACKWARDS) {
+				size_t tmp = reloc_addr[0];
+				reloc_addr[0] = reloc_addr[1];
+				reloc_addr[1] = tmp;
+			}
 			break;
 		default:
 			error("Error relocating %s: unsupported relocation type %d",
@@ -475,6 +556,24 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 	}
 }
 
+static void do_relr_relocs(struct dso *dso, size_t *relr, size_t relr_size)
+{
+	if (dso == &ldso) return; /* self-relocation was done in _dlstart */
+	unsigned char *base = dso->base;
+	size_t *reloc_addr;
+	for (; relr_size; relr++, relr_size-=sizeof(size_t))
+		if ((relr[0]&1) == 0) {
+			reloc_addr = laddr(dso, relr[0]);
+			*reloc_addr++ += (size_t)base;
+		} else {
+			int i = 0;
+			for (size_t bitmap=relr[0]; (bitmap>>=1); i++)
+				if (bitmap&1)
+					reloc_addr[i] += (size_t)base;
+			reloc_addr += 8*sizeof(size_t)-1;
+		}
+}
+
 static void redo_lazy_relocs()
 {
 	struct dso *p = lazy_head, *next;
@@ -517,16 +616,32 @@ static void reclaim_gaps(struct dso *dso)
 	for (; phcnt--; ph=(void *)((char *)ph+dso->phentsize)) {
 		if (ph->p_type!=PT_LOAD) continue;
 		if ((ph->p_flags&(PF_R|PF_W))!=(PF_R|PF_W)) continue;
+		if (ph->p_memsz == 0) continue;
 		reclaim(dso, ph->p_vaddr & -PAGE_SIZE, ph->p_vaddr);
 		reclaim(dso, ph->p_vaddr+ph->p_memsz,
 			ph->p_vaddr+ph->p_memsz+PAGE_SIZE-1 & -PAGE_SIZE);
 	}
 }
 
+static ssize_t read_loop(int fd, void *p, size_t n)
+{
+	for (size_t i=0; i<n; ) {
+		ssize_t l = read(fd, (char *)p+i, n-i);
+		if (l<0) {
+			if (errno==EINTR) continue;
+			else return -1;
+		}
+		if (l==0) return i;
+		i += l;
+	}
+	return n;
+}
+
 static void *mmap_fixed(void *p, size_t n, int prot, int flags, int fd, off_t off)
 {
 	static int no_map_fixed;
 	char *q;
+	if (!n) return p;
 	if (!no_map_fixed) {
 		q = mmap(p, n, prot, flags|MAP_FIXED, fd, off);
 		if (!DL_NOMMU_SUPPORT || q != MAP_FAILED || errno != EINVAL)
@@ -813,7 +928,7 @@ static int fixup_rpath(struct dso *p, char *buf, size_t buf_size)
 		case ENOENT:
 		case ENOTDIR:
 		case EACCES:
-			break;
+			return 0;
 		default:
 			return -1;
 		}
@@ -904,7 +1019,7 @@ static void *dl_mmap(size_t n)
 #else
 	p = (void *)__syscall(SYS_mmap, 0, n, prot, flags, -1, 0);
 #endif
-	return p == MAP_FAILED ? 0 : p;
+	return (unsigned long)p > -4096UL ? 0 : p;
 }
 
 static void makefuncdescs(struct dso *p)
@@ -1027,13 +1142,17 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
 				snprintf(etc_ldso_path, sizeof etc_ldso_path,
 					"%.*s/etc/ld-musl-" LDSO_ARCH ".path",
 					(int)prefix_len, prefix);
-				FILE *f = fopen(etc_ldso_path, "rbe");
-				if (f) {
-					if (getdelim(&sys_path, (size_t[1]){0}, 0, f) <= 0) {
+				fd = open(etc_ldso_path, O_RDONLY|O_CLOEXEC);
+				if (fd>=0) {
+					size_t n = 0;
+					if (!fstat(fd, &st)) n = st.st_size;
+					if ((sys_path = malloc(n+1)))
+						sys_path[n] = 0;
+					if (!sys_path || read_loop(fd, sys_path, n)<0) {
 						free(sys_path);
 						sys_path = "";
 					}
-					fclose(f);
+					close(fd);
 				} else if (errno != ENOENT) {
 					sys_path = "";
 				}
@@ -1101,6 +1220,7 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
 	p->ino = st.st_ino;
 	p->needed_by = needed_by;
 	p->name = p->buf;
+	p->runtime_loaded = runtime;
 	strcpy(p->name, pathname);
 	/* Add a shortname only if name arg was not an explicit pathname. */
 	if (pathname != name) p->shortname = strrchr(p->name, '/')+1;
@@ -1108,9 +1228,9 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
 		p->tls_id = ++tls_cnt;
 		tls_align = MAXP2(tls_align, p->tls.align);
 #ifdef TLS_ABOVE_TP
-		p->tls.offset = tls_offset + ( (tls_align-1) &
-			-(tls_offset + (uintptr_t)p->tls.image) );
-		tls_offset += p->tls.size;
+		p->tls.offset = tls_offset + ( (p->tls.align-1) &
+			(-tls_offset + (uintptr_t)p->tls.image) );
+		tls_offset = p->tls.offset + p->tls.size;
 #else
 		tls_offset += p->tls.size + p->tls.align - 1;
 		tls_offset -= (tls_offset + (uintptr_t)p->tls.image)
@@ -1136,30 +1256,99 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
 	return p;
 }
 
+static void load_direct_deps(struct dso *p)
+{
+	size_t i, cnt=0;
+
+	if (p->deps) return;
+	/* For head, all preloads are direct pseudo-dependencies.
+	 * Count and include them now to avoid realloc later. */
+	if (p==head) for (struct dso *q=p->next; q; q=q->next)
+		cnt++;
+	for (i=0; p->dynv[i]; i+=2)
+		if (p->dynv[i] == DT_NEEDED) cnt++;
+	/* Use builtin buffer for apps with no external deps, to
+	 * preserve property of no runtime failure paths. */
+	p->deps = (p==head && cnt<2) ? builtin_deps :
+		calloc(cnt+1, sizeof *p->deps);
+	if (!p->deps) {
+		error("Error loading dependencies for %s", p->name);
+		if (runtime) longjmp(*rtld_fail, 1);
+	}
+	cnt=0;
+	if (p==head) for (struct dso *q=p->next; q; q=q->next)
+		p->deps[cnt++] = q;
+	for (i=0; p->dynv[i]; i+=2) {
+		if (p->dynv[i] != DT_NEEDED) continue;
+		struct dso *dep = load_library(p->strings + p->dynv[i+1], p);
+		if (!dep) {
+			error("Error loading shared library %s: %m (needed by %s)",
+				p->strings + p->dynv[i+1], p->name);
+			if (runtime) longjmp(*rtld_fail, 1);
+			continue;
+		}
+		p->deps[cnt++] = dep;
+	}
+	p->deps[cnt] = 0;
+	p->ndeps_direct = cnt;
+}
+
 static void load_deps(struct dso *p)
 {
-	size_t i, ndeps=0;
-	struct dso ***deps = &p->deps, **tmp, *dep;
-	for (; p; p=p->next) {
-		for (i=0; p->dynv[i]; i+=2) {
-			if (p->dynv[i] != DT_NEEDED) continue;
-			dep = load_library(p->strings + p->dynv[i+1], p);
-			if (!dep) {
-				error("Error loading shared library %s: %m (needed by %s)",
-					p->strings + p->dynv[i+1], p->name);
-				if (runtime) longjmp(*rtld_fail, 1);
-				continue;
-			}
-			if (runtime) {
-				tmp = realloc(*deps, sizeof(*tmp)*(ndeps+2));
-				if (!tmp) longjmp(*rtld_fail, 1);
-				tmp[ndeps++] = dep;
-				tmp[ndeps] = 0;
-				*deps = tmp;
-			}
+	if (p->deps) return;
+	for (; p; p=p->next)
+		load_direct_deps(p);
+}
+
+static void extend_bfs_deps(struct dso *p)
+{
+	size_t i, j, cnt, ndeps_all;
+	struct dso **tmp;
+
+	/* Can't use realloc if the original p->deps was allocated at
+	 * program entry and malloc has been replaced, or if it's
+	 * the builtin non-allocated trivial main program deps array. */
+	int no_realloc = (__malloc_replaced && !p->runtime_loaded)
+		|| p->deps == builtin_deps;
+
+	if (p->bfs_built) return;
+	ndeps_all = p->ndeps_direct;
+
+	/* Mark existing (direct) deps so they won't be duplicated. */
+	for (i=0; p->deps[i]; i++)
+		p->deps[i]->mark = 1;
+
+	/* For each dependency already in the list, copy its list of direct
+	 * dependencies to the list, excluding any items already in the
+	 * list. Note that the list this loop iterates over will grow during
+	 * the loop, but since duplicates are excluded, growth is bounded. */
+	for (i=0; p->deps[i]; i++) {
+		struct dso *dep = p->deps[i];
+		for (j=cnt=0; j<dep->ndeps_direct; j++)
+			if (!dep->deps[j]->mark) cnt++;
+		tmp = no_realloc ? 
+			malloc(sizeof(*tmp) * (ndeps_all+cnt+1)) :
+			realloc(p->deps, sizeof(*tmp) * (ndeps_all+cnt+1));
+		if (!tmp) {
+			error("Error recording dependencies for %s", p->name);
+			if (runtime) longjmp(*rtld_fail, 1);
+			continue;
 		}
+		if (no_realloc) {
+			memcpy(tmp, p->deps, sizeof(*tmp) * (ndeps_all+1));
+			no_realloc = 0;
+		}
+		p->deps = tmp;
+		for (j=0; j<dep->ndeps_direct; j++) {
+			if (dep->deps[j]->mark) continue;
+			dep->deps[j]->mark = 1;
+			p->deps[ndeps_all++] = dep->deps[j];
+		}
+		p->deps[ndeps_all] = 0;
 	}
-	if (!*deps) *deps = (struct dso **)&nodeps_dummy;
+	p->bfs_built = 1;
+	for (p=head; p; p=p->next)
+		p->mark = 0;
 }
 
 static void load_preload(char *s)
@@ -1228,13 +1417,17 @@ static void reloc_all(struct dso *p)
 			2+(dyn[DT_PLTREL]==DT_RELA));
 		do_relocs(p, laddr(p, dyn[DT_REL]), dyn[DT_RELSZ], 2);
 		do_relocs(p, laddr(p, dyn[DT_RELA]), dyn[DT_RELASZ], 3);
-
-		if (head != &ldso && p->relro_start != p->relro_end &&
-		    mprotect(laddr(p, p->relro_start), p->relro_end-p->relro_start, PROT_READ)
-		    && errno != ENOSYS) {
-			error("Error relocating %s: RELRO protection failed: %m",
-				p->name);
-			if (runtime) longjmp(*rtld_fail, 1);
+		if (!DL_FDPIC)
+			do_relr_relocs(p, laddr(p, dyn[DT_RELR]), dyn[DT_RELRSZ]);
+
+		if (head != &ldso && p->relro_start != p->relro_end) {
+			long ret = __syscall(SYS_mprotect, laddr(p, p->relro_start),
+				p->relro_end-p->relro_start, PROT_READ);
+			if (ret != 0 && ret != -ENOSYS) {
+				error("Error relocating %s: RELRO protection failed: %m",
+					p->name);
+				if (runtime) longjmp(*rtld_fail, 1);
+			}
 		}
 
 		p->relocated = 1;
@@ -1275,7 +1468,18 @@ void __libc_exit_fini()
 {
 	struct dso *p;
 	size_t dyn[DYN_CNT];
+	pthread_t self = __pthread_self();
+
+	/* Take both locks before setting shutting_down, so that
+	 * either lock is sufficient to read its value. The lock
+	 * order matches that in dlopen to avoid deadlock. */
+	pthread_rwlock_wrlock(&lock);
+	pthread_mutex_lock(&init_fini_lock);
+	shutting_down = 1;
+	pthread_rwlock_unlock(&lock);
 	for (p=fini_head; p; p=p->fini_next) {
+		while (p->ctor_visitor && p->ctor_visitor!=self)
+			pthread_cond_wait(&ctor_cond, &init_fini_lock);
 		if (!p->constructed) continue;
 		decode_vec(p->dynv, dyn, DYN_CNT);
 		if (dyn[0] & (1<<DT_FINI_ARRAY)) {
@@ -1290,22 +1494,108 @@ void __libc_exit_fini()
 	}
 }
 
-static void do_init_fini(struct dso *p)
+void __ldso_atfork(int who)
 {
-	size_t dyn[DYN_CNT];
-	int need_locking = libc.threads_minus_1;
-	/* Allow recursive calls that arise when a library calls
-	 * dlopen from one of its constructors, but block any
-	 * other threads until all ctors have finished. */
-	if (need_locking) pthread_mutex_lock(&init_fini_lock);
-	for (; p; p=p->prev) {
-		if (p->constructed) continue;
-		p->constructed = 1;
+	if (who<0) {
+		pthread_rwlock_wrlock(&lock);
+		pthread_mutex_lock(&init_fini_lock);
+	} else {
+		pthread_mutex_unlock(&init_fini_lock);
+		pthread_rwlock_unlock(&lock);
+	}
+}
+
+static struct dso **queue_ctors(struct dso *dso)
+{
+	size_t cnt, qpos, spos, i;
+	struct dso *p, **queue, **stack;
+
+	if (ldd_mode) return 0;
+
+	/* Bound on queue size is the total number of indirect deps.
+	 * If a bfs deps list was built, we can use it. Otherwise,
+	 * bound by the total number of DSOs, which is always safe and
+	 * is reasonable we use it (for main app at startup). */
+	if (dso->bfs_built) {
+		for (cnt=0; dso->deps[cnt]; cnt++)
+			dso->deps[cnt]->mark = 0;
+		cnt++; /* self, not included in deps */
+	} else {
+		for (cnt=0, p=head; p; cnt++, p=p->next)
+			p->mark = 0;
+	}
+	cnt++; /* termination slot */
+	if (dso==head && cnt <= countof(builtin_ctor_queue))
+		queue = builtin_ctor_queue;
+	else
+		queue = calloc(cnt, sizeof *queue);
+
+	if (!queue) {
+		error("Error allocating constructor queue: %m\n");
+		if (runtime) longjmp(*rtld_fail, 1);
+		return 0;
+	}
+
+	/* Opposite ends of the allocated buffer serve as an output queue
+	 * and a working stack. Setup initial stack with just the argument
+	 * dso and initial queue empty... */
+	stack = queue;
+	qpos = 0;
+	spos = cnt;
+	stack[--spos] = dso;
+	dso->next_dep = 0;
+	dso->mark = 1;
+
+	/* Then perform pseudo-DFS sort, but ignoring circular deps. */
+	while (spos<cnt) {
+		p = stack[spos++];
+		while (p->next_dep < p->ndeps_direct) {
+			if (p->deps[p->next_dep]->mark) {
+				p->next_dep++;
+			} else {
+				stack[--spos] = p;
+				p = p->deps[p->next_dep];
+				p->next_dep = 0;
+				p->mark = 1;
+			}
+		}
+		queue[qpos++] = p;
+	}
+	queue[qpos] = 0;
+	for (i=0; i<qpos; i++) queue[i]->mark = 0;
+	for (i=0; i<qpos; i++)
+		if (queue[i]->ctor_visitor && queue[i]->ctor_visitor->tid < 0) {
+			error("State of %s is inconsistent due to multithreaded fork\n",
+				queue[i]->name);
+			free(queue);
+			if (runtime) longjmp(*rtld_fail, 1);
+		}
+
+	return queue;
+}
+
+static void do_init_fini(struct dso **queue)
+{
+	struct dso *p;
+	size_t dyn[DYN_CNT], i;
+	pthread_t self = __pthread_self();
+
+	pthread_mutex_lock(&init_fini_lock);
+	for (i=0; (p=queue[i]); i++) {
+		while ((p->ctor_visitor && p->ctor_visitor!=self) || shutting_down)
+			pthread_cond_wait(&ctor_cond, &init_fini_lock);
+		if (p->ctor_visitor || p->constructed)
+			continue;
+		p->ctor_visitor = self;
+		
 		decode_vec(p->dynv, dyn, DYN_CNT);
 		if (dyn[0] & ((1<<DT_FINI) | (1<<DT_FINI_ARRAY))) {
 			p->fini_next = fini_head;
 			fini_head = p;
 		}
+
+		pthread_mutex_unlock(&init_fini_lock);
+
 #ifndef NO_LEGACY_INITFINI
 		if ((dyn[0] & (1<<DT_INIT)) && dyn[DT_INIT])
 			fpaddr(p, dyn[DT_INIT])();
@@ -1315,17 +1605,21 @@ static void do_init_fini(struct dso *p)
 			size_t *fn = laddr(p, dyn[DT_INIT_ARRAY]);
 			while (n--) ((void (*)(void))*fn++)();
 		}
-		if (!need_locking && libc.threads_minus_1) {
-			need_locking = 1;
-			pthread_mutex_lock(&init_fini_lock);
-		}
+
+		pthread_mutex_lock(&init_fini_lock);
+		p->ctor_visitor = 0;
+		p->constructed = 1;
+		pthread_cond_broadcast(&ctor_cond);
 	}
-	if (need_locking) pthread_mutex_unlock(&init_fini_lock);
+	pthread_mutex_unlock(&init_fini_lock);
 }
 
 void __libc_start_init(void)
 {
-	do_init_fini(tail);
+	do_init_fini(main_ctor_queue);
+	if (!__malloc_replaced && main_ctor_queue != builtin_ctor_queue)
+		free(main_ctor_queue);
+	main_ctor_queue = 0;
 }
 
 static void dl_debug_state(void)
@@ -1338,48 +1632,6 @@ void __init_tls(size_t *auxv)
 {
 }
 
-hidden void *__tls_get_new(tls_mod_off_t *v)
-{
-	pthread_t self = __pthread_self();
-
-	/* Block signals to make accessing new TLS async-signal-safe */
-	sigset_t set;
-	__block_all_sigs(&set);
-	if (v[0] <= self->dtv[0]) {
-		__restore_sigs(&set);
-		return (void *)(self->dtv[v[0]] + v[1]);
-	}
-
-	/* This is safe without any locks held because, if the caller
-	 * is able to request the Nth entry of the DTV, the DSO list
-	 * must be valid at least that far out and it was synchronized
-	 * at program startup or by an already-completed call to dlopen. */
-	struct dso *p;
-	for (p=head; p->tls_id != v[0]; p=p->next);
-
-	/* Get new DTV space from new DSO */
-	uintptr_t *newdtv = p->new_dtv +
-		(v[0]+1)*a_fetch_add(&p->new_dtv_idx,1);
-	memcpy(newdtv, self->dtv, (self->dtv[0]+1) * sizeof(uintptr_t));
-	newdtv[0] = v[0];
-	self->dtv = self->dtv_copy = newdtv;
-
-	/* Get new TLS memory from all new DSOs up to the requested one */
-	unsigned char *mem;
-	for (p=head; ; p=p->next) {
-		if (!p->tls_id || self->dtv[p->tls_id]) continue;
-		mem = p->new_tls + (p->tls.size + p->tls.align)
-			* a_fetch_add(&p->new_tls_idx,1);
-		mem += ((uintptr_t)p->tls.image - (uintptr_t)mem)
-			& (p->tls.align-1);
-		self->dtv[p->tls_id] = (uintptr_t)mem + DTP_OFFSET;
-		memcpy(mem, p->tls.image, p->tls.len);
-		if (p->tls_id == v[0]) break;
-	}
-	__restore_sigs(&set);
-	return mem + v[1] + DTP_OFFSET;
-}
-
 static void update_tls_size()
 {
 	libc.tls_cnt = tls_cnt;
@@ -1392,6 +1644,56 @@ static void update_tls_size()
 	tls_align);
 }
 
+static void install_new_tls(void)
+{
+	sigset_t set;
+	pthread_t self = __pthread_self(), td;
+	struct dso *dtv_provider = container_of(tls_tail, struct dso, tls);
+	uintptr_t (*newdtv)[tls_cnt+1] = (void *)dtv_provider->new_dtv;
+	struct dso *p;
+	size_t i, j;
+	size_t old_cnt = self->dtv[0];
+
+	__block_app_sigs(&set);
+	__tl_lock();
+	/* Copy existing dtv contents from all existing threads. */
+	for (i=0, td=self; !i || td!=self; i++, td=td->next) {
+		memcpy(newdtv+i, td->dtv,
+			(old_cnt+1)*sizeof(uintptr_t));
+		newdtv[i][0] = tls_cnt;
+	}
+	/* Install new dtls into the enlarged, uninstalled dtv copies. */
+	for (p=head; ; p=p->next) {
+		if (p->tls_id <= old_cnt) continue;
+		unsigned char *mem = p->new_tls;
+		for (j=0; j<i; j++) {
+			unsigned char *new = mem;
+			new += ((uintptr_t)p->tls.image - (uintptr_t)mem)
+				& (p->tls.align-1);
+			memcpy(new, p->tls.image, p->tls.len);
+			newdtv[j][p->tls_id] =
+				(uintptr_t)new + DTP_OFFSET;
+			mem += p->tls.size + p->tls.align;
+		}
+		if (p->tls_id == tls_cnt) break;
+	}
+
+	/* Broadcast barrier to ensure contents of new dtv is visible
+	 * if the new dtv pointer is. The __membarrier function has a
+	 * fallback emulation using signals for kernels that lack the
+	 * feature at the syscall level. */
+
+	__membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0);
+
+	/* Install new dtv for each thread. */
+	for (j=0, td=self; !j || td!=self; j++, td=td->next) {
+		td->dtv = newdtv[j];
+	}
+
+	__tl_unlock();
+	__restore_sigs(&set);
+}
+
 /* Stage 1 of the dynamic linker is defined in dlstart.c. It calls the
  * following stage 2 and stage 3 functions via primitive symbolic lookup
  * since it does not have access to their addresses to begin with. */
@@ -1405,13 +1707,14 @@ static void update_tls_size()
 
 hidden void __dls2(unsigned char *base, size_t *sp)
 {
+	size_t *auxv;
+	for (auxv=sp+1+*sp+1; *auxv; auxv++);
+	auxv++;
 	if (DL_FDPIC) {
 		void *p1 = (void *)sp[-2];
 		void *p2 = (void *)sp[-1];
 		if (!p1) {
-			size_t *auxv, aux[AUX_CNT];
-			for (auxv=sp+1+*sp+1; *auxv; auxv++);
-			auxv++;
+			size_t aux[AUX_CNT];
 			decode_vec(auxv, aux, AUX_CNT);
 			if (aux[AT_BASE]) ldso.base = (void *)aux[AT_BASE];
 			else ldso.base = (void *)(aux[AT_PHDR] & -4096);
@@ -1422,11 +1725,12 @@ hidden void __dls2(unsigned char *base, size_t *sp)
 	} else {
 		ldso.base = base;
 	}
-	Ehdr *ehdr = (void *)ldso.base;
+	Ehdr *ehdr = __ehdr_start ? (void *)__ehdr_start : (void *)ldso.base;
 	ldso.name = ldso.shortname = "libc.so";
 	ldso.phnum = ehdr->e_phnum;
 	ldso.phdr = laddr(&ldso, ehdr->e_phoff);
 	ldso.phentsize = ehdr->e_phentsize;
+	search_vec(auxv, &ldso_page_size, AT_PAGESZ);
 	kernel_mapped_dso(&ldso);
 	decode_dyn(&ldso);
 
@@ -1457,8 +1761,8 @@ hidden void __dls2(unsigned char *base, size_t *sp)
 	 * symbolically as a barrier against moving the address
 	 * load across the above relocation processing. */
 	struct symdef dls2b_def = find_sym(&ldso, "__dls2b", 0);
-	if (DL_FDPIC) ((stage3_func)&ldso.funcdescs[dls2b_def.sym-ldso.syms])(sp);
-	else ((stage3_func)laddr(&ldso, dls2b_def.sym->st_value))(sp);
+	if (DL_FDPIC) ((stage3_func)&ldso.funcdescs[dls2b_def.sym-ldso.syms])(sp, auxv);
+	else ((stage3_func)laddr(&ldso, dls2b_def.sym->st_value))(sp, auxv);
 }
 
 /* Stage 2b sets up a valid thread pointer, which requires relocations
@@ -1467,11 +1771,13 @@ hidden void __dls2(unsigned char *base, size_t *sp)
  * so that loads of the thread pointer and &errno can be pure/const and
  * thereby hoistable. */
 
-_Noreturn void __dls2b(size_t *sp)
+void __dls2b(size_t *sp, size_t *auxv)
 {
 	/* Setup early thread pointer in builtin_tls for ldso/libc itself to
 	 * use during dynamic linking. If possible it will also serve as the
 	 * thread pointer at runtime. */
+	search_vec(auxv, &__hwcap, AT_HWCAP);
+	libc.auxv = auxv;
 	libc.tls_size = sizeof builtin_tls;
 	libc.tls_align = tls_align;
 	if (__init_tp(__copy_tls((void *)builtin_tls)) < 0) {
@@ -1479,8 +1785,8 @@ _Noreturn void __dls2b(size_t *sp)
 	}
 
 	struct symdef dls3_def = find_sym(&ldso, "__dls3", 0);
-	if (DL_FDPIC) ((stage3_func)&ldso.funcdescs[dls3_def.sym-ldso.syms])(sp);
-	else ((stage3_func)laddr(&ldso, dls3_def.sym->st_value))(sp);
+	if (DL_FDPIC) ((stage3_func)&ldso.funcdescs[dls3_def.sym-ldso.syms])(sp, auxv);
+	else ((stage3_func)laddr(&ldso, dls3_def.sym->st_value))(sp, auxv);
 }
 
 /* Stage 3 of the dynamic linker is called with the dynamic linker/libc
@@ -1488,10 +1794,10 @@ _Noreturn void __dls2b(size_t *sp)
  * process dependencies and relocations for the main application and
  * transfer control to its entry point. */
 
-_Noreturn void __dls3(size_t *sp)
+void __dls3(size_t *sp, size_t *auxv)
 {
 	static struct dso app, vdso;
-	size_t aux[AUX_CNT], *auxv;
+	size_t aux[AUX_CNT];
 	size_t i;
 	char *env_preload=0;
 	char *replace_argv0=0;
@@ -1504,10 +1810,9 @@ _Noreturn void __dls3(size_t *sp)
 	/* Find aux vector just past environ[] and use it to initialize
 	 * global data that may be needed before we can make syscalls. */
 	__environ = envp;
-	for (i=argc+1; argv[i]; i++);
-	libc.auxv = auxv = (void *)(argv+i+1);
 	decode_vec(auxv, aux, AUX_CNT);
-	__hwcap = aux[AT_HWCAP];
+	search_vec(auxv, &__sysinfo, AT_SYSINFO);
+	__pthread_self()->sysinfo = __sysinfo;
 	libc.page_size = aux[AT_PAGESZ];
 	libc.secure = ((aux[0]&0x7800)!=0x7800 || aux[AT_UID]!=aux[AT_EUID]
 		|| aux[AT_GID]!=aux[AT_EGID] || aux[AT_SECURE]);
@@ -1518,6 +1823,9 @@ _Noreturn void __dls3(size_t *sp)
 		env_preload = getenv("LD_PRELOAD");
 	}
 
+	/* Activate error handler function */
+	error = error_impl;
+
 	/* If the main program was already loaded by the kernel,
 	 * AT_PHDR will point to some location other than the dynamic
 	 * linker's program headers. */
@@ -1593,7 +1901,7 @@ _Noreturn void __dls3(size_t *sp)
 			dprintf(2, "%s: cannot load %s: %s\n", ldname, argv[0], strerror(errno));
 			_exit(1);
 		}
-		Ehdr *ehdr = (void *)map_library(fd, &app);
+		Ehdr *ehdr = map_library(fd, &app);
 		if (!ehdr) {
 			dprintf(2, "%s: %s: Not a valid dynamic program\n", ldname, argv[0]);
 			_exit(1);
@@ -1617,10 +1925,9 @@ _Noreturn void __dls3(size_t *sp)
 		app.tls_id = tls_cnt = 1;
 #ifdef TLS_ABOVE_TP
 		app.tls.offset = GAP_ABOVE_TP;
-		app.tls.offset += -GAP_ABOVE_TP & (app.tls.align-1);
-		tls_offset = app.tls.offset + app.tls.size
-			+ ( -((uintptr_t)app.tls.image + app.tls.size)
-			& (app.tls.align-1) );
+		app.tls.offset += (-GAP_ABOVE_TP + (uintptr_t)app.tls.image)
+			& (app.tls.align-1);
+		tls_offset = app.tls.offset + app.tls.size;
 #else
 		tls_offset = app.tls.offset = app.tls.size
 			+ ( -((uintptr_t)app.tls.image + app.tls.size)
@@ -1650,6 +1957,7 @@ _Noreturn void __dls3(size_t *sp)
 	reclaim_gaps(&ldso);
 
 	/* Load preload/needed libraries, add symbols to global namespace. */
+	ldso.deps = (struct dso **)no_deps;
 	if (env_preload) load_preload(env_preload);
  	load_deps(&app);
 	for (struct dso *p=head; p; p=p->next)
@@ -1671,6 +1979,7 @@ _Noreturn void __dls3(size_t *sp)
 		vdso.name = "";
 		vdso.shortname = "linux-gate.so.1";
 		vdso.relocated = 1;
+		vdso.deps = (struct dso **)no_deps;
 		decode_dyn(&vdso);
 		vdso.prev = tail;
 		tail->next = &vdso;
@@ -1684,21 +1993,42 @@ _Noreturn void __dls3(size_t *sp)
 			size_t *ptr = (size_t *) app.dynv[i+1];
 			*ptr = (size_t)&debug;
 		}
+		if (app.dynv[i]==DT_DEBUG_INDIRECT_REL) {
+			size_t *ptr = (size_t *)((size_t)&app.dynv[i] + app.dynv[i+1]);
+			*ptr = (size_t)&debug;
+		}
 	}
 
-	/* The main program must be relocated LAST since it may contin
-	 * copy relocations which depend on libraries' relocations. */
-	reloc_all(app.next);
-	reloc_all(&app);
+	/* This must be done before final relocations, since it calls
+	 * malloc, which may be provided by the application. Calling any
+	 * application code prior to the jump to its entry point is not
+	 * valid in our model and does not work with FDPIC, where there
+	 * are additional relocation-like fixups that only the entry point
+	 * code can see to perform. */
+	main_ctor_queue = queue_ctors(&app);
 
+	/* Initial TLS must also be allocated before final relocations
+	 * might result in calloc being a call to application code. */
 	update_tls_size();
+	void *initial_tls = builtin_tls;
 	if (libc.tls_size > sizeof builtin_tls || tls_align > MIN_TLS_ALIGN) {
-		void *initial_tls = calloc(libc.tls_size, 1);
+		initial_tls = calloc(libc.tls_size, 1);
 		if (!initial_tls) {
 			dprintf(2, "%s: Error getting %zu bytes thread-local storage: %m\n",
 				argv[0], libc.tls_size);
 			_exit(127);
 		}
+	}
+	static_tls_cnt = tls_cnt;
+
+	/* The main program must be relocated LAST since it may contain
+	 * copy relocations which depend on libraries' relocations. */
+	reloc_all(app.next);
+	reloc_all(&app);
+
+	/* Actual copying to new TLS needs to happen after relocations,
+	 * since the TLS images might have contained relocated addresses. */
+	if (initial_tls != builtin_tls) {
 		if (__init_tp(__copy_tls(initial_tls)) < 0) {
 			a_crash();
 		}
@@ -1712,7 +2042,6 @@ _Noreturn void __dls3(size_t *sp)
 		if (__copy_tls((void*)builtin_tls) != self) a_crash();
 		libc.tls_size = tmp_tls_size;
 	}
-	static_tls_cnt = tls_cnt;
 
 	if (ldso_fail) _exit(127);
 	if (ldd_mode) _exit(0);
@@ -1722,6 +2051,8 @@ _Noreturn void __dls3(size_t *sp)
 	 * possibility of incomplete replacement. */
 	if (find_sym(head, "malloc", 1).dso != &ldso)
 		__malloc_replaced = 1;
+	if (find_sym(head, "aligned_alloc", 1).dso != &ldso)
+		__aligned_alloc_replaced = 1;
 
 	/* Switch to runtime mode: any further failures in the dynamic
 	 * linker are a reportable failure rather than a fatal startup
@@ -1732,7 +2063,7 @@ _Noreturn void __dls3(size_t *sp)
 	debug.bp = dl_debug_state;
 	debug.head = head;
 	debug.base = ldso.base;
-	debug.state = 0;
+	debug.state = RT_CONSISTENT;
 	_dl_debug_state();
 
 	if (replace_argv0) argv[0] = replace_argv0;
@@ -1773,6 +2104,7 @@ void *dlopen(const char *file, int mode)
 	size_t i;
 	int cs;
 	jmp_buf jb;
+	struct dso **volatile ctor_queue = 0;
 
 	if (!file) return head;
 
@@ -1780,7 +2112,14 @@ void *dlopen(const char *file, int mode)
 	pthread_rwlock_wrlock(&lock);
 	__inhibit_ptc();
 
+	debug.state = RT_ADD;
+	_dl_debug_state();
+
 	p = 0;
+	if (shutting_down) {
+		error("Cannot dlopen while program is exiting.");
+		goto end;
+	}
 	orig_tls_tail = tls_tail;
 	orig_tls_cnt = tls_cnt;
 	orig_tls_offset = tls_offset;
@@ -1804,11 +2143,12 @@ void *dlopen(const char *file, int mode)
 			free(p->funcdescs);
 			if (p->rpath != p->rpath_orig)
 				free(p->rpath);
-			if (p->deps != &nodeps_dummy)
-				free(p->deps);
+			free(p->deps);
 			unmap_library(p);
 			free(p);
 		}
+		free(ctor_queue);
+		ctor_queue = 0;
 		if (!orig_tls_tail) libc.tls_head = 0;
 		tls_tail = orig_tls_tail;
 		if (tls_tail) tls_tail->next = 0;
@@ -1831,24 +2171,26 @@ void *dlopen(const char *file, int mode)
 	}
 
 	/* First load handling */
-	int first_load = !p->deps;
-	if (first_load) {
-		load_deps(p);
-		if (!p->relocated && (mode & RTLD_LAZY)) {
-			prepare_lazy(p);
-			for (i=0; p->deps[i]; i++)
-				if (!p->deps[i]->relocated)
-					prepare_lazy(p->deps[i]);
-		}
+	load_deps(p);
+	extend_bfs_deps(p);
+	pthread_mutex_lock(&init_fini_lock);
+	int constructed = p->constructed;
+	pthread_mutex_unlock(&init_fini_lock);
+	if (!constructed) ctor_queue = queue_ctors(p);
+	if (!p->relocated && (mode & RTLD_LAZY)) {
+		prepare_lazy(p);
+		for (i=0; p->deps[i]; i++)
+			if (!p->deps[i]->relocated)
+				prepare_lazy(p->deps[i]);
 	}
-	if (first_load || (mode & RTLD_GLOBAL)) {
+	if (!p->relocated || (mode & RTLD_GLOBAL)) {
 		/* Make new symbols global, at least temporarily, so we can do
 		 * relocations. If not RTLD_GLOBAL, this is reverted below. */
 		add_syms(p);
 		for (i=0; p->deps[i]; i++)
 			add_syms(p->deps[i]);
 	}
-	if (first_load) {
+	if (!p->relocated) {
 		reloc_all(p);
 	}
 
@@ -1864,13 +2206,19 @@ void *dlopen(const char *file, int mode)
 	redo_lazy_relocs();
 
 	update_tls_size();
-	_dl_debug_state();
+	if (tls_cnt != orig_tls_cnt)
+		install_new_tls();
 	orig_tail = tail;
 end:
+	debug.state = RT_CONSISTENT;
+	_dl_debug_state();
 	__release_ptc();
 	if (p) gencnt++;
 	pthread_rwlock_unlock(&lock);
-	if (p) do_init_fini(orig_tail);
+	if (ctor_queue) {
+		do_init_fini(ctor_queue);
+		free(ctor_queue);
+	}
 	pthread_setcancelstate(cs, 0);
 	return p;
 }
@@ -1918,58 +2266,27 @@ static void *addr2dso(size_t a)
 
 static void *do_dlsym(struct dso *p, const char *s, void *ra)
 {
-	size_t i;
-	uint32_t h = 0, gh = 0, *ght;
-	Sym *sym;
-	if (p == head || p == RTLD_DEFAULT || p == RTLD_NEXT) {
-		if (p == RTLD_DEFAULT) {
-			p = head;
-		} else if (p == RTLD_NEXT) {
-			p = addr2dso((size_t)ra);
-			if (!p) p=head;
-			p = p->next;
-		}
-		struct symdef def = find_sym(p, s, 0);
-		if (!def.sym) goto failed;
-		if ((def.sym->st_info&0xf) == STT_TLS)
-			return __tls_get_addr((tls_mod_off_t []){def.dso->tls_id, def.sym->st_value-DTP_OFFSET});
-		if (DL_FDPIC && (def.sym->st_info&0xf) == STT_FUNC)
-			return def.dso->funcdescs + (def.sym - def.dso->syms);
-		return laddr(def.dso, def.sym->st_value);
-	}
-	if (__dl_invalid_handle(p))
+	int use_deps = 0;
+	if (p == head || p == RTLD_DEFAULT) {
+		p = head;
+	} else if (p == RTLD_NEXT) {
+		p = addr2dso((size_t)ra);
+		if (!p) p=head;
+		p = p->next;
+	} else if (__dl_invalid_handle(p)) {
 		return 0;
-	if ((ght = p->ghashtab)) {
-		gh = gnu_hash(s);
-		sym = gnu_lookup(gh, ght, p, s);
-	} else {
-		h = sysv_hash(s);
-		sym = sysv_lookup(s, h, p);
-	}
-	if (sym && (sym->st_info&0xf) == STT_TLS)
-		return __tls_get_addr((tls_mod_off_t []){p->tls_id, sym->st_value-DTP_OFFSET});
-	if (DL_FDPIC && sym && sym->st_shndx && (sym->st_info&0xf) == STT_FUNC)
-		return p->funcdescs + (sym - p->syms);
-	if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
-		return laddr(p, sym->st_value);
-	for (i=0; p->deps[i]; i++) {
-		if ((ght = p->deps[i]->ghashtab)) {
-			if (!gh) gh = gnu_hash(s);
-			sym = gnu_lookup(gh, ght, p->deps[i], s);
-		} else {
-			if (!h) h = sysv_hash(s);
-			sym = sysv_lookup(s, h, p->deps[i]);
-		}
-		if (sym && (sym->st_info&0xf) == STT_TLS)
-			return __tls_get_addr((tls_mod_off_t []){p->deps[i]->tls_id, sym->st_value-DTP_OFFSET});
-		if (DL_FDPIC && sym && sym->st_shndx && (sym->st_info&0xf) == STT_FUNC)
-			return p->deps[i]->funcdescs + (sym - p->deps[i]->syms);
-		if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
-			return laddr(p->deps[i], sym->st_value);
-	}
-failed:
-	error("Symbol not found: %s", s);
-	return 0;
+	} else
+		use_deps = 1;
+	struct symdef def = find_sym2(p, s, 0, use_deps);
+	if (!def.sym) {
+		error("Symbol not found: %s", s);
+		return 0;
+	}
+	if ((def.sym->st_info&0xf) == STT_TLS)
+		return __tls_get_addr((tls_mod_off_t []){def.dso->tls_id, def.sym->st_value-DTP_OFFSET});
+	if (DL_FDPIC && (def.sym->st_info&0xf) == STT_FUNC)
+		return def.dso->funcdescs + (def.sym - def.dso->syms);
+	return laddr(def.dso, def.sym->st_value);
 }
 
 int dladdr(const void *addr_arg, Dl_info *info)
@@ -2017,7 +2334,7 @@ int dladdr(const void *addr_arg, Dl_info *info)
 		}
 	}
 
-	if (bestsym && besterr > bestsym->st_size-1) {
+	if (best && besterr > bestsym->st_size-1) {
 		best = 0;
 		bestsym = 0;
 	}
@@ -2048,6 +2365,33 @@ hidden void *__dlsym(void *restrict p, const char *restrict s, void *restrict ra
 	return res;
 }
 
+hidden void *__dlsym_redir_time64(void *restrict p, const char *restrict s, void *restrict ra)
+{
+#if _REDIR_TIME64
+	const char *suffix, *suffix2 = "";
+	char redir[36];
+
+	/* Map the symbol name to a time64 version of itself according to the
+	 * pattern used for naming the redirected time64 symbols. */
+	size_t l = strnlen(s, sizeof redir);
+	if (l<4 || l==sizeof redir) goto no_redir;
+	if (s[l-2]=='_' && s[l-1]=='r') {
+		l -= 2;
+		suffix2 = s+l;
+	}
+	if (l<4) goto no_redir;
+	if (!strcmp(s+l-4, "time")) suffix = "64";
+	else suffix = "_time64";
+
+	/* Use the presence of the remapped symbol name in libc to determine
+	 * whether it's one that requires time64 redirection; replace if so. */
+	snprintf(redir, sizeof redir, "__%.*s%s%s", (int)l, s, suffix, suffix2);
+	if (find_sym(&ldso, redir, 1).sym) s = redir;
+no_redir:
+#endif
+	return __dlsym(p, s, ra);
+}
+
 int dl_iterate_phdr(int(*callback)(struct dl_phdr_info *info, size_t size, void *data), void *data)
 {
 	struct dso *current;
@@ -2061,7 +2405,8 @@ int dl_iterate_phdr(int(*callback)(struct dl_phdr_info *info, size_t size, void
 		info.dlpi_adds      = gencnt;
 		info.dlpi_subs      = 0;
 		info.dlpi_tls_modid = current->tls_id;
-		info.dlpi_tls_data  = current->tls.image;
+		info.dlpi_tls_data = !current->tls_id ? 0 :
+			__tls_get_addr((tls_mod_off_t[]){current->tls_id,0});
 
 		ret = (callback)(&info, sizeof (info), data);
 
@@ -2074,7 +2419,7 @@ int dl_iterate_phdr(int(*callback)(struct dl_phdr_info *info, size_t size, void
 	return ret;
 }
 
-static void error(const char *fmt, ...)
+static void error_impl(const char *fmt, ...)
 {
 	va_list ap;
 	va_start(ap, fmt);
@@ -2088,3 +2433,7 @@ static void error(const char *fmt, ...)
 	__dl_vseterr(fmt, ap);
 	va_end(ap);
 }
+
+static void error_noop(const char *fmt, ...)
+{
+}
diff --git a/src/aio/aio.c b/src/aio/aio.c
index dae97cc6..d7e063bf 100644
--- a/src/aio/aio.c
+++ b/src/aio/aio.c
@@ -9,6 +9,12 @@
 #include "syscall.h"
 #include "atomic.h"
 #include "pthread_impl.h"
+#include "aio_impl.h"
+
+#define malloc __libc_malloc
+#define calloc __libc_calloc
+#define realloc __libc_realloc
+#define free __libc_free
 
 /* The following is a threads-based implementation of AIO with minimal
  * dependence on implementation details. Most synchronization is
@@ -70,8 +76,14 @@ static struct aio_queue *****map;
 static volatile int aio_fd_cnt;
 volatile int __aio_fut;
 
+static size_t io_thread_stack_size;
+
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+
 static struct aio_queue *__aio_get_queue(int fd, int need)
 {
+	sigset_t allmask, origmask;
+	int masked = 0;
 	if (fd < 0) {
 		errno = EBADF;
 		return 0;
@@ -83,7 +95,14 @@ static struct aio_queue *__aio_get_queue(int fd, int need)
 	if ((!map || !map[a] || !map[a][b] || !map[a][b][c] || !(q=map[a][b][c][d])) && need) {
 		pthread_rwlock_unlock(&maplock);
 		if (fcntl(fd, F_GETFD) < 0) return 0;
+		sigfillset(&allmask);
+		masked = 1;
+		pthread_sigmask(SIG_BLOCK, &allmask, &origmask);
 		pthread_rwlock_wrlock(&maplock);
+		if (!io_thread_stack_size) {
+			unsigned long val = __getauxval(AT_MINSIGSTKSZ);
+			io_thread_stack_size = MAX(MINSIGSTKSZ+2048, val+512);
+		}
 		if (!map) map = calloc(sizeof *map, (-1U/2+1)>>24);
 		if (!map) goto out;
 		if (!map[a]) map[a] = calloc(sizeof **map, 256);
@@ -105,6 +124,7 @@ static struct aio_queue *__aio_get_queue(int fd, int need)
 	if (q) pthread_mutex_lock(&q->lock);
 out:
 	pthread_rwlock_unlock(&maplock);
+	if (masked) pthread_sigmask(SIG_SETMASK, &origmask, 0);
 	return q;
 }
 
@@ -259,15 +279,6 @@ static void *io_thread_func(void *ctx)
 	return 0;
 }
 
-static size_t io_thread_stack_size = MINSIGSTKSZ+2048;
-static pthread_once_t init_stack_size_once;
-
-static void init_stack_size()
-{
-	unsigned long val = __getauxval(AT_MINSIGSTKSZ);
-	if (val > MINSIGSTKSZ) io_thread_stack_size = val + 512;
-}
-
 static int submit(struct aiocb *cb, int op)
 {
 	int ret = 0;
@@ -280,6 +291,8 @@ static int submit(struct aiocb *cb, int op)
 
 	if (!q) {
 		if (errno != EBADF) errno = EAGAIN;
+		cb->__ret = -1;
+		cb->__err = errno;
 		return -1;
 	}
 	q->ref++;
@@ -291,7 +304,6 @@ static int submit(struct aiocb *cb, int op)
 		else
 			pthread_attr_init(&a);
 	} else {
-		pthread_once(&init_stack_size_once, init_stack_size);
 		pthread_attr_init(&a);
 		pthread_attr_setstacksize(&a, io_thread_stack_size);
 		pthread_attr_setguardsize(&a, 0);
@@ -303,8 +315,8 @@ static int submit(struct aiocb *cb, int op)
 	if (pthread_create(&td, &a, io_thread_func, &args)) {
 		pthread_mutex_lock(&q->lock);
 		__aio_unref_queue(q);
-		errno = EAGAIN;
-		ret = -1;
+		cb->__err = errno = EAGAIN;
+		cb->__ret = ret = -1;
 	}
 	pthread_sigmask(SIG_SETMASK, &origmask, 0);
 
@@ -390,9 +402,31 @@ int __aio_close(int fd)
 	return fd;
 }
 
-weak_alias(aio_cancel, aio_cancel64);
-weak_alias(aio_error, aio_error64);
-weak_alias(aio_fsync, aio_fsync64);
-weak_alias(aio_read, aio_read64);
-weak_alias(aio_write, aio_write64);
-weak_alias(aio_return, aio_return64);
+void __aio_atfork(int who)
+{
+	if (who<0) {
+		pthread_rwlock_rdlock(&maplock);
+		return;
+	} else if (!who) {
+		pthread_rwlock_unlock(&maplock);
+		return;
+	}
+	aio_fd_cnt = 0;
+	if (pthread_rwlock_tryrdlock(&maplock)) {
+		/* Obtaining lock may fail if _Fork was called nor via
+		 * fork. In this case, no further aio is possible from
+		 * child and we can just null out map so __aio_close
+		 * does not attempt to do anything. */
+		map = 0;
+		return;
+	}
+	if (map) for (int a=0; a<(-1U/2+1)>>24; a++)
+		if (map[a]) for (int b=0; b<256; b++)
+			if (map[a][b]) for (int c=0; c<256; c++)
+				if (map[a][b][c]) for (int d=0; d<256; d++)
+					map[a][b][c][d] = 0;
+	/* Re-initialize the rwlock rather than unlocking since there
+	 * may have been more than one reference on it in the parent.
+	 * We are not a lock holder anyway; the thread in the parent was. */
+	pthread_rwlock_init(&maplock, 0);
+}
diff --git a/src/aio/aio_suspend.c b/src/aio/aio_suspend.c
index 9b24b6af..1f0c9aaa 100644
--- a/src/aio/aio_suspend.c
+++ b/src/aio/aio_suspend.c
@@ -3,12 +3,13 @@
 #include <time.h>
 #include "atomic.h"
 #include "pthread_impl.h"
+#include "aio_impl.h"
 
 int aio_suspend(const struct aiocb *const cbs[], int cnt, const struct timespec *ts)
 {
 	int i, tid = 0, ret, expect = 0;
 	struct timespec at;
-	volatile int dummy_fut, *pfut;
+	volatile int dummy_fut = 0, *pfut;
 	int nzcnt = 0;
 	const struct aiocb *cb = 0;
 
@@ -72,5 +73,3 @@ int aio_suspend(const struct aiocb *const cbs[], int cnt, const struct timespec
 		}
 	}
 }
-
-weak_alias(aio_suspend, aio_suspend64);
diff --git a/src/aio/lio_listio.c b/src/aio/lio_listio.c
index 7b6a03d3..a672812f 100644
--- a/src/aio/lio_listio.c
+++ b/src/aio/lio_listio.c
@@ -113,7 +113,7 @@ int lio_listio(int mode, struct aiocb *restrict const *restrict cbs, int cnt, st
 
 	if (st) {
 		pthread_attr_t a;
-		sigset_t set;
+		sigset_t set, set_old;
 		pthread_t td;
 
 		if (sev->sigev_notify == SIGEV_THREAD) {
@@ -128,16 +128,14 @@ int lio_listio(int mode, struct aiocb *restrict const *restrict cbs, int cnt, st
 		}
 		pthread_attr_setdetachstate(&a, PTHREAD_CREATE_DETACHED);
 		sigfillset(&set);
-		pthread_sigmask(SIG_BLOCK, &set, &set);
+		pthread_sigmask(SIG_BLOCK, &set, &set_old);
 		if (pthread_create(&td, &a, wait_thread, st)) {
 			free(st);
 			errno = EAGAIN;
 			return -1;
 		}
-		pthread_sigmask(SIG_SETMASK, &set, 0);
+		pthread_sigmask(SIG_SETMASK, &set_old, 0);
 	}
 
 	return 0;
 }
-
-weak_alias(lio_listio, lio_listio64);
diff --git a/src/complex/__cexp.c b/src/complex/__cexp.c
index 05ac28c7..003d20af 100644
--- a/src/complex/__cexp.c
+++ b/src/complex/__cexp.c
@@ -25,7 +25,7 @@
  * SUCH DAMAGE.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 static const uint32_t k = 1799; /* constant for reduction */
 static const double kln2 = 1246.97177782734161156; /* k * ln2 */
diff --git a/src/complex/__cexpf.c b/src/complex/__cexpf.c
index 69b54045..ee5ff2bc 100644
--- a/src/complex/__cexpf.c
+++ b/src/complex/__cexpf.c
@@ -25,7 +25,7 @@
  * SUCH DAMAGE.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 static const uint32_t k = 235; /* constant for reduction */
 static const float kln2 = 162.88958740F; /* k * ln2 */
diff --git a/src/complex/cabs.c b/src/complex/cabs.c
index f61d364e..c5ad58ab 100644
--- a/src/complex/cabs.c
+++ b/src/complex/cabs.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 double cabs(double complex z)
 {
diff --git a/src/complex/cabsf.c b/src/complex/cabsf.c
index 30b25c70..619f28d3 100644
--- a/src/complex/cabsf.c
+++ b/src/complex/cabsf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float cabsf(float complex z)
 {
diff --git a/src/complex/cabsl.c b/src/complex/cabsl.c
index 40a067c1..d37e3f2e 100644
--- a/src/complex/cabsl.c
+++ b/src/complex/cabsl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double cabsl(long double complex z)
diff --git a/src/complex/cacos.c b/src/complex/cacos.c
index 27c35636..c39d257b 100644
--- a/src/complex/cacos.c
+++ b/src/complex/cacos.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 // FIXME: Hull et al. "Implementing the complex arcsine and arccosine functions using exception handling" 1997
 
diff --git a/src/complex/cacosf.c b/src/complex/cacosf.c
index 11852659..ed8acf0f 100644
--- a/src/complex/cacosf.c
+++ b/src/complex/cacosf.c
@@ -1,9 +1,11 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 // FIXME
 
+static const float float_pi_2 = M_PI_2;
+
 float complex cacosf(float complex z)
 {
 	z = casinf(z);
-	return CMPLXF((float)M_PI_2 - crealf(z), -cimagf(z));
+	return CMPLXF(float_pi_2 - crealf(z), -cimagf(z));
 }
diff --git a/src/complex/cacosh.c b/src/complex/cacosh.c
index 8c68cb01..55b857ce 100644
--- a/src/complex/cacosh.c
+++ b/src/complex/cacosh.c
@@ -1,9 +1,12 @@
-#include "libm.h"
+#include "complex_impl.h"
 
-/* acosh(z) = i acos(z) */
+/* acosh(z) = ±i acos(z) */
 
 double complex cacosh(double complex z)
 {
+	int zineg = signbit(cimag(z));
+
 	z = cacos(z);
-	return CMPLX(-cimag(z), creal(z));
+	if (zineg) return CMPLX(cimag(z), -creal(z));
+	else       return CMPLX(-cimag(z), creal(z));
 }
diff --git a/src/complex/cacoshf.c b/src/complex/cacoshf.c
index ade01c09..8bd80581 100644
--- a/src/complex/cacoshf.c
+++ b/src/complex/cacoshf.c
@@ -1,7 +1,10 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float complex cacoshf(float complex z)
 {
+	int zineg = signbit(cimagf(z));
+
 	z = cacosf(z);
-	return CMPLXF(-cimagf(z), crealf(z));
+	if (zineg) return CMPLXF(cimagf(z), -crealf(z));
+	else       return CMPLXF(-cimagf(z), crealf(z));
 }
diff --git a/src/complex/cacoshl.c b/src/complex/cacoshl.c
index 65342557..3a284be9 100644
--- a/src/complex/cacoshl.c
+++ b/src/complex/cacoshl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex cacoshl(long double complex z)
@@ -8,7 +8,10 @@ long double complex cacoshl(long double complex z)
 #else
 long double complex cacoshl(long double complex z)
 {
+	int zineg = signbit(cimagl(z));
+
 	z = cacosl(z);
-	return CMPLXL(-cimagl(z), creall(z));
+	if (zineg) return CMPLXL(cimagl(z), -creall(z));
+	else       return CMPLXL(-cimagl(z), creall(z));
 }
 #endif
diff --git a/src/complex/cacosl.c b/src/complex/cacosl.c
index 7fd4a2f6..cc20dcd7 100644
--- a/src/complex/cacosl.c
+++ b/src/complex/cacosl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex cacosl(long double complex z)
diff --git a/src/complex/carg.c b/src/complex/carg.c
index d2d1b462..dfe9b97a 100644
--- a/src/complex/carg.c
+++ b/src/complex/carg.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 double carg(double complex z)
 {
diff --git a/src/complex/cargf.c b/src/complex/cargf.c
index ce183c4b..9a6c19b6 100644
--- a/src/complex/cargf.c
+++ b/src/complex/cargf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float cargf(float complex z)
 {
diff --git a/src/complex/cargl.c b/src/complex/cargl.c
index e0d50478..88f95f96 100644
--- a/src/complex/cargl.c
+++ b/src/complex/cargl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double cargl(long double complex z)
diff --git a/src/complex/casin.c b/src/complex/casin.c
index 01ed6184..3244bebb 100644
--- a/src/complex/casin.c
+++ b/src/complex/casin.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 // FIXME
 
diff --git a/src/complex/casinf.c b/src/complex/casinf.c
index 4fcb76fc..2cda2f08 100644
--- a/src/complex/casinf.c
+++ b/src/complex/casinf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 // FIXME
 
diff --git a/src/complex/casinh.c b/src/complex/casinh.c
index b57fe8c4..50bf27ce 100644
--- a/src/complex/casinh.c
+++ b/src/complex/casinh.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 /* asinh(z) = -i asin(i z) */
 
diff --git a/src/complex/casinhf.c b/src/complex/casinhf.c
index a11bf902..93d82e5f 100644
--- a/src/complex/casinhf.c
+++ b/src/complex/casinhf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float complex casinhf(float complex z)
 {
diff --git a/src/complex/casinhl.c b/src/complex/casinhl.c
index 714f1893..68ba3ddf 100644
--- a/src/complex/casinhl.c
+++ b/src/complex/casinhl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex casinhl(long double complex z)
diff --git a/src/complex/casinl.c b/src/complex/casinl.c
index 3b7ceba7..072adc45 100644
--- a/src/complex/casinl.c
+++ b/src/complex/casinl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex casinl(long double complex z)
diff --git a/src/complex/catan.c b/src/complex/catan.c
index 7dc2afeb..b4fe552a 100644
--- a/src/complex/catan.c
+++ b/src/complex/catan.c
@@ -58,30 +58,7 @@
  * 2.9e-17.  See also clog().
  */
 
-#include "libm.h"
-
-#define MAXNUM 1.0e308
-
-static const double DP1 = 3.14159265160560607910E0;
-static const double DP2 = 1.98418714791870343106E-9;
-static const double DP3 = 1.14423774522196636802E-17;
-
-static double _redupi(double x)
-{
-	double t;
-	long i;
-
-	t = x/M_PI;
-	if (t >= 0.0)
-		t += 0.5;
-	else
-		t -= 0.5;
-
-	i = t;  /* the multiple */
-	t = i;
-	t = ((x - t * DP1) - t * DP2) - t * DP3;
-	return t;
-}
+#include "complex_impl.h"
 
 double complex catan(double complex z)
 {
@@ -95,7 +72,7 @@ double complex catan(double complex z)
 	a = 1.0 - x2 - (y * y);
 
 	t = 0.5 * atan2(2.0 * x, a);
-	w = _redupi(t);
+	w = t;
 
 	t = y - 1.0;
 	a = x2 + (t * t);
diff --git a/src/complex/catanf.c b/src/complex/catanf.c
index 8533bde3..faaa907a 100644
--- a/src/complex/catanf.c
+++ b/src/complex/catanf.c
@@ -53,31 +53,7 @@
  *    IEEE      -10,+10     30000        2.3e-6      5.2e-8
  */
 
-#include "libm.h"
-
-#define MAXNUMF 1.0e38F
-
-static const double DP1 = 3.140625;
-static const double DP2 = 9.67502593994140625E-4;
-static const double DP3 = 1.509957990978376432E-7;
-
-static float _redupif(float xx)
-{
-	float x, t;
-	long i;
-
-	x = xx;
-	t = x/(float)M_PI;
-	if (t >= 0.0f)
-		t += 0.5f;
-	else
-		t -= 0.5f;
-
-	i = t;  /* the multiple */
-	t = i;
-	t = ((x - t * DP1) - t * DP2) - t * DP3;
-	return t;
-}
+#include "complex_impl.h"
 
 float complex catanf(float complex z)
 {
@@ -87,29 +63,17 @@ float complex catanf(float complex z)
 	x = crealf(z);
 	y = cimagf(z);
 
-	if ((x == 0.0f) && (y > 1.0f))
-		goto ovrf;
-
 	x2 = x * x;
 	a = 1.0f - x2 - (y * y);
-	if (a == 0.0f)
-		goto ovrf;
 
 	t = 0.5f * atan2f(2.0f * x, a);
-	w = _redupif(t);
+	w = t;
 
 	t = y - 1.0f;
 	a = x2 + (t * t);
-	if (a == 0.0f)
-		goto ovrf;
 
 	t = y + 1.0f;
 	a = (x2 + (t * t))/a;
-	w = w + (0.25f * logf (a)) * I;
-	return w;
-
-ovrf:
-	// FIXME
-	w = MAXNUMF + MAXNUMF * I;
+	w = CMPLXF(w, 0.25f * logf(a));
 	return w;
 }
diff --git a/src/complex/catanh.c b/src/complex/catanh.c
index e248d9b9..c324c7f2 100644
--- a/src/complex/catanh.c
+++ b/src/complex/catanh.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 /* atanh = -i atan(i z) */
 
diff --git a/src/complex/catanhf.c b/src/complex/catanhf.c
index 4a5eb040..b0505f60 100644
--- a/src/complex/catanhf.c
+++ b/src/complex/catanhf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float complex catanhf(float complex z)
 {
diff --git a/src/complex/catanhl.c b/src/complex/catanhl.c
index a5dd538e..6025c414 100644
--- a/src/complex/catanhl.c
+++ b/src/complex/catanhl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex catanhl(long double complex z)
diff --git a/src/complex/catanl.c b/src/complex/catanl.c
index 5ace7704..cd2d2b00 100644
--- a/src/complex/catanl.c
+++ b/src/complex/catanl.c
@@ -59,7 +59,7 @@
 
 #include <complex.h>
 #include <float.h>
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex catanl(long double complex z)
@@ -67,28 +67,6 @@ long double complex catanl(long double complex z)
 	return catan(z);
 }
 #else
-static const long double PIL = 3.141592653589793238462643383279502884197169L;
-static const long double DP1 = 3.14159265358979323829596852490908531763125L;
-static const long double DP2 = 1.6667485837041756656403424829301998703007e-19L;
-static const long double DP3 = 1.8830410776607851167459095484560349402753e-39L;
-
-static long double redupil(long double x)
-{
-	long double t;
-	long i;
-
-	t = x / PIL;
-	if (t >= 0.0L)
-		t += 0.5L;
-	else
-		t -= 0.5L;
-
-	i = t;  /* the multiple */
-	t = i;
-	t = ((x - t * DP1) - t * DP2) - t * DP3;
-	return t;
-}
-
 long double complex catanl(long double complex z)
 {
 	long double complex w;
@@ -97,30 +75,18 @@ long double complex catanl(long double complex z)
 	x = creall(z);
 	y = cimagl(z);
 
-	if ((x == 0.0L) && (y > 1.0L))
-		goto ovrf;
-
 	x2 = x * x;
 	a = 1.0L - x2 - (y * y);
-	if (a == 0.0L)
-		goto ovrf;
 
 	t = atan2l(2.0L * x, a) * 0.5L;
-	w = redupil(t);
+	w = t;
 
 	t = y - 1.0L;
 	a = x2 + (t * t);
-	if (a == 0.0L)
-		goto ovrf;
 
 	t = y + 1.0L;
 	a = (x2 + (t * t)) / a;
-	w = w + (0.25L * logl(a)) * I;
-	return w;
-
-ovrf:
-	// FIXME
-	w = LDBL_MAX + LDBL_MAX * I;
+	w = CMPLXF(w, 0.25L * logl(a));
 	return w;
 }
 #endif
diff --git a/src/complex/ccos.c b/src/complex/ccos.c
index 645aec29..f32e1fad 100644
--- a/src/complex/ccos.c
+++ b/src/complex/ccos.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 /* cos(z) = cosh(i z) */
 
diff --git a/src/complex/ccosf.c b/src/complex/ccosf.c
index 9a67241f..490be9b3 100644
--- a/src/complex/ccosf.c
+++ b/src/complex/ccosf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float complex ccosf(float complex z)
 {
diff --git a/src/complex/ccosh.c b/src/complex/ccosh.c
index 401f3c60..c995da7b 100644
--- a/src/complex/ccosh.c
+++ b/src/complex/ccosh.c
@@ -34,7 +34,7 @@
  * These values and the return value were taken from n1124.pdf.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 static const double huge = 0x1p1023;
 
diff --git a/src/complex/ccoshf.c b/src/complex/ccoshf.c
index 90acfe05..189ce946 100644
--- a/src/complex/ccoshf.c
+++ b/src/complex/ccoshf.c
@@ -28,7 +28,7 @@
  * Hyperbolic cosine of a complex argument.  See s_ccosh.c for details.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 static const float huge = 0x1p127;
 
diff --git a/src/complex/ccoshl.c b/src/complex/ccoshl.c
index 9b2aed9e..ffb4d8a1 100644
--- a/src/complex/ccoshl.c
+++ b/src/complex/ccoshl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 //FIXME
 long double complex ccoshl(long double complex z)
diff --git a/src/complex/ccosl.c b/src/complex/ccosl.c
index d787047f..2530006b 100644
--- a/src/complex/ccosl.c
+++ b/src/complex/ccosl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex ccosl(long double complex z)
diff --git a/src/complex/cexp.c b/src/complex/cexp.c
index 5118e00e..7fb489bb 100644
--- a/src/complex/cexp.c
+++ b/src/complex/cexp.c
@@ -25,7 +25,7 @@
  * SUCH DAMAGE.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 static const uint32_t
 exp_ovfl  = 0x40862e42,  /* high bits of MAX_EXP * ln2 ~= 710 */
diff --git a/src/complex/cexpf.c b/src/complex/cexpf.c
index 1a09964c..00d258f3 100644
--- a/src/complex/cexpf.c
+++ b/src/complex/cexpf.c
@@ -25,7 +25,7 @@
  * SUCH DAMAGE.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 static const uint32_t
 exp_ovfl  = 0x42b17218,  /* MAX_EXP * ln2 ~= 88.722839355 */
diff --git a/src/complex/cexpl.c b/src/complex/cexpl.c
index a27f85c0..d4df950e 100644
--- a/src/complex/cexpl.c
+++ b/src/complex/cexpl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 //FIXME
 long double complex cexpl(long double complex z)
diff --git a/src/complex/cimag.c b/src/complex/cimag.c
index 00955641..d6b0e683 100644
--- a/src/complex/cimag.c
+++ b/src/complex/cimag.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 double (cimag)(double complex z)
 {
diff --git a/src/complex/cimagf.c b/src/complex/cimagf.c
index f7bcd76e..b7166dcf 100644
--- a/src/complex/cimagf.c
+++ b/src/complex/cimagf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float (cimagf)(float complex z)
 {
diff --git a/src/complex/cimagl.c b/src/complex/cimagl.c
index 9ec24eee..4db77f20 100644
--- a/src/complex/cimagl.c
+++ b/src/complex/cimagl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 long double (cimagl)(long double complex z)
 {
diff --git a/src/complex/clog.c b/src/complex/clog.c
index 12aae9c7..b587c291 100644
--- a/src/complex/clog.c
+++ b/src/complex/clog.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 // FIXME
 
diff --git a/src/complex/clogf.c b/src/complex/clogf.c
index e9b32e60..0389d472 100644
--- a/src/complex/clogf.c
+++ b/src/complex/clogf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 // FIXME
 
diff --git a/src/complex/clogl.c b/src/complex/clogl.c
index 18f16088..88e83e87 100644
--- a/src/complex/clogl.c
+++ b/src/complex/clogl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex clogl(long double complex z)
diff --git a/src/complex/conj.c b/src/complex/conj.c
index 0b3f5f46..a3b19a4a 100644
--- a/src/complex/conj.c
+++ b/src/complex/conj.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 double complex conj(double complex z)
 {
diff --git a/src/complex/conjf.c b/src/complex/conjf.c
index 9af6b2c3..b2195c84 100644
--- a/src/complex/conjf.c
+++ b/src/complex/conjf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float complex conjf(float complex z)
 {
diff --git a/src/complex/conjl.c b/src/complex/conjl.c
index 67f11b9d..87a4ebec 100644
--- a/src/complex/conjl.c
+++ b/src/complex/conjl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 long double complex conjl(long double complex z)
 {
diff --git a/src/complex/cpow.c b/src/complex/cpow.c
index f863588f..1137d391 100644
--- a/src/complex/cpow.c
+++ b/src/complex/cpow.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 /* pow(z, c) = exp(c log(z)), See C99 G.6.4.1 */
 
diff --git a/src/complex/cpowf.c b/src/complex/cpowf.c
index 53c65dcb..f3fd4b7b 100644
--- a/src/complex/cpowf.c
+++ b/src/complex/cpowf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float complex cpowf(float complex z, float complex c)
 {
diff --git a/src/complex/cpowl.c b/src/complex/cpowl.c
index c1a80a7b..be36f046 100644
--- a/src/complex/cpowl.c
+++ b/src/complex/cpowl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex cpowl(long double complex z, long double complex c)
diff --git a/src/complex/cproj.c b/src/complex/cproj.c
index 15f358a1..d2b8f5a9 100644
--- a/src/complex/cproj.c
+++ b/src/complex/cproj.c
@@ -1,8 +1,8 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 double complex cproj(double complex z)
 {
 	if (isinf(creal(z)) || isinf(cimag(z)))
-		return CMPLX(INFINITY, copysign(0.0, creal(z)));
+		return CMPLX(INFINITY, copysign(0.0, cimag(z)));
 	return z;
 }
diff --git a/src/complex/cprojf.c b/src/complex/cprojf.c
index 653be5e8..15a874bb 100644
--- a/src/complex/cprojf.c
+++ b/src/complex/cprojf.c
@@ -1,8 +1,8 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float complex cprojf(float complex z)
 {
 	if (isinf(crealf(z)) || isinf(cimagf(z)))
-		return CMPLXF(INFINITY, copysignf(0.0, crealf(z)));
+		return CMPLXF(INFINITY, copysignf(0.0, cimagf(z)));
 	return z;
 }
diff --git a/src/complex/cprojl.c b/src/complex/cprojl.c
index 6731aaa2..531ffa1c 100644
--- a/src/complex/cprojl.c
+++ b/src/complex/cprojl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex cprojl(long double complex z)
@@ -9,7 +9,7 @@ long double complex cprojl(long double complex z)
 long double complex cprojl(long double complex z)
 {
 	if (isinf(creall(z)) || isinf(cimagl(z)))
-		return CMPLXL(INFINITY, copysignl(0.0, creall(z)));
+		return CMPLXL(INFINITY, copysignl(0.0, cimagl(z)));
 	return z;
 }
 #endif
diff --git a/src/complex/csin.c b/src/complex/csin.c
index ad8ae67a..535c4bf8 100644
--- a/src/complex/csin.c
+++ b/src/complex/csin.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 /* sin(z) = -i sinh(i z) */
 
diff --git a/src/complex/csinf.c b/src/complex/csinf.c
index 60b3cbaa..69f5164e 100644
--- a/src/complex/csinf.c
+++ b/src/complex/csinf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float complex csinf(float complex z)
 {
diff --git a/src/complex/csinh.c b/src/complex/csinh.c
index 0f8035d1..eda0ab59 100644
--- a/src/complex/csinh.c
+++ b/src/complex/csinh.c
@@ -34,7 +34,7 @@
  * These values and the return value were taken from n1124.pdf.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 static const double huge = 0x1p1023;
 
diff --git a/src/complex/csinhf.c b/src/complex/csinhf.c
index 49697f02..eb1d98c5 100644
--- a/src/complex/csinhf.c
+++ b/src/complex/csinhf.c
@@ -28,7 +28,7 @@
  * Hyperbolic sine of a complex argument z.  See s_csinh.c for details.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 static const float huge = 0x1p127;
 
diff --git a/src/complex/csinhl.c b/src/complex/csinhl.c
index c566653b..09fd18f9 100644
--- a/src/complex/csinhl.c
+++ b/src/complex/csinhl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 //FIXME
 long double complex csinhl(long double complex z)
diff --git a/src/complex/csinl.c b/src/complex/csinl.c
index 4e9f86c3..90a4eb37 100644
--- a/src/complex/csinl.c
+++ b/src/complex/csinl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex csinl(long double complex z)
diff --git a/src/complex/csqrt.c b/src/complex/csqrt.c
index 8a2ba608..c36de001 100644
--- a/src/complex/csqrt.c
+++ b/src/complex/csqrt.c
@@ -25,7 +25,7 @@
  * SUCH DAMAGE.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 /*
  * gcc doesn't implement complex multiplication or division correctly,
diff --git a/src/complex/csqrtf.c b/src/complex/csqrtf.c
index ab5102f0..a6163974 100644
--- a/src/complex/csqrtf.c
+++ b/src/complex/csqrtf.c
@@ -25,7 +25,7 @@
  * SUCH DAMAGE.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 /*
  * gcc doesn't implement complex multiplication or division correctly,
diff --git a/src/complex/csqrtl.c b/src/complex/csqrtl.c
index 0600ef3b..22539379 100644
--- a/src/complex/csqrtl.c
+++ b/src/complex/csqrtl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 //FIXME
 long double complex csqrtl(long double complex z)
diff --git a/src/complex/ctan.c b/src/complex/ctan.c
index c0926374..918717bf 100644
--- a/src/complex/ctan.c
+++ b/src/complex/ctan.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 /* tan(z) = -i tanh(i z) */
 
diff --git a/src/complex/ctanf.c b/src/complex/ctanf.c
index 009b1921..04c3ff19 100644
--- a/src/complex/ctanf.c
+++ b/src/complex/ctanf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float complex ctanf(float complex z)
 {
diff --git a/src/complex/ctanh.c b/src/complex/ctanh.c
index 3ba3a899..54004cd7 100644
--- a/src/complex/ctanh.c
+++ b/src/complex/ctanh.c
@@ -63,7 +63,7 @@
  *   precision.  I also handle large x differently.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 double complex ctanh(double complex z)
 {
diff --git a/src/complex/ctanhf.c b/src/complex/ctanhf.c
index 72b76da0..7f422ba7 100644
--- a/src/complex/ctanhf.c
+++ b/src/complex/ctanhf.c
@@ -28,7 +28,7 @@
  * Hyperbolic tangent of a complex argument z.  See s_ctanh.c for details.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 float complex ctanhf(float complex z)
 {
diff --git a/src/complex/ctanhl.c b/src/complex/ctanhl.c
index 89a75d13..45d5862c 100644
--- a/src/complex/ctanhl.c
+++ b/src/complex/ctanhl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 //FIXME
 long double complex ctanhl(long double complex z)
diff --git a/src/complex/ctanl.c b/src/complex/ctanl.c
index ac1c3e0a..4b87420d 100644
--- a/src/complex/ctanl.c
+++ b/src/complex/ctanl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex ctanl(long double complex z)
diff --git a/src/conf/confstr.c b/src/conf/confstr.c
index 02cb1aa2..3d417284 100644
--- a/src/conf/confstr.c
+++ b/src/conf/confstr.c
@@ -7,7 +7,7 @@ size_t confstr(int name, char *buf, size_t len)
 	const char *s = "";
 	if (!name) {
 		s = "/bin:/usr/bin";
-	} else if ((name&~4U)!=1 && name-_CS_POSIX_V6_ILP32_OFF32_CFLAGS>33U) {
+	} else if ((name&~4U)!=1 && name-_CS_POSIX_V6_ILP32_OFF32_CFLAGS>35U) {
 		errno = EINVAL;
 		return 0;
 	}
diff --git a/src/conf/sysconf.c b/src/conf/sysconf.c
index 3baaed32..8dd5c725 100644
--- a/src/conf/sysconf.c
+++ b/src/conf/sysconf.c
@@ -4,6 +4,7 @@
 #include <sys/resource.h>
 #include <signal.h>
 #include <sys/sysinfo.h>
+#include <sys/auxv.h>
 #include "syscall.h"
 #include "libc.h"
 
@@ -19,6 +20,8 @@
 #define JT_AVPHYS_PAGES JT(9)
 #define JT_ZERO JT(10)
 #define JT_DELAYTIMER_MAX JT(11)
+#define JT_MINSIGSTKSZ JT(12)
+#define JT_SIGSTKSZ JT(13)
 
 #define RLIM(x) (-32768|(RLIMIT_ ## x))
 
@@ -165,6 +168,9 @@ long sysconf(int name)
 		[_SC_XOPEN_STREAMS] = JT_ZERO,
 		[_SC_THREAD_ROBUST_PRIO_INHERIT] = -1,
 		[_SC_THREAD_ROBUST_PRIO_PROTECT] = -1,
+
+		[_SC_MINSIGSTKSZ] = JT_MINSIGSTKSZ,
+		[_SC_SIGSTKSZ] = JT_SIGSTKSZ,
 	};
 
 	if (name >= sizeof(values)/sizeof(values[0]) || !values[name]) {
@@ -212,6 +218,18 @@ long sysconf(int name)
 		mem *= si.mem_unit;
 		mem /= PAGE_SIZE;
 		return (mem > LONG_MAX) ? LONG_MAX : mem;
+	case JT_MINSIGSTKSZ & 255:
+	case JT_SIGSTKSZ & 255: ;
+		/* Value from auxv/kernel is only sigfame size. Clamp it
+		 * to at least 1k below arch's traditional MINSIGSTKSZ,
+		 * then add 1k of working space for signal handler. */
+		unsigned long sigframe_sz = __getauxval(AT_MINSIGSTKSZ);
+		if (sigframe_sz < MINSIGSTKSZ - 1024)
+			sigframe_sz = MINSIGSTKSZ - 1024;
+		unsigned val = sigframe_sz + 1024;
+		if (values[name] == JT_SIGSTKSZ)
+			val += SIGSTKSZ - MINSIGSTKSZ;
+		return val;
 	case JT_ZERO & 255:
 		return 0;
 	}
diff --git a/src/crypt/crypt_blowfish.c b/src/crypt/crypt_blowfish.c
index d3f79851..d722607b 100644
--- a/src/crypt/crypt_blowfish.c
+++ b/src/crypt/crypt_blowfish.c
@@ -15,7 +15,7 @@
  * No copyright is claimed, and the software is hereby placed in the public
  * domain.  In case this attempt to disclaim copyright and place the software
  * in the public domain is deemed null and void, then the software is
- * Copyright (c) 1998-2012 Solar Designer and it is hereby released to the
+ * Copyright (c) 1998-2014 Solar Designer and it is hereby released to the
  * general public under the following terms:
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,12 +31,12 @@
  * you place this code and any modifications you make under a license
  * of your choice.
  *
- * This implementation is mostly compatible with OpenBSD's bcrypt.c (prefix
- * "$2a$") by Niels Provos <provos at citi.umich.edu>, and uses some of his
- * ideas.  The password hashing algorithm was designed by David Mazieres
- * <dm at lcs.mit.edu>.  For more information on the level of compatibility,
- * please refer to the comments in BF_set_key() below and to the included
- * crypt(3) man page.
+ * This implementation is fully compatible with OpenBSD's bcrypt.c for prefix
+ * "$2b$", originally by Niels Provos <provos at citi.umich.edu>, and it uses
+ * some of his ideas.  The password hashing algorithm was designed by David
+ * Mazieres <dm at lcs.mit.edu>.  For information on the level of
+ * compatibility for bcrypt hash prefixes other than "$2b$", please refer to
+ * the comments in BF_set_key() below and to the included crypt(3) man page.
  *
  * There's a paper on the algorithm that explains its design decisions:
  *
@@ -533,6 +533,7 @@ static void BF_set_key(const char *key, BF_key expanded, BF_key initial,
  * Valid combinations of settings are:
  *
  * Prefix "$2a$": bug = 0, safety = 0x10000
+ * Prefix "$2b$": bug = 0, safety = 0
  * Prefix "$2x$": bug = 1, safety = 0
  * Prefix "$2y$": bug = 0, safety = 0
  */
@@ -596,12 +597,14 @@ static void BF_set_key(const char *key, BF_key expanded, BF_key initial,
 	initial[0] ^= sign;
 }
 
+static const unsigned char flags_by_subtype[26] = {
+	2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 0
+};
+
 static char *BF_crypt(const char *key, const char *setting,
 	char *output, BF_word min)
 {
-	static const unsigned char flags_by_subtype[26] =
-		{2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 0};
 	struct {
 		BF_ctx ctx;
 		BF_key expanded_key;
@@ -746,9 +749,11 @@ char *__crypt_blowfish(const char *key, const char *setting, char *output)
 {
 	const char *test_key = "8b \xd0\xc1\xd2\xcf\xcc\xd8";
 	const char *test_setting = "$2a$00$abcdefghijklmnopqrstuu";
-	static const char test_hash[2][34] =
-		{"VUrPmXD6q/nVSSp7pNDhCR9071IfIRe\0\x55", /* $2x$ */
-		"i1D709vfamulimlGcq0qq3UvuUasvEa\0\x55"}; /* $2a$, $2y$ */
+	static const char test_hashes[2][34] = {
+		"i1D709vfamulimlGcq0qq3UvuUasvEa\0\x55", /* 'a', 'b', 'y' */
+		"VUrPmXD6q/nVSSp7pNDhCR9071IfIRe\0\x55", /* 'x' */
+	};
+	const char *test_hash = test_hashes[0];
 	char *retval;
 	const char *p;
 	int ok;
@@ -768,8 +773,11 @@ char *__crypt_blowfish(const char *key, const char *setting, char *output)
  * detected by the self-test.
  */
 	memcpy(buf.s, test_setting, sizeof(buf.s));
-	if (retval)
+	if (retval) {
+		unsigned int flags = flags_by_subtype[setting[2] - 'a'];
+		test_hash = test_hashes[flags & 1];
 		buf.s[2] = setting[2];
+	}
 	memset(buf.o, 0x55, sizeof(buf.o));
 	buf.o[sizeof(buf.o) - 1] = 0;
 	p = BF_crypt(test_key, buf.s, buf.o, 1);
@@ -777,7 +785,7 @@ char *__crypt_blowfish(const char *key, const char *setting, char *output)
 	ok = (p == buf.o &&
 	    !memcmp(p, buf.s, 7 + 22) &&
 	    !memcmp(p + (7 + 22),
-	    test_hash[buf.s[2] & 1],
+	    test_hash,
 	    31 + 1 + 1 + 1));
 
 	{
diff --git a/src/ctype/alpha.h b/src/ctype/alpha.h
index 299277c7..4167f387 100644
--- a/src/ctype/alpha.h
+++ b/src/ctype/alpha.h
@@ -8,17 +8,17 @@
 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
 17,17,17,17,17,17,17,63,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,64,65,17,66,67,
-68,69,70,71,72,73,74,17,75,76,77,78,79,80,16,16,16,81,82,83,84,85,86,87,88,89,
-16,90,16,91,92,16,16,17,17,17,93,94,95,16,16,16,16,16,16,16,16,16,16,17,17,17,
-17,96,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,17,97,16,16,16,16,16,16,
+68,69,70,71,72,73,74,17,75,76,77,78,79,80,81,16,82,83,84,85,86,87,88,89,90,91,
+92,93,16,94,95,96,16,17,17,17,97,98,99,16,16,16,16,16,16,16,16,16,16,17,17,17,
+17,100,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,17,101,16,16,16,16,16,
 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
-16,17,17,98,99,16,16,16,100,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
-17,17,17,17,17,17,17,101,17,17,102,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
-16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,103,
-104,16,16,16,16,16,16,16,16,16,105,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
-16,16,16,16,16,16,16,16,16,106,107,108,109,16,16,16,16,16,16,16,16,110,16,16,
-16,16,16,16,16,111,112,16,16,16,16,113,16,16,114,16,16,16,16,16,16,16,16,16,
-16,16,16,16,
+16,16,17,17,102,103,16,16,104,105,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+17,17,17,17,17,17,17,17,17,106,17,17,107,16,16,16,16,16,16,16,16,16,16,16,16,
+16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,
+108,109,16,16,16,16,16,16,16,16,16,110,16,16,16,16,16,16,16,16,16,16,16,16,16,
+16,16,16,16,16,16,16,16,16,16,111,112,113,114,16,16,16,16,16,16,16,16,115,116,
+117,16,16,16,16,16,118,119,16,16,16,16,120,16,16,121,16,16,16,16,16,16,16,16,
+16,16,16,16,16,
 16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,254,255,255,7,254,
@@ -27,8 +27,8 @@
 255,195,255,3,0,31,80,0,0,0,0,0,0,0,0,0,0,32,0,0,0,0,0,223,188,64,215,255,255,
 251,255,255,255,255,255,255,255,255,255,191,255,255,255,255,255,255,255,255,
 255,255,255,255,255,255,255,255,255,3,252,255,255,255,255,255,255,255,255,255,
-255,255,255,255,255,255,255,255,255,255,255,254,255,255,255,127,2,254,255,255,
-255,255,0,0,0,0,0,255,191,182,0,255,255,255,7,7,0,0,0,255,7,255,255,255,255,
+255,255,255,255,255,255,255,255,255,255,255,254,255,255,255,127,2,255,255,255,
+255,255,1,0,0,0,0,255,191,182,0,255,255,255,135,7,0,0,0,255,7,255,255,255,255,
 255,255,255,254,255,195,255,255,255,255,255,255,255,255,255,255,255,255,239,
 31,254,225,255,
 159,0,0,255,255,255,255,255,255,0,224,255,255,255,255,255,255,255,255,255,255,
@@ -42,54 +42,55 @@
 255,0,0,239,223,253,255,255,253,239,227,223,29,96,64,207,255,6,0,239,223,253,
 255,255,255,255,231,223,93,240,128,207,255,0,252,236,255,127,252,255,255,251,
 47,127,128,95,255,192,255,12,0,254,255,255,255,255,127,255,7,63,32,255,3,0,0,
-0,0,150,37,240,254,174,236,255,59,95,32,255,243,0,0,0,
+0,0,214,247,255,255,175,255,255,59,95,32,255,243,0,0,0,
 0,1,0,0,0,255,3,0,0,255,254,255,255,255,31,254,255,3,255,255,254,255,255,255,
-31,0,0,0,0,0,0,0,0,255,255,255,255,255,255,127,249,255,3,255,255,231,193,255,
-255,127,64,255,51,255,255,255,255,191,32,255,255,255,255,255,247,255,255,255,
+31,0,0,0,0,0,0,0,0,255,255,255,255,255,255,127,249,255,3,255,255,255,255,255,
+255,255,255,255,63,255,255,255,255,191,32,255,255,255,255,255,247,255,255,255,
 255,255,255,255,255,255,61,127,61,255,255,255,255,255,61,255,255,255,255,61,
 127,61,255,127,255,255,255,255,255,255,255,61,255,255,255,255,255,255,255,255,
-135,0,0,0,0,255,255,0,0,255,255,255,255,255,255,255,255,255,255,63,63,254,255,
+7,0,0,0,0,255,255,0,0,255,255,255,255,255,255,255,255,255,255,63,63,254,255,
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 255,255,255,255,255,159,255,255,254,255,255,7,255,255,255,255,255,255,255,255,
 255,199,255,1,255,223,15,0,255,255,15,0,255,255,15,0,255,223,13,0,255,255,255,
 255,255,255,207,255,255,1,128,16,255,3,0,0,0,0,255,3,255,255,255,255,255,255,
-255,255,255,255,255,0,255,255,255,255,255,7,255,255,255,255,255,255,255,255,
+255,255,255,255,255,1,255,255,255,255,255,7,255,255,255,255,255,255,255,255,
 63,
 0,255,255,255,127,255,15,255,1,192,255,255,255,255,63,31,0,255,255,255,255,
 255,15,255,255,255,3,255,3,0,0,0,0,255,255,255,15,255,255,255,255,255,255,255,
 127,254,255,31,0,255,3,255,3,128,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,
 255,239,255,239,15,255,3,0,0,0,0,255,255,255,255,255,243,255,255,255,255,255,
-255,191,255,3,0,255,255,255,255,255,255,63,0,255,227,255,255,255,255,255,63,
-255,1,0,0,0,0,0,0,0,0,0,0,0,222,111,0,255,255,255,255,255,255,255,255,255,255,
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,128,255,31,0,
-255,255,63,63,255,255,255,255,63,63,255,170,255,255,255,63,255,255,255,255,
-255,255,223,95,220,31,207,15,255,31,220,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,128,
-0,0,255,31,0,0,0,0,0,0,0,0,0,0,0,0,132,252,47,62,80,189,255,243,224,67,0,0,
-255,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
+255,191,255,3,0,255,255,255,255,255,255,127,0,255,227,255,255,255,255,255,63,
+255,1,255,255,255,255,255,231,0,0,0,0,0,222,111,4,255,255,255,255,255,255,255,
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,
+128,255,31,0,255,255,63,63,255,255,255,255,63,63,255,170,255,255,255,63,255,
+255,255,255,255,255,223,95,220,31,207,15,255,31,220,31,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,2,128,0,0,255,31,0,0,0,0,0,0,0,0,0,0,0,0,132,252,47,62,80,189,255,243,
+224,67,0,0,255,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,192,255,255,255,255,255,255,3,0,
 0,255,255,255,255,255,127,255,255,255,255,255,127,255,255,255,255,255,255,255,
 255,255,255,255,255,255,255,255,255,31,120,12,0,255,255,255,255,191,32,255,
 255,255,255,255,255,255,128,0,0,255,255,127,0,127,127,127,127,127,127,127,127,
 255,255,255,255,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,224,0,0,0,254,3,62,31,254,255,255,255,255,255,255,255,255,255,127,224,254,
-255,255,255,255,255,255,255,255,255,255,247,224,255,255,255,255,127,254,255,
+255,255,255,255,255,255,255,255,255,255,247,224,255,255,255,255,255,254,255,
 255,255,255,255,255,255,255,255,255,127,0,0,255,255,255,7,0,0,0,0,0,0,255,255,
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 255,255,255,63,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,7,0,
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,
 0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,31,0,0,
 0,0,0,0,0,0,255,255,255,255,255,63,255,31,255,255,255,15,0,0,255,255,255,255,
 255,127,240,143,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,
 0,128,255,252,255,255,255,255,255,255,255,255,255,255,255,255,249,255,255,255,
-127,255,0,0,0,0,0,0,0,128,255,187,247,255,255,255,0,0,0,255,255,255,255,255,
-255,15,0,255,255,255,255,255,255,255,255,47,0,255,3,0,0,252,40,255,255,255,
-255,255,7,255,255,255,255,7,0,255,255,255,31,255,255,255,255,255,255,247,255,
-0,128,255,3,223,255,255,127,255,255,255,255,255,255,127,0,255,63,255,3,255,
-255,127,196,255,255,255,255,255,255,255,127,5,0,0,56,255,255,60,0,126,126,126,
-0,127,127,255,255,255,255,255,247,63,0,255,255,255,255,255,255,255,255,255,
-255,255,255,255,255,255,7,255,3,255,255,255,255,255,255,255,255,255,255,255,
-255,255,255,255,255,255,255,255,255,15,0,255,255,127,248,255,255,255,255,255,
+255,255,255,124,0,0,0,0,0,128,255,191,255,255,255,255,0,0,0,255,255,255,255,
+255,255,15,0,255,255,255,255,255,255,255,255,47,0,255,3,0,0,252,232,255,255,
+255,255,255,7,255,255,255,255,7,0,255,255,255,31,255,255,255,255,255,255,247,
+255,0,128,255,3,255,255,255,127,255,255,255,255,255,255,127,0,255,63,255,3,
+255,255,127,252,255,255,255,255,255,255,255,127,5,0,0,56,255,255,60,0,126,126,
+126,0,127,127,255,255,255,255,255,247,255,0,255,255,255,255,255,255,255,255,
+255,255,255,255,255,255,255,7,255,3,255,255,255,255,255,255,255,255,255,255,
+255,255,255,255,255,255,255,255,255,255,15,0,255,255,127,248,255,255,255,255,
+255,
 15,255,255,255,255,255,255,255,255,255,255,255,255,255,63,255,255,255,255,255,
 255,255,255,255,255,255,255,255,3,0,0,0,0,127,0,248,224,255,253,127,95,219,
 255,255,255,255,255,255,255,255,255,255,255,255,255,3,0,0,0,248,255,255,255,
@@ -109,55 +110,63 @@
 0,0,0,0,0,0,0,0,0,0,0,0,63,253,255,255,255,255,191,145,255,255,63,0,255,255,
 127,0,255,255,255,127,0,0,0,0,0,0,0,0,255,255,55,0,255,255,63,0,255,255,255,3,
 0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,192,0,0,0,0,0,0,0,0,111,240,239,
-254,255,255,15,0,0,0,0,0,255,255,255,31,255,255,255,31,0,0,0,0,255,254,255,
+254,255,255,63,0,0,0,0,0,255,255,255,31,255,255,255,31,0,0,0,0,255,254,255,
 255,31,0,0,0,255,255,255,255,255,255,63,0,255,255,63,0,255,255,7,0,255,255,3,
 0,0,0,0,0,0,0,0,0,0,0,0,
 0,255,255,255,255,255,255,255,255,255,1,0,0,0,0,0,0,255,255,255,255,255,255,7,
-0,255,255,255,255,255,255,7,0,255,255,255,255,255,255,255,255,63,0,0,0,192,
-255,0,0,252,255,255,255,255,255,255,1,0,0,255,255,255,1,255,3,255,255,255,255,
-255,255,199,255,0,0,255,255,255,255,71,0,255,255,255,255,255,255,255,255,30,0,
-255,23,0,0,0,0,255,255,251,255,255,255,159,64,0,0,0,0,0,0,0,0,127,189,255,191,
-255,1,255,255,255,255,255,255,255,1,255,3,239,159,249,255,255,253,237,227,159,
-25,129,224,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,
-255,255,187,7,255,3,0,0,0,0,255,255,255,255,255,255,255,255,179,0,255,3,0,0,0,
+0,255,255,255,255,255,255,7,0,255,255,255,255,255,0,255,3,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,31,128,0,255,255,63,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,255,255,127,0,255,255,255,255,255,255,255,255,63,0,0,0,
+192,255,0,0,252,255,255,255,255,255,255,1,0,0,255,255,255,1,255,3,255,255,255,
+255,255,255,199,255,112,0,255,255,255,255,71,0,255,255,255,255,255,255,255,
+255,30,0,255,23,0,0,0,0,255,255,251,255,255,255,159,64,0,0,0,0,0,0,0,0,127,
+189,255,191,255,1,255,255,255,255,255,255,255,1,255,3,239,159,249,255,255,253,
+237,227,159,25,129,224,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,
+255,255,255,255,255,187,7,255,131,0,0,0,0,255,255,255,255,255,255,255,255,179,
+0,255,3,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,63,127,0,0,0,63,0,0,
 0,0,255,255,255,255,255,255,255,127,17,0,255,3,0,0,0,0,255,255,255,255,255,
-255,63,0,255,3,0,0,0,0,0,
-0,255,255,255,227,255,7,255,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,3,
-0,128,255,255,255,255,255,255,231,127,0,0,255,255,255,255,255,255,207,255,255,
-0,0,0,0,0,255,255,255,255,255,255,255,1,255,253,255,255,255,255,127,127,1,0,
-255,3,0,0,252,255,255,255,252,255,255,254,127,0,0,0,0,0,0,0,0,0,127,251,255,
-255,255,255,127,180,203,0,255,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,
+255,63,1,255,3,0,0,0,0,0,0,255,255,255,231,255,7,255,3,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,
+0,255,255,255,255,255,255,255,255,255,3,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,255,252,255,255,255,255,255,252,26,0,0,0,255,255,255,255,255,255,231,
+127,0,0,255,255,255,255,255,255,255,255,255,32,0,0,0,0,255,255,255,255,255,
+255,255,1,255,253,255,255,255,255,127,127,1,0,255,3,0,0,252,255,255,255,252,
+255,255,254,127,0,0,0,0,0,0,0,0,0,127,251,255,255,255,255,127,180,203,0,255,3,
+191,253,255,255,255,127,123,1,255,3,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,127,0,255,
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,3,0,0,
 0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,127,0,
 0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
-255,255,255,255,255,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,255,255,255,255,255,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-255,255,255,255,255,255,255,255,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,255,255,255,255,255,255,255,1,255,255,255,127,255,3,0,0,0,0,0,0,0,0,0,0,0,
-0,255,255,255,63,0,0,255,255,255,255,255,255,127,0,15,0,255,3,248,255,255,224,
-255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,31,0,255,
-255,255,255,255,127,0,0,248,255,0,0,0,0,0,0,0,0,3,0,0,0,255,255,255,255,255,
+255,255,255,255,255,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,
+255,255,255,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,
+255,255,255,255,255,255,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,
+255,255,255,255,255,255,1,255,255,255,127,255,3,0,0,0,0,0,0,0,0,0,0,0,0,255,
+255,255,63,0,0,255,255,255,255,255,255,0,0,15,0,255,3,248,255,255,224,255,255,
+0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,255,255,255,255,255,255,255,255,255,135,255,255,255,255,255,255,255,128,
+255,255,0,0,0,0,0,0,0,0,11,0,0,0,255,255,255,255,255,255,255,255,255,255,255,
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
-255,255,255,255,255,31,0,0,255,255,255,255,255,255,255,255,255,255,255,255,
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,7,0,
-255,255,255,127,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,
-255,255,255,255,255,255,255,
+255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+255,255,255,255,255,255,255,255,255,255,255,255,7,0,255,255,255,127,0,0,0,0,0,
+0,7,0,240,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
-255,255,255,255,255,255,255,255,255,255,255,255,255,15,255,255,255,255,255,
-255,255,255,255,255,255,255,255,7,255,31,255,1,255,67,0,0,0,0,0,0,0,0,0,0,0,0,
-255,255,255,255,255,255,255,255,255,255,223,255,255,255,255,255,255,255,255,
-223,100,222,255,235,239,255,255,255,255,255,255,255,191,231,223,223,255,255,
-255,123,95,252,253,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,15,255,255,255,255,
+255,255,255,255,255,255,255,255,255,7,255,31,255,1,255,67,0,0,0,0,0,0,0,0,0,0,
+0,0,255,255,255,255,255,255,255,255,255,255,223,255,255,255,255,255,255,255,
+255,223,100,222,255,235,239,255,255,255,255,255,255,
+255,191,231,223,223,255,255,255,123,95,252,253,255,255,255,255,255,255,255,
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
-255,255,255,255,255,255,255,255,63,255,255,255,253,255,255,247,255,255,255,
-247,255,255,223,255,255,255,223,255,255,127,255,255,255,127,255,255,255,253,
-255,255,255,253,255,255,247,207,255,255,255,255,255,255,127,255,255,249,219,7,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,31,0,
-0,0,0,0,0,
-0,255,255,255,255,255,255,255,255,143,0,255,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,239,255,255,255,150,254,247,10,132,234,150,170,150,247,247,94,255,251,
-255,15,238,251,255,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,3,255,255,255,3,
-255,255,255,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,63,255,255,255,
+253,255,255,247,255,255,255,247,255,255,223,255,255,255,223,255,255,127,255,
+255,255,127,255,255,255,253,255,255,255,253,255,255,247,207,255,255,255,255,
+255,255,127,255,255,249,219,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,255,255,255,255,255,31,128,63,255,67,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,
+15,255,3,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+255,255,255,255,255,255,255,31,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,
+143,8,255,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,239,255,255,255,150,254,247,10,132,234,150,170,150,247,247,94,255,251,255,
+15,238,251,255,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,3,255,255,255,3,255,
+255,255,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
diff --git a/src/ctype/casemap.h b/src/ctype/casemap.h
new file mode 100644
index 00000000..6ee1209b
--- /dev/null
+++ b/src/ctype/casemap.h
@@ -0,0 +1,297 @@
+static const unsigned char tab[] = {
+	7, 8, 9, 10, 11, 12, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	13, 6, 6, 14, 6, 6, 6, 6, 6, 6, 6, 6, 15, 16, 17, 18,
+	6, 19, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 20, 21, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 22, 23, 6, 6, 6, 24, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 25,
+	6, 6, 6, 6, 26, 6, 6, 6, 6, 6, 6, 6, 27, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 28, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 29, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 30, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36,
+	43, 43, 43, 43, 43, 43, 43, 43, 1, 0, 84, 86, 86, 86, 86, 86,
+	86, 86, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 43, 43, 43, 43, 43, 43,
+	43, 7, 43, 43, 91, 86, 86, 86, 86, 86, 86, 86, 74, 86, 86, 5,
+	49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80,
+	36, 80, 121, 49, 80, 49, 80, 49, 56, 80, 49, 80, 49, 80, 49, 80,
+	49, 80, 49, 80, 49, 80, 49, 80, 78, 49, 2, 78, 13, 13, 78, 3,
+	78, 0, 36, 110, 0, 78, 49, 38, 110, 81, 78, 36, 80, 78, 57, 20,
+	129, 27, 29, 29, 83, 49, 80, 49, 80, 13, 49, 80, 49, 80, 49, 80,
+	27, 83, 36, 80, 49, 2, 92, 123, 92, 123, 92, 123, 92, 123, 92, 123,
+	20, 121, 92, 123, 92, 123, 92, 45, 43, 73, 3, 72, 3, 120, 92, 123,
+	20, 0, 150, 10, 1, 43, 40, 6, 6, 0, 42, 6, 42, 42, 43, 7,
+	187, 181, 43, 30, 0, 43, 7, 43, 43, 43, 1, 43, 43, 43, 43, 43,
+	43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+	43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 1, 43, 43, 43, 43,
+	43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+	43, 43, 43, 42, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+	43, 205, 70, 205, 43, 0, 37, 43, 7, 1, 6, 1, 85, 86, 86, 86,
+	86, 86, 85, 86, 86, 2, 36, 129, 129, 129, 129, 129, 21, 129, 129, 129,
+	0, 0, 43, 0, 178, 209, 178, 209, 178, 209, 178, 209, 0, 0, 205, 204,
+	1, 0, 215, 215, 215, 215, 215, 131, 129, 129, 129, 129, 129, 129, 129, 129,
+	129, 129, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 28, 0, 0, 0,
+	0, 0, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 2, 0, 0,
+	49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80,
+	49, 80, 78, 49, 80, 49, 80, 78, 49, 80, 49, 80, 49, 80, 49, 80,
+	49, 80, 49, 80, 49, 80, 49, 2, 135, 166, 135, 166, 135, 166, 135, 166,
+	135, 166, 135, 166, 135, 166, 135, 166, 42, 43, 43, 43, 43, 43, 43, 43,
+	43, 43, 43, 43, 43, 0, 0, 0, 84, 86, 86, 86, 86, 86, 86, 86,
+	86, 86, 86, 86, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 84, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86,
+	12, 0, 12, 42, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+	43, 7, 42, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 43, 43, 43, 43, 43, 43,
+	43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+	43, 43, 43, 43, 86, 86, 108, 129, 21, 0, 43, 43, 43, 43, 43, 43,
+	43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+	43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+	43, 43, 43, 43, 7, 108, 3, 65, 43, 43, 86, 86, 86, 86, 86, 86,
+	86, 86, 86, 86, 86, 86, 86, 86, 44, 86, 43, 43, 43, 43, 43, 43,
+	43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 1,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 12, 108, 0, 0, 0, 0, 0, 6,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 6, 37, 6, 37, 6, 37, 6, 37, 6, 37,
+	6, 37, 6, 37, 6, 37, 6, 37, 6, 37, 6, 37, 6, 37, 6, 37,
+	6, 37, 6, 37, 6, 37, 6, 37, 6, 37, 6, 37, 6, 37, 6, 37,
+	6, 37, 6, 37, 6, 37, 6, 37, 86, 122, 158, 38, 6, 37, 6, 37,
+	6, 37, 6, 37, 6, 37, 6, 37, 6, 37, 6, 37, 6, 37, 6, 37,
+	6, 37, 6, 37, 6, 37, 6, 37, 6, 37, 6, 1, 43, 43, 79, 86,
+	86, 44, 43, 127, 86, 86, 57, 43, 43, 85, 86, 86, 43, 43, 79, 86,
+	86, 44, 43, 127, 86, 86, 129, 55, 117, 91, 123, 92, 43, 43, 79, 86,
+	86, 2, 172, 4, 0, 0, 57, 43, 43, 85, 86, 86, 43, 43, 79, 86,
+	86, 44, 43, 43, 86, 86, 50, 19, 129, 87, 0, 111, 129, 126, 201, 215,
+	126, 45, 129, 129, 14, 126, 57, 127, 111, 87, 0, 129, 129, 126, 21, 0,
+	126, 3, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 7, 43,
+	36, 43, 151, 43, 43, 43, 43, 43, 43, 43, 43, 43, 42, 43, 43, 43,
+	43, 43, 86, 86, 86, 86, 86, 128, 129, 129, 129, 129, 57, 187, 42, 43,
+	43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+	43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+	43, 43, 43, 43, 43, 43, 43, 1, 129, 129, 129, 129, 129, 129, 129, 129,
+	129, 129, 129, 129, 129, 129, 129, 201, 172, 172, 172, 172, 172, 172, 172, 172,
+	172, 172, 172, 172, 172, 172, 172, 208, 13, 0, 78, 49, 2, 180, 193, 193,
+	215, 215, 36, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80,
+	49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80,
+	49, 80, 49, 80, 215, 215, 83, 193, 71, 212, 215, 215, 215, 5, 43, 43,
+	43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 7, 1, 0, 1, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 78, 49, 80, 49, 80, 49, 80,
+	49, 80, 49, 80, 49, 80, 49, 80, 13, 0, 0, 0, 0, 0, 36, 80,
+	49, 80, 49, 80, 49, 80, 49, 80, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43, 43, 43, 43, 43, 43,
+	43, 43, 43, 43, 43, 121, 92, 123, 92, 123, 79, 123, 92, 123, 92, 123,
+	92, 123, 92, 123, 92, 123, 92, 123, 92, 123, 92, 123, 92, 123, 92, 45,
+	43, 43, 121, 20, 92, 123, 92, 45, 121, 42, 92, 39, 92, 123, 92, 123,
+	92, 123, 164, 0, 10, 180, 92, 123, 92, 123, 79, 3, 42, 43, 43, 43,
+	43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 1,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 72, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 42, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+	43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 43, 43, 43, 43, 43, 43, 43, 43, 7, 0, 72, 86, 86, 86, 86,
+	86, 86, 86, 86, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43, 43, 43, 43,
+	43, 43, 43, 43, 43, 43, 43, 43, 43, 85, 86, 86, 86, 86, 86, 86,
+	86, 86, 86, 86, 86, 86, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 36, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+	43, 43, 7, 0, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 43, 43, 43,
+	43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 7, 0, 0,
+	0, 0, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86,
+	86, 86, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 43, 43,
+	43, 43, 43, 43, 43, 43, 43, 43, 86, 86, 86, 86, 86, 86, 86, 86,
+	86, 86, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 42, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 86, 86,
+	86, 86, 86, 86, 86, 86, 86, 86, 14, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 85,
+	86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 14, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+static const int rules[] = {
+	0x0, 0x2001, -0x2000, 0x1dbf00, 0x2e700, 0x7900,
+	0x2402, 0x101, -0x100, 0x0, 0x201, -0x200,
+	-0xc6ff, -0xe800, -0x78ff, -0x12c00, 0xc300, 0xd201,
+	0xce01, 0xcd01, 0x4f01, 0xca01, 0xcb01, 0xcf01,
+	0x6100, 0xd301, 0xd101, 0xa300, 0xd501, 0x8200,
+	0xd601, 0xda01, 0xd901, 0xdb01, 0x3800, 0x3,
+	-0x4f00, -0x60ff, -0x37ff, 0x242802, 0x0, 0x101,
+	-0x100, -0xcd00, -0xda00, -0x81ff, 0x2a2b01, -0xa2ff,
+	0x2a2801, 0x2a3f00, -0xc2ff, 0x4501, 0x4701, 0x2a1f00,
+	0x2a1c00, 0x2a1e00, -0xd200, -0xce00, -0xca00, -0xcb00,
+	0xa54f00, 0xa54b00, -0xcf00, 0xa52800, 0xa54400, -0xd100,
+	-0xd300, 0x29f700, 0xa54100, 0x29fd00, -0xd500, -0xd600,
+	0x29e700, 0xa54300, 0xa52a00, -0x4500, -0xd900, -0x4700,
+	-0xdb00, 0xa51500, 0xa51200, 0x4c2402, 0x0, 0x2001,
+	-0x2000, 0x101, -0x100, 0x5400, 0x7401, 0x2601,
+	0x2501, 0x4001, 0x3f01, -0x2600, -0x2500, -0x1f00,
+	-0x4000, -0x3f00, 0x801, -0x3e00, -0x3900, -0x2f00,
+	-0x3600, -0x800, -0x5600, -0x5000, 0x700, -0x7400,
+	-0x3bff, -0x6000, -0x6ff, 0x701a02, 0x101, -0x100,
+	0x2001, -0x2000, 0x5001, 0xf01, -0xf00, 0x0,
+	0x3001, -0x3000, 0x101, -0x100, 0x0, 0xbc000,
+	0x1c6001, 0x0, 0x97d001, 0x801, -0x800, 0x8a0502,
+	0x0, -0xbbfff, -0x186200, 0x89c200, -0x182500, -0x186e00,
+	-0x186d00, -0x186400, -0x186300, -0x185c00, 0x0, 0x8a3800,
+	0x8a0400, 0xee600, 0x101, -0x100, 0x0, -0x3b00,
+	-0x1dbeff, 0x8f1d02, 0x800, -0x7ff, 0x0, 0x5600,
+	-0x55ff, 0x4a00, 0x6400, 0x8000, 0x7000, 0x7e00,
+	0x900, -0x49ff, -0x8ff, -0x1c2500, -0x63ff, -0x6fff,
+	-0x7fff, -0x7dff, 0xac0502, 0x0, 0x1001, -0x1000,
+	0x1c01, 0x101, -0x1d5cff, -0x20beff, -0x2045ff, -0x1c00,
+	0xb10b02, 0x101, -0x100, 0x3001, -0x3000, 0x0,
+	-0x29f6ff, -0xee5ff, -0x29e6ff, -0x2a2b00, -0x2a2800, -0x2a1bff,
+	-0x29fcff, -0x2a1eff, -0x2a1dff, -0x2a3eff, 0x0, -0x1c6000,
+	0x0, 0x101, -0x100, 0xbc0c02, 0x0, 0x101,
+	-0x100, -0xa543ff, 0x3a001, -0x8a03ff, -0xa527ff, 0x3000,
+	-0xa54eff, -0xa54aff, -0xa540ff, -0xa511ff, -0xa529ff, -0xa514ff,
+	-0x2fff, -0xa542ff, -0x8a37ff, 0x0, -0x97d000, -0x3a000,
+	0x0, 0x2001, -0x2000, 0x0, 0x2801, -0x2800,
+	0x0, 0x4001, -0x4000, 0x0, 0x2001, -0x2000,
+	0x0, 0x2001, -0x2000, 0x0, 0x2201, -0x2200,
+};
+static const unsigned char rulebases[] = {
+	0, 6, 39, 81, 111, 119, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	124, 0, 0, 127, 0, 0, 0, 0, 0, 0, 0, 0, 131, 142, 146, 151,
+	0, 170, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 180, 196, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 198, 201, 0, 0, 0, 219, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 222,
+	0, 0, 0, 0, 225, 0, 0, 0, 0, 0, 0, 0, 228, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 231, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 234, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 237, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+static const unsigned char exceptions[][2] = {
+	{ 48, 12 }, { 49, 13 }, { 120, 14 }, { 127, 15 },
+	{ 128, 16 }, { 129, 17 }, { 134, 18 }, { 137, 19 },
+	{ 138, 19 }, { 142, 20 }, { 143, 21 }, { 144, 22 },
+	{ 147, 19 }, { 148, 23 }, { 149, 24 }, { 150, 25 },
+	{ 151, 26 }, { 154, 27 }, { 156, 25 }, { 157, 28 },
+	{ 158, 29 }, { 159, 30 }, { 166, 31 }, { 169, 31 },
+	{ 174, 31 }, { 177, 32 }, { 178, 32 }, { 183, 33 },
+	{ 191, 34 }, { 197, 35 }, { 200, 35 }, { 203, 35 },
+	{ 221, 36 }, { 242, 35 }, { 246, 37 }, { 247, 38 },
+	{ 32, 45 }, { 58, 46 }, { 61, 47 }, { 62, 48 },
+	{ 63, 49 }, { 64, 49 }, { 67, 50 }, { 68, 51 },
+	{ 69, 52 }, { 80, 53 }, { 81, 54 }, { 82, 55 },
+	{ 83, 56 }, { 84, 57 }, { 89, 58 }, { 91, 59 },
+	{ 92, 60 }, { 97, 61 }, { 99, 62 }, { 101, 63 },
+	{ 102, 64 }, { 104, 65 }, { 105, 66 }, { 106, 64 },
+	{ 107, 67 }, { 108, 68 }, { 111, 66 }, { 113, 69 },
+	{ 114, 70 }, { 117, 71 }, { 125, 72 }, { 130, 73 },
+	{ 135, 74 }, { 137, 75 }, { 138, 76 }, { 139, 76 },
+	{ 140, 77 }, { 146, 78 }, { 157, 79 }, { 158, 80 },
+	{ 69, 87 }, { 123, 29 }, { 124, 29 }, { 125, 29 },
+	{ 127, 88 }, { 134, 89 }, { 136, 90 }, { 137, 90 },
+	{ 138, 90 }, { 140, 91 }, { 142, 92 }, { 143, 92 },
+	{ 172, 93 }, { 173, 94 }, { 174, 94 }, { 175, 94 },
+	{ 194, 95 }, { 204, 96 }, { 205, 97 }, { 206, 97 },
+	{ 207, 98 }, { 208, 99 }, { 209, 100 }, { 213, 101 },
+	{ 214, 102 }, { 215, 103 }, { 240, 104 }, { 241, 105 },
+	{ 242, 106 }, { 243, 107 }, { 244, 108 }, { 245, 109 },
+	{ 249, 110 }, { 253, 45 }, { 254, 45 }, { 255, 45 },
+	{ 80, 105 }, { 81, 105 }, { 82, 105 }, { 83, 105 },
+	{ 84, 105 }, { 85, 105 }, { 86, 105 }, { 87, 105 },
+	{ 88, 105 }, { 89, 105 }, { 90, 105 }, { 91, 105 },
+	{ 92, 105 }, { 93, 105 }, { 94, 105 }, { 95, 105 },
+	{ 130, 0 }, { 131, 0 }, { 132, 0 }, { 133, 0 },
+	{ 134, 0 }, { 135, 0 }, { 136, 0 }, { 137, 0 },
+	{ 192, 117 }, { 207, 118 }, { 128, 137 }, { 129, 138 },
+	{ 130, 139 }, { 133, 140 }, { 134, 141 }, { 112, 157 },
+	{ 113, 157 }, { 118, 158 }, { 119, 158 }, { 120, 159 },
+	{ 121, 159 }, { 122, 160 }, { 123, 160 }, { 124, 161 },
+	{ 125, 161 }, { 179, 162 }, { 186, 163 }, { 187, 163 },
+	{ 188, 164 }, { 190, 165 }, { 195, 162 }, { 204, 164 },
+	{ 218, 166 }, { 219, 166 }, { 229, 106 }, { 234, 167 },
+	{ 235, 167 }, { 236, 110 }, { 243, 162 }, { 248, 168 },
+	{ 249, 168 }, { 250, 169 }, { 251, 169 }, { 252, 164 },
+	{ 38, 176 }, { 42, 177 }, { 43, 178 }, { 78, 179 },
+	{ 132, 8 }, { 98, 186 }, { 99, 187 }, { 100, 188 },
+	{ 101, 189 }, { 102, 190 }, { 109, 191 }, { 110, 192 },
+	{ 111, 193 }, { 112, 194 }, { 126, 195 }, { 127, 195 },
+	{ 125, 207 }, { 141, 208 }, { 148, 209 }, { 171, 210 },
+	{ 172, 211 }, { 173, 212 }, { 176, 213 }, { 177, 214 },
+	{ 178, 215 }, { 196, 216 }, { 197, 217 }, { 198, 218 },
+};
diff --git a/src/ctype/nonspacing.h b/src/ctype/nonspacing.h
index 48231e73..7746f3b6 100644
--- a/src/ctype/nonspacing.h
+++ b/src/ctype/nonspacing.h
@@ -1,23 +1,23 @@
-16,16,16,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16,16,32,16,16,16,33,34,35,
-36,37,38,39,16,16,40,16,16,16,16,16,16,16,16,16,16,16,41,42,16,16,43,16,16,16,
+16,16,16,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,16,33,16,16,16,34,35,36,
+37,38,39,40,16,16,41,16,16,16,16,16,16,16,16,16,16,16,42,43,16,16,44,16,16,16,
 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
-16,16,16,16,16,16,16,16,16,16,44,16,45,46,47,48,16,16,16,16,16,16,16,16,16,16,
+16,16,16,16,16,16,16,16,16,16,45,16,46,47,48,49,16,16,16,16,16,16,16,16,16,16,
 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
+16,16,16,16,16,16,16,50,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
+16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,51,16,16,52,
+53,16,54,55,56,16,16,16,16,16,16,57,16,16,58,16,59,60,61,62,63,64,65,66,67,68,
+69,70,16,71,72,73,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
+16,74,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
-16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,49,16,16,50,
-51,16,52,53,54,16,16,16,16,16,16,55,16,16,16,16,16,56,57,58,59,60,61,62,63,16,
-16,64,16,65,66,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
+16,16,16,75,76,16,16,16,77,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
-16,16,16,67,68,16,16,16,69,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
-16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
-16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
-16,16,16,16,16,16,16,70,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
-16,16,71,72,16,16,16,16,16,16,16,73,16,16,16,16,16,74,16,16,16,16,16,16,16,75,
-76,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
+16,16,16,16,16,16,16,78,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
+16,16,79,80,16,16,16,16,16,16,16,81,16,16,16,16,16,82,83,84,16,16,16,16,16,85,
+86,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
 16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
@@ -25,57 +25,67 @@
 0,0,0,0,0,0,0,248,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,0,254,255,255,255,255,191,182,0,0,0,0,0,0,0,63,0,255,23,0,0,0,0,0,248,255,
 255,0,0,1,0,0,0,0,0,0,0,0,0,0,0,192,191,159,61,0,0,0,128,2,0,0,0,255,255,255,
-7,0,0,0,0,0,0,0,0,0,0,192,255,1,0,0,0,0,0,0,248,15,0,0,0,192,251,239,62,0,0,0,
-0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,240,255,255,255,255,
+7,0,0,0,0,0,0,0,0,0,0,192,255,1,0,0,0,0,0,0,248,15,32,0,0,192,251,239,62,0,0,
+0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,248,255,255,255,255,
 255,7,0,0,0,0,0,0,20,254,33,254,0,12,0,0,0,2,0,0,0,0,0,0,16,30,32,0,0,12,0,0,
-0,6,0,0,0,0,0,0,16,134,57,2,0,0,0,35,0,6,0,0,0,0,0,0,16,190,33,0,0,12,0,0,252,
-2,0,0,0,0,0,0,144,30,32,64,0,12,0,0,0,4,0,0,0,0,0,0,0,1,32,0,0,0,0,0,0,1,0,0,
-0,0,0,0,192,193,61,96,0,12,0,0,0,2,0,0,0,0,0,0,144,64,48,0,0,12,0,0,0,3,0,0,0,
-0,0,0,24,30,32,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,4,92,0,0,0,0,0,0,0,0,0,0,0,242,
-7,128,127,0,0,0,0,0,0,0,0,0,0,0,0,242,27,0,63,0,0,0,0,0,0,0,0,0,3,0,0,160,2,0,
-0,0,0,0,0,254,127,223,224,255,254,255,255,255,31,64,0,0,0,0,0,0,0,0,0,0,0,0,
-224,253,102,0,0,0,195,1,0,30,0,100,32,0,32,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,224,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28,0,
-0,0,28,0,0,0,12,0,0,0,12,0,0,0,0,0,0,0,176,63,64,254,15,32,0,0,0,0,0,120,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,96,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,135,1,4,14,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,9,0,0,0,0,0,0,64,127,
-229,31,248,159,0,0,0,0,0,0,255,127,0,0,0,0,0,0,0,0,15,0,0,0,0,0,208,23,4,0,0,
-0,0,248,15,0,3,0,0,0,60,59,0,0,0,0,0,0,64,163,3,0,0,0,0,0,0,240,207,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,247,255,253,33,16,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,
+64,6,0,0,0,0,0,0,16,134,57,2,0,0,0,35,0,6,0,0,0,0,0,0,16,190,33,0,0,12,0,0,
+252,2,0,0,0,0,0,0,144,30,32,64,0,12,0,0,0,4,0,0,0,0,0,0,0,1,32,0,0,0,0,0,0,17,
+0,0,0,0,0,0,192,193,61,96,0,12,0,0,0,2,0,0,0,0,0,0,144,64,48,0,0,12,0,0,0,3,0,
+0,0,0,0,0,24,30,32,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,4,92,0,0,0,0,0,0,0,0,0,0,0,
+242,7,128,127,0,0,0,0,0,0,0,0,0,0,0,0,242,31,0,63,0,0,0,0,0,0,0,0,0,3,0,0,160,
+2,0,0,0,0,0,0,254,127,223,224,255,254,255,255,255,31,64,0,0,0,0,0,0,0,0,0,0,0,
+0,224,253,102,0,0,0,195,1,0,30,0,100,32,0,32,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,
+255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,224,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,28,0,0,0,28,0,0,0,12,0,0,0,12,0,0,0,0,0,0,0,176,63,64,254,
+15,32,0,0,0,0,0,120,0,0,0,0,0,0,0,0,0,0,0,0,0,0,96,0,0,0,0,2,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,135,1,4,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+128,9,0,0,0,0,0,0,64,127,229,31,248,159,0,0,0,0,0,0,255,127,0,0,0,0,0,0,0,0,
+15,0,0,0,0,0,208,23,4,0,0,0,0,248,15,0,3,0,0,0,60,59,0,0,0,0,0,0,64,163,3,0,0,
+0,0,0,0,240,207,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,247,255,253,33,16,
+3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,
 251,0,248,0,0,0,124,0,0,0,0,0,0,223,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,
 255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,3,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,0,0,0,0,
 0,60,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,0,128,247,63,0,0,0,192,0,0,0,0,0,0,0,0,0,0,3,0,68,8,0,0,96,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,48,0,0,0,255,255,3,0,0,0,0,0,192,63,0,0,128,255,3,0,0,
-0,0,0,7,0,0,0,0,0,200,19,0,0,0,0,32,0,0,0,0,0,0,0,0,126,102,0,8,16,0,0,0,0,0,
-16,0,0,0,0,0,0,157,193,2,0,0,0,0,48,64,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32,33,0,0,0,0,0,64,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,255,255,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,32,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,192,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,110,240,0,0,0,0,0,135,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,96,0,0,
-0,2,0,0,0,0,0,0,255,127,0,0,0,0,0,0,128,3,0,0,0,0,0,120,38,0,0,0,0,0,0,0,0,7,
-0,0,0,128,239,31,0,0,0,0,0,0,0,8,0,3,0,0,0,0,0,192,127,0,28,0,0,0,0,0,0,0,0,0,
-0,0,128,211,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,248,7,0,0,3,0,0,0,0,
-0,0,16,1,0,0,0,192,31,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,
-92,0,0,0,0,0,0,0,0,0,0,0,0,0,248,133,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,60,176,1,0,0,48,0,0,0,0,0,0,0,0,0,0,248,167,1,0,0,0,0,0,0,
-0,0,0,0,0,0,40,191,0,0,0,0,0,0,0,0,0,0,0,0,224,188,15,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,126,6,0,0,0,0,248,121,128,0,126,14,0,0,0,0,0,252,127,3,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,127,191,0,0,0,0,0,0,0,0,0,0,252,255,255,252,109,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,126,180,191,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,0,0,0,0,0,0,0,127,
+0,0,0,0,0,0,0,0,0,0,0,0,48,0,0,0,255,255,3,128,0,0,0,0,192,63,0,0,128,255,3,0,
+0,0,0,0,7,0,0,0,0,0,200,51,0,0,0,0,32,0,0,
+0,0,0,0,0,0,126,102,0,8,16,0,0,0,0,0,16,0,0,0,0,0,0,157,193,2,0,0,0,0,48,64,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32,33,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,0,0,0,
+64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,255,
+255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,192,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,110,240,0,
+0,0,0,0,135,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,96,0,0,0,0,0,0,0,240,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,192,255,1,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,255,127,0,0,0,0,0,0,128,
+3,0,0,0,0,0,120,38,0,32,0,0,0,0,0,0,7,0,0,0,128,239,31,0,0,0,0,0,0,0,8,0,3,0,
+0,0,0,0,192,127,0,30,0,0,0,0,0,0,0,0,0,0,0,128,211,64,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,128,248,7,0,0,3,0,0,0,0,0,0,24,1,0,0,0,192,31,31,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,255,92,0,0,64,0,0,0,0,0,0,0,0,0,0,248,133,13,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60,176,1,0,0,48,0,0,0,0,0,0,0,0,0,0,
+248,167,1,0,0,0,0,0,0,0,0,0,0,0,0,40,191,0,0,0,0,0,0,0,0,0,0,0,0,224,188,15,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,255,6,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,128,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-96,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,3,248,255,231,15,0,0,
-0,60,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,240,12,1,0,0,0,254,7,0,0,0,0,248,121,128,0,126,14,0,0,0,0,0,252,
+127,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,127,191,0,0,0,0,0,0,0,0,0,0,252,255,
+255,252,109,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,126,180,191,0,0,0,0,0,0,0,0,0,163,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24,0,0,0,0,0,0,0,255,
+1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,0,0,0,0,0,0,0,127,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,7,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,96,15,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,3,248,255,231,15,0,0,0,60,0,0,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,28,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,
 255,255,255,255,127,248,255,255,255,255,255,31,32,0,16,0,0,248,254,255,0,0,0,
 0,0,0,0,0,0,0,127,255,255,249,219,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,127,0,0,0,0,0,0,
-0,0,0,0,0,0,0,240,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,240,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,127,0,0,0,0,0,0,0,0,0,0,0,0,0,240,7,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
diff --git a/src/ctype/punct.h b/src/ctype/punct.h
index 7a623940..67929470 100644
--- a/src/ctype/punct.h
+++ b/src/ctype/punct.h
@@ -8,17 +8,17 @@
 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,64,16,16,16,16,16,16,16,16,16,
 16,16,16,16,16,16,16,16,16,16,16,16,16,16,65,16,16,66,16,67,68,
-69,16,70,71,72,16,73,16,16,74,75,76,77,78,16,79,16,80,81,82,83,84,85,86,87,88,
-16,89,16,90,91,16,16,16,16,16,16,92,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
+69,16,70,71,72,16,73,16,16,74,75,76,77,78,16,79,80,81,82,83,84,85,86,87,88,89,
+90,91,16,92,93,94,95,16,16,16,16,96,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
+16,97,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
+16,16,16,98,99,16,16,100,101,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
-16,16,16,93,94,16,16,16,95,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
-16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
-16,16,16,16,16,16,16,96,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
-16,97,98,99,100,16,16,101,102,17,17,103,16,16,16,16,16,16,16,16,16,16,16,16,
-16,104,105,16,16,16,16,106,16,107,108,109,17,17,17,110,111,112,113,16,16,16,
-16,16,
+16,16,16,16,16,16,16,16,102,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
+16,16,16,103,104,105,106,16,16,107,108,17,17,109,16,16,16,16,16,16,110,111,16,
+16,16,16,16,112,113,16,16,114,115,116,16,117,118,119,17,17,17,120,121,122,123,
+124,16,16,16,16,
 16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,254,255,0,252,1,0,0,248,1,
@@ -28,25 +28,25 @@
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,252,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,0,252,0,0,0,0,0,230,254,255,255,255,0,64,73,0,0,0,0,0,24,0,255,255,0,216,
 0,0,0,0,0,0,0,1,0,60,0,0,0,0,0,0,0,0,0,0,0,0,16,224,1,30,0,
-96,255,191,0,0,0,0,0,0,255,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,248,207,3,
-0,0,0,3,0,32,255,127,0,0,0,78,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,252,0,0,0,0,0,
-0,0,0,0,16,0,32,30,0,48,0,1,0,0,0,0,0,0,0,0,16,0,32,0,0,0,0,252,47,0,0,0,0,0,
-0,0,16,0,32,0,0,0,0,0,0,0,0,0,0,0,0,0,16,0,32,0,0,0,0,3,224,0,0,0,0,0,0,0,16,
-0,32,0,0,0,0,253,0,0,0,0,0,0,0,0,0,0,32,0,0,0,0,255,7,0,0,0,0,0,0,0,0,0,32,0,
-0,0,0,0,255,0,0,0,0,0,0,0,16,0,32,0,0,0,0,0,0,0,0,0,0,0,0,0,24,0,160,0,127,0,
-0,255,3,0,0,0,0,0,0,0,0,0,4,0,0,0,0,16,0,0,0,0,0,0,128,0,128,192,223,0,12,0,0,
-0,0,0,0,0,0,0,0,0,0,0,31,0,0,0,0,0,
+96,255,191,0,0,0,0,0,0,255,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,248,207,
+227,0,0,0,3,0,32,255,127,0,0,0,78,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,7,252,0,0,0,
+0,0,0,0,0,0,16,0,32,30,0,48,0,1,0,0,0,0,0,0,0,0,16,0,32,0,0,0,0,252,111,0,0,0,
+0,0,0,0,16,0,32,0,0,0,0,64,0,0,0,0,0,0,0,0,16,0,32,0,0,0,0,3,224,0,0,0,0,0,0,
+0,16,0,32,0,0,0,0,253,0,0,0,0,0,0,0,0,0,0,32,0,0,0,0,255,7,16,0,0,0,0,0,0,0,0,
+32,0,0,0,0,128,255,16,0,0,0,0,0,0,16,0,32,0,0,0,0,0,0,0,0,0,0,0,0,0,24,0,160,
+0,127,0,0,255,3,0,0,0,0,0,0,0,0,0,4,0,0,0,0,16,0,0,0,0,0,0,128,0,128,192,223,
+0,12,0,0,0,0,0,0,0,0,0,0,0,4,0,31,0,0,0,0,0,
 0,254,255,255,255,0,252,255,255,0,0,0,0,0,0,0,0,252,0,0,0,0,0,0,192,255,223,
-255,7,0,0,0,0,0,0,0,0,0,0,128,6,0,252,0,0,24,62,0,0,128,191,0,204,0,0,0,0,0,0,
-0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,96,255,255,255,31,0,0,255,3,0,0,0,0,0,0,0,0,
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,96,0,0,1,0,0,24,0,0,0,0,0,0,0,0,0,56,0,0,0,0,16,0,0,0,112,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,48,0,0,254,127,47,0,0,255,3,255,127,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,49,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,196,255,255,255,
+255,7,0,0,0,0,0,0,0,0,0,0,128,6,0,252,0,0,0,0,0,0,0,0,0,192,0,0,0,0,0,0,0,0,0,
+0,0,8,0,0,0,0,0,0,0,0,0,0,0,224,255,255,255,31,0,0,255,3,0,0,0,0,0,0,0,0,0,0,
+0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,96,0,0,1,0,0,24,0,0,0,0,0,0,0,0,0,56,0,0,0,0,16,0,0,0,112,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,48,0,0,254,127,47,0,0,255,3,255,127,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,49,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,196,255,255,255,
 255,0,0,0,192,0,0,0,0,0,0,0,0,1,0,224,159,0,0,0,0,127,63,255,127,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,16,0,16,0,0,252,255,255,255,31,0,0,0,0,0,12,0,0,0,0,0,0,64,0,
-12,240,0,0,0,0,0,0,192,248,0,0,0,0,0,0,0,192,0,0,0,0,0,0,0,0,255,0,255,255,
+12,240,0,0,0,0,0,0,128,248,0,0,0,0,0,0,0,192,0,0,0,0,0,0,0,0,255,0,255,255,
 255,33,144,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,
 127,0,224,251,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,160,3,224,0,224,0,
 224,0,96,128,248,255,255,255,252,255,255,255,255,255,127,223,255,241,127,255,
@@ -55,22 +55,23 @@
 255,255,255,255,255,127,0,0,0,255,7,0,0,255,255,255,255,255,255,255,255,255,
 255,63,0,0,0,0,0,0,252,255,
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,207,255,255,255,
-63,255,255,255,255,227,255,253,7,0,0,240,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,224,135,3,254,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,128,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,127,255,255,255,3,0,0,0,0,0,0,
-255,255,255,251,255,255,255,255,255,255,255,255,255,255,15,0,255,255,255,255,
+63,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,224,135,3,254,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,127,255,255,255,255,0,
+0,0,0,0,0,255,255,255,251,255,255,255,255,255,255,255,255,255,255,15,0,255,
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
-255,255,255,63,0,0,0,255,15,30,255,255,255,1,252,193,224,0,0,0,0,0,0,0,0,0,0,
-0,30,1,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,
-0,0,255,255,255,255,15,0,0,0,255,255,255,127,255,255,255,255,255,255,255,255,
+255,255,255,255,255,255,63,0,0,0,255,15,30,255,255,255,1,252,193,224,0,0,0,0,
+0,0,0,0,0,0,0,30,1,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+255,255,0,0,0,0,255,255,255,255,15,0,0,0,255,255,255,127,255,255,255,255,255,
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
-127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,
+255,255,255,
+255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,
 255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,127,0,0,0,
 0,0,0,192,0,224,0,0,0,0,0,0,0,0,0,0,0,128,15,112,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 255,0,255,255,127,0,3,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-68,8,0,0,0,15,255,3,0,0,0,0,0,0,240,0,0,0,0,0,0,0,0,0,16,192,0,0,255,255,3,23,
-0,0,0,0,0,248,0,0,0,0,8,128,0,0,0,0,0,0,0,0,0,0,8,0,255,63,0,192,32,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,240,0,0,128,59,0,0,0,0,0,0,0,128,2,0,0,192,0,0,67,0,0,0,0,0,
+64,0,0,0,0,15,255,3,0,0,0,0,0,0,240,0,0,0,0,0,0,0,0,0,16,192,0,0,255,255,3,23,
+0,0,0,0,0,248,0,0,0,0,8,128,0,0,0,0,0,0,0,0,0,0,8,0,255,63,0,192,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,240,0,0,128,3,0,0,0,0,0,0,0,128,2,0,0,192,0,0,67,0,0,0,0,0,
 0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,56,0,
 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,2,0,0,0,0,0,0,
@@ -84,46 +85,57 @@
 0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 128,255,0,0,128,255,0,0,0,0,128,255,0,0,0,0,0,0,0,0,0,248,0,0,192,143,0,0,0,
 128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,48,255,255,252,255,255,255,255,255,0,0,0,0,
-0,0,0,135,255,0,255,1,0,0,0,224,0,0,0,224,0,0,0,0,0,1,0,0,96,248,127,0,0,0,0,
+0,0,0,135,255,1,255,1,0,0,0,224,0,0,0,224,0,0,0,0,0,1,0,0,96,248,127,0,0,0,0,
 0,0,0,0,254,0,0,0,255,0,0,0,255,0,0,0,30,0,254,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,252,0,0,0,0,0,0,0,0,0,0,0,
 0,255,255,255,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,192,63,252,255,63,0,0,128,3,0,0,0,0,0,0,254,3,0,0,0,0,0,0,0,
-0,0,0,0,0,0,24,0,15,0,0,0,0,0,56,0,0,0,0,0,0,0,0,0,225,63,0,232,254,255,31,0,
-0,0,0,0,0,0,96,63,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,
-0,16,0,32,0,0,192,31,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,
-248,0,40,0,0,0,0,0,0,0,0,0,0,0,0,76,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,128,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,128,14,0,0,0,255,31,
-0,0,0,0,0,0,0,0,192,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,252,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,224,127,0,0,0,192,255,255,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,192,63,252,255,63,0,0,128,3,0,0,0,0,0,0,254,3,32,0,0,0,0,0,0,0,
+0,0,0,0,0,24,0,15,0,0,0,0,0,56,0,0,0,0,0,0,0,0,0,225,63,0,232,254,255,31,0,0,
+0,0,0,0,0,96,63,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,
+24,0,32,0,0,192,31,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,
+248,0,104,0,0,0,0,0,0,0,0,0,0,0,0,76,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,128,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,128,14,0,0,0,255,
+31,0,0,0,0,0,0,0,0,192,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,8,0,252,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,252,7,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,24,128,255,0,0,0,0,0,
+0,0,0,0,0,223,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,62,0,0,252,255,31,3,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,52,0,0,0,0,0,0,0,0,0,128,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,128,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,
+255,3,
+128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,192,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,63,0,0,0,0,0,0,0,255,255,48,0,0,248,
+3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,
+255,255,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,176,15,0,0,0,0,0,0,
+0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+255,255,255,255,255,255,255,255,255,255,255,255,255,63,
+0,255,255,255,255,127,254,255,255,255,255,255,255,255,255,255,255,255,255,255,
+255,255,255,255,255,255,255,255,255,255,1,0,0,255,255,255,255,255,255,255,255,
+63,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,15,0,255,255,255,255,255,255,
+255,255,255,255,127,0,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,8,0,0,0,8,0,0,32,0,0,0,32,0,0,128,
+0,0,0,128,0,0,0,2,0,0,0,2,0,0,8,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,
+255,255,255,255,255,255,255,255,255,15,0,248,254,255,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,127,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,240,0,
+128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,255,127,0,0,0,0,0,0,0,
+0,0,0,0,0,0,112,7,0,192,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,254,255,255,255,255,255,255,255,31,0,0,0,0,0,0,0,0,0,254,255,
+255,255,255,255,255,63,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,255,255,255,255,255,
+15,255,255,255,255,255,255,255,255,255,255,255,255,15,0,255,127,254,255,254,
+255,254,255,255,255,63,0,255,31,255,255,255,255,0,0,0,252,0,0,0,28,0,0,0,252,
+255,255,255,31,0,0,0,0,0,0,192,255,255,255,7,0,255,255,255,255,255,15,255,1,3,
+0,63,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+255,255,255,255,255,255,255,63,0,255,31,255,7,255,255,255,255,255,255,255,255,
+255,255,255,255,255,255,15,0,255,255,255,255,255,255,255,255,255,255,255,1,
+255,15,0,0,255,15,255,255,255,255,255,255,255,0,255,3,255,255,255,255,255,0,
+255,255,255,63,0,0,0,0,0,0,0,0,0,0,255,239,255,255,255,255,255,255,255,255,
+255,255,255,255,123,252,255,255,255,255,231,199,255,255,255,231,255,255,255,
+255,255,255,255,255,255,255,255,255,255,255,255,255,15,0,255,63,15,7,7,0,63,0,
 0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,252,7,0,0,0,0,0,0,
-0,24,128,255,0,0,0,0,0,0,0,0,0,0,223,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-128,62,0,0,252,255,31,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,52,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,192,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,63,0,0,0,0,0,0,0,128,255,48,0,0,248,3,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,7,0,0,0,0,0,0,0,0,0,0,0,
-0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,176,15,0,0,0,0,0,0,0,0,0,0,0,255,255,
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
-255,255,255,255,255,255,255,255,255,63,0,255,255,255,255,127,254,255,255,255,
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
-255,1,0,0,255,255,255,255,255,255,255,255,63,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,127,0,255,255,3,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
-0,8,0,0,0,8,0,0,32,0,0,0,32,0,0,128,0,0,0,128,0,0,0,2,0,0,0,2,0,0,8,0,0,0,0,0,
-0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,15,0,
-248,254,255,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,255,127,0,0,0,0,0,0,0,0,
-0,0,0,0,0,112,7,0,192,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,255,255,255,255,255,15,255,
-255,255,255,255,255,255,255,255,255,255,255,15,0,255,127,254,255,254,255,254,
-255,255,255,63,0,255,31,255,255,255,127,0,0,0,252,0,0,0,12,0,0,0,252,255,255,
-255,31,0,0,0,0,0,0,192,255,255,255,7,0,255,255,255,255,255,15,255,1,3,0,63,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,31,0,255,31,
-255,1,255,255,255,255,255,255,255,255,255,255,255,255,255,255,15,0,255,255,
-255,255,255,255,255,255,255,255,31,0,0,0,0,
-0,255,15,255,255,255,255,255,255,255,0,255,3,255,255,255,255,255,0,255,255,
-255,63,0,0,0,0,0,0,0,0,0,0,255,15,255,255,255,255,255,127,255,31,255,255,255,
-15,0,0,255,255,255,0,0,0,0,0,1,0,255,255,127,0,0,0,
diff --git a/src/ctype/towctrans.c b/src/ctype/towctrans.c
index 8f681018..76d13769 100644
--- a/src/ctype/towctrans.c
+++ b/src/ctype/towctrans.c
@@ -1,307 +1,73 @@
-#include <ctype.h>
-#include <stddef.h>
 #include <wctype.h>
 
-#define CASEMAP(u1,u2,l) { (u1), (l)-(u1), (u2)-(u1)+1 }
-#define CASELACE(u1,u2) CASEMAP((u1),(u2),(u1)+1)
+static const unsigned char tab[];
 
-static const struct {
-	unsigned short upper;
-	signed char lower;
-	unsigned char len;
-} casemaps[] = {
-	CASEMAP(0xc0,0xde,0xe0),
+static const unsigned char rulebases[512];
+static const int rules[];
 
-	CASELACE(0x0100,0x012e),
-	CASELACE(0x0132,0x0136),
-	CASELACE(0x0139,0x0147),
-	CASELACE(0x014a,0x0176),
-	CASELACE(0x0179,0x017d),
+static const unsigned char exceptions[][2];
 
-	CASELACE(0x370,0x372),
-	CASEMAP(0x391,0x3a1,0x3b1),
-	CASEMAP(0x3a3,0x3ab,0x3c3),
-	CASEMAP(0x400,0x40f,0x450),
-	CASEMAP(0x410,0x42f,0x430),
+#include "casemap.h"
 
-	CASELACE(0x460,0x480),
-	CASELACE(0x48a,0x4be),
-	CASELACE(0x4c1,0x4cd),
-	CASELACE(0x4d0,0x50e),
-
-	CASELACE(0x514,0x52e),
-	CASEMAP(0x531,0x556,0x561),
-
-	CASELACE(0x01a0,0x01a4),
-	CASELACE(0x01b3,0x01b5),
-	CASELACE(0x01cd,0x01db),
-	CASELACE(0x01de,0x01ee),
-	CASELACE(0x01f8,0x021e),
-	CASELACE(0x0222,0x0232),
-	CASELACE(0x03d8,0x03ee),
-
-	CASELACE(0x1e00,0x1e94),
-	CASELACE(0x1ea0,0x1efe),
-
-	CASEMAP(0x1f08,0x1f0f,0x1f00),
-	CASEMAP(0x1f18,0x1f1d,0x1f10),
-	CASEMAP(0x1f28,0x1f2f,0x1f20),
-	CASEMAP(0x1f38,0x1f3f,0x1f30),
-	CASEMAP(0x1f48,0x1f4d,0x1f40),
-
-	CASEMAP(0x1f68,0x1f6f,0x1f60),
-	CASEMAP(0x1f88,0x1f8f,0x1f80),
-	CASEMAP(0x1f98,0x1f9f,0x1f90),
-	CASEMAP(0x1fa8,0x1faf,0x1fa0),
-	CASEMAP(0x1fb8,0x1fb9,0x1fb0),
-	CASEMAP(0x1fba,0x1fbb,0x1f70),
-	CASEMAP(0x1fc8,0x1fcb,0x1f72),
-	CASEMAP(0x1fd8,0x1fd9,0x1fd0),
-	CASEMAP(0x1fda,0x1fdb,0x1f76),
-	CASEMAP(0x1fe8,0x1fe9,0x1fe0),
-	CASEMAP(0x1fea,0x1feb,0x1f7a),
-	CASEMAP(0x1ff8,0x1ff9,0x1f78),
-	CASEMAP(0x1ffa,0x1ffb,0x1f7c),
-
-	CASEMAP(0x13f0,0x13f5,0x13f8),
-	CASELACE(0xa698,0xa69a),
-	CASELACE(0xa796,0xa79e),
-
-	CASELACE(0x246,0x24e),
-	CASELACE(0x510,0x512),
-	CASEMAP(0x2160,0x216f,0x2170),
-	CASEMAP(0x2c00,0x2c2e,0x2c30),
-	CASELACE(0x2c67,0x2c6b),
-	CASELACE(0x2c80,0x2ce2),
-	CASELACE(0x2ceb,0x2ced),
-
-	CASELACE(0xa640,0xa66c),
-	CASELACE(0xa680,0xa696),
-
-	CASELACE(0xa722,0xa72e),
-	CASELACE(0xa732,0xa76e),
-	CASELACE(0xa779,0xa77b),
-	CASELACE(0xa77e,0xa786),
-
-	CASELACE(0xa790,0xa792),
-	CASELACE(0xa7a0,0xa7a8),
-
-	CASELACE(0xa7b4,0xa7b6),
-
-	CASEMAP(0xff21,0xff3a,0xff41),
-	{ 0,0,0 }
-};
-
-static const unsigned short pairs[][2] = {
-	{ 'I',    0x0131 },
-	{ 'S',    0x017f },
-	{ 0x0130, 'i'    },
-	{ 0x0178, 0x00ff },
-	{ 0x0181, 0x0253 },
-	{ 0x0182, 0x0183 },
-	{ 0x0184, 0x0185 },
-	{ 0x0186, 0x0254 },
-	{ 0x0187, 0x0188 },
-	{ 0x0189, 0x0256 },
-	{ 0x018a, 0x0257 },
-	{ 0x018b, 0x018c },
-	{ 0x018e, 0x01dd },
-	{ 0x018f, 0x0259 },
-	{ 0x0190, 0x025b },
-	{ 0x0191, 0x0192 },
-	{ 0x0193, 0x0260 },
-	{ 0x0194, 0x0263 },
-	{ 0x0196, 0x0269 },
-	{ 0x0197, 0x0268 },
-	{ 0x0198, 0x0199 },
-	{ 0x019c, 0x026f },
-	{ 0x019d, 0x0272 },
-	{ 0x019f, 0x0275 },
-	{ 0x01a6, 0x0280 },
-	{ 0x01a7, 0x01a8 },
-	{ 0x01a9, 0x0283 },
-	{ 0x01ac, 0x01ad },
-	{ 0x01ae, 0x0288 },
-	{ 0x01af, 0x01b0 },
-	{ 0x01b1, 0x028a },
-	{ 0x01b2, 0x028b },
-	{ 0x01b7, 0x0292 },
-	{ 0x01b8, 0x01b9 },
-	{ 0x01bc, 0x01bd },
-	{ 0x01c4, 0x01c6 },
-	{ 0x01c4, 0x01c5 },
-	{ 0x01c5, 0x01c6 },
-	{ 0x01c7, 0x01c9 },
-	{ 0x01c7, 0x01c8 },
-	{ 0x01c8, 0x01c9 },
-	{ 0x01ca, 0x01cc },
-	{ 0x01ca, 0x01cb },
-	{ 0x01cb, 0x01cc },
-	{ 0x01f1, 0x01f3 },
-	{ 0x01f1, 0x01f2 },
-	{ 0x01f2, 0x01f3 },
-	{ 0x01f4, 0x01f5 },
-	{ 0x01f6, 0x0195 },
-	{ 0x01f7, 0x01bf },
-	{ 0x0220, 0x019e },
-	{ 0x0386, 0x03ac },
-	{ 0x0388, 0x03ad },
-	{ 0x0389, 0x03ae },
-	{ 0x038a, 0x03af },
-	{ 0x038c, 0x03cc },
-	{ 0x038e, 0x03cd },
-	{ 0x038f, 0x03ce },
-	{ 0x0399, 0x0345 },
-	{ 0x0399, 0x1fbe },
-	{ 0x03a3, 0x03c2 },
-	{ 0x03f7, 0x03f8 },
-	{ 0x03fa, 0x03fb },
-	{ 0x1e60, 0x1e9b },
-	{ 0x1e9e, 0xdf },
-
-	{ 0x1f59, 0x1f51 },
-	{ 0x1f5b, 0x1f53 },
-	{ 0x1f5d, 0x1f55 },
-	{ 0x1f5f, 0x1f57 },
-	{ 0x1fbc, 0x1fb3 },
-	{ 0x1fcc, 0x1fc3 },
-	{ 0x1fec, 0x1fe5 },
-	{ 0x1ffc, 0x1ff3 },
-
-	{ 0x23a, 0x2c65 },
-	{ 0x23b, 0x23c },
-	{ 0x23d, 0x19a },
-	{ 0x23e, 0x2c66 },
-	{ 0x241, 0x242 },
-	{ 0x243, 0x180 },
-	{ 0x244, 0x289 },
-	{ 0x245, 0x28c },
-	{ 0x3f4, 0x3b8 },
-	{ 0x3f9, 0x3f2 },
-	{ 0x3fd, 0x37b },
-	{ 0x3fe, 0x37c },
-	{ 0x3ff, 0x37d },
-	{ 0x4c0, 0x4cf },
-
-	{ 0x2126, 0x3c9 },
-	{ 0x212a, 'k' },
-	{ 0x212b, 0xe5 },
-	{ 0x2132, 0x214e },
-	{ 0x2183, 0x2184 },
-	{ 0x2c60, 0x2c61 },
-	{ 0x2c62, 0x26b },
-	{ 0x2c63, 0x1d7d },
-	{ 0x2c64, 0x27d },
-	{ 0x2c6d, 0x251 },
-	{ 0x2c6e, 0x271 },
-	{ 0x2c6f, 0x250 },
-	{ 0x2c70, 0x252 },
-	{ 0x2c72, 0x2c73 },
-	{ 0x2c75, 0x2c76 },
-	{ 0x2c7e, 0x23f },
-	{ 0x2c7f, 0x240 },
-	{ 0x2cf2, 0x2cf3 },
-
-	{ 0xa77d, 0x1d79 },
-	{ 0xa78b, 0xa78c },
-	{ 0xa78d, 0x265 },
-	{ 0xa7aa, 0x266 },
-
-	{ 0x10c7, 0x2d27 },
-	{ 0x10cd, 0x2d2d },
-
-	/* bogus greek 'symbol' letters */
-	{ 0x376, 0x377 },
-	{ 0x39c, 0xb5 },
-	{ 0x392, 0x3d0 },
-	{ 0x398, 0x3d1 },
-	{ 0x3a6, 0x3d5 },
-	{ 0x3a0, 0x3d6 },
-	{ 0x39a, 0x3f0 },
-	{ 0x3a1, 0x3f1 },
-	{ 0x395, 0x3f5 },
-	{ 0x3cf, 0x3d7 },
-
-	{ 0xa7ab, 0x25c },
-	{ 0xa7ac, 0x261 },
-	{ 0xa7ad, 0x26c },
-	{ 0xa7ae, 0x26a },
-	{ 0xa7b0, 0x29e },
-	{ 0xa7b1, 0x287 },
-	{ 0xa7b2, 0x29d },
-	{ 0xa7b3, 0xab53 },
-
-	/* special cyrillic lowercase forms */
-	{ 0x412, 0x1c80 },
-	{ 0x414, 0x1c81 },
-	{ 0x41e, 0x1c82 },
-	{ 0x421, 0x1c83 },
-	{ 0x422, 0x1c84 },
-	{ 0x422, 0x1c85 },
-	{ 0x42a, 0x1c86 },
-	{ 0x462, 0x1c87 },
-	{ 0xa64a, 0x1c88 },
-
-	{ 0,0 }
-};
-
-
-static wchar_t __towcase(wchar_t wc, int lower)
+static int casemap(unsigned c, int dir)
 {
-	int i;
-	int lmul = 2*lower-1;
-	int lmask = lower-1;
-	/* no letters with case in these large ranges */
-	if (!iswalpha(wc)
-	 || (unsigned)wc - 0x0600 <= 0x0fff-0x0600
-	 || (unsigned)wc - 0x2e00 <= 0xa63f-0x2e00
-	 || (unsigned)wc - 0xa800 <= 0xab52-0xa800
-	 || (unsigned)wc - 0xabc0 <= 0xfeff-0xabc0)
-		return wc;
-	/* special case because the diff between upper/lower is too big */
-	if (lower && (unsigned)wc - 0x10a0 < 0x2e)
-		if (wc>0x10c5 && wc != 0x10c7 && wc != 0x10cd) return wc;
-		else return wc + 0x2d00 - 0x10a0;
-	if (!lower && (unsigned)wc - 0x2d00 < 0x26)
-		if (wc>0x2d25 && wc != 0x2d27 && wc != 0x2d2d) return wc;
-		else return wc + 0x10a0 - 0x2d00;
-	if (lower && (unsigned)wc - 0x13a0 < 0x50)
-		return wc + 0xab70 - 0x13a0;
-	if (!lower && (unsigned)wc - 0xab70 < 0x50)
-		return wc + 0x13a0 - 0xab70;
-	for (i=0; casemaps[i].len; i++) {
-		int base = casemaps[i].upper + (lmask & casemaps[i].lower);
-		if ((unsigned)wc-base < casemaps[i].len) {
-			if (casemaps[i].lower == 1)
-				return wc + lower - ((wc-casemaps[i].upper)&1);
-			return wc + lmul*casemaps[i].lower;
+	unsigned b, x, y, v, rt, xb, xn;
+	int r, rd, c0 = c;
+
+	if (c >= 0x20000) return c;
+
+	b = c>>8;
+	c &= 255;
+	x = c/3;
+	y = c%3;
+
+	/* lookup entry in two-level base-6 table */
+	v = tab[tab[b]*86+x];
+	static const int mt[] = { 2048, 342, 57 };
+	v = (v*mt[y]>>11)%6;
+
+	/* use the bit vector out of the tables as an index into
+	 * a block-specific set of rules and decode the rule into
+	 * a type and a case-mapping delta. */
+	r = rules[rulebases[b]+v];
+	rt = r & 255;
+	rd = r >> 8;
+
+	/* rules 0/1 are simple lower/upper case with a delta.
+	 * apply according to desired mapping direction. */
+	if (rt < 2) return c0 + (rd & -(rt^dir));
+
+	/* binary search. endpoints of the binary search for
+	 * this block are stored in the rule delta field. */
+	xn = rd & 0xff;
+	xb = (unsigned)rd >> 8;
+	while (xn) {
+		unsigned try = exceptions[xb+xn/2][0];
+		if (try == c) {
+			r = rules[exceptions[xb+xn/2][1]];
+			rt = r & 255;
+			rd = r >> 8;
+			if (rt < 2) return c0 + (rd & -(rt^dir));
+			/* Hard-coded for the four exceptional titlecase */
+			return c0 + (dir ? -1 : 1);
+		} else if (try > c) {
+			xn /= 2;
+		} else {
+			xb += xn/2;
+			xn -= xn/2;
 		}
 	}
-	for (i=0; pairs[i][1-lower]; i++) {
-		if (pairs[i][1-lower] == wc)
-			return pairs[i][lower];
-	}
-	if ((unsigned)wc - (0x10428 - 0x28*lower) < 0x28)
-		return wc - 0x28 + 0x50*lower;
-	if ((unsigned)wc - (0x104d8 - 0x28*lower) < 0x24)
-		return wc - 0x28 + 0x50*lower;
-	if ((unsigned)wc - (0x10cc0 - 0x40*lower) < 0x33)
-		return wc - 0x40 + 0x80*lower;
-	if ((unsigned)wc - (0x118c0 - 0x20*lower) < 0x20)
-		return wc - 0x20 + 0x40*lower;
-	if ((unsigned)wc - (0x1e922 - 0x22*lower) < 0x22)
-		return wc - 0x22 + 0x44*lower;
-	return wc;
+	return c0;
 }
 
-wint_t towupper(wint_t wc)
+wint_t towlower(wint_t wc)
 {
-	return (unsigned)wc < 128 ? toupper(wc) : __towcase(wc, 0);
+	return casemap(wc, 0);
 }
 
-wint_t towlower(wint_t wc)
+wint_t towupper(wint_t wc)
 {
-	return (unsigned)wc < 128 ? tolower(wc) : __towcase(wc, 1);
+	return casemap(wc, 1);
 }
 
 wint_t __towupper_l(wint_t c, locale_t l)
diff --git a/src/ctype/wcwidth.c b/src/ctype/wcwidth.c
index 49c40eea..36256a53 100644
--- a/src/ctype/wcwidth.c
+++ b/src/ctype/wcwidth.c
@@ -23,7 +23,7 @@ int wcwidth(wchar_t wc)
 		return -1;
 	if (wc-0x20000U < 0x20000)
 		return 2;
-	if (wc == 0xe0001 || wc-0xe0020U < 0x5f || wc-0xe0100 < 0xef)
+	if (wc == 0xe0001 || wc-0xe0020U < 0x5f || wc-0xe0100U < 0xef)
 		return 0;
 	return 1;
 }
diff --git a/src/ctype/wide.h b/src/ctype/wide.h
index e4672b23..e403c9a5 100644
--- a/src/ctype/wide.h
+++ b/src/ctype/wide.h
@@ -17,7 +17,7 @@
 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,38,39,16,16,
 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
-16,16,16,16,16,16,16,40,41,42,43,44,45,46,16,16,47,16,16,16,16,16,
+16,16,16,16,16,16,16,40,41,42,43,44,45,46,47,16,48,49,16,16,16,16,
 16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
@@ -31,10 +31,10 @@
 255,255,255,15,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 255,255,255,255,255,255,255,255,255,255,255,63,0,0,0,255,15,255,255,255,255,
 255,255,255,127,254,255,255,255,255,255,255,255,255,255,127,254,255,255,255,
-255,255,255,255,255,255,255,255,255,224,255,255,255,255,127,254,255,255,255,
+255,255,255,255,255,255,255,255,255,224,255,255,255,255,255,254,255,255,255,
 255,255,255,255,255,255,255,127,255,255,255,255,255,7,255,255,255,255,15,0,
 255,255,255,255,255,127,255,255,255,255,255,0,255,255,255,255,255,255,255,255,
-255,255,255,255,255,255,255,255,255,255,255,255,255,127,255,255,255,255,255,
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,
 0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 255,31,255,255,255,255,255,255,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,
@@ -43,13 +43,13 @@
 255,15,0,0,0,0,0,0,0,0,0,0,0,0,0,255,3,0,0,255,255,255,255,247,255,127,15,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,254,255,255,255,255,255,255,255,255,255,255,
 255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,255,255,255,255,255,255,255,255,255,255,255,
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,31,0,
-0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
-255,255,255,255,255,255,255,255,255,255,255,7,0,255,255,255,127,0,0,0,0,0,0,0,
-0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,255,255,255,255,255,255,255,255,255,255,255,
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+255,255,255,255,255,255,255,255,255,255,255,255,7,0,255,255,255,127,0,0,0,0,0,
+0,7,0,240,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
-255,255,255,255,255,255,255,255,255,255,255,255,
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 15,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,64,254,7,0,0,0,0,0,0,0,0,0,0,0,0,7,0,255,255,255,
 255,255,15,255,1,3,0,63,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,
@@ -58,6 +58,8 @@
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 159,255,255,255,255,255,255,255,63,0,120,255,255,255,0,0,4,0,0,96,0,16,0,0,0,
 0,0,0,0,0,0,0,248,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,255,255,
-255,255,255,255,255,255,63,16,7,0,0,24,240,1,0,0,255,255,255,255,255,127,255,
-31,255,255,255,15,0,0,255,255,255,0,0,0,0,0,1,0,255,255,127,0,0,
-0,
+255,255,255,255,255,255,63,16,39,0,0,24,240,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,255,15,0,
+0,0,224,255,255,255,255,255,255,255,255,255,255,255,255,123,252,255,255,255,
+255,231,199,255,255,255,231,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,15,7,7,0,63,0,0,0,0,0,0,0,0,0,0,0,0,0,
diff --git a/src/dirent/alphasort.c b/src/dirent/alphasort.c
index bee672eb..ab2624e2 100644
--- a/src/dirent/alphasort.c
+++ b/src/dirent/alphasort.c
@@ -5,5 +5,3 @@ int alphasort(const struct dirent **a, const struct dirent **b)
 {
 	return strcoll((*a)->d_name, (*b)->d_name);
 }
-
-weak_alias(alphasort, alphasort64);
diff --git a/src/dirent/fdopendir.c b/src/dirent/fdopendir.c
index c377271d..d78fb87f 100644
--- a/src/dirent/fdopendir.c
+++ b/src/dirent/fdopendir.c
@@ -13,6 +13,10 @@ DIR *fdopendir(int fd)
 	if (fstat(fd, &st) < 0) {
 		return 0;
 	}
+	if (fcntl(fd, F_GETFL) & O_PATH) {
+		errno = EBADF;
+		return 0;
+	}
 	if (!S_ISDIR(st.st_mode)) {
 		errno = ENOTDIR;
 		return 0;
diff --git a/src/dirent/posix_getdents.c b/src/dirent/posix_getdents.c
new file mode 100644
index 00000000..26c16ac6
--- /dev/null
+++ b/src/dirent/posix_getdents.c
@@ -0,0 +1,11 @@
+#include <dirent.h>
+#include <limits.h>
+#include <errno.h>
+#include "syscall.h"
+
+ssize_t posix_getdents(int fd, void *buf, size_t len, int flags)
+{
+	if (flags) return __syscall_ret(-EOPNOTSUPP);
+	if (len>INT_MAX) len = INT_MAX;
+	return syscall(SYS_getdents, fd, buf, len);
+}
diff --git a/src/dirent/readdir.c b/src/dirent/readdir.c
index 569fc705..5a03b363 100644
--- a/src/dirent/readdir.c
+++ b/src/dirent/readdir.c
@@ -25,5 +25,3 @@ struct dirent *readdir(DIR *dir)
 	dir->tell = de->d_off;
 	return de;
 }
-
-weak_alias(readdir, readdir64);
diff --git a/src/dirent/readdir_r.c b/src/dirent/readdir_r.c
index e2a818f3..0d5de5f5 100644
--- a/src/dirent/readdir_r.c
+++ b/src/dirent/readdir_r.c
@@ -25,5 +25,3 @@ int readdir_r(DIR *restrict dir, struct dirent *restrict buf, struct dirent **re
 	*result = buf;
 	return 0;
 }
-
-weak_alias(readdir_r, readdir64_r);
diff --git a/src/dirent/scandir.c b/src/dirent/scandir.c
index 7ee195dd..7456b9b8 100644
--- a/src/dirent/scandir.c
+++ b/src/dirent/scandir.c
@@ -43,5 +43,3 @@ int scandir(const char *path, struct dirent ***res,
 	*res = names;
 	return cnt;
 }
-
-weak_alias(scandir, scandir64);
diff --git a/src/dirent/versionsort.c b/src/dirent/versionsort.c
index d4c48923..97696105 100644
--- a/src/dirent/versionsort.c
+++ b/src/dirent/versionsort.c
@@ -6,6 +6,3 @@ int versionsort(const struct dirent **a, const struct dirent **b)
 {
 	return strverscmp((*a)->d_name, (*b)->d_name);
 }
-
-#undef versionsort64
-weak_alias(versionsort, versionsort64);
diff --git a/src/env/__init_tls.c b/src/env/__init_tls.c
index 842886f6..a93141ed 100644
--- a/src/env/__init_tls.c
+++ b/src/env/__init_tls.c
@@ -1,3 +1,4 @@
+#define SYSCALL_NO_TLS 1
 #include <elf.h>
 #include <limits.h>
 #include <sys/mman.h>
@@ -8,6 +9,8 @@
 #include "atomic.h"
 #include "syscall.h"
 
+volatile int __thread_list_lock;
+
 int __init_tp(void *p)
 {
 	pthread_t td = p;
@@ -16,9 +19,11 @@ int __init_tp(void *p)
 	if (r < 0) return -1;
 	if (!r) libc.can_do_threads = 1;
 	td->detach_state = DT_JOINABLE;
-	td->tid = __syscall(SYS_set_tid_address, &td->detach_state);
+	td->tid = __syscall(SYS_set_tid_address, &__thread_list_lock);
 	td->locale = &libc.global_locale;
 	td->robust_list.head = &td->robust_list.head;
+	td->sysinfo = __sysinfo;
+	td->next = td->prev = td;
 	return 0;
 }
 
@@ -62,7 +67,7 @@ void *__copy_tls(unsigned char *mem)
 	}
 #endif
 	dtv[0] = libc.tls_cnt;
-	td->dtv = td->dtv_copy = dtv;
+	td->dtv = dtv;
 	return td;
 }
 
@@ -110,7 +115,8 @@ static void static_init_tls(size_t *aux)
 		& (main_tls.align-1);
 #ifdef TLS_ABOVE_TP
 	main_tls.offset = GAP_ABOVE_TP;
-	main_tls.offset += -GAP_ABOVE_TP & (main_tls.align-1);
+	main_tls.offset += (-GAP_ABOVE_TP + (uintptr_t)main_tls.image)
+		& (main_tls.align-1);
 #else
 	main_tls.offset = main_tls.size;
 #endif
diff --git a/src/env/__libc_start_main.c b/src/env/__libc_start_main.c
index 7c95f822..c5b277bd 100644
--- a/src/env/__libc_start_main.c
+++ b/src/env/__libc_start_main.c
@@ -28,7 +28,7 @@ void __init_libc(char **envp, char *pn)
 	libc.auxv = auxv = (void *)(envp+i+1);
 	for (i=0; auxv[i]; i+=2) if (auxv[i]<AUX_CNT) aux[auxv[i]] = auxv[i+1];
 	__hwcap = aux[AT_HWCAP];
-	__sysinfo = aux[AT_SYSINFO];
+	if (aux[AT_SYSINFO]) __sysinfo = aux[AT_SYSINFO];
 	libc.page_size = aux[AT_PAGESZ];
 
 	if (!pn) pn = (void*)aux[AT_EXECFN];
@@ -69,7 +69,8 @@ weak_alias(libc_start_init, __libc_start_init);
 typedef int lsm2_fn(int (*)(int,char **,char **), int, char **);
 static lsm2_fn libc_start_main_stage2;
 
-int __libc_start_main(int (*main)(int,char **,char **), int argc, char **argv)
+int __libc_start_main(int (*main)(int,char **,char **), int argc, char **argv,
+	void (*init_dummy)(), void(*fini_dummy)(), void(*ldso_dummy)())
 {
 	char **envp = argv+argc+1;
 
diff --git a/src/env/__stack_chk_fail.c b/src/env/__stack_chk_fail.c
index e32596d1..e5352602 100644
--- a/src/env/__stack_chk_fail.c
+++ b/src/env/__stack_chk_fail.c
@@ -9,7 +9,16 @@ void __init_ssp(void *entropy)
 	if (entropy) memcpy(&__stack_chk_guard, entropy, sizeof(uintptr_t));
 	else __stack_chk_guard = (uintptr_t)&__stack_chk_guard * 1103515245;
 
-	__pthread_self()->CANARY = __stack_chk_guard;
+#if UINTPTR_MAX >= 0xffffffffffffffff
+	/* Sacrifice 8 bits of entropy on 64bit to prevent leaking/
+	 * overwriting the canary via string-manipulation functions.
+	 * The NULL byte is on the second byte so that off-by-ones can
+	 * still be detected. Endianness is taken care of
+	 * automatically. */
+	((char *)&__stack_chk_guard)[1] = 0;
+#endif
+
+	__pthread_self()->canary = __stack_chk_guard;
 }
 
 void __stack_chk_fail(void)
diff --git a/src/env/secure_getenv.c b/src/env/secure_getenv.c
new file mode 100644
index 00000000..72322f81
--- /dev/null
+++ b/src/env/secure_getenv.c
@@ -0,0 +1,8 @@
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include "libc.h"
+
+char *secure_getenv(const char *name)
+{
+	return libc.secure ? NULL : getenv(name);
+}
diff --git a/src/errno/__strerror.h b/src/errno/__strerror.h
index 2f04d400..0398b5b6 100644
--- a/src/errno/__strerror.h
+++ b/src/errno/__strerror.h
@@ -1,8 +1,9 @@
-/* This file is sorted such that 'errors' which represent exceptional
- * conditions under which a correct program may fail come first, followed
- * by messages that indicate an incorrect program or system failure. The
- * macro E() along with double-inclusion is used to ensure that ordering
- * of the strings remains synchronized. */
+/* The first entry is a catch-all for codes not enumerated here.
+ * This file is included multiple times to declare and define a structure
+ * with these messages, and then to define a lookup table translating
+ * error codes to offsets of corresponding fields in the structure. */
+
+E(0,            "No error information")
 
 E(EILSEQ,       "Illegal byte sequence")
 E(EDOM,         "Domain error")
@@ -96,10 +97,14 @@ E(ESHUTDOWN,    "Cannot send after socket shutdown")
 E(EALREADY,     "Operation already in progress")
 E(EINPROGRESS,  "Operation in progress")
 E(ESTALE,       "Stale file handle")
+E(EUCLEAN,      "Data consistency error")
+E(ENAVAIL,      "Resource not available")
 E(EREMOTEIO,    "Remote I/O error")
 E(EDQUOT,       "Quota exceeded")
 E(ENOMEDIUM,    "No medium found")
 E(EMEDIUMTYPE,  "Wrong medium type")
 E(EMULTIHOP,    "Multihop attempted")
-
-E(0,            "No error information")
+E(ENOKEY,       "Required key not available")
+E(EKEYEXPIRED,  "Key has expired")
+E(EKEYREVOKED,  "Key has been revoked")
+E(EKEYREJECTED, "Key was rejected by service")
diff --git a/src/errno/strerror.c b/src/errno/strerror.c
index e3ed771a..7f926432 100644
--- a/src/errno/strerror.c
+++ b/src/errno/strerror.c
@@ -1,30 +1,41 @@
 #include <errno.h>
+#include <stddef.h>
 #include <string.h>
 #include "locale_impl.h"
 
-#define E(a,b) ((unsigned char)a),
-static const unsigned char errid[] = {
+/* mips has one error code outside of the 8-bit range due to a
+ * historical typo, so we just remap it. */
+#if EDQUOT==1133
+#define EDQUOT_ORIG 1133
+#undef  EDQUOT
+#define EDQUOT 109
+#endif
+
+static const struct errmsgstr_t {
+#define E(n, s) char str##n[sizeof(s)];
+#include "__strerror.h"
+#undef E
+} errmsgstr = {
+#define E(n, s) s,
 #include "__strerror.h"
+#undef E
 };
 
-#undef E
-#define E(a,b) b "\0"
-static const char errmsg[] =
+static const unsigned short errmsgidx[] = {
+#define E(n, s) [n] = offsetof(struct errmsgstr_t, str##n),
 #include "__strerror.h"
-;
+#undef E
+};
 
 char *__strerror_l(int e, locale_t loc)
 {
 	const char *s;
-	int i;
-	/* mips has one error code outside of the 8-bit range due to a
-	 * historical typo, so we just remap it. */
-	if (EDQUOT==1133) {
-		if (e==109) e=-1;
-		else if (e==EDQUOT) e=109;
-	}
-	for (i=0; errid[i] && errid[i] != e; i++);
-	for (s=errmsg; i; s++, i--) for (; *s; s++);
+#ifdef EDQUOT_ORIG
+	if (e==EDQUOT) e=0;
+	else if (e==EDQUOT_ORIG) e=EDQUOT;
+#endif
+	if (e >= sizeof errmsgidx / sizeof *errmsgidx) e = 0;
+	s = (char *)&errmsgstr + errmsgidx[e];
 	return (char *)LCTRANS(s, LC_MESSAGES, loc);
 }
 
diff --git a/src/exit/abort.c b/src/exit/abort.c
index e1980f10..f21f458e 100644
--- a/src/exit/abort.c
+++ b/src/exit/abort.c
@@ -6,8 +6,6 @@
 #include "lock.h"
 #include "ksigaction.h"
 
-hidden volatile int __abort_lock[1];
-
 _Noreturn void abort(void)
 {
 	raise(SIGABRT);
diff --git a/src/exit/abort_lock.c b/src/exit/abort_lock.c
new file mode 100644
index 00000000..3af72c7b
--- /dev/null
+++ b/src/exit/abort_lock.c
@@ -0,0 +1,3 @@
+#include "pthread_impl.h"
+
+volatile int __abort_lock[1];
diff --git a/src/exit/assert.c b/src/exit/assert.c
index 49b0dc3e..94edd827 100644
--- a/src/exit/assert.c
+++ b/src/exit/assert.c
@@ -4,6 +4,5 @@
 _Noreturn void __assert_fail(const char *expr, const char *file, int line, const char *func)
 {
 	fprintf(stderr, "Assertion failed: %s (%s: %s: %d)\n", expr, file, func, line);
-	fflush(NULL);
 	abort();
 }
diff --git a/src/exit/at_quick_exit.c b/src/exit/at_quick_exit.c
index d3ce6522..e4b5d78d 100644
--- a/src/exit/at_quick_exit.c
+++ b/src/exit/at_quick_exit.c
@@ -1,12 +1,14 @@
 #include <stdlib.h>
 #include "libc.h"
 #include "lock.h"
+#include "fork_impl.h"
 
 #define COUNT 32
 
 static void (*funcs[COUNT])(void);
 static int count;
 static volatile int lock[1];
+volatile int *const __at_quick_exit_lockptr = lock;
 
 void __funcs_on_quick_exit()
 {
diff --git a/src/exit/atexit.c b/src/exit/atexit.c
index 160d277a..92c91c9d 100644
--- a/src/exit/atexit.c
+++ b/src/exit/atexit.c
@@ -2,6 +2,12 @@
 #include <stdint.h>
 #include "libc.h"
 #include "lock.h"
+#include "fork_impl.h"
+
+#define malloc __libc_malloc
+#define calloc __libc_calloc
+#define realloc undef
+#define free undef
 
 /* Ensure that at least 32 atexit handlers can be registered without malloc */
 #define COUNT 32
@@ -13,8 +19,10 @@ static struct fl
 	void *a[COUNT];
 } builtin, *head;
 
+static int finished_atexit;
 static int slot;
 static volatile int lock[1];
+volatile int *const __atexit_lockptr = lock;
 
 void __funcs_on_exit()
 {
@@ -27,6 +35,10 @@ void __funcs_on_exit()
 		func(arg);
 		LOCK(lock);
 	}
+	/* Unlock to prevent deadlock if a global dtor
+	 * attempts to call atexit. */
+	finished_atexit = 1;
+	UNLOCK(lock);
 }
 
 void __cxa_finalize(void *dso)
@@ -37,6 +49,13 @@ int __cxa_atexit(void (*func)(void *), void *arg, void *dso)
 {
 	LOCK(lock);
 
+	/* Prevent dtors from registering further atexit
+	 * handlers that would never be run. */
+	if (finished_atexit) {
+		UNLOCK(lock);
+		return -1;
+	}
+
 	/* Defer initialization of head so it can be in BSS */
 	if (!head) head = &builtin;
 
diff --git a/src/exit/exit.c b/src/exit/exit.c
index a6869b37..17b33cc6 100644
--- a/src/exit/exit.c
+++ b/src/exit/exit.c
@@ -1,6 +1,9 @@
 #include <stdlib.h>
 #include <stdint.h>
 #include "libc.h"
+#include "pthread_impl.h"
+#include "atomic.h"
+#include "syscall.h"
 
 static void dummy()
 {
@@ -26,6 +29,17 @@ weak_alias(libc_exit_fini, __libc_exit_fini);
 
 _Noreturn void exit(int code)
 {
+	/* Handle potentially concurrent or recursive calls to exit,
+	 * whose behaviors have traditionally been undefined by the
+	 * standards. Using a custom lock here avoids pulling in lock
+	 * machinery and lets us trap recursive calls while supporting
+	 * multiple threads contending to be the one to exit(). */
+	static volatile int exit_lock[1];
+	int tid =  __pthread_self()->tid;
+	int prev = a_cas(exit_lock, 0, tid);
+	if (prev == tid) a_crash();
+	else if (prev) for (;;) __sys_pause();
+
 	__funcs_on_exit();
 	__libc_exit_fini();
 	__stdio_exit();
diff --git a/src/fcntl/creat.c b/src/fcntl/creat.c
index 8f8aab64..c9c43910 100644
--- a/src/fcntl/creat.c
+++ b/src/fcntl/creat.c
@@ -4,5 +4,3 @@ int creat(const char *filename, mode_t mode)
 {
 	return open(filename, O_CREAT|O_WRONLY|O_TRUNC, mode);
 }
-
-weak_alias(creat, creat64);
diff --git a/src/fcntl/open.c b/src/fcntl/open.c
index 1d817a2d..4c3c8275 100644
--- a/src/fcntl/open.c
+++ b/src/fcntl/open.c
@@ -19,5 +19,3 @@ int open(const char *filename, int flags, ...)
 
 	return __syscall_ret(fd);
 }
-
-weak_alias(open, open64);
diff --git a/src/fcntl/openat.c b/src/fcntl/openat.c
index ad165ec3..83a9e0d0 100644
--- a/src/fcntl/openat.c
+++ b/src/fcntl/openat.c
@@ -15,5 +15,3 @@ int openat(int fd, const char *filename, int flags, ...)
 
 	return syscall_cp(SYS_openat, fd, filename, flags|O_LARGEFILE, mode);
 }
-
-weak_alias(openat, openat64);
diff --git a/src/fcntl/posix_fadvise.c b/src/fcntl/posix_fadvise.c
index 75b8e1ae..07346d21 100644
--- a/src/fcntl/posix_fadvise.c
+++ b/src/fcntl/posix_fadvise.c
@@ -14,5 +14,3 @@ int posix_fadvise(int fd, off_t base, off_t len, int advice)
 		__SYSCALL_LL_E(len), advice);
 #endif
 }
-
-weak_alias(posix_fadvise, posix_fadvise64);
diff --git a/src/fcntl/posix_fallocate.c b/src/fcntl/posix_fallocate.c
index c57a24ae..80a65cbf 100644
--- a/src/fcntl/posix_fallocate.c
+++ b/src/fcntl/posix_fallocate.c
@@ -6,5 +6,3 @@ int posix_fallocate(int fd, off_t base, off_t len)
 	return -__syscall(SYS_fallocate, fd, 0, __SYSCALL_LL_E(base),
 		__SYSCALL_LL_E(len));
 }
-
-weak_alias(posix_fallocate, posix_fallocate64);
diff --git a/src/fenv/loongarch64/fenv.S b/src/fenv/loongarch64/fenv.S
new file mode 100644
index 00000000..9c38599e
--- /dev/null
+++ b/src/fenv/loongarch64/fenv.S
@@ -0,0 +1,78 @@
+#ifndef __loongarch_soft_float
+
+#ifdef BROKEN_LOONGARCH_FCSR_ASM
+#define FCSR $r0
+#else
+#define FCSR $fcsr0
+#endif
+
+.global feclearexcept
+.type   feclearexcept,@function
+feclearexcept:
+	li.w    $t0, 0x1f0000
+	and     $a0, $a0, $t0
+	movfcsr2gr $t1, FCSR
+	andn    $t1, $t1, $a0
+	movgr2fcsr FCSR, $t1
+	li.w    $a0, 0
+	jr      $ra
+
+.global feraiseexcept
+.type   feraiseexcept,@function
+feraiseexcept:
+	li.w    $t0, 0x1f0000
+	and     $a0, $a0, $t0
+	movfcsr2gr $t1, FCSR
+	or      $t1, $t1, $a0
+	movgr2fcsr FCSR, $t1
+	li.w    $a0, 0
+	jr      $ra
+
+.global fetestexcept
+.type   fetestexcept,@function
+fetestexcept:
+	li.w    $t0, 0x1f0000
+	and     $a0, $a0, $t0
+	movfcsr2gr $t1, FCSR
+	and     $a0, $t1, $a0
+	jr      $ra
+
+.global fegetround
+.type   fegetround,@function
+fegetround:
+	movfcsr2gr $t0, FCSR
+	andi    $a0, $t0, 0x300
+	jr      $ra
+
+.global __fesetround
+.hidden __fesetround
+.type   __fesetround,@function
+__fesetround:
+	li.w    $t0, 0x300
+	and     $a0, $a0, $t0
+	movfcsr2gr $t1, FCSR
+	andn    $t1, $t1, $t0
+	or      $t1, $t1, $a0
+	movgr2fcsr FCSR, $t1
+	li.w    $a0, 0
+	jr      $ra
+
+.global fegetenv
+.type   fegetenv,@function
+fegetenv:
+	movfcsr2gr $t0, FCSR
+	st.w    $t0, $a0, 0
+	li.w    $a0, 0
+	jr      $ra
+
+.global fesetenv
+.type   fesetenv,@function
+fesetenv:
+	addi.d  $t0, $a0, 1
+	beq     $t0, $r0, 1f
+	ld.w    $t0, $a0, 0
+1:	movgr2fcsr FCSR, $t0
+	li.w    $a0, 0
+	jr      $ra
+
+#endif
diff --git a/src/fenv/powerpc/fenv-sf.c b/src/fenv/powerpc/fenv-sf.c
index 85bef40f..d4248f26 100644
--- a/src/fenv/powerpc/fenv-sf.c
+++ b/src/fenv/powerpc/fenv-sf.c
@@ -1,3 +1,3 @@
-#ifdef _SOFT_FLOAT
+#if defined(_SOFT_FLOAT) || defined(__NO_FPRS__)
 #include "../fenv.c"
 #endif
diff --git a/src/fenv/powerpc/fenv.S b/src/fenv/powerpc/fenv.S
index 22cea216..55055d0b 100644
--- a/src/fenv/powerpc/fenv.S
+++ b/src/fenv/powerpc/fenv.S
@@ -1,4 +1,4 @@
-#ifndef _SOFT_FLOAT
+#if !defined(_SOFT_FLOAT) && !defined(__NO_FPRS__)
 .global feclearexcept
 .type feclearexcept,@function
 feclearexcept:
diff --git a/src/fenv/riscv32/fenv-sf.c b/src/fenv/riscv32/fenv-sf.c
new file mode 100644
index 00000000..ecd3cb5c
--- /dev/null
+++ b/src/fenv/riscv32/fenv-sf.c
@@ -0,0 +1,3 @@
+#ifndef __riscv_flen
+#include "../fenv.c"
+#endif
diff --git a/src/fenv/riscv32/fenv.S b/src/fenv/riscv32/fenv.S
new file mode 100644
index 00000000..0ea78bf9
--- /dev/null
+++ b/src/fenv/riscv32/fenv.S
@@ -0,0 +1,56 @@
+#ifdef __riscv_flen
+
+.global feclearexcept
+.type feclearexcept, %function
+feclearexcept:
+	csrc fflags, a0
+	li a0, 0
+	ret
+
+.global feraiseexcept
+.type feraiseexcept, %function
+feraiseexcept:
+	csrs fflags, a0
+	li a0, 0
+	ret
+
+.global fetestexcept
+.type fetestexcept, %function
+fetestexcept:
+	frflags t0
+	and a0, t0, a0
+	ret
+
+.global fegetround
+.type fegetround, %function
+fegetround:
+	frrm a0
+	ret
+
+.global __fesetround
+.type __fesetround, %function
+__fesetround:
+	fsrm t0, a0
+	li a0, 0
+	ret
+
+.global fegetenv
+.type fegetenv, %function
+fegetenv:
+	frcsr t0
+	sw t0, 0(a0)
+	li a0, 0
+	ret
+
+.global fesetenv
+.type fesetenv, %function
+fesetenv:
+	li t2, -1
+	li t1, 0
+	beq a0, t2, 1f
+	lw t1, 0(a0)
+1:	fscsr t1
+	li a0, 0
+	ret
+
+#endif
diff --git a/src/fenv/riscv64/fenv-sf.c b/src/fenv/riscv64/fenv-sf.c
new file mode 100644
index 00000000..ecd3cb5c
--- /dev/null
+++ b/src/fenv/riscv64/fenv-sf.c
@@ -0,0 +1,3 @@
+#ifndef __riscv_flen
+#include "../fenv.c"
+#endif
diff --git a/src/fenv/riscv64/fenv.S b/src/fenv/riscv64/fenv.S
new file mode 100644
index 00000000..0ea78bf9
--- /dev/null
+++ b/src/fenv/riscv64/fenv.S
@@ -0,0 +1,56 @@
+#ifdef __riscv_flen
+
+.global feclearexcept
+.type feclearexcept, %function
+feclearexcept:
+	csrc fflags, a0
+	li a0, 0
+	ret
+
+.global feraiseexcept
+.type feraiseexcept, %function
+feraiseexcept:
+	csrs fflags, a0
+	li a0, 0
+	ret
+
+.global fetestexcept
+.type fetestexcept, %function
+fetestexcept:
+	frflags t0
+	and a0, t0, a0
+	ret
+
+.global fegetround
+.type fegetround, %function
+fegetround:
+	frrm a0
+	ret
+
+.global __fesetround
+.type __fesetround, %function
+__fesetround:
+	fsrm t0, a0
+	li a0, 0
+	ret
+
+.global fegetenv
+.type fegetenv, %function
+fegetenv:
+	frcsr t0
+	sw t0, 0(a0)
+	li a0, 0
+	ret
+
+.global fesetenv
+.type fesetenv, %function
+fesetenv:
+	li t2, -1
+	li t1, 0
+	beq a0, t2, 1f
+	lw t1, 0(a0)
+1:	fscsr t1
+	li a0, 0
+	ret
+
+#endif
diff --git a/src/fenv/sh/fenv.S b/src/fenv/sh/fenv.S
index 907aefc0..b3b7d66a 100644
--- a/src/fenv/sh/fenv.S
+++ b/src/fenv/sh/fenv.S
@@ -12,6 +12,8 @@ fegetround:
 .type   __fesetround, @function
 __fesetround:
 	sts fpscr, r0
+	mov #-4, r1
+	and r1, r0
 	or  r4, r0
 	lds r0, fpscr
 	rts
diff --git a/src/include/errno.h b/src/include/errno.h
index 54a38ff4..8ec49377 100644
--- a/src/include/errno.h
+++ b/src/include/errno.h
@@ -3,6 +3,9 @@
 
 #include "../../include/errno.h"
 
+#ifdef __GNUC__
+__attribute__((const))
+#endif
 hidden int *___errno_location(void);
 
 #undef errno
diff --git a/src/include/pthread.h b/src/include/pthread.h
index d93ac3a5..7167d3e1 100644
--- a/src/include/pthread.h
+++ b/src/include/pthread.h
@@ -18,5 +18,12 @@ hidden int __private_cond_signal(pthread_cond_t *, int);
 hidden int __pthread_cond_timedwait(pthread_cond_t *restrict, pthread_mutex_t *restrict, const struct timespec *restrict);
 hidden int __pthread_key_create(pthread_key_t *, void (*)(void *));
 hidden int __pthread_key_delete(pthread_key_t);
+hidden int __pthread_rwlock_rdlock(pthread_rwlock_t *);
+hidden int __pthread_rwlock_tryrdlock(pthread_rwlock_t *);
+hidden int __pthread_rwlock_timedrdlock(pthread_rwlock_t *__restrict, const struct timespec *__restrict);
+hidden int __pthread_rwlock_wrlock(pthread_rwlock_t *);
+hidden int __pthread_rwlock_trywrlock(pthread_rwlock_t *);
+hidden int __pthread_rwlock_timedwrlock(pthread_rwlock_t *__restrict, const struct timespec *__restrict);
+hidden int __pthread_rwlock_unlock(pthread_rwlock_t *);
 
 #endif
diff --git a/src/include/stdio.h b/src/include/stdio.h
index 534c6907..fae3755b 100644
--- a/src/include/stdio.h
+++ b/src/include/stdio.h
@@ -1,6 +1,8 @@
 #ifndef STDIO_H
 #define STDIO_H
 
+#define __DEFINED_struct__IO_FILE
+
 #include "../../include/stdio.h"
 
 #undef stdin
diff --git a/src/include/stdlib.h b/src/include/stdlib.h
index d38a5417..812b04de 100644
--- a/src/include/stdlib.h
+++ b/src/include/stdlib.h
@@ -8,5 +8,12 @@ hidden void __env_rm_add(char *, char *);
 hidden int __mkostemps(char *, int, int);
 hidden int __ptsname_r(int, char *, size_t);
 hidden char *__randname(char *);
+hidden void __qsort_r (void *, size_t, size_t, int (*)(const void *, const void *, void *), void *);
+
+hidden void *__libc_malloc(size_t);
+hidden void *__libc_malloc_impl(size_t);
+hidden void *__libc_calloc(size_t, size_t);
+hidden void *__libc_realloc(void *, size_t);
+hidden void __libc_free(void *);
 
 #endif
diff --git a/src/include/sys/membarrier.h b/src/include/sys/membarrier.h
new file mode 100644
index 00000000..3654491c
--- /dev/null
+++ b/src/include/sys/membarrier.h
@@ -0,0 +1,9 @@
+#ifndef SYS_MEMBARRIER_H
+#define SYS_MEMBARRIER_H
+
+#include "../../../include/sys/membarrier.h"
+#include <features.h>
+
+hidden int __membarrier(int, int);
+
+#endif
diff --git a/src/include/sys/stat.h b/src/include/sys/stat.h
new file mode 100644
index 00000000..59339bee
--- /dev/null
+++ b/src/include/sys/stat.h
@@ -0,0 +1,9 @@
+#ifndef SYS_STAT_H
+#define SYS_STAT_H
+
+#include "../../../include/sys/stat.h"
+
+hidden int __fstat(int, struct stat *);
+hidden int __fstatat(int, const char *restrict, struct stat *restrict, int);
+
+#endif
diff --git a/src/include/time.h b/src/include/time.h
index 24c87973..cbabde47 100644
--- a/src/include/time.h
+++ b/src/include/time.h
@@ -4,6 +4,7 @@
 #include "../../include/time.h"
 
 hidden int __clock_gettime(clockid_t, struct timespec *);
+hidden int __clock_nanosleep(clockid_t, int, const struct timespec *, struct timespec *);
 
 hidden char *__asctime_r(const struct tm *, char *);
 hidden struct tm *__gmtime_r(const time_t *restrict, struct tm *restrict);
diff --git a/src/include/unistd.h b/src/include/unistd.h
index 6deb1bcc..7b52a924 100644
--- a/src/include/unistd.h
+++ b/src/include/unistd.h
@@ -8,6 +8,6 @@ extern char **__environ;
 hidden int __dup3(int, int, int);
 hidden int __mkostemps(char *, int, int);
 hidden int __execvpe(const char *, char *const *, char *const *);
-hidden int __aio_close(int);
+hidden off_t __lseek(int, off_t, int);
 
 #endif
diff --git a/src/include/wchar.h b/src/include/wchar.h
new file mode 100644
index 00000000..79f5d0e7
--- /dev/null
+++ b/src/include/wchar.h
@@ -0,0 +1,9 @@
+#ifndef WCHAR_H
+#define WCHAR_H
+
+#define __DEFINED_struct__IO_FILE
+
+#include "../../include/wchar.h"
+
+#endif
+
diff --git a/src/internal/aarch64/syscall.s b/src/internal/aarch64/syscall.s
deleted file mode 100644
index 845986bf..00000000
--- a/src/internal/aarch64/syscall.s
+++ /dev/null
@@ -1,14 +0,0 @@
-.global __syscall
-.hidden __syscall
-.type __syscall,%function
-__syscall:
-	uxtw x8,w0
-	mov x0,x1
-	mov x1,x2
-	mov x2,x3
-	mov x3,x4
-	mov x4,x5
-	mov x5,x6
-	mov x6,x7
-	svc 0
-	ret
diff --git a/src/internal/aio_impl.h b/src/internal/aio_impl.h
new file mode 100644
index 00000000..a8657665
--- /dev/null
+++ b/src/internal/aio_impl.h
@@ -0,0 +1,9 @@
+#ifndef AIO_IMPL_H
+#define AIO_IMPL_H
+
+extern hidden volatile int __aio_fut;
+
+extern hidden int __aio_close(int);
+extern hidden void __aio_atfork(int);
+
+#endif
diff --git a/src/internal/arm/syscall.s b/src/internal/arm/syscall.s
deleted file mode 100644
index 64dba2fc..00000000
--- a/src/internal/arm/syscall.s
+++ /dev/null
@@ -1,15 +0,0 @@
-.syntax unified
-.global __syscall
-.hidden __syscall
-.type __syscall,%function
-__syscall:
-	mov ip,sp
-	stmfd sp!,{r4,r5,r6,r7}
-	mov r7,r0
-	mov r0,r1
-	mov r1,r2
-	mov r2,r3
-	ldmfd ip,{r3,r4,r5,r6}
-	svc 0
-	ldmfd sp!,{r4,r5,r6,r7}
-	bx lr
diff --git a/src/internal/atomic.h b/src/internal/atomic.h
index f938879b..8f71c8cd 100644
--- a/src/internal/atomic.h
+++ b/src/internal/atomic.h
@@ -194,7 +194,7 @@ static inline void a_store(volatile int *p, int v)
 
 #ifndef a_barrier
 #define a_barrier a_barrier
-static void a_barrier()
+static inline void a_barrier()
 {
 	volatile int tmp = 0;
 	a_cas(&tmp, 0, 0);
@@ -315,4 +315,19 @@ static inline int a_clz_64(uint64_t x)
 }
 #endif
 
+#ifndef a_clz_32
+#define a_clz_32 a_clz_32
+static inline int a_clz_32(uint32_t x)
+{
+	x >>= 1;
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+	x++;
+	return 31-a_ctz_32(x);
+}
+#endif
+
 #endif
diff --git a/src/internal/complex_impl.h b/src/internal/complex_impl.h
new file mode 100644
index 00000000..51fb298a
--- /dev/null
+++ b/src/internal/complex_impl.h
@@ -0,0 +1,22 @@
+#ifndef _COMPLEX_IMPL_H
+#define _COMPLEX_IMPL_H
+
+#include <complex.h>
+#include "libm.h"
+
+#undef __CMPLX
+#undef CMPLX
+#undef CMPLXF
+#undef CMPLXL
+
+#define __CMPLX(x, y, t) \
+	((union { _Complex t __z; t __xy[2]; }){.__xy = {(x),(y)}}.__z)
+
+#define CMPLX(x, y) __CMPLX(x, y, double)
+#define CMPLXF(x, y) __CMPLX(x, y, float)
+#define CMPLXL(x, y) __CMPLX(x, y, long double)
+
+hidden double complex __ldexp_cexp(double complex,int);
+hidden float complex __ldexp_cexpf(float complex,int);
+
+#endif
diff --git a/src/internal/defsysinfo.c b/src/internal/defsysinfo.c
new file mode 100644
index 00000000..6d4117db
--- /dev/null
+++ b/src/internal/defsysinfo.c
@@ -0,0 +1,3 @@
+#include "libc.h"
+
+size_t __sysinfo;
diff --git a/src/internal/dynlink.h b/src/internal/dynlink.h
index cbe0a6fe..40c743e2 100644
--- a/src/internal/dynlink.h
+++ b/src/internal/dynlink.h
@@ -28,6 +28,7 @@ typedef Elf64_Sym Sym;
 enum {
 	REL_NONE = 0,
 	REL_SYMBOLIC = -100,
+	REL_USYMBOLIC,
 	REL_GOT,
 	REL_PLT,
 	REL_RELATIVE,
@@ -72,6 +73,10 @@ struct fdpic_dummy_loadmap {
 #define DL_NOMMU_SUPPORT 0
 #endif
 
+#ifndef TLSDESC_BACKWARDS
+#define TLSDESC_BACKWARDS 0
+#endif
+
 #if !DL_FDPIC
 #define IS_RELATIVE(x,s) ( \
 	(R_TYPE(x) == REL_RELATIVE) || \
@@ -91,11 +96,14 @@ struct fdpic_dummy_loadmap {
 #define DT_DEBUG_INDIRECT 0
 #endif
 
+#ifndef DT_DEBUG_INDIRECT_REL
+#define DT_DEBUG_INDIRECT_REL 0
+#endif
+
 #define AUX_CNT 32
-#define DYN_CNT 32
+#define DYN_CNT 37
 
 typedef void (*stage2_func)(unsigned char *, size_t *);
-typedef _Noreturn void (*stage3_func)(size_t *);
 
 hidden void *__dlsym(void *restrict, const char *restrict, void *restrict);
 
@@ -105,4 +113,9 @@ hidden void __dl_vseterr(const char *, va_list);
 
 hidden ptrdiff_t __tlsdesc_static(), __tlsdesc_dynamic();
 
+hidden extern int __malloc_replaced;
+hidden extern int __aligned_alloc_replaced;
+hidden void __malloc_donate(char *, char *);
+hidden int __malloc_allzerop(void *);
+
 #endif
diff --git a/src/internal/emulate_wait4.c b/src/internal/emulate_wait4.c
new file mode 100644
index 00000000..f6303412
--- /dev/null
+++ b/src/internal/emulate_wait4.c
@@ -0,0 +1,55 @@
+#include <sys/wait.h>
+#include "syscall.h"
+
+#ifndef SYS_wait4
+hidden long __emulate_wait4(int pid, int *status, int options, void *kru, int cp)
+{
+	idtype_t t;
+	int r;
+	siginfo_t info;
+
+	info.si_pid = 0;
+	if (pid < -1) {
+		t = P_PGID;
+		pid = -pid;
+	} else if (pid == -1) {
+		t = P_ALL;
+	} else if (pid == 0) {
+		t = P_PGID;
+	} else {
+		t = P_PID;
+	}
+
+	if (cp) r = __syscall_cp(SYS_waitid, t, pid, &info, options|WEXITED, kru);
+	else r = __syscall(SYS_waitid, t, pid, &info, options|WEXITED, kru);
+
+	if (r<0) return r;
+
+	if (info.si_pid && status) {
+		int sw=0;
+		switch (info.si_code) {
+		case CLD_CONTINUED:
+			sw = 0xffff;
+			break;
+		case CLD_DUMPED:
+			sw = info.si_status&0x7f | 0x80;
+			break;
+		case CLD_EXITED:
+			sw = (info.si_status&0xff) << 8;
+			break;
+		case CLD_KILLED:
+			sw = info.si_status&0x7f;
+			break;
+		case CLD_STOPPED:
+		case CLD_TRAPPED:
+			/* see ptrace(2); the high bits of si_status can contain */
+			/* PTRACE_EVENT_ values which must be preserved */
+			sw = (info.si_status << 8) + 0x7f;
+			break;
+		}
+		*status = sw;
+	}
+
+	return info.si_pid;
+}
+#endif
diff --git a/src/internal/floatscan.c b/src/internal/floatscan.c
index 278bf250..8c0828fc 100644
--- a/src/internal/floatscan.c
+++ b/src/internal/floatscan.c
@@ -33,9 +33,6 @@
 
 #define MASK (KMAX-1)
 
-#define CONCAT2(x,y) x ## y
-#define CONCAT(x,y) CONCAT2(x,y)
-
 static long long scanexp(FILE *f, int pok)
 {
 	int c;
@@ -301,7 +298,7 @@ static long double decfloat(FILE *f, int c, int bits, int emin, int sign, int po
 	y -= bias;
 
 	if ((e2+LDBL_MANT_DIG & INT_MAX) > emax-5) {
-		if (fabs(y) >= CONCAT(0x1p, LDBL_MANT_DIG)) {
+		if (fabsl(y) >= 2/LDBL_EPSILON) {
 			if (denormal && bits==LDBL_MANT_DIG+e2-emin)
 				denormal = 0;
 			y *= 0.5;
diff --git a/src/internal/fork_impl.h b/src/internal/fork_impl.h
new file mode 100644
index 00000000..f995fce2
--- /dev/null
+++ b/src/internal/fork_impl.h
@@ -0,0 +1,21 @@
+#include <features.h>
+
+extern hidden volatile int *const __at_quick_exit_lockptr;
+extern hidden volatile int *const __atexit_lockptr;
+extern hidden volatile int *const __gettext_lockptr;
+extern hidden volatile int *const __locale_lockptr;
+extern hidden volatile int *const __random_lockptr;
+extern hidden volatile int *const __sem_open_lockptr;
+extern hidden volatile int *const __stdio_ofl_lockptr;
+extern hidden volatile int *const __syslog_lockptr;
+extern hidden volatile int *const __timezone_lockptr;
+
+extern hidden volatile int *const __bump_lockptr;
+
+extern hidden volatile int *const __vmlock_lockptr;
+
+hidden void __malloc_atfork(int);
+hidden void __ldso_atfork(int);
+hidden void __pthread_key_atfork(int);
+
+hidden void __post_Fork(int);
diff --git a/src/internal/i386/defsysinfo.s b/src/internal/i386/defsysinfo.s
new file mode 100644
index 00000000..f1b5b0f2
--- /dev/null
+++ b/src/internal/i386/defsysinfo.s
@@ -0,0 +1,9 @@
+1:	int $128
+	ret
+
+.data
+.align 4
+.hidden __sysinfo
+.global __sysinfo
+__sysinfo:
+	.long 1b
diff --git a/src/internal/i386/syscall.s b/src/internal/i386/syscall.s
deleted file mode 100644
index 0ebf2218..00000000
--- a/src/internal/i386/syscall.s
+++ /dev/null
@@ -1,78 +0,0 @@
-.hidden __sysinfo
-
-# The calling convention for __vsyscall has the syscall number
-# and 5 args arriving as:  eax, edx, ecx, edi, esi, 4(%esp).
-# This ensures that the inline asm in the C code never has to touch
-# ebx or ebp (which are unavailable in PIC and frame-pointer-using
-# code, respectively), and optimizes for size/simplicity in the caller.
-
-.global __vsyscall
-.hidden __vsyscall
-.type __vsyscall,@function
-__vsyscall:
-	push %edi
-	push %ebx
-	mov %edx,%ebx
-	mov %edi,%edx
-	mov 12(%esp),%edi
-	push %eax
-	call 1f
-2:	mov %ebx,%edx
-	pop %ebx
-	pop %ebx
-	pop %edi
-	ret
-
-1:	mov (%esp),%eax
-	add $[__sysinfo-2b],%eax
-	mov (%eax),%eax
-	test %eax,%eax
-	jz 1f
-	push %eax
-	mov 8(%esp),%eax
-	ret                     # tail call to kernel vsyscall entry
-1:	mov 4(%esp),%eax
-	int $128
-	ret
-
-# The __vsyscall6 entry point is used only for 6-argument syscalls.
-# Instead of passing the 5th argument on the stack, a pointer to the
-# 5th and 6th arguments is passed. This is ugly, but there are no
-# register constraints the inline asm could use that would make it
-# possible to pass two arguments on the stack.
-
-.global __vsyscall6
-.hidden __vsyscall6
-.type __vsyscall6,@function
-__vsyscall6:
-	push %ebp
-	push %eax
-	mov 12(%esp), %ebp
-	mov (%ebp), %eax
-	mov 4(%ebp), %ebp
-	push %eax
-	mov 4(%esp),%eax
-	call __vsyscall
-	pop %ebp
-	pop %ebp
-	pop %ebp
-	ret
-
-.global __syscall
-.hidden __syscall
-.type __syscall,@function
-__syscall:
-	lea 24(%esp),%eax
-	push %esi
-	push %edi
-	push %eax
-	mov 16(%esp),%eax
-	mov 20(%esp),%edx
-	mov 24(%esp),%ecx
-	mov 28(%esp),%edi
-	mov 32(%esp),%esi
-	call __vsyscall6
-	pop %edi
-	pop %edi
-	pop %esi
-	ret
diff --git a/src/internal/ksigaction.h b/src/internal/ksigaction.h
index 8ebd5938..ef333f33 100644
--- a/src/internal/ksigaction.h
+++ b/src/internal/ksigaction.h
@@ -6,8 +6,13 @@
 struct k_sigaction {
 	void (*handler)(int);
 	unsigned long flags;
+#ifdef SA_RESTORER
 	void (*restorer)(void);
+#endif
 	unsigned mask[2];
+#ifndef SA_RESTORER
+	void *unused;
+#endif
 };
 
 hidden void __restore(), __restore_rt();
diff --git a/src/internal/libc.c b/src/internal/libc.c
index 2e10942d..cb051810 100644
--- a/src/internal/libc.c
+++ b/src/internal/libc.c
@@ -3,7 +3,6 @@
 struct __libc __libc;
 
 size_t __hwcap;
-size_t __sysinfo;
 char *__progname=0, *__progname_full=0;
 
 weak_alias(__progname, program_invocation_short_name);
diff --git a/src/internal/libc.h b/src/internal/libc.h
index ac97dc7e..619bba86 100644
--- a/src/internal/libc.h
+++ b/src/internal/libc.h
@@ -18,10 +18,11 @@ struct tls_module {
 };
 
 struct __libc {
-	int can_do_threads;
-	int threaded;
-	int secure;
-	volatile int threads_minus_1;
+	char can_do_threads;
+	char threaded;
+	char secure;
+	volatile signed char need_locks;
+	int threads_minus_1;
 	size_t *auxv;
 	struct tls_module *tls_head;
 	size_t tls_size, tls_align, tls_cnt;
diff --git a/src/internal/libm.h b/src/internal/libm.h
index fd916277..72ad17d8 100644
--- a/src/internal/libm.h
+++ b/src/internal/libm.h
@@ -1,23 +1,11 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/math_private.h */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
 #ifndef _LIBM_H
 #define _LIBM_H
 
 #include <stdint.h>
 #include <float.h>
 #include <math.h>
-#include <complex.h>
 #include <endian.h>
+#include "fp_arch.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 #elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
@@ -71,124 +59,196 @@ union ldshape {
 #error Unsupported long double representation
 #endif
 
+/* Support non-nearest rounding mode.  */
+#define WANT_ROUNDING 1
+/* Support signaling NaNs.  */
+#define WANT_SNAN 0
+
+#if WANT_SNAN
+#error SNaN is unsupported
+#else
+#define issignalingf_inline(x) 0
+#define issignaling_inline(x) 0
+#endif
+
+#ifndef TOINT_INTRINSICS
+#define TOINT_INTRINSICS 0
+#endif
+
+#if TOINT_INTRINSICS
+/* Round x to nearest int in all rounding modes, ties have to be rounded
+   consistently with converttoint so the results match.  If the result
+   would be outside of [-2^31, 2^31-1] then the semantics is unspecified.  */
+static double_t roundtoint(double_t);
+
+/* Convert x to nearest int in all rounding modes, ties have to be rounded
+   consistently with roundtoint.  If the result is not representible in an
+   int32_t then the semantics is unspecified.  */
+static int32_t converttoint(double_t);
+#endif
+
+/* Helps static branch prediction so hot path can be better optimized.  */
+#ifdef __GNUC__
+#define predict_true(x) __builtin_expect(!!(x), 1)
+#define predict_false(x) __builtin_expect(x, 0)
+#else
+#define predict_true(x) (x)
+#define predict_false(x) (x)
+#endif
+
+/* Evaluate an expression as the specified type. With standard excess
+   precision handling a type cast or assignment is enough (with
+   -ffloat-store an assignment is required, in old compilers argument
+   passing and return statement may not drop excess precision).  */
+
+static inline float eval_as_float(float x)
+{
+	float y = x;
+	return y;
+}
+
+static inline double eval_as_double(double x)
+{
+	double y = x;
+	return y;
+}
+
+/* fp_barrier returns its input, but limits code transformations
+   as if it had a side-effect (e.g. observable io) and returned
+   an arbitrary value.  */
+
+#ifndef fp_barrierf
+#define fp_barrierf fp_barrierf
+static inline float fp_barrierf(float x)
+{
+	volatile float y = x;
+	return y;
+}
+#endif
+
+#ifndef fp_barrier
+#define fp_barrier fp_barrier
+static inline double fp_barrier(double x)
+{
+	volatile double y = x;
+	return y;
+}
+#endif
+
+#ifndef fp_barrierl
+#define fp_barrierl fp_barrierl
+static inline long double fp_barrierl(long double x)
+{
+	volatile long double y = x;
+	return y;
+}
+#endif
+
+/* fp_force_eval ensures that the input value is computed when that's
+   otherwise unused.  To prevent the constant folding of the input
+   expression, an additional fp_barrier may be needed or a compilation
+   mode that does so (e.g. -frounding-math in gcc). Then it can be
+   used to evaluate an expression for its fenv side-effects only.   */
+
+#ifndef fp_force_evalf
+#define fp_force_evalf fp_force_evalf
+static inline void fp_force_evalf(float x)
+{
+	volatile float y;
+	y = x;
+}
+#endif
+
+#ifndef fp_force_eval
+#define fp_force_eval fp_force_eval
+static inline void fp_force_eval(double x)
+{
+	volatile double y;
+	y = x;
+}
+#endif
+
+#ifndef fp_force_evall
+#define fp_force_evall fp_force_evall
+static inline void fp_force_evall(long double x)
+{
+	volatile long double y;
+	y = x;
+}
+#endif
+
 #define FORCE_EVAL(x) do {                        \
 	if (sizeof(x) == sizeof(float)) {         \
-		volatile float __x;               \
-		__x = (x);                        \
+		fp_force_evalf(x);                \
 	} else if (sizeof(x) == sizeof(double)) { \
-		volatile double __x;              \
-		__x = (x);                        \
+		fp_force_eval(x);                 \
 	} else {                                  \
-		volatile long double __x;         \
-		__x = (x);                        \
+		fp_force_evall(x);                \
 	}                                         \
 } while(0)
 
-/* Get two 32 bit ints from a double.  */
+#define asuint(f) ((union{float _f; uint32_t _i;}){f})._i
+#define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f
+#define asuint64(f) ((union{double _f; uint64_t _i;}){f})._i
+#define asdouble(i) ((union{uint64_t _i; double _f;}){i})._f
+
 #define EXTRACT_WORDS(hi,lo,d)                    \
 do {                                              \
-  union {double f; uint64_t i;} __u;              \
-  __u.f = (d);                                    \
-  (hi) = __u.i >> 32;                             \
-  (lo) = (uint32_t)__u.i;                         \
+  uint64_t __u = asuint64(d);                     \
+  (hi) = __u >> 32;                               \
+  (lo) = (uint32_t)__u;                           \
 } while (0)
 
-/* Get the more significant 32 bit int from a double.  */
 #define GET_HIGH_WORD(hi,d)                       \
 do {                                              \
-  union {double f; uint64_t i;} __u;              \
-  __u.f = (d);                                    \
-  (hi) = __u.i >> 32;                             \
+  (hi) = asuint64(d) >> 32;                       \
 } while (0)
 
-/* Get the less significant 32 bit int from a double.  */
 #define GET_LOW_WORD(lo,d)                        \
 do {                                              \
-  union {double f; uint64_t i;} __u;              \
-  __u.f = (d);                                    \
-  (lo) = (uint32_t)__u.i;                         \
+  (lo) = (uint32_t)asuint64(d);                   \
 } while (0)
 
-/* Set a double from two 32 bit ints.  */
 #define INSERT_WORDS(d,hi,lo)                     \
 do {                                              \
-  union {double f; uint64_t i;} __u;              \
-  __u.i = ((uint64_t)(hi)<<32) | (uint32_t)(lo);  \
-  (d) = __u.f;                                    \
+  (d) = asdouble(((uint64_t)(hi)<<32) | (uint32_t)(lo)); \
 } while (0)
 
-/* Set the more significant 32 bits of a double from an int.  */
 #define SET_HIGH_WORD(d,hi)                       \
-do {                                              \
-  union {double f; uint64_t i;} __u;              \
-  __u.f = (d);                                    \
-  __u.i &= 0xffffffff;                            \
-  __u.i |= (uint64_t)(hi) << 32;                  \
-  (d) = __u.f;                                    \
-} while (0)
+  INSERT_WORDS(d, hi, (uint32_t)asuint64(d))
 
-/* Set the less significant 32 bits of a double from an int.  */
 #define SET_LOW_WORD(d,lo)                        \
-do {                                              \
-  union {double f; uint64_t i;} __u;              \
-  __u.f = (d);                                    \
-  __u.i &= 0xffffffff00000000ull;                 \
-  __u.i |= (uint32_t)(lo);                        \
-  (d) = __u.f;                                    \
-} while (0)
+  INSERT_WORDS(d, asuint64(d)>>32, lo)
 
-/* Get a 32 bit int from a float.  */
 #define GET_FLOAT_WORD(w,d)                       \
 do {                                              \
-  union {float f; uint32_t i;} __u;               \
-  __u.f = (d);                                    \
-  (w) = __u.i;                                    \
+  (w) = asuint(d);                                \
 } while (0)
 
-/* Set a float from a 32 bit int.  */
 #define SET_FLOAT_WORD(d,w)                       \
 do {                                              \
-  union {float f; uint32_t i;} __u;               \
-  __u.i = (w);                                    \
-  (d) = __u.f;                                    \
+  (d) = asfloat(w);                               \
 } while (0)
 
-#undef __CMPLX
-#undef CMPLX
-#undef CMPLXF
-#undef CMPLXL
-
-#define __CMPLX(x, y, t) \
-	((union { _Complex t __z; t __xy[2]; }){.__xy = {(x),(y)}}.__z)
-
-#define CMPLX(x, y) __CMPLX(x, y, double)
-#define CMPLXF(x, y) __CMPLX(x, y, float)
-#define CMPLXL(x, y) __CMPLX(x, y, long double)
-
-/* fdlibm kernel functions */
-
 hidden int    __rem_pio2_large(double*,double*,int,int,int);
 
 hidden int    __rem_pio2(double,double*);
 hidden double __sin(double,double,int);
 hidden double __cos(double,double);
 hidden double __tan(double,double,int);
-hidden double __expo2(double);
-hidden double complex __ldexp_cexp(double complex,int);
+hidden double __expo2(double,double);
 
 hidden int    __rem_pio2f(float,double*);
 hidden float  __sindf(double);
 hidden float  __cosdf(double);
 hidden float  __tandf(double,int);
-hidden float  __expo2f(float);
-hidden float complex __ldexp_cexpf(float complex,int);
+hidden float  __expo2f(float,float);
 
 hidden int __rem_pio2l(long double, long double *);
 hidden long double __sinl(long double, long double, int);
 hidden long double __cosl(long double, long double);
 hidden long double __tanl(long double, long double, int);
 
-/* polynomial evaluation */
 hidden long double __polevll(long double, const long double *, int);
 hidden long double __p1evll(long double, const long double *, int);
 
@@ -196,4 +256,19 @@ extern int __signgam;
 hidden double __lgamma_r(double, int *);
 hidden float __lgammaf_r(float, int *);
 
+/* error handling functions */
+hidden float __math_xflowf(uint32_t, float);
+hidden float __math_uflowf(uint32_t);
+hidden float __math_oflowf(uint32_t);
+hidden float __math_divzerof(uint32_t);
+hidden float __math_invalidf(float);
+hidden double __math_xflow(uint32_t, double);
+hidden double __math_uflow(uint32_t);
+hidden double __math_oflow(uint32_t);
+hidden double __math_divzero(uint32_t);
+hidden double __math_invalid(double);
+#if LDBL_MANT_DIG != DBL_MANT_DIG
+hidden long double __math_invalidl(long double);
+#endif
+
 #endif
diff --git a/src/internal/locale_impl.h b/src/internal/locale_impl.h
index 741a71c4..4431a92e 100644
--- a/src/internal/locale_impl.h
+++ b/src/internal/locale_impl.h
@@ -15,6 +15,8 @@ struct __locale_map {
 	const struct __locale_map *next;
 };
 
+extern hidden volatile int __locale_lock[1];
+
 extern hidden const struct __locale_map __c_dot_utf8;
 extern hidden const struct __locale_struct __c_locale;
 extern hidden const struct __locale_struct __c_dot_utf8_locale;
diff --git a/src/internal/m68k/syscall.s b/src/internal/m68k/syscall.s
deleted file mode 100644
index 9972a34d..00000000
--- a/src/internal/m68k/syscall.s
+++ /dev/null
@@ -1,9 +0,0 @@
-.global __syscall
-.hidden __syscall
-.type __syscall,%function
-__syscall:
-	movem.l %d2-%d5,-(%sp)
-	movem.l 20(%sp),%d0-%d5/%a0
-	trap #0
-	movem.l (%sp)+,%d2-%d5
-	rts
diff --git a/src/internal/microblaze/syscall.s b/src/internal/microblaze/syscall.s
deleted file mode 100644
index e0312e78..00000000
--- a/src/internal/microblaze/syscall.s
+++ /dev/null
@@ -1,14 +0,0 @@
-.global __syscall
-.hidden __syscall
-.type   __syscall,@function
-__syscall:
-	addi    r12, r5, 0              # Save the system call number
-	add     r5, r6, r0              # Shift the arguments, arg1
-	add     r6, r7, r0              # arg2
-	add     r7, r8, r0              # arg3
-	add     r8, r9, r0              # arg4
-	add     r9, r10, r0             # arg5
-	lwi     r10, r1, 28             # Get arg6.
-	brki    r14, 0x8                # syscall
-	rtsd    r15, 8
-	nop
diff --git a/src/internal/mips/syscall.s b/src/internal/mips/syscall.s
deleted file mode 100644
index 5d0def52..00000000
--- a/src/internal/mips/syscall.s
+++ /dev/null
@@ -1,26 +0,0 @@
-.set    noreorder
-
-.global __syscall
-.hidden __syscall
-.type   __syscall,@function
-__syscall:
-	move    $2, $4
-	move    $4, $5
-	move    $5, $6
-	move    $6, $7
-	lw      $7, 16($sp)
-	lw      $8, 20($sp)
-	lw      $9, 24($sp)
-	lw      $10,28($sp)
-	subu    $sp, $sp, 32
-	sw      $8, 16($sp)
-	sw      $9, 20($sp)
-	sw      $10,24($sp)
-	sw      $2 ,28($sp)
-	lw      $2, 28($sp)
-	syscall
-	beq     $7, $0, 1f
-	addu    $sp, $sp, 32
-	subu    $2, $0, $2
-1:	jr      $ra
-	nop
diff --git a/src/internal/mips64/syscall.s b/src/internal/mips64/syscall.s
deleted file mode 100644
index 98448667..00000000
--- a/src/internal/mips64/syscall.s
+++ /dev/null
@@ -1,19 +0,0 @@
-.set	noreorder
-.global	__syscall
-.hidden	__syscall
-.type	__syscall,@function
-__syscall:
-	move	$2, $4
-	move	$4, $5
-	move	$5, $6
-	move	$6, $7
-	move	$7, $8
-	move	$8, $9
-	move	$9, $10
-	move	$10, $11
-	syscall
-	beq	$7, $0, 1f
-	nop
-	dsubu	$2, $0, $2
-1:	jr	$ra
-	nop
diff --git a/src/internal/mipsn32/syscall.s b/src/internal/mipsn32/syscall.s
deleted file mode 100644
index 510a6fa4..00000000
--- a/src/internal/mipsn32/syscall.s
+++ /dev/null
@@ -1,19 +0,0 @@
-.set	noreorder
-.global	__syscall
-.hidden	__syscall
-.type	__syscall,@function
-__syscall:
-	move	$2, $4
-	move	$4, $5
-	move	$5, $6
-	move	$6, $7
-	move	$7, $8
-	move	$8, $9
-	move	$9, $10
-	move	$10, $11
-	syscall
-	beq	$7, $0, 1f
-	nop
-	subu	$2, $0, $2
-1:	jr	$ra
-	nop
diff --git a/src/internal/or1k/syscall.s b/src/internal/or1k/syscall.s
deleted file mode 100644
index 177964e9..00000000
--- a/src/internal/or1k/syscall.s
+++ /dev/null
@@ -1,14 +0,0 @@
-.global __syscall
-.hidden __syscall
-.type   __syscall,@function
-__syscall:
-	l.ori	r11, r3, 0
-	l.lwz	r3, 0(r1)
-	l.lwz	r4, 4(r1)
-	l.lwz	r5, 8(r1)
-	l.lwz	r6, 12(r1)
-	l.lwz	r7, 16(r1)
-	l.lwz	r8, 20(r1)
-	l.sys	1
-	l.jr	r9
-	 l.nop
diff --git a/src/internal/powerpc/syscall.s b/src/internal/powerpc/syscall.s
deleted file mode 100644
index 5b16b8f2..00000000
--- a/src/internal/powerpc/syscall.s
+++ /dev/null
@@ -1,19 +0,0 @@
-	.global __syscall
-	.hidden __syscall
-	.type   __syscall,@function
-__syscall:
-	mr      0, 3                  # Save the system call number
-	mr      3, 4                  # Shift the arguments: arg1
-	mr      4, 5                  # arg2
-	mr      5, 6                  # arg3
-	mr      6, 7                  # arg4
-	mr      7, 8                  # arg5
-	mr      8, 9                  # arg6
-	sc
-	bnslr+ # return if not summary overflow
-	#else error:
-	# return negated value.
-	neg 3, 3
-	blr
-	.end    __syscall
-	.size   __syscall, .-__syscall
diff --git a/src/internal/powerpc64/syscall.s b/src/internal/powerpc64/syscall.s
deleted file mode 100644
index fe21f9e1..00000000
--- a/src/internal/powerpc64/syscall.s
+++ /dev/null
@@ -1,17 +0,0 @@
-	.global __syscall
-	.hidden __syscall
-	.type   __syscall,@function
-__syscall:
-	mr      0, 3                  # Save the system call number
-	mr      3, 4                  # Shift the arguments: arg1
-	mr      4, 5                  # arg2
-	mr      5, 6                  # arg3
-	mr      6, 7                  # arg4
-	mr      7, 8                  # arg5
-	mr      8, 9                  # arg6
-	sc
-	bnslr+       # return if not summary overflow
-	neg     3, 3 # otherwise error: return negated value.
-	blr
-	.end    __syscall
-	.size   __syscall, .-__syscall
diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h
index 7a25b88e..de2b9d8b 100644
--- a/src/internal/pthread_impl.h
+++ b/src/internal/pthread_impl.h
@@ -11,16 +11,25 @@
 #include "atomic.h"
 #include "futex.h"
 
+#include "pthread_arch.h"
+
 #define pthread __pthread
 
 struct pthread {
 	/* Part 1 -- these fields may be external or
 	 * internal (accessed via asm) ABI. Do not change. */
 	struct pthread *self;
+#ifndef TLS_ABOVE_TP
 	uintptr_t *dtv;
-	void *unused1, *unused2;
+#endif
+	struct pthread *prev, *next; /* non-ABI */
 	uintptr_t sysinfo;
-	uintptr_t canary, canary2;
+#ifndef TLS_ABOVE_TP
+#ifdef CANARY_PAD
+	uintptr_t canary_pad;
+#endif
+	uintptr_t canary;
+#endif
 
 	/* Part 2 -- implementation details, non-ABI. */
 	int tid;
@@ -29,15 +38,12 @@ struct pthread {
 	volatile int cancel;
 	volatile unsigned char canceldisable, cancelasync;
 	unsigned char tsd_used:1;
-	unsigned char unblock_cancel:1;
 	unsigned char dlerror_flag:1;
 	unsigned char *map_base;
 	size_t map_size;
 	void *stack;
 	size_t stack_size;
 	size_t guard_size;
-	void *start_arg;
-	void *(*start)(void *);
 	void *result;
 	struct __ptcb *cancelbuf;
 	void **tsd;
@@ -46,6 +52,7 @@ struct pthread {
 		long off;
 		volatile void *volatile pending;
 	} robust_list;
+	int h_errno_val;
 	volatile int timer_id;
 	locale_t locale;
 	volatile int killlock[1];
@@ -54,16 +61,10 @@ struct pthread {
 
 	/* Part 3 -- the positions of these fields relative to
 	 * the end of the structure is external and internal ABI. */
-	uintptr_t canary_at_end;
-	uintptr_t *dtv_copy;
-};
-
-struct start_sched_args {
-	void *start_arg;
-	void *(*start_fn)(void *);
-	sigset_t mask;
-	pthread_attr_t *attr;
-	volatile int futex;
+#ifdef TLS_ABOVE_TP
+	uintptr_t canary;
+	uintptr_t *dtv;
+#endif
 };
 
 enum {
@@ -71,12 +72,6 @@ enum {
 	DT_EXITING,
 	DT_JOINABLE,
 	DT_DETACHED,
-	DT_DYNAMIC,
-};
-
-struct __timer {
-	int timerid;
-	pthread_t thread;
 };
 
 #define __SU (sizeof(size_t)/sizeof(int))
@@ -111,16 +106,22 @@ struct __timer {
 #define _b_waiters2 __u.__vi[4]
 #define _b_inst __u.__p[3]
 
-#include "pthread_arch.h"
-
-#ifndef CANARY
-#define CANARY canary
+#ifndef TP_OFFSET
+#define TP_OFFSET 0
 #endif
 
 #ifndef DTP_OFFSET
 #define DTP_OFFSET 0
 #endif
 
+#ifdef TLS_ABOVE_TP
+#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + TP_OFFSET)
+#define __pthread_self() ((pthread_t)(__get_tp() - sizeof(struct __pthread) - TP_OFFSET))
+#else
+#define TP_ADJ(p) (p)
+#define __pthread_self() ((pthread_t)__get_tp())
+#endif
+
 #ifndef tls_mod_off_t
 #define tls_mod_off_t size_t
 #endif
@@ -138,11 +139,11 @@ struct __timer {
 	 0x80000000 })
 
 void *__tls_get_addr(tls_mod_off_t *);
-hidden void *__tls_get_new(tls_mod_off_t *);
 hidden int __init_tp(void *);
 hidden void *__copy_tls(unsigned char *);
 hidden void __reset_tls();
 
+hidden void __membarrier_init(void);
 hidden void __dl_thread_cleanup(void);
 hidden void __testcancel();
 hidden void __do_cleanup_push(struct __ptcb *);
@@ -152,10 +153,9 @@ hidden void __pthread_tsd_run_dtors();
 hidden void __pthread_key_delete_synccall(void (*)(void *), void *);
 hidden int __pthread_key_delete_impl(pthread_key_t);
 
-extern hidden volatile int __block_new_threads;
 extern hidden volatile size_t __pthread_tsd_size;
 extern hidden void *__pthread_tsd_main[];
-extern hidden volatile int __aio_fut;
+extern hidden volatile int __eintr_valid_flag;
 
 hidden int __clone(int (*)(void *), void *, int, void *, ...);
 hidden int __set_thread_area(void *);
@@ -183,6 +183,14 @@ hidden void __acquire_ptc(void);
 hidden void __release_ptc(void);
 hidden void __inhibit_ptc(void);
 
+hidden void __tl_lock(void);
+hidden void __tl_unlock(void);
+hidden void __tl_sync(pthread_t);
+
+extern hidden volatile int __thread_list_lock;
+
+extern hidden volatile int __abort_lock[1];
+
 extern hidden unsigned __default_stacksize;
 extern hidden unsigned __default_guardsize;
 
diff --git a/src/internal/s390x/syscall.s b/src/internal/s390x/syscall.s
deleted file mode 100644
index 2322bc36..00000000
--- a/src/internal/s390x/syscall.s
+++ /dev/null
@@ -1,15 +0,0 @@
-.global __syscall
-.hidden __syscall
-.type   __syscall, %function
-__syscall:
-	stg %r7, 56(%r15)
-	lgr %r1, %r2
-	lgr %r2, %r3
-	lgr %r3, %r4
-	lgr %r4, %r5
-	lgr %r5, %r6
-	lg  %r6, 160(%r15)
-	lg  %r7, 168(%r15)
-	svc 0
-	lg  %r7, 56(%r15)
-	br  %r14
diff --git a/src/internal/sh/syscall.s b/src/internal/sh/syscall.s
deleted file mode 100644
index 331918a5..00000000
--- a/src/internal/sh/syscall.s
+++ /dev/null
@@ -1,23 +0,0 @@
-.global __syscall
-.hidden __syscall
-.type   __syscall, @function
-__syscall:
-	! The kernel syscall entry point documents that the trap number indicates
-	! the number of arguments being passed, but it then ignores that information.
-	! Since we do not actually know how many arguments are being passed, we will
-	! say there are six, since that is the maximum we support here.
-	mov r4, r3
-	mov r5, r4
-	mov r6, r5
-	mov r7, r6
-	mov.l @r15, r7
-	mov.l @(4,r15), r0
-	mov.l @(8,r15), r1
-	trapa #31
-	or r0, r0
-	or r0, r0
-	or r0, r0
-	or r0, r0
-	or r0, r0
-	rts
-	 nop
diff --git a/src/internal/shgetc.c b/src/internal/shgetc.c
index ebd5fae7..7455d2f0 100644
--- a/src/internal/shgetc.c
+++ b/src/internal/shgetc.c
@@ -22,7 +22,8 @@ int __shgetc(FILE *f)
 	off_t cnt = shcnt(f);
 	if (f->shlim && cnt >= f->shlim || (c=__uflow(f)) < 0) {
 		f->shcnt = f->buf - f->rpos + cnt;
-		f->shend = 0;
+		f->shend = f->rpos;
+		f->shlim = -1;
 		return EOF;
 	}
 	cnt++;
@@ -31,6 +32,6 @@ int __shgetc(FILE *f)
 	else
 		f->shend = f->rend;
 	f->shcnt = f->buf - f->rpos + cnt;
-	if (f->rpos[-1] != c) f->rpos[-1] = c;
+	if (f->rpos <= f->buf) f->rpos[-1] = c;
 	return c;
 }
diff --git a/src/internal/shgetc.h b/src/internal/shgetc.h
index 1c30f75f..9435381a 100644
--- a/src/internal/shgetc.h
+++ b/src/internal/shgetc.h
@@ -26,7 +26,7 @@ hidden int __shgetc(FILE *);
 #define shcnt(f) ((f)->shcnt + ((f)->rpos - (f)->buf))
 #define shlim(f, lim) __shlim((f), (lim))
 #define shgetc(f) (((f)->rpos != (f)->shend) ? *(f)->rpos++ : __shgetc(f))
-#define shunget(f) ((f)->shend ? (void)(f)->rpos-- : (void)0)
+#define shunget(f) ((f)->shlim>=0 ? (void)(f)->rpos-- : (void)0)
 
 #define sh_fromstring(f, s) \
 	((f)->buf = (f)->rpos = (void *)(s), (f)->rend = (void*)-1)
diff --git a/src/internal/stdio_impl.h b/src/internal/stdio_impl.h
index d7398f59..0b2438d6 100644
--- a/src/internal/stdio_impl.h
+++ b/src/internal/stdio_impl.h
@@ -60,8 +60,6 @@ hidden size_t __stdout_write(FILE *, const unsigned char *, size_t);
 hidden off_t __stdio_seek(FILE *, off_t, int);
 hidden int __stdio_close(FILE *);
 
-hidden size_t __string_read(FILE *, unsigned char *, size_t);
-
 hidden int __toread(FILE *);
 hidden int __towrite(FILE *);
 
diff --git a/src/internal/syscall.h b/src/internal/syscall.h
index 06c5527f..2d8a5c13 100644
--- a/src/internal/syscall.h
+++ b/src/internal/syscall.h
@@ -2,6 +2,7 @@
 #define _INTERNAL_SYSCALL_H
 
 #include <features.h>
+#include <errno.h>
 #include <sys/syscall.h>
 #include "syscall_arch.h"
 
@@ -22,27 +23,17 @@
 typedef long syscall_arg_t;
 #endif
 
-hidden long __syscall_ret(unsigned long), __syscall(syscall_arg_t, ...),
+hidden long __syscall_ret(unsigned long),
 	__syscall_cp(syscall_arg_t, syscall_arg_t, syscall_arg_t, syscall_arg_t,
 	             syscall_arg_t, syscall_arg_t, syscall_arg_t);
 
-#ifdef SYSCALL_NO_INLINE
-#define __syscall0(n) (__syscall)(n)
-#define __syscall1(n,a) (__syscall)(n,__scc(a))
-#define __syscall2(n,a,b) (__syscall)(n,__scc(a),__scc(b))
-#define __syscall3(n,a,b,c) (__syscall)(n,__scc(a),__scc(b),__scc(c))
-#define __syscall4(n,a,b,c,d) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d))
-#define __syscall5(n,a,b,c,d,e) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e))
-#define __syscall6(n,a,b,c,d,e,f) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f))
-#else
 #define __syscall1(n,a) __syscall1(n,__scc(a))
 #define __syscall2(n,a,b) __syscall2(n,__scc(a),__scc(b))
 #define __syscall3(n,a,b,c) __syscall3(n,__scc(a),__scc(b),__scc(c))
 #define __syscall4(n,a,b,c,d) __syscall4(n,__scc(a),__scc(b),__scc(c),__scc(d))
 #define __syscall5(n,a,b,c,d,e) __syscall5(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e))
 #define __syscall6(n,a,b,c,d,e,f) __syscall6(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f))
-#endif
-#define __syscall7(n,a,b,c,d,e,f,g) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f),__scc(g))
+#define __syscall7(n,a,b,c,d,e,f,g) __syscall7(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f),__scc(g))
 
 #define __SYSCALL_NARGS_X(a,b,c,d,e,f,g,h,n,...) n
 #define __SYSCALL_NARGS(...) __SYSCALL_NARGS_X(__VA_ARGS__,7,6,5,4,3,2,1,0,)
@@ -53,8 +44,8 @@ hidden long __syscall_ret(unsigned long), __syscall(syscall_arg_t, ...),
 #define __syscall(...) __SYSCALL_DISP(__syscall,__VA_ARGS__)
 #define syscall(...) __syscall_ret(__syscall(__VA_ARGS__))
 
-#define socketcall __socketcall
-#define socketcall_cp __socketcall_cp
+#define socketcall(nm,a,b,c,d,e,f) __syscall_ret(__socketcall(nm,a,b,c,d,e,f))
+#define socketcall_cp(nm,a,b,c,d,e,f) __syscall_ret(__socketcall_cp(nm,a,b,c,d,e,f))
 
 #define __syscall_cp0(n) (__syscall_cp)(n,0,0,0,0,0,0)
 #define __syscall_cp1(n,a) (__syscall_cp)(n,__scc(a),0,0,0,0,0)
@@ -67,15 +58,22 @@ hidden long __syscall_ret(unsigned long), __syscall(syscall_arg_t, ...),
 #define __syscall_cp(...) __SYSCALL_DISP(__syscall_cp,__VA_ARGS__)
 #define syscall_cp(...) __syscall_ret(__syscall_cp(__VA_ARGS__))
 
-#ifndef SYSCALL_USE_SOCKETCALL
-#define __socketcall(nm,a,b,c,d,e,f) syscall(SYS_##nm, a, b, c, d, e, f)
-#define __socketcall_cp(nm,a,b,c,d,e,f) syscall_cp(SYS_##nm, a, b, c, d, e, f)
-#else
-#define __socketcall(nm,a,b,c,d,e,f) syscall(SYS_socketcall, __SC_##nm, \
-    ((long [6]){ (long)a, (long)b, (long)c, (long)d, (long)e, (long)f }))
-#define __socketcall_cp(nm,a,b,c,d,e,f) syscall_cp(SYS_socketcall, __SC_##nm, \
-    ((long [6]){ (long)a, (long)b, (long)c, (long)d, (long)e, (long)f }))
+static inline long __alt_socketcall(int sys, int sock, int cp, syscall_arg_t a, syscall_arg_t b, syscall_arg_t c, syscall_arg_t d, syscall_arg_t e, syscall_arg_t f)
+{
+	long r;
+	if (cp) r = __syscall_cp(sys, a, b, c, d, e, f);
+	else r = __syscall(sys, a, b, c, d, e, f);
+	if (r != -ENOSYS) return r;
+#ifdef SYS_socketcall
+	if (cp) r = __syscall_cp(SYS_socketcall, sock, ((long[6]){a, b, c, d, e, f}));
+	else r = __syscall(SYS_socketcall, sock, ((long[6]){a, b, c, d, e, f}));
 #endif
+	return r;
+}
+#define __socketcall(nm, a, b, c, d, e, f) __alt_socketcall(SYS_##nm, __SC_##nm, 0, \
+	__scc(a), __scc(b), __scc(c), __scc(d), __scc(e), __scc(f))
+#define __socketcall_cp(nm, a, b, c, d, e, f) __alt_socketcall(SYS_##nm, __SC_##nm, 1, \
+	__scc(a), __scc(b), __scc(c), __scc(d), __scc(e), __scc(f))
 
 /* fixup legacy 16-bit junk */
 
@@ -203,6 +201,128 @@ hidden long __syscall_ret(unsigned long), __syscall(syscall_arg_t, ...),
 #define SYS_sendfile SYS_sendfile64
 #endif
 
+#ifdef SYS_timer_settime32
+#define SYS_timer_settime SYS_timer_settime32
+#endif
+
+#ifdef SYS_timer_gettime32
+#define SYS_timer_gettime SYS_timer_gettime32
+#endif
+
+#ifdef SYS_timerfd_settime32
+#define SYS_timerfd_settime SYS_timerfd_settime32
+#endif
+
+#ifdef SYS_timerfd_gettime32
+#define SYS_timerfd_gettime SYS_timerfd_gettime32
+#endif
+
+#ifdef SYS_clock_settime32
+#define SYS_clock_settime SYS_clock_settime32
+#endif
+
+#ifdef SYS_clock_gettime32
+#define SYS_clock_gettime SYS_clock_gettime32
+#endif
+
+#ifdef SYS_clock_getres_time32
+#define SYS_clock_getres SYS_clock_getres_time32
+#endif
+
+#ifdef SYS_clock_nanosleep_time32
+#define SYS_clock_nanosleep SYS_clock_nanosleep_time32
+#endif
+
+#ifdef SYS_gettimeofday_time32
+#define SYS_gettimeofday SYS_gettimeofday_time32
+#endif
+
+#ifdef SYS_settimeofday_time32
+#define SYS_settimeofday SYS_settimeofday_time32
+#endif
+
+/* Ensure that the plain syscall names are defined even for "time64-only"
+ * archs. These facilitate callers passing null time arguments, and make
+ * tests for establishing which to use/fallback-to more consistent when
+ * they do need to be called with time arguments. */
+
+#ifndef SYS_clock_gettime
+#define SYS_clock_gettime SYS_clock_gettime64
+#endif
+
+#ifndef SYS_clock_settime
+#define SYS_clock_settime SYS_clock_settime64
+#endif
+
+#ifndef SYS_clock_adjtime
+#define SYS_clock_adjtime SYS_clock_adjtime64
+#endif
+
+#ifndef SYS_clock_getres
+#define SYS_clock_getres SYS_clock_getres_time64
+#endif
+
+#ifndef SYS_clock_nanosleep
+#define SYS_clock_nanosleep SYS_clock_nanosleep_time64
+#endif
+
+#ifndef SYS_timer_gettime
+#define SYS_timer_gettime SYS_timer_gettime64
+#endif
+
+#ifndef SYS_timer_settime
+#define SYS_timer_settime SYS_timer_settime64
+#endif
+
+#ifndef SYS_timerfd_gettime
+#define SYS_timerfd_gettime SYS_timerfd_gettime64
+#endif
+
+#ifndef SYS_timerfd_settime
+#define SYS_timerfd_settime SYS_timerfd_settime64
+#endif
+
+#ifndef SYS_utimensat
+#define SYS_utimensat SYS_utimensat_time64
+#endif
+
+#ifndef SYS_pselect6
+#define SYS_pselect6 SYS_pselect6_time64
+#endif
+
+#ifndef SYS_ppoll
+#define SYS_ppoll SYS_ppoll_time64
+#endif
+
+#ifndef SYS_recvmmsg
+#define SYS_recvmmsg SYS_recvmmsg_time64
+#endif
+
+#ifndef SYS_mq_timedsend
+#define SYS_mq_timedsend SYS_mq_timedsend_time64
+#endif
+
+#ifndef SYS_mq_timedreceive
+#define SYS_mq_timedreceive SYS_mq_timedreceive_time64
+#endif
+
+/* SYS_semtimedop omitted because SYS_ipc may provide it */
+
+#ifndef SYS_rt_sigtimedwait
+#define SYS_rt_sigtimedwait SYS_rt_sigtimedwait_time64
+#endif
+
+#ifndef SYS_futex
+#define SYS_futex SYS_futex_time64
+#endif
+
+#ifndef SYS_sched_rr_get_interval
+#define SYS_sched_rr_get_interval SYS_sched_rr_get_interval_time64
+#endif
+
+
+
+
 /* socketcall calls */
 
 #define __SC_socket      1
@@ -226,6 +346,33 @@ hidden long __syscall_ret(unsigned long), __syscall(syscall_arg_t, ...),
 #define __SC_recvmmsg    19
 #define __SC_sendmmsg    20
 
+/* This is valid only because all socket syscalls are made via
+ * socketcall, which always fills unused argument slots with zeros. */
+#ifndef SYS_accept
+#define SYS_accept SYS_accept4
+#endif
+
+#ifndef SO_RCVTIMEO_OLD
+#define SO_RCVTIMEO_OLD  20
+#endif
+#ifndef SO_SNDTIMEO_OLD
+#define SO_SNDTIMEO_OLD  21
+#endif
+
+#define SO_TIMESTAMP_OLD    29
+#define SO_TIMESTAMPNS_OLD  35
+#define SO_TIMESTAMPING_OLD 37
+#define SCM_TIMESTAMP_OLD    SO_TIMESTAMP_OLD
+#define SCM_TIMESTAMPNS_OLD  SO_TIMESTAMPNS_OLD
+#define SCM_TIMESTAMPING_OLD SO_TIMESTAMPING_OLD
+
+#ifndef SIOCGSTAMP_OLD
+#define SIOCGSTAMP_OLD 0x8906
+#endif
+#ifndef SIOCGSTAMPNS_OLD
+#define SIOCGSTAMPNS_OLD 0x8907
+#endif
+
 #ifdef SYS_open
 #define __sys_open2(x,pn,fl) __syscall2(SYS_open, pn, (fl)|O_LARGEFILE)
 #define __sys_open3(x,pn,fl,mo) __syscall3(SYS_open, pn, (fl)|O_LARGEFILE, mo)
@@ -244,6 +391,29 @@ hidden long __syscall_ret(unsigned long), __syscall(syscall_arg_t, ...),
 #define __sys_open_cp(...) __SYSCALL_DISP(__sys_open_cp,,__VA_ARGS__)
 #define sys_open_cp(...) __syscall_ret(__sys_open_cp(__VA_ARGS__))
 
+#ifdef SYS_pause
+#define __sys_pause() __syscall(SYS_pause)
+#define __sys_pause_cp() __syscall_cp(SYS_pause)
+#else
+#define __sys_pause() __syscall(SYS_ppoll, 0, 0, 0, 0)
+#define __sys_pause_cp() __syscall_cp(SYS_ppoll, 0, 0, 0, 0)
+#endif
+
+#define sys_pause() __syscall_ret(__sys_pause())
+#define sys_pause_cp() __syscall_ret(__sys_pause_cp())
+
+#ifdef SYS_wait4
+#define __sys_wait4(a,b,c,d) __syscall(SYS_wait4,a,b,c,d)
+#define __sys_wait4_cp(a,b,c,d) __syscall_cp(SYS_wait4,a,b,c,d)
+#else
+hidden long __emulate_wait4(int, int *, int, void *, int);
+#define __sys_wait4(a,b,c,d) __emulate_wait4(a,b,c,d,0)
+#define __sys_wait4_cp(a,b,c,d) __emulate_wait4(a,b,c,d,1)
+#endif
+
+#define sys_wait4(a,b,c,d) __syscall_ret(__sys_wait4(a,b,c,d))
+#define sys_wait4_cp(a,b,c,d) __syscall_ret(__sys_wait4_cp(a,b,c,d))
+
 hidden void __procfdname(char __buf[static 15+3*sizeof(int)], unsigned);
 
 hidden void *__vdsosym(const char *, const char *);
diff --git a/src/internal/x32/syscall.s b/src/internal/x32/syscall.s
deleted file mode 100644
index c4bee804..00000000
--- a/src/internal/x32/syscall.s
+++ /dev/null
@@ -1,13 +0,0 @@
-.global __syscall
-.hidden __syscall
-.type __syscall,@function
-__syscall:
-	movq %rdi,%rax
-	movq %rsi,%rdi
-	movq %rdx,%rsi
-	movq %rcx,%rdx
-	movq %r8,%r10
-	movq %r9,%r8
-	movq 8(%rsp),%r9
-	syscall
-	ret
diff --git a/src/internal/x86_64/syscall.s b/src/internal/x86_64/syscall.s
deleted file mode 100644
index c4bee804..00000000
--- a/src/internal/x86_64/syscall.s
+++ /dev/null
@@ -1,13 +0,0 @@
-.global __syscall
-.hidden __syscall
-.type __syscall,@function
-__syscall:
-	movq %rdi,%rax
-	movq %rsi,%rdi
-	movq %rdx,%rsi
-	movq %rcx,%rdx
-	movq %r8,%r10
-	movq %r9,%r8
-	movq 8(%rsp),%r9
-	syscall
-	ret
diff --git a/src/ipc/ipc.h b/src/ipc/ipc.h
index 30ab939a..746a905c 100644
--- a/src/ipc/ipc.h
+++ b/src/ipc/ipc.h
@@ -1,3 +1,5 @@
+#include "syscall.h"
+
 #define IPCOP_semop      1
 #define IPCOP_semget     2
 #define IPCOP_semctl     3
@@ -10,3 +12,13 @@
 #define IPCOP_shmdt     22
 #define IPCOP_shmget    23
 #define IPCOP_shmctl    24
+
+#ifndef IPC_64
+#define IPC_64 0x100
+#endif
+
+#define IPC_TIME64 (IPC_STAT & 0x100)
+
+#define IPC_CMD(cmd) (((cmd) & ~IPC_TIME64) | IPC_64)
+
+#define IPC_HILO(b,t) ((b)->t = (b)->__##t##_lo | 0LL+(b)->__##t##_hi<<32)
diff --git a/src/ipc/msgctl.c b/src/ipc/msgctl.c
index ea9b2337..9c114406 100644
--- a/src/ipc/msgctl.c
+++ b/src/ipc/msgctl.c
@@ -9,6 +9,14 @@
 
 int msgctl(int q, int cmd, struct msqid_ds *buf)
 {
+#if IPC_TIME64
+	struct msqid_ds out, *orig;
+	if (cmd&IPC_TIME64) {
+		out = (struct msqid_ds){0};
+		orig = buf;
+		buf = &out;
+	}
+#endif
 #ifdef SYSCALL_IPC_BROKEN_MODE
 	struct msqid_ds tmp;
 	if (cmd == IPC_SET) {
@@ -17,18 +25,27 @@ int msgctl(int q, int cmd, struct msqid_ds *buf)
 		buf = &tmp;
 	}
 #endif
-#ifdef SYS_msgctl
-	int r = __syscall(SYS_msgctl, q, cmd | IPC_64, buf);
+#ifndef SYS_ipc
+	int r = __syscall(SYS_msgctl, q, IPC_CMD(cmd), buf);
 #else
-	int r = __syscall(SYS_ipc, IPCOP_msgctl, q, cmd | IPC_64, 0, buf, 0);
+	int r = __syscall(SYS_ipc, IPCOP_msgctl, q, IPC_CMD(cmd), 0, buf, 0);
 #endif
 #ifdef SYSCALL_IPC_BROKEN_MODE
-	if (r >= 0) switch (cmd) {
+	if (r >= 0) switch (cmd | IPC_TIME64) {
 	case IPC_STAT:
 	case MSG_STAT:
 	case MSG_STAT_ANY:
 		buf->msg_perm.mode >>= 16;
 	}
 #endif
+#if IPC_TIME64
+	if (r >= 0 && (cmd&IPC_TIME64)) {
+		buf = orig;
+		*buf = out;
+		IPC_HILO(buf, msg_stime);
+		IPC_HILO(buf, msg_rtime);
+		IPC_HILO(buf, msg_ctime);
+	}
+#endif
 	return __syscall_ret(r);
 }
diff --git a/src/ipc/msgget.c b/src/ipc/msgget.c
index 9dfbc4ea..30a4b42b 100644
--- a/src/ipc/msgget.c
+++ b/src/ipc/msgget.c
@@ -4,7 +4,7 @@
 
 int msgget(key_t k, int flag)
 {
-#ifdef SYS_msgget
+#ifndef SYS_ipc
 	return syscall(SYS_msgget, k, flag);
 #else
 	return syscall(SYS_ipc, IPCOP_msgget, k, flag);
diff --git a/src/ipc/msgrcv.c b/src/ipc/msgrcv.c
index 0a344e56..9d1034b1 100644
--- a/src/ipc/msgrcv.c
+++ b/src/ipc/msgrcv.c
@@ -4,7 +4,7 @@
 
 ssize_t msgrcv(int q, void *m, size_t len, long type, int flag)
 {
-#ifdef SYS_msgrcv
+#ifndef SYS_ipc
 	return syscall_cp(SYS_msgrcv, q, m, len, type, flag);
 #else
 	return syscall_cp(SYS_ipc, IPCOP_msgrcv, q, len, flag, ((long[]){ (long)m, type }));
diff --git a/src/ipc/msgsnd.c b/src/ipc/msgsnd.c
index e1abde3a..99bb17e9 100644
--- a/src/ipc/msgsnd.c
+++ b/src/ipc/msgsnd.c
@@ -4,7 +4,7 @@
 
 int msgsnd(int q, const void *m, size_t len, int flag)
 {
-#ifdef SYS_msgsnd
+#ifndef SYS_ipc
 	return syscall_cp(SYS_msgsnd, q, m, len, flag);
 #else
 	return syscall_cp(SYS_ipc, IPCOP_msgsnd, q, len, flag, m);
diff --git a/src/ipc/semctl.c b/src/ipc/semctl.c
index 941e2813..bbb97d7a 100644
--- a/src/ipc/semctl.c
+++ b/src/ipc/semctl.c
@@ -18,13 +18,24 @@ int semctl(int id, int num, int cmd, ...)
 {
 	union semun arg = {0};
 	va_list ap;
-	switch (cmd) {
-	case SETVAL: case GETALL: case SETALL: case IPC_STAT: case IPC_SET:
-	case IPC_INFO: case SEM_INFO: case SEM_STAT:
+	switch (cmd & ~IPC_TIME64) {
+	case SETVAL: case GETALL: case SETALL: case IPC_SET:
+	case IPC_INFO: case SEM_INFO:
+	case IPC_STAT & ~IPC_TIME64:
+	case SEM_STAT & ~IPC_TIME64:
+	case SEM_STAT_ANY & ~IPC_TIME64:
 		va_start(ap, cmd);
 		arg = va_arg(ap, union semun);
 		va_end(ap);
 	}
+#if IPC_TIME64
+	struct semid_ds out, *orig;
+	if (cmd&IPC_TIME64) {
+		out = (struct semid_ds){0};
+		orig = arg.buf;
+		arg.buf = &out;
+	}
+#endif
 #ifdef SYSCALL_IPC_BROKEN_MODE
 	struct semid_ds tmp;
 	if (cmd == IPC_SET) {
@@ -33,18 +44,26 @@ int semctl(int id, int num, int cmd, ...)
 		arg.buf = &tmp;
 	}
 #endif
-#ifdef SYS_semctl
-	int r = __syscall(SYS_semctl, id, num, cmd | IPC_64, arg.buf);
+#ifndef SYS_ipc
+	int r = __syscall(SYS_semctl, id, num, IPC_CMD(cmd), arg.buf);
 #else
-	int r = __syscall(SYS_ipc, IPCOP_semctl, id, num, cmd | IPC_64, &arg.buf);
+	int r = __syscall(SYS_ipc, IPCOP_semctl, id, num, IPC_CMD(cmd), &arg.buf);
 #endif
 #ifdef SYSCALL_IPC_BROKEN_MODE
-	if (r >= 0) switch (cmd) {
+	if (r >= 0) switch (cmd | IPC_TIME64) {
 	case IPC_STAT:
 	case SEM_STAT:
 	case SEM_STAT_ANY:
 		arg.buf->sem_perm.mode >>= 16;
 	}
 #endif
+#if IPC_TIME64
+	if (r >= 0 && (cmd&IPC_TIME64)) {
+		arg.buf = orig;
+		*arg.buf = out;
+		IPC_HILO(arg.buf, sem_otime);
+		IPC_HILO(arg.buf, sem_ctime);
+	}
+#endif
 	return __syscall_ret(r);
 }
diff --git a/src/ipc/semget.c b/src/ipc/semget.c
index c4a559db..2cdf626b 100644
--- a/src/ipc/semget.c
+++ b/src/ipc/semget.c
@@ -11,7 +11,7 @@ int semget(key_t key, int n, int fl)
 	 * n fits in the correct (per POSIX) userspace type, so
 	 * we have to check here. */
 	if (n > USHRT_MAX) return __syscall_ret(-EINVAL);
-#ifdef SYS_semget
+#ifndef SYS_ipc
 	return syscall(SYS_semget, key, n, fl);
 #else
 	return syscall(SYS_ipc, IPCOP_semget, key, n, fl);
diff --git a/src/ipc/semop.c b/src/ipc/semop.c
index 8046e437..5f0c7dea 100644
--- a/src/ipc/semop.c
+++ b/src/ipc/semop.c
@@ -4,7 +4,7 @@
 
 int semop(int id, struct sembuf *buf, size_t n)
 {
-#ifdef SYS_semop
+#ifndef SYS_ipc
 	return syscall(SYS_semop, id, buf, n);
 #else
 	return syscall(SYS_ipc, IPCOP_semop, id, n, 0, buf);
diff --git a/src/ipc/semtimedop.c b/src/ipc/semtimedop.c
index b0c4cf9f..a104af21 100644
--- a/src/ipc/semtimedop.c
+++ b/src/ipc/semtimedop.c
@@ -1,13 +1,36 @@
 #define _GNU_SOURCE
 #include <sys/sem.h>
+#include <errno.h>
 #include "syscall.h"
 #include "ipc.h"
 
+#define IS32BIT(x) !((x)+0x80000000ULL>>32)
+#define CLAMP(x) (int)(IS32BIT(x) ? (x) : 0x7fffffffU+((0ULL+(x))>>63))
+
+#if !defined(SYS_semtimedop) && !defined(SYS_ipc) || \
+	SYS_semtimedop == SYS_semtimedop_time64
+#define NO_TIME32 1
+#else
+#define NO_TIME32 0
+#endif
+
 int semtimedop(int id, struct sembuf *buf, size_t n, const struct timespec *ts)
 {
-#ifdef SYS_semtimedop
+#ifdef SYS_semtimedop_time64
+	time_t s = ts ? ts->tv_sec : 0;
+	long ns = ts ? ts->tv_nsec : 0;
+	int r = -ENOSYS;
+	if (NO_TIME32 || !IS32BIT(s))
+		r = __syscall(SYS_semtimedop_time64, id, buf, n,
+			ts ? ((long long[]){s, ns}) : 0);
+	if (NO_TIME32 || r!=-ENOSYS) return __syscall_ret(r);
+	ts = ts ? (void *)(long[]){CLAMP(s), ns} : 0;
+#endif
+#if defined(SYS_ipc)
+	return syscall(SYS_ipc, IPCOP_semtimedop, id, n, 0, buf, ts);
+#elif defined(SYS_semtimedop)
 	return syscall(SYS_semtimedop, id, buf, n, ts);
 #else
-	return syscall(SYS_ipc, IPCOP_semtimedop, id, n, 0, buf, ts);
+	return __syscall_ret(-ENOSYS);
 #endif
 }
diff --git a/src/ipc/shmat.c b/src/ipc/shmat.c
index 38db92f9..8c7407d1 100644
--- a/src/ipc/shmat.c
+++ b/src/ipc/shmat.c
@@ -2,7 +2,7 @@
 #include "syscall.h"
 #include "ipc.h"
 
-#ifdef SYS_shmat
+#ifndef SYS_ipc
 void *shmat(int id, const void *addr, int flag)
 {
 	return (void *)syscall(SYS_shmat, id, addr, flag);
diff --git a/src/ipc/shmctl.c b/src/ipc/shmctl.c
index c951a581..1c9f78c2 100644
--- a/src/ipc/shmctl.c
+++ b/src/ipc/shmctl.c
@@ -9,6 +9,14 @@
 
 int shmctl(int id, int cmd, struct shmid_ds *buf)
 {
+#if IPC_TIME64
+	struct shmid_ds out, *orig;
+	if (cmd&IPC_TIME64) {
+		out = (struct shmid_ds){0};
+		orig = buf;
+		buf = &out;
+	}
+#endif
 #ifdef SYSCALL_IPC_BROKEN_MODE
 	struct shmid_ds tmp;
 	if (cmd == IPC_SET) {
@@ -17,18 +25,27 @@ int shmctl(int id, int cmd, struct shmid_ds *buf)
 		buf = &tmp;
 	}
 #endif
-#ifdef SYS_shmctl
-	int r = __syscall(SYS_shmctl, id, cmd | IPC_64, buf);
+#ifndef SYS_ipc
+	int r = __syscall(SYS_shmctl, id, IPC_CMD(cmd), buf);
 #else
-	int r = __syscall(SYS_ipc, IPCOP_shmctl, id, cmd | IPC_64, 0, buf, 0);
+	int r = __syscall(SYS_ipc, IPCOP_shmctl, id, IPC_CMD(cmd), 0, buf, 0);
 #endif
 #ifdef SYSCALL_IPC_BROKEN_MODE
-	if (r >= 0) switch (cmd) {
+	if (r >= 0) switch (cmd | IPC_TIME64) {
 	case IPC_STAT:
 	case SHM_STAT:
 	case SHM_STAT_ANY:
 		buf->shm_perm.mode >>= 16;
 	}
 #endif
+#if IPC_TIME64
+	if (r >= 0 && (cmd&IPC_TIME64)) {
+		buf = orig;
+		*buf = out;
+		IPC_HILO(buf, shm_atime);
+		IPC_HILO(buf, shm_dtime);
+		IPC_HILO(buf, shm_ctime);
+	}
+#endif
 	return __syscall_ret(r);
 }
diff --git a/src/ipc/shmdt.c b/src/ipc/shmdt.c
index d4fac8f1..57238137 100644
--- a/src/ipc/shmdt.c
+++ b/src/ipc/shmdt.c
@@ -4,7 +4,7 @@
 
 int shmdt(const void *addr)
 {
-#ifdef SYS_shmdt
+#ifndef SYS_ipc
 	return syscall(SYS_shmdt, addr);
 #else
 	return syscall(SYS_ipc, IPCOP_shmdt, 0, 0, 0, addr);
diff --git a/src/ipc/shmget.c b/src/ipc/shmget.c
index b44f9d68..7521b5fa 100644
--- a/src/ipc/shmget.c
+++ b/src/ipc/shmget.c
@@ -6,7 +6,7 @@
 int shmget(key_t key, size_t size, int flag)
 {
 	if (size > PTRDIFF_MAX) size = SIZE_MAX;
-#ifdef SYS_shmget
+#ifndef SYS_ipc
 	return syscall(SYS_shmget, key, size, flag);
 #else
 	return syscall(SYS_ipc, IPCOP_shmget, key, size, flag);
diff --git a/src/ldso/__dlsym.c b/src/ldso/__dlsym.c
index 8ac0a334..0384f97e 100644
--- a/src/ldso/__dlsym.c
+++ b/src/ldso/__dlsym.c
@@ -8,3 +8,7 @@ static void *stub_dlsym(void *restrict p, const char *restrict s, void *restrict
 }
 
 weak_alias(stub_dlsym, __dlsym);
+
+#if _REDIR_TIME64
+weak_alias(stub_dlsym, __dlsym_redir_time64);
+#endif
diff --git a/src/ldso/aarch64/tlsdesc.s b/src/ldso/aarch64/tlsdesc.s
index 8e4004d7..c6c685b3 100644
--- a/src/ldso/aarch64/tlsdesc.s
+++ b/src/ldso/aarch64/tlsdesc.s
@@ -9,87 +9,23 @@ __tlsdesc_static:
 	ldr x0,[x0,#8]
 	ret
 
-.hidden __tls_get_new
-
 // size_t __tlsdesc_dynamic(size_t *a)
 // {
 // 	struct {size_t modidx,off;} *p = (void*)a[1];
 // 	size_t *dtv = *(size_t**)(tp - 8);
-// 	if (p->modidx <= dtv[0])
-// 		return dtv[p->modidx] + p->off - tp;
-// 	return __tls_get_new(p) - tp;
+// 	return dtv[p->modidx] + p->off - tp;
 // }
 .global __tlsdesc_dynamic
 .hidden __tlsdesc_dynamic
 .type __tlsdesc_dynamic,@function
 __tlsdesc_dynamic:
-	stp x1,x2,[sp,#-32]!
-	stp x3,x4,[sp,#16]
+	stp x1,x2,[sp,#-16]!
 	mrs x1,tpidr_el0      // tp
 	ldr x0,[x0,#8]        // p
-	ldr x2,[x0]           // p->modidx
-	ldr x3,[x1,#-8]       // dtv
-	ldr x4,[x3]           // dtv[0]
-	cmp x2,x4
-	b.hi 1f
-	ldr x2,[x3,x2,lsl #3] // dtv[p->modidx]
-	ldr x0,[x0,#8]        // p->off
-	add x0,x0,x2
-2:	sub x0,x0,x1
-	ldp x3,x4,[sp,#16]
-	ldp x1,x2,[sp],#32
+	ldp x0,x2,[x0]        // p->modidx, p->off
+	sub x2,x2,x1          // p->off - tp
+	ldr x1,[x1,#-8]       // dtv
+	ldr x1,[x1,x0,lsl #3] // dtv[p->modidx]
+	add x0,x1,x2          // dtv[p->modidx] + p->off - tp
+	ldp x1,x2,[sp],#16
 	ret
-
-	// save all registers __tls_get_new may clobber
-	// update sp in two steps because offset must be in [-512,509]
-1:	stp x29,x30,[sp,#-160]!
-	stp x5,x6,[sp,#16]
-	stp x7,x8,[sp,#32]
-	stp x9,x10,[sp,#48]
-	stp x11,x12,[sp,#64]
-	stp x13,x14,[sp,#80]
-	stp x15,x16,[sp,#96]
-	stp x17,x18,[sp,#112]
-	stp q0,q1,[sp,#128]
-	stp q2,q3,[sp,#-480]!
-	stp q4,q5,[sp,#32]
-	stp q6,q7,[sp,#64]
-	stp q8,q9,[sp,#96]
-	stp q10,q11,[sp,#128]
-	stp q12,q13,[sp,#160]
-	stp q14,q15,[sp,#192]
-	stp q16,q17,[sp,#224]
-	stp q18,q19,[sp,#256]
-	stp q20,q21,[sp,#288]
-	stp q22,q23,[sp,#320]
-	stp q24,q25,[sp,#352]
-	stp q26,q27,[sp,#384]
-	stp q28,q29,[sp,#416]
-	stp q30,q31,[sp,#448]
-	bl __tls_get_new
-	mrs x1,tpidr_el0
-	ldp q4,q5,[sp,#32]
-	ldp q6,q7,[sp,#64]
-	ldp q8,q9,[sp,#96]
-	ldp q10,q11,[sp,#128]
-	ldp q12,q13,[sp,#160]
-	ldp q14,q15,[sp,#192]
-	ldp q16,q17,[sp,#224]
-	ldp q18,q19,[sp,#256]
-	ldp q20,q21,[sp,#288]
-	ldp q22,q23,[sp,#320]
-	ldp q24,q25,[sp,#352]
-	ldp q26,q27,[sp,#384]
-	ldp q28,q29,[sp,#416]
-	ldp q30,q31,[sp,#448]
-	ldp q2,q3,[sp],#480
-	ldp x5,x6,[sp,#16]
-	ldp x7,x8,[sp,#32]
-	ldp x9,x10,[sp,#48]
-	ldp x11,x12,[sp,#64]
-	ldp x13,x14,[sp,#80]
-	ldp x15,x16,[sp,#96]
-	ldp x17,x18,[sp,#112]
-	ldp q0,q1,[sp,#128]
-	ldp x29,x30,[sp],#160
-	b 2b
diff --git a/src/ldso/arm/dlsym_time64.S b/src/ldso/arm/dlsym_time64.S
new file mode 100644
index 00000000..bb2e7040
--- /dev/null
+++ b/src/ldso/arm/dlsym_time64.S
@@ -0,0 +1,3 @@
+#define __dlsym __dlsym_redir_time64
+#define dlsym __dlsym_time64
+#include "dlsym.s"
diff --git a/src/ldso/arm/tlsdesc.S b/src/ldso/arm/tlsdesc.S
index 4e67c3e2..3ae133c9 100644
--- a/src/ldso/arm/tlsdesc.S
+++ b/src/ldso/arm/tlsdesc.S
@@ -8,8 +8,6 @@ __tlsdesc_static:
 	ldr r0,[r0]
 	bx lr
 
-.hidden __tls_get_new
-
 .global __tlsdesc_dynamic
 .hidden __tlsdesc_dynamic
 .type __tlsdesc_dynamic,%function
@@ -30,18 +28,18 @@ __tlsdesc_dynamic:
 #if __ARM_ARCH >= 5
 	blx r0          // r0 = tp
 #else
+#if __thumb__
+	add lr,pc,#1
+#else
 	mov lr,pc
+#endif
 	bx r0
 #endif
 #endif
 	ldr r3,[r0,#-4] // r3 = dtv
-	ldr ip,[r3]     // ip = dtv slot count
-	cmp r1,ip
-	bhi 3f
 	ldr ip,[r3,r1,LSL #2]
 	sub r0,ip,r0
 	add r0,r0,r2    // r0 = r3[r1]-r0+r2
-4:
 #if __ARM_ARCH >= 5
 	pop {r2,r3,ip,pc}
 #else
@@ -49,21 +47,6 @@ __tlsdesc_dynamic:
 	bx lr
 #endif
 
-3:
-#if __ARM_PCS_VFP || !__SOFTFP__
-	.fpu vfp
-	vpush {d0-d7}
-#endif
-	push {r0-r3}
-	add r0,sp,#4
-	bl __tls_get_new
-	pop {r1-r3,ip}
-#if __ARM_PCS_VFP || !__SOFTFP__
-	vpop {d0-d7}
-#endif
-	sub r0,r0,r1    // r0 = retval-tp
-	b 4b
-
 #if ((__ARM_ARCH_6K__ || __ARM_ARCH_6KZ__ || __ARM_ARCH_6ZK__) && !__thumb__) \
  || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
 #else
diff --git a/src/ldso/dl_iterate_phdr.c b/src/ldso/dl_iterate_phdr.c
index 86c87ef8..9546dd36 100644
--- a/src/ldso/dl_iterate_phdr.c
+++ b/src/ldso/dl_iterate_phdr.c
@@ -1,5 +1,6 @@
 #include <elf.h>
 #include <link.h>
+#include "pthread_impl.h"
 #include "libc.h"
 
 #define AUX_CNT 38
@@ -35,7 +36,7 @@ static int static_dl_iterate_phdr(int(*callback)(struct dl_phdr_info *info, size
 	info.dlpi_subs  = 0;
 	if (tls_phdr) {
 		info.dlpi_tls_modid = 1;
-		info.dlpi_tls_data = (void *)(base + tls_phdr->p_vaddr);
+		info.dlpi_tls_data = __tls_get_addr((tls_mod_off_t[]){1,0});
 	} else {
 		info.dlpi_tls_modid = 0;
 		info.dlpi_tls_data = 0;
diff --git a/src/ldso/dlerror.c b/src/ldso/dlerror.c
index 06ed8542..dae0f3a9 100644
--- a/src/ldso/dlerror.c
+++ b/src/ldso/dlerror.c
@@ -3,6 +3,12 @@
 #include <stdarg.h>
 #include "pthread_impl.h"
 #include "dynlink.h"
+#include "atomic.h"
+
+#define malloc __libc_malloc
+#define calloc __libc_calloc
+#define realloc __libc_realloc
+#define free __libc_free
 
 char *dlerror()
 {
@@ -16,21 +22,45 @@ char *dlerror()
 		return s;
 }
 
+/* Atomic singly-linked list, used to store list of thread-local dlerror
+ * buffers for deferred free. They cannot be freed at thread exit time
+ * because, by the time it's known they can be freed, the exiting thread
+ * is in a highly restrictive context where it cannot call (even the
+ * libc-internal) free. It also can't take locks; thus the atomic list. */
+
+static void *volatile freebuf_queue;
+
 void __dl_thread_cleanup(void)
 {
 	pthread_t self = __pthread_self();
-	if (self->dlerror_buf != (void *)-1)
-		free(self->dlerror_buf);
+	if (!self->dlerror_buf || self->dlerror_buf == (void *)-1)
+		return;
+	void *h;
+	do {
+		h = freebuf_queue;
+		*(void **)self->dlerror_buf = h;
+	} while (a_cas_p(&freebuf_queue, h, self->dlerror_buf) != h);
 }
 
 hidden void __dl_vseterr(const char *fmt, va_list ap)
 {
+	void **q;
+	do q = freebuf_queue;
+	while (q && a_cas_p(&freebuf_queue, q, 0) != q);
+
+	while (q) {
+		void **p = *q;
+		free(q);
+		q = p;
+	}
+
 	va_list ap2;
 	va_copy(ap2, ap);
 	pthread_t self = __pthread_self();
 	if (self->dlerror_buf != (void *)-1)
 		free(self->dlerror_buf);
 	size_t len = vsnprintf(0, 0, fmt, ap2);
+	if (len < sizeof(void *)) len = sizeof(void *);
 	va_end(ap2);
 	char *buf = malloc(len+1);
 	if (buf) {
diff --git a/src/ldso/i386/dlsym_time64.S b/src/ldso/i386/dlsym_time64.S
new file mode 100644
index 00000000..bb2e7040
--- /dev/null
+++ b/src/ldso/i386/dlsym_time64.S
@@ -0,0 +1,3 @@
+#define __dlsym __dlsym_redir_time64
+#define dlsym __dlsym_time64
+#include "dlsym.s"
diff --git a/src/ldso/i386/tlsdesc.s b/src/ldso/i386/tlsdesc.s
index 4a553bce..32c81766 100644
--- a/src/ldso/i386/tlsdesc.s
+++ b/src/ldso/i386/tlsdesc.s
@@ -6,8 +6,6 @@ __tlsdesc_static:
 	mov 4(%eax),%eax
 	ret
 
-.hidden __tls_get_new
-
 .global __tlsdesc_dynamic
 .hidden __tlsdesc_dynamic
 .type __tlsdesc_dynamic,@function
@@ -17,15 +15,9 @@ __tlsdesc_dynamic:
 	mov %gs:4,%edx
 	push %ecx
 	mov (%eax),%ecx
-	cmp %ecx,(%edx)
-	jc 1f
 	mov 4(%eax),%eax
 	add (%edx,%ecx,4),%eax
-2:	pop %ecx
+	pop %ecx
 	sub %gs:0,%eax
 	pop %edx
 	ret
-1:	push %eax
-	call __tls_get_new
-	pop %ecx
-	jmp 2b
diff --git a/src/ldso/loongarch64/dlsym.s b/src/ldso/loongarch64/dlsym.s
new file mode 100644
index 00000000..26fabcdb
--- /dev/null
+++ b/src/ldso/loongarch64/dlsym.s
@@ -0,0 +1,7 @@
+.global dlsym
+.hidden __dlsym
+.type   dlsym,@function
+dlsym:
+	move      $a2, $ra
+	la.global $t0, __dlsym
+	jr        $t0
diff --git a/src/ldso/loongarch64/tlsdesc.s b/src/ldso/loongarch64/tlsdesc.s
new file mode 100644
index 00000000..4b6ea0e5
--- /dev/null
+++ b/src/ldso/loongarch64/tlsdesc.s
@@ -0,0 +1,37 @@
+.text
+.global __tlsdesc_static
+.hidden __tlsdesc_static
+.type __tlsdesc_static,%function
+__tlsdesc_static:
+	ld.d $a0, $a0, 8
+	jr $ra
+# size_t __tlsdesc_dynamic(size_t *a)
+# {
+#      struct {size_t modidx,off;} *p = (void*)a[1];
+#      size_t *dtv = *(size_t**)(tp - 8);
+#      return dtv[p->modidx] + p->off - tp;
+# }
+.global __tlsdesc_dynamic
+.hidden __tlsdesc_dynamic
+.type __tlsdesc_dynamic,%function
+__tlsdesc_dynamic:
+	addi.d $sp, $sp, -16
+	st.d $t1, $sp, 0
+	st.d $t2, $sp, 8
+
+	ld.d $t2, $tp, -8 # t2=dtv
+
+	ld.d $a0, $a0, 8  # a0=&{modidx,off}
+	ld.d $t1, $a0, 8  # t1=off
+	ld.d $a0, $a0, 0  # a0=modidx
+	slli.d $a0, $a0, 3  # a0=8*modidx
+
+	add.d $a0, $a0, $t2  # a0=dtv+8*modidx
+	ld.d $a0, $a0, 0  # a0=dtv[modidx]
+	add.d $a0, $a0, $t1 # a0=dtv[modidx]+off
+	sub.d $a0, $a0, $tp # a0=dtv[modidx]+off-tp
+
+	ld.d $t1, $sp, 0
+	ld.d $t2, $sp, 8
+	addi.d $sp, $sp, 16
+	jr $ra
diff --git a/src/ldso/m68k/dlsym_time64.S b/src/ldso/m68k/dlsym_time64.S
new file mode 100644
index 00000000..bb2e7040
--- /dev/null
+++ b/src/ldso/m68k/dlsym_time64.S
@@ -0,0 +1,3 @@
+#define __dlsym __dlsym_redir_time64
+#define dlsym __dlsym_time64
+#include "dlsym.s"
diff --git a/src/ldso/microblaze/dlsym_time64.S b/src/ldso/microblaze/dlsym_time64.S
new file mode 100644
index 00000000..bb2e7040
--- /dev/null
+++ b/src/ldso/microblaze/dlsym_time64.S
@@ -0,0 +1,3 @@
+#define __dlsym __dlsym_redir_time64
+#define dlsym __dlsym_time64
+#include "dlsym.s"
diff --git a/src/ldso/mips/dlsym_time64.S b/src/ldso/mips/dlsym_time64.S
new file mode 100644
index 00000000..bb2e7040
--- /dev/null
+++ b/src/ldso/mips/dlsym_time64.S
@@ -0,0 +1,3 @@
+#define __dlsym __dlsym_redir_time64
+#define dlsym __dlsym_time64
+#include "dlsym.s"
diff --git a/src/ldso/mipsn32/dlsym_time64.S b/src/ldso/mipsn32/dlsym_time64.S
new file mode 100644
index 00000000..bb2e7040
--- /dev/null
+++ b/src/ldso/mipsn32/dlsym_time64.S
@@ -0,0 +1,3 @@
+#define __dlsym __dlsym_redir_time64
+#define dlsym __dlsym_time64
+#include "dlsym.s"
diff --git a/src/ldso/or1k/dlsym_time64.S b/src/ldso/or1k/dlsym_time64.S
new file mode 100644
index 00000000..bb2e7040
--- /dev/null
+++ b/src/ldso/or1k/dlsym_time64.S
@@ -0,0 +1,3 @@
+#define __dlsym __dlsym_redir_time64
+#define dlsym __dlsym_time64
+#include "dlsym.s"
diff --git a/src/ldso/powerpc/dlsym.s b/src/ldso/powerpc/dlsym.s
index 357d5771..cfe308ef 100644
--- a/src/ldso/powerpc/dlsym.s
+++ b/src/ldso/powerpc/dlsym.s
@@ -5,5 +5,4 @@
 dlsym:
 	mflr    5                      # The return address is arg3.
 	b       __dlsym
-	.end    dlsym
 	.size   dlsym, .-dlsym
diff --git a/src/ldso/powerpc/dlsym_time64.S b/src/ldso/powerpc/dlsym_time64.S
new file mode 100644
index 00000000..bb2e7040
--- /dev/null
+++ b/src/ldso/powerpc/dlsym_time64.S
@@ -0,0 +1,3 @@
+#define __dlsym __dlsym_redir_time64
+#define dlsym __dlsym_time64
+#include "dlsym.s"
diff --git a/src/ldso/powerpc64/dlsym.s b/src/ldso/powerpc64/dlsym.s
index 7eb691d9..a14715fd 100644
--- a/src/ldso/powerpc64/dlsym.s
+++ b/src/ldso/powerpc64/dlsym.s
@@ -8,5 +8,4 @@ dlsym:
 	.localentry dlsym,.-dlsym
 	mflr    5                      # The return address is arg3.
 	b       __dlsym
-	.end    dlsym
 	.size   dlsym, .-dlsym
diff --git a/src/ldso/riscv32/dlsym.s b/src/ldso/riscv32/dlsym.s
new file mode 100644
index 00000000..2bafd72d
--- /dev/null
+++ b/src/ldso/riscv32/dlsym.s
@@ -0,0 +1,6 @@
+.global dlsym
+.hidden __dlsym
+.type dlsym, %function
+dlsym:
+	mv a2, ra
+	tail __dlsym
diff --git a/src/ldso/riscv64/dlsym.s b/src/ldso/riscv64/dlsym.s
new file mode 100644
index 00000000..2bafd72d
--- /dev/null
+++ b/src/ldso/riscv64/dlsym.s
@@ -0,0 +1,6 @@
+.global dlsym
+.hidden __dlsym
+.type dlsym, %function
+dlsym:
+	mv a2, ra
+	tail __dlsym
diff --git a/src/ldso/riscv64/tlsdesc.s b/src/ldso/riscv64/tlsdesc.s
new file mode 100644
index 00000000..bef8b322
--- /dev/null
+++ b/src/ldso/riscv64/tlsdesc.s
@@ -0,0 +1,32 @@
+.text
+.global __tlsdesc_static
+.hidden __tlsdesc_static
+.type __tlsdesc_static,%function
+__tlsdesc_static:
+	ld a0,8(a0)
+	jr t0
+
+.global __tlsdesc_dynamic
+.hidden __tlsdesc_dynamic
+.type __tlsdesc_dynamic,%function
+__tlsdesc_dynamic:
+	add sp,sp,-16
+	sd t1,(sp)
+	sd t2,8(sp)
+
+	ld t2,-8(tp) # t2=dtv
+
+	ld a0,8(a0)  # a0=&{modidx,off}
+	ld t1,8(a0)  # t1=off
+	ld a0,(a0)   # a0=modidx
+	sll a0,a0,3  # a0=8*modidx
+
+	add a0,a0,t2 # a0=dtv+8*modidx
+	ld a0,(a0)   # a0=dtv[modidx]
+	add a0,a0,t1 # a0=dtv[modidx]+off
+	sub a0,a0,tp # a0=dtv[modidx]+off-tp
+
+	ld t1,(sp)
+	ld t2,8(sp)
+	add sp,sp,16
+	jr t0
diff --git a/src/ldso/sh/dlsym.s b/src/ldso/sh/dlsym.s
index 11a6fff5..34f3c35c 100644
--- a/src/ldso/sh/dlsym.s
+++ b/src/ldso/sh/dlsym.s
@@ -5,7 +5,7 @@
 dlsym:
 	mov.l L1, r0
 1:	braf  r0
-	 mov.l @r15, r6
+	 sts pr, r6
 
 .align 2
 L1:	.long __dlsym@PLT-(1b+4-.)
diff --git a/src/ldso/sh/dlsym_time64.S b/src/ldso/sh/dlsym_time64.S
new file mode 100644
index 00000000..bb2e7040
--- /dev/null
+++ b/src/ldso/sh/dlsym_time64.S
@@ -0,0 +1,3 @@
+#define __dlsym __dlsym_redir_time64
+#define dlsym __dlsym_time64
+#include "dlsym.s"
diff --git a/src/ldso/x86_64/tlsdesc.s b/src/ldso/x86_64/tlsdesc.s
index 8238c3eb..e08f1d7d 100644
--- a/src/ldso/x86_64/tlsdesc.s
+++ b/src/ldso/x86_64/tlsdesc.s
@@ -6,8 +6,6 @@ __tlsdesc_static:
 	mov 8(%rax),%rax
 	ret
 
-.hidden __tls_get_new
-
 .global __tlsdesc_dynamic
 .hidden __tlsdesc_dynamic
 .type __tlsdesc_dynamic,@function
@@ -17,28 +15,9 @@ __tlsdesc_dynamic:
 	mov %fs:8,%rdx
 	push %rcx
 	mov (%rax),%rcx
-	cmp %rcx,(%rdx)
-	jc 1f
 	mov 8(%rax),%rax
 	add (%rdx,%rcx,8),%rax
-2:	pop %rcx
+	pop %rcx
 	sub %fs:0,%rax
 	pop %rdx
 	ret
-1:	push %rdi
-	push %rdi
-	push %rsi
-	push %r8
-	push %r9
-	push %r10
-	push %r11
-	mov %rax,%rdi
-	call __tls_get_new
-	pop %r11
-	pop %r10
-	pop %r9
-	pop %r8
-	pop %rsi
-	pop %rdi
-	pop %rdi
-	jmp 2b
diff --git a/src/legacy/cuserid.c b/src/legacy/cuserid.c
index 4e78798d..dcaf73d4 100644
--- a/src/legacy/cuserid.c
+++ b/src/legacy/cuserid.c
@@ -2,13 +2,21 @@
 #include <pwd.h>
 #include <stdio.h>
 #include <unistd.h>
+#include <string.h>
 
 char *cuserid(char *buf)
 {
+	static char usridbuf[L_cuserid];
 	struct passwd pw, *ppw;
 	long pwb[256];
-	if (getpwuid_r(geteuid(), &pw, (void *)pwb, sizeof pwb, &ppw))
-		return 0;
-	snprintf(buf, L_cuserid, "%s", pw.pw_name);
+	if (buf) *buf = 0;
+	getpwuid_r(geteuid(), &pw, (void *)pwb, sizeof pwb, &ppw);
+	if (!ppw)
+		return buf;
+	size_t len = strnlen(pw.pw_name, L_cuserid);
+	if (len == L_cuserid)
+		return buf;
+	if (!buf) buf = usridbuf;
+	memcpy(buf, pw.pw_name, len+1);
 	return buf;
 }
diff --git a/src/legacy/ftw.c b/src/legacy/ftw.c
index 506bd29c..e757fc6f 100644
--- a/src/legacy/ftw.c
+++ b/src/legacy/ftw.c
@@ -7,5 +7,3 @@ int ftw(const char *path, int (*fn)(const char *, const struct stat *, int), int
 	 * actually undefined, but works on all real-world machines. */
 	return nftw(path, (int (*)())fn, fd_limit, FTW_PHYS);
 }
-
-weak_alias(ftw, ftw64);
diff --git a/src/legacy/getusershell.c b/src/legacy/getusershell.c
index 5fecdec2..1c5d98ec 100644
--- a/src/legacy/getusershell.c
+++ b/src/legacy/getusershell.c
@@ -25,8 +25,10 @@ char *getusershell(void)
 	ssize_t l;
 	if (!f) setusershell();
 	if (!f) return 0;
-	l = getline(&line, &linesize, f);
-	if (l <= 0) return 0;
+	do {
+		l = getline(&line, &linesize, f);
+		if (l <= 0) return 0;
+	} while (line[0] == '#' || line[0] == '\n');
 	if (line[l-1]=='\n') line[l-1]=0;
 	return line;
 }
diff --git a/src/legacy/lutimes.c b/src/legacy/lutimes.c
index 2e5502d1..dd465923 100644
--- a/src/legacy/lutimes.c
+++ b/src/legacy/lutimes.c
@@ -6,9 +6,11 @@
 int lutimes(const char *filename, const struct timeval tv[2])
 {
 	struct timespec times[2];
-	times[0].tv_sec  = tv[0].tv_sec;
-	times[0].tv_nsec = tv[0].tv_usec * 1000;
-	times[1].tv_sec  = tv[1].tv_sec;
-	times[1].tv_nsec = tv[1].tv_usec * 1000;
-	return utimensat(AT_FDCWD, filename, times, AT_SYMLINK_NOFOLLOW);
+	if (tv) {
+		times[0].tv_sec  = tv[0].tv_sec;
+		times[0].tv_nsec = tv[0].tv_usec * 1000;
+		times[1].tv_sec  = tv[1].tv_sec;
+		times[1].tv_nsec = tv[1].tv_usec * 1000;
+	}
+	return utimensat(AT_FDCWD, filename, tv ? times : 0, AT_SYMLINK_NOFOLLOW);
 }
diff --git a/src/linux/adjtime.c b/src/linux/adjtime.c
index fa8af9f0..5a707f2f 100644
--- a/src/linux/adjtime.c
+++ b/src/linux/adjtime.c
@@ -15,7 +15,7 @@ int adjtime(const struct timeval *in, struct timeval *out)
 		tx.offset = in->tv_sec*1000000 + in->tv_usec;
 		tx.modes = ADJ_OFFSET_SINGLESHOT;
 	}
-	if (syscall(SYS_adjtimex, &tx) < 0) return -1;
+	if (adjtimex(&tx) < 0) return -1;
 	if (out) {
 		out->tv_sec = tx.offset / 1000000;
 		if ((out->tv_usec = tx.offset % 1000000) < 0) {
diff --git a/src/linux/adjtimex.c b/src/linux/adjtimex.c
index 91de6824..e9d727cf 100644
--- a/src/linux/adjtimex.c
+++ b/src/linux/adjtimex.c
@@ -1,7 +1,7 @@
 #include <sys/timex.h>
-#include "syscall.h"
+#include <time.h>
 
 int adjtimex(struct timex *tx)
 {
-	return syscall(SYS_adjtimex, tx);
+	return clock_adjtime(CLOCK_REALTIME, tx);
 }
diff --git a/src/linux/cache.c b/src/linux/cache.c
index 84a138a4..e76f7812 100644
--- a/src/linux/cache.c
+++ b/src/linux/cache.c
@@ -1,4 +1,6 @@
+#include <errno.h>
 #include "syscall.h"
+#include "atomic.h"
 
 #ifdef SYS_cacheflush
 int _flush_cache(void *addr, int len, int op)
@@ -15,3 +17,35 @@ int __cachectl(void *addr, int len, int op)
 }
 weak_alias(__cachectl, cachectl);
 #endif
+
+#ifdef SYS_riscv_flush_icache
+
+#define VDSO_FLUSH_ICACHE_SYM "__vdso_flush_icache"
+#define VDSO_FLUSH_ICACHE_VER "LINUX_4.15"
+
+static void *volatile vdso_func;
+
+static int flush_icache_init(void *start, void *end, unsigned long int flags)
+{
+	void *p = __vdsosym(VDSO_FLUSH_ICACHE_VER, VDSO_FLUSH_ICACHE_SYM);
+	int (*f)(void *, void *, unsigned long int) =
+		(int (*)(void *, void *, unsigned long int))p;
+	a_cas_p(&vdso_func, (void *)flush_icache_init, p);
+	return f ? f(start, end, flags) : -ENOSYS;
+}
+
+static void *volatile vdso_func = (void *)flush_icache_init;
+
+int __riscv_flush_icache(void *start, void *end, unsigned long int flags) 
+{
+	int (*f)(void *, void *, unsigned long int) =
+		(int (*)(void *, void *, unsigned long int))vdso_func;
+	if (f) {
+		int r = f(start, end, flags);
+		if (!r) return r;
+		if (r != -ENOSYS) return __syscall_ret(r);
+	}
+	return syscall(SYS_riscv_flush_icache, start, end, flags);
+}
+weak_alias(__riscv_flush_icache, riscv_flush_icache);
+#endif
diff --git a/src/linux/clock_adjtime.c b/src/linux/clock_adjtime.c
index 056ad6d3..d4d03d24 100644
--- a/src/linux/clock_adjtime.c
+++ b/src/linux/clock_adjtime.c
@@ -1,7 +1,151 @@
 #include <sys/timex.h>
+#include <time.h>
+#include <errno.h>
 #include "syscall.h"
 
+#define IS32BIT(x) !((x)+0x80000000ULL>>32)
+
+struct ktimex64 {
+	unsigned modes;
+	int :32;
+	long long offset, freq, maxerror, esterror;
+	int status;
+	int :32;
+	long long constant, precision, tolerance;
+	long long time_sec, time_usec;
+	long long tick, ppsfreq, jitter;
+	int shift;
+	int :32;
+	long long stabil, jitcnt, calcnt, errcnt, stbcnt;
+	int tai;
+	int __padding[11];
+};
+
+struct ktimex {
+	unsigned modes;
+	long offset, freq, maxerror, esterror;
+	int status;
+	long constant, precision, tolerance;
+	long time_sec, time_usec;
+	long tick, ppsfreq, jitter;
+	int shift;
+	long stabil, jitcnt, calcnt, errcnt, stbcnt;
+	int tai;
+	int __padding[11];
+};
+
 int clock_adjtime (clockid_t clock_id, struct timex *utx)
 {
+	int r = -ENOSYS;
+#ifdef SYS_clock_adjtime64
+	struct ktimex64 ktx = {
+		.modes = utx->modes,
+		.offset = utx->offset,
+		.freq = utx->freq,
+		.maxerror = utx->maxerror,
+		.esterror = utx->esterror,
+		.status = utx->status,
+		.constant = utx->constant,
+		.precision = utx->precision,
+		.tolerance = utx->tolerance,
+		.time_sec = utx->time.tv_sec,
+		.time_usec = utx->time.tv_usec,
+		.tick = utx->tick,
+		.ppsfreq = utx->ppsfreq,
+		.jitter = utx->jitter,
+		.shift = utx->shift,
+		.stabil = utx->stabil,
+		.jitcnt = utx->jitcnt,
+		.calcnt = utx->calcnt,
+		.errcnt = utx->errcnt,
+		.stbcnt = utx->stbcnt,
+		.tai = utx->tai,
+	};
+	r = __syscall(SYS_clock_adjtime64, clock_id, &ktx);
+	if (r>=0) {
+		utx->modes = ktx.modes;
+		utx->offset = ktx.offset;
+		utx->freq = ktx.freq;
+		utx->maxerror = ktx.maxerror;
+		utx->esterror = ktx.esterror;
+		utx->status = ktx.status;
+		utx->constant = ktx.constant;
+		utx->precision = ktx.precision;
+		utx->tolerance = ktx.tolerance;
+		utx->time.tv_sec = ktx.time_sec;
+		utx->time.tv_usec = ktx.time_usec;
+		utx->tick = ktx.tick;
+		utx->ppsfreq = ktx.ppsfreq;
+		utx->jitter = ktx.jitter;
+		utx->shift = ktx.shift;
+		utx->stabil = ktx.stabil;
+		utx->jitcnt = ktx.jitcnt;
+		utx->calcnt = ktx.calcnt;
+		utx->errcnt = ktx.errcnt;
+		utx->stbcnt = ktx.stbcnt;
+		utx->tai = ktx.tai;
+	}
+	if (SYS_clock_adjtime == SYS_clock_adjtime64 || r!=-ENOSYS)
+		return __syscall_ret(r);
+	if ((utx->modes & ADJ_SETOFFSET) && !IS32BIT(utx->time.tv_sec))
+		return __syscall_ret(-ENOTSUP);
+#endif
+	if (sizeof(time_t) > sizeof(long)) {
+		struct ktimex ktx = {
+			.modes = utx->modes,
+			.offset = utx->offset,
+			.freq = utx->freq,
+			.maxerror = utx->maxerror,
+			.esterror = utx->esterror,
+			.status = utx->status,
+			.constant = utx->constant,
+			.precision = utx->precision,
+			.tolerance = utx->tolerance,
+			.time_sec = utx->time.tv_sec,
+			.time_usec = utx->time.tv_usec,
+			.tick = utx->tick,
+			.ppsfreq = utx->ppsfreq,
+			.jitter = utx->jitter,
+			.shift = utx->shift,
+			.stabil = utx->stabil,
+			.jitcnt = utx->jitcnt,
+			.calcnt = utx->calcnt,
+			.errcnt = utx->errcnt,
+			.stbcnt = utx->stbcnt,
+			.tai = utx->tai,
+		};
+#ifdef SYS_adjtimex
+		if (clock_id==CLOCK_REALTIME) r = __syscall(SYS_adjtimex, &ktx);
+		else
+#endif
+		r = __syscall(SYS_clock_adjtime, clock_id, &ktx);
+		if (r>=0) {
+			utx->modes = ktx.modes;
+			utx->offset = ktx.offset;
+			utx->freq = ktx.freq;
+			utx->maxerror = ktx.maxerror;
+			utx->esterror = ktx.esterror;
+			utx->status = ktx.status;
+			utx->constant = ktx.constant;
+			utx->precision = ktx.precision;
+			utx->tolerance = ktx.tolerance;
+			utx->time.tv_sec = ktx.time_sec;
+			utx->time.tv_usec = ktx.time_usec;
+			utx->tick = ktx.tick;
+			utx->ppsfreq = ktx.ppsfreq;
+			utx->jitter = ktx.jitter;
+			utx->shift = ktx.shift;
+			utx->stabil = ktx.stabil;
+			utx->jitcnt = ktx.jitcnt;
+			utx->calcnt = ktx.calcnt;
+			utx->errcnt = ktx.errcnt;
+			utx->stbcnt = ktx.stbcnt;
+			utx->tai = ktx.tai;
+		}
+		return __syscall_ret(r);
+	}
+#ifdef SYS_adjtimex
+	if (clock_id==CLOCK_REALTIME) return syscall(SYS_adjtimex, utx);
+#endif
 	return syscall(SYS_clock_adjtime, clock_id, utx);
 }
diff --git a/src/linux/clone.c b/src/linux/clone.c
index 8c1af7d3..257c1cec 100644
--- a/src/linux/clone.c
+++ b/src/linux/clone.c
@@ -4,18 +4,62 @@
 #include <sched.h>
 #include "pthread_impl.h"
 #include "syscall.h"
+#include "lock.h"
+#include "fork_impl.h"
+
+struct clone_start_args {
+	int (*func)(void *);
+	void *arg;
+	sigset_t sigmask;
+};
+
+static int clone_start(void *arg)
+{
+	struct clone_start_args *csa = arg;
+	__post_Fork(0);
+	__restore_sigs(&csa->sigmask);
+	return csa->func(csa->arg);
+}
 
 int clone(int (*func)(void *), void *stack, int flags, void *arg, ...)
 {
+	struct clone_start_args csa;
 	va_list ap;
-	pid_t *ptid, *ctid;
-	void  *tls;
+	pid_t *ptid = 0, *ctid = 0;
+	void  *tls = 0;
+
+	/* Flags that produce an invalid thread/TLS state are disallowed. */
+	int badflags = CLONE_THREAD | CLONE_SETTLS | CLONE_CHILD_CLEARTID;
+
+	if ((flags & badflags) || !stack)
+		return __syscall_ret(-EINVAL);
 
 	va_start(ap, arg);
-	ptid = va_arg(ap, pid_t *);
-	tls  = va_arg(ap, void *);
-	ctid = va_arg(ap, pid_t *);
+	if (flags & (CLONE_PIDFD | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID))
+	 	ptid = va_arg(ap, pid_t *);
+	if (flags & CLONE_CHILD_SETTID) {
+		tls = va_arg(ap, void *);
+		ctid = va_arg(ap, pid_t *);
+	}
 	va_end(ap);
 
-	return __syscall_ret(__clone(func, stack, flags, arg, ptid, tls, ctid));
+	/* If CLONE_VM is used, it's impossible to give the child a consistent
+	 * thread structure. In this case, the best we can do is assume the
+	 * caller is content with an extremely restrictive execution context
+	 * like the one vfork() would provide. */
+	if (flags & CLONE_VM) return __syscall_ret(
+		__clone(func, stack, flags, arg, ptid, tls, ctid));
+
+	__block_all_sigs(&csa.sigmask);
+	LOCK(__abort_lock);
+
+	/* Setup the a wrapper start function for the child process to do
+	 * mimic _Fork in producing a consistent execution state. */
+	csa.func = func;
+	csa.arg = arg;
+	int ret = __clone(clone_start, stack, flags, &csa, ptid, tls, ctid);
+
+	__post_Fork(ret);
+	__restore_sigs(&csa.sigmask);
+	return __syscall_ret(ret);
 }
diff --git a/src/linux/copy_file_range.c b/src/linux/copy_file_range.c
new file mode 100644
index 00000000..dd4b1333
--- /dev/null
+++ b/src/linux/copy_file_range.c
@@ -0,0 +1,8 @@
+#define _GNU_SOURCE
+#include <unistd.h>
+#include "syscall.h"
+
+ssize_t copy_file_range(int fd_in, off_t *off_in, int fd_out, off_t *off_out, size_t len, unsigned flags)
+{
+	return syscall(SYS_copy_file_range, fd_in, off_in, fd_out, off_out, len, flags);
+}
diff --git a/src/linux/epoll.c b/src/linux/epoll.c
index deff5b10..e56e8f4c 100644
--- a/src/linux/epoll.c
+++ b/src/linux/epoll.c
@@ -5,6 +5,7 @@
 
 int epoll_create(int size)
 {
+	if (size<=0) return __syscall_ret(-EINVAL);
 	return epoll_create1(0);
 }
 
@@ -24,9 +25,9 @@ int epoll_ctl(int fd, int op, int fd2, struct epoll_event *ev)
 
 int epoll_pwait(int fd, struct epoll_event *ev, int cnt, int to, const sigset_t *sigs)
 {
-	int r = __syscall(SYS_epoll_pwait, fd, ev, cnt, to, sigs, _NSIG/8);
+	int r = __syscall_cp(SYS_epoll_pwait, fd, ev, cnt, to, sigs, _NSIG/8);
 #ifdef SYS_epoll_wait
-	if (r==-ENOSYS && !sigs) r = __syscall(SYS_epoll_wait, fd, ev, cnt, to);
+	if (r==-ENOSYS && !sigs) r = __syscall_cp(SYS_epoll_wait, fd, ev, cnt, to);
 #endif
 	return __syscall_ret(r);
 }
diff --git a/src/linux/fallocate.c b/src/linux/fallocate.c
index 7d68bc8f..9146350e 100644
--- a/src/linux/fallocate.c
+++ b/src/linux/fallocate.c
@@ -7,6 +7,3 @@ int fallocate(int fd, int mode, off_t base, off_t len)
 	return syscall(SYS_fallocate, fd, mode, __SYSCALL_LL_E(base),
 		__SYSCALL_LL_E(len));
 }
-
-#undef fallocate64
-weak_alias(fallocate, fallocate64);
diff --git a/src/linux/getdents.c b/src/linux/getdents.c
index de6de3b4..97f76e14 100644
--- a/src/linux/getdents.c
+++ b/src/linux/getdents.c
@@ -1,10 +1,10 @@
 #define _BSD_SOURCE
 #include <dirent.h>
+#include <limits.h>
 #include "syscall.h"
 
 int getdents(int fd, struct dirent *buf, size_t len)
 {
+	if (len>INT_MAX) len = INT_MAX;
 	return syscall(SYS_getdents, fd, buf, len);
 }
-
-weak_alias(getdents, getdents64);
diff --git a/src/linux/gettid.c b/src/linux/gettid.c
new file mode 100644
index 00000000..70767137
--- /dev/null
+++ b/src/linux/gettid.c
@@ -0,0 +1,8 @@
+#define _GNU_SOURCE
+#include <unistd.h>
+#include "pthread_impl.h"
+
+pid_t gettid(void)
+{
+	return __pthread_self()->tid;
+}
diff --git a/src/linux/membarrier.c b/src/linux/membarrier.c
new file mode 100644
index 00000000..f64fe7e1
--- /dev/null
+++ b/src/linux/membarrier.c
@@ -0,0 +1,72 @@
+#include <sys/membarrier.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <string.h>
+#include "pthread_impl.h"
+#include "syscall.h"
+
+static void dummy_0(void)
+{
+}
+
+weak_alias(dummy_0, __tl_lock);
+weak_alias(dummy_0, __tl_unlock);
+
+static sem_t barrier_sem;
+
+static void bcast_barrier(int s)
+{
+	sem_post(&barrier_sem);
+}
+
+int __membarrier(int cmd, int flags)
+{
+	int r = __syscall(SYS_membarrier, cmd, flags);
+	/* Emulate the private expedited command, which is needed by the
+	 * dynamic linker for installation of dynamic TLS, for older
+	 * kernels that lack the syscall. Unlike the syscall, this only
+	 * synchronizes with threads of the process, not other processes
+	 * sharing the VM, but such sharing is not a supported usage
+	 * anyway. */
+	if (r && cmd == MEMBARRIER_CMD_PRIVATE_EXPEDITED && !flags) {
+		pthread_t self=__pthread_self(), td;
+		sigset_t set;
+		__block_app_sigs(&set);
+		__tl_lock();
+		sem_init(&barrier_sem, 0, 0);
+		struct sigaction sa = {
+			.sa_flags = SA_RESTART | SA_ONSTACK,
+			.sa_handler = bcast_barrier
+		};
+		memset(&sa.sa_mask, -1, sizeof sa.sa_mask);
+		if (!__libc_sigaction(SIGSYNCCALL, &sa, 0)) {
+			for (td=self->next; td!=self; td=td->next)
+				__syscall(SYS_tkill, td->tid, SIGSYNCCALL);
+			for (td=self->next; td!=self; td=td->next)
+				sem_wait(&barrier_sem);
+			r = 0;
+			sa.sa_handler = SIG_IGN;
+			__libc_sigaction(SIGSYNCCALL, &sa, 0);
+		}
+		sem_destroy(&barrier_sem);
+		__tl_unlock();
+		__restore_sigs(&set);
+	}
+	return __syscall_ret(r);
+}
+
+void __membarrier_init(void)
+{
+	/* If membarrier is linked, attempt to pre-register to be able to use
+	 * the private expedited command before the process becomes multi-
+	 * threaded, since registering later has bad, potentially unbounded
+	 * latency. This syscall should be essentially free, and it's arguably
+	 * a mistake in the API design that registration was even required.
+	 * For other commands, registration may impose some cost, so it's left
+	 * to the application to do so if desired. Unfortunately this means
+	 * library code initialized after the process becomes multi-threaded
+	 * cannot use these features without accepting registration latency. */
+	__syscall(SYS_membarrier, MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 0);
+}
+
+weak_alias(__membarrier, membarrier);
diff --git a/src/linux/ppoll.c b/src/linux/ppoll.c
deleted file mode 100644
index 9e262477..00000000
--- a/src/linux/ppoll.c
+++ /dev/null
@@ -1,10 +0,0 @@
-#define _GNU_SOURCE
-#include <poll.h>
-#include <signal.h>
-#include "syscall.h"
-
-int ppoll(struct pollfd *fds, nfds_t n, const struct timespec *to, const sigset_t *mask)
-{
-	return syscall_cp(SYS_ppoll, fds, n,
-		to ? (struct timespec []){*to} : 0, mask, _NSIG/8);
-}
diff --git a/src/linux/preadv2.c b/src/linux/preadv2.c
new file mode 100644
index 00000000..5e7ab70f
--- /dev/null
+++ b/src/linux/preadv2.c
@@ -0,0 +1,17 @@
+#define _GNU_SOURCE
+#include <sys/uio.h>
+#include <unistd.h>
+#include "syscall.h"
+
+ssize_t preadv2(int fd, const struct iovec *iov, int count, off_t ofs, int flags)
+{
+#ifdef SYS_preadv
+	if (!flags) {
+		if (ofs==-1) return readv(fd, iov, count);
+		return syscall_cp(SYS_preadv, fd, iov, count,
+			(long)(ofs), (long)(ofs>>32));
+	}
+#endif
+	return syscall_cp(SYS_preadv2, fd, iov, count,
+		(long)(ofs), (long)(ofs>>32), flags);
+}
diff --git a/src/linux/prlimit.c b/src/linux/prlimit.c
index 3df9ffba..fcf45aab 100644
--- a/src/linux/prlimit.c
+++ b/src/linux/prlimit.c
@@ -21,6 +21,3 @@ int prlimit(pid_t pid, int resource, const struct rlimit *new_limit, struct rlim
 	}
 	return r;
 }
-
-#undef prlimit64
-weak_alias(prlimit, prlimit64);
diff --git a/src/linux/pwritev2.c b/src/linux/pwritev2.c
new file mode 100644
index 00000000..ece90d7c
--- /dev/null
+++ b/src/linux/pwritev2.c
@@ -0,0 +1,17 @@
+#define _GNU_SOURCE
+#include <sys/uio.h>
+#include <unistd.h>
+#include "syscall.h"
+
+ssize_t pwritev2(int fd, const struct iovec *iov, int count, off_t ofs, int flags)
+{
+#ifdef SYS_pwritev
+	if (!flags) {
+		if (ofs==-1) return writev(fd, iov, count);
+		return syscall_cp(SYS_pwritev, fd, iov, count,
+			(long)(ofs), (long)(ofs>>32));
+	}
+#endif
+	return syscall_cp(SYS_pwritev2, fd, iov, count,
+		(long)(ofs), (long)(ofs>>32), flags);
+}
diff --git a/src/linux/renameat2.c b/src/linux/renameat2.c
new file mode 100644
index 00000000..b8060388
--- /dev/null
+++ b/src/linux/renameat2.c
@@ -0,0 +1,11 @@
+#define _GNU_SOURCE
+#include <stdio.h>
+#include "syscall.h"
+
+int renameat2(int oldfd, const char *old, int newfd, const char *new, unsigned flags)
+{
+#ifdef SYS_renameat
+	if (!flags) return syscall(SYS_renameat, oldfd, old, newfd, new);
+#endif
+	return syscall(SYS_renameat2, oldfd, old, newfd, new, flags);
+}
diff --git a/src/linux/sendfile.c b/src/linux/sendfile.c
index 9afe6dd6..fc1577d3 100644
--- a/src/linux/sendfile.c
+++ b/src/linux/sendfile.c
@@ -5,5 +5,3 @@ ssize_t sendfile(int out_fd, int in_fd, off_t *ofs, size_t count)
 {
 	return syscall(SYS_sendfile, out_fd, in_fd, ofs, count);
 }
-
-weak_alias(sendfile, sendfile64);
diff --git a/src/linux/setgroups.c b/src/linux/setgroups.c
index 1248fdbf..47142f14 100644
--- a/src/linux/setgroups.c
+++ b/src/linux/setgroups.c
@@ -1,8 +1,36 @@
 #define _GNU_SOURCE
 #include <unistd.h>
+#include <signal.h>
 #include "syscall.h"
+#include "libc.h"
+
+struct ctx {
+	size_t count;
+	const gid_t *list;
+	int ret;
+};
+
+static void do_setgroups(void *p)
+{
+	struct ctx *c = p;
+	if (c->ret<0) return;
+	int ret = __syscall(SYS_setgroups, c->count, c->list);
+	if (ret && !c->ret) {
+		/* If one thread fails to set groups after another has already
+		 * succeeded, forcibly killing the process is the only safe
+		 * thing to do. State is inconsistent and dangerous. Use
+		 * SIGKILL because it is uncatchable. */
+		__block_all_sigs(0);
+		__syscall(SYS_kill, __syscall(SYS_getpid), SIGKILL);
+	}
+	c->ret = ret;
+}
 
 int setgroups(size_t count, const gid_t list[])
 {
-	return syscall(SYS_setgroups, count, list);
+	/* ret is initially nonzero so that failure of the first thread does not
+	 * trigger the safety kill above. */
+	struct ctx c = { .count = count, .list = list, .ret = 1 };
+	__synccall(do_setgroups, &c);
+	return __syscall_ret(c.ret);
 }
diff --git a/src/linux/settimeofday.c b/src/linux/settimeofday.c
index 15c18c63..860fb5de 100644
--- a/src/linux/settimeofday.c
+++ b/src/linux/settimeofday.c
@@ -1,8 +1,13 @@
 #define _BSD_SOURCE
 #include <sys/time.h>
+#include <time.h>
+#include <errno.h>
 #include "syscall.h"
 
 int settimeofday(const struct timeval *tv, const struct timezone *tz)
 {
-	return syscall(SYS_settimeofday, tv, 0);
+	if (!tv) return 0;
+	if (tv->tv_usec >= 1000000ULL) return __syscall_ret(-EINVAL);
+	return clock_settime(CLOCK_REALTIME, &((struct timespec){
+		.tv_sec = tv->tv_sec, .tv_nsec = tv->tv_usec * 1000}));
 }
diff --git a/src/linux/statx.c b/src/linux/statx.c
new file mode 100644
index 00000000..4fb96e4b
--- /dev/null
+++ b/src/linux/statx.c
@@ -0,0 +1,44 @@
+#define _GNU_SOURCE
+#include <sys/stat.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/sysmacros.h>
+#include <errno.h>
+
+int statx(int dirfd, const char *restrict path, int flags, unsigned mask, struct statx *restrict stx)
+{
+	int ret = __syscall(SYS_statx, dirfd, path, flags, mask, stx);
+
+#ifndef SYS_fstatat
+	return __syscall_ret(ret);
+#endif
+
+	if (ret != -ENOSYS) return __syscall_ret(ret);
+
+	struct stat st;
+	ret = fstatat(dirfd, path, &st, flags);
+	if (ret) return ret;
+
+	*stx = (struct statx){0};
+	stx->stx_dev_major = major(st.st_dev);
+	stx->stx_dev_minor = minor(st.st_dev);
+	stx->stx_rdev_major = major(st.st_rdev);
+	stx->stx_rdev_minor = minor(st.st_rdev);
+	stx->stx_ino = st.st_ino;
+	stx->stx_mode = st.st_mode;
+	stx->stx_nlink = st.st_nlink;
+	stx->stx_uid = st.st_uid;
+	stx->stx_gid = st.st_gid;
+	stx->stx_size = st.st_size;
+	stx->stx_blksize = st.st_blksize;
+	stx->stx_blocks = st.st_blocks;
+	stx->stx_atime.tv_sec = st.st_atim.tv_sec;
+	stx->stx_atime.tv_nsec = st.st_atim.tv_nsec;
+	stx->stx_mtime.tv_sec = st.st_mtim.tv_sec;
+	stx->stx_mtime.tv_nsec = st.st_mtim.tv_nsec;
+	stx->stx_ctime.tv_sec = st.st_ctim.tv_sec;
+	stx->stx_ctime.tv_nsec = st.st_ctim.tv_nsec;
+	stx->stx_mask = STATX_BASIC_STATS;
+
+	return 0;
+}
diff --git a/src/linux/timerfd.c b/src/linux/timerfd.c
index 62cc2773..5bdfaf16 100644
--- a/src/linux/timerfd.c
+++ b/src/linux/timerfd.c
@@ -1,6 +1,9 @@
 #include <sys/timerfd.h>
+#include <errno.h>
 #include "syscall.h"
 
+#define IS32BIT(x) !((x)+0x80000000ULL>>32)
+
 int timerfd_create(int clockid, int flags)
 {
 	return syscall(SYS_timerfd_create, clockid, flags);
@@ -8,10 +11,49 @@ int timerfd_create(int clockid, int flags)
 
 int timerfd_settime(int fd, int flags, const struct itimerspec *new, struct itimerspec *old)
 {
+#ifdef SYS_timerfd_settime64
+	time_t is = new->it_interval.tv_sec, vs = new->it_value.tv_sec;
+	long ins = new->it_interval.tv_nsec, vns = new->it_value.tv_nsec;
+	int r = -ENOSYS;
+	if (SYS_timerfd_settime == SYS_timerfd_settime64
+	    || !IS32BIT(is) || !IS32BIT(vs) || (sizeof(time_t)>4 && old))
+		r = __syscall(SYS_timerfd_settime64, fd, flags,
+			((long long[]){is, ins, vs, vns}), old);
+	if (SYS_timerfd_settime == SYS_timerfd_settime64 || r!=-ENOSYS)
+		return __syscall_ret(r);
+	if (!IS32BIT(is) || !IS32BIT(vs))
+		return __syscall_ret(-ENOTSUP);
+	long old32[4];
+	r = __syscall(SYS_timerfd_settime, fd, flags,
+		((long[]){is, ins, vs, vns}), old32);
+	if (!r && old) {
+		old->it_interval.tv_sec = old32[0];
+		old->it_interval.tv_nsec = old32[1];
+		old->it_value.tv_sec = old32[2];
+		old->it_value.tv_nsec = old32[3];
+	}
+	return __syscall_ret(r);
+#endif
 	return syscall(SYS_timerfd_settime, fd, flags, new, old);
 }
 
 int timerfd_gettime(int fd, struct itimerspec *cur)
 {
+#ifdef SYS_timerfd_gettime64
+	int r = -ENOSYS;
+	if (sizeof(time_t) > 4)
+		r = __syscall(SYS_timerfd_gettime64, fd, cur);
+	if (SYS_timerfd_gettime == SYS_timerfd_gettime64 || r!=-ENOSYS)
+		return __syscall_ret(r);
+	long cur32[4];
+	r = __syscall(SYS_timerfd_gettime, fd, cur32);
+	if (!r) {
+		cur->it_interval.tv_sec = cur32[0];
+		cur->it_interval.tv_nsec = cur32[1];
+		cur->it_value.tv_sec = cur32[2];
+		cur->it_value.tv_nsec = cur32[3];
+	}
+	return __syscall_ret(r);
+#endif
 	return syscall(SYS_timerfd_gettime, fd, cur);
 }
diff --git a/src/linux/wait4.c b/src/linux/wait4.c
index 97f12cc5..fb08c0d0 100644
--- a/src/linux/wait4.c
+++ b/src/linux/wait4.c
@@ -1,9 +1,39 @@
 #define _GNU_SOURCE
 #include <sys/wait.h>
 #include <sys/resource.h>
+#include <string.h>
+#include <errno.h>
 #include "syscall.h"
 
-pid_t wait4(pid_t pid, int *status, int options, struct rusage *usage)
+pid_t wait4(pid_t pid, int *status, int options, struct rusage *ru)
 {
-	return syscall(SYS_wait4, pid, status, options, usage);
+	int r;
+#ifdef SYS_wait4_time64
+	if (ru) {
+		long long kru64[18];
+		r = __syscall(SYS_wait4_time64, pid, status, options, kru64);
+		if (r > 0) {
+			ru->ru_utime = (struct timeval)
+				{ .tv_sec = kru64[0], .tv_usec = kru64[1] };
+			ru->ru_stime = (struct timeval)
+				{ .tv_sec = kru64[2], .tv_usec = kru64[3] };
+			char *slots = (char *)&ru->ru_maxrss;
+			for (int i=0; i<14; i++)
+				*(long *)(slots + i*sizeof(long)) = kru64[4+i];
+		}
+		if (SYS_wait4_time64 == SYS_wait4 || r != -ENOSYS)
+			return __syscall_ret(r);
+	}
+#endif
+	char *dest = ru ? (char *)&ru->ru_maxrss - 4*sizeof(long) : 0;
+	r = __sys_wait4(pid, status, options, dest);
+	if (r>0 && ru && sizeof(time_t) > sizeof(long)) {
+		long kru[4];
+		memcpy(kru, dest, 4*sizeof(long));
+		ru->ru_utime = (struct timeval)
+			{ .tv_sec = kru[0], .tv_usec = kru[1] };
+		ru->ru_stime = (struct timeval)
+			{ .tv_sec = kru[2], .tv_usec = kru[3] };
+	}
+	return __syscall_ret(r);
 }
diff --git a/src/locale/bind_textdomain_codeset.c b/src/locale/bind_textdomain_codeset.c
index 5ebfd5e8..240e83ed 100644
--- a/src/locale/bind_textdomain_codeset.c
+++ b/src/locale/bind_textdomain_codeset.c
@@ -5,7 +5,9 @@
 
 char *bind_textdomain_codeset(const char *domainname, const char *codeset)
 {
-	if (codeset && strcasecmp(codeset, "UTF-8"))
+	if (codeset && strcasecmp(codeset, "UTF-8")) {
 		errno = EINVAL;
-	return NULL;
+		return 0;
+	}
+	return "UTF-8";
 }
diff --git a/src/locale/catclose.c b/src/locale/catclose.c
index 02cd3e5c..54e24dd2 100644
--- a/src/locale/catclose.c
+++ b/src/locale/catclose.c
@@ -1,6 +1,14 @@
+#define _BSD_SOURCE
 #include <nl_types.h>
+#include <stdint.h>
+#include <endian.h>
+#include <sys/mman.h>
+
+#define V(p) be32toh(*(uint32_t *)(p))
 
 int catclose (nl_catd catd)
 {
+	char *map = (char *)catd;
+	munmap(map, V(map+8)+20);
 	return 0;
 }
diff --git a/src/locale/catgets.c b/src/locale/catgets.c
index bbee8986..71c31c1d 100644
--- a/src/locale/catgets.c
+++ b/src/locale/catgets.c
@@ -1,6 +1,38 @@
+#define _BSD_SOURCE
 #include <nl_types.h>
+#include <endian.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <errno.h>
+
+#define V(p) be32toh(*(uint32_t *)(p))
+
+static int cmp(const void *a, const void *b)
+{
+	uint32_t x = V(a), y = V(b);
+	return x<y ? -1 : x>y ? 1 : 0;
+}
 
 char *catgets (nl_catd catd, int set_id, int msg_id, const char *s)
 {
-	return (char *)s;
+	const char *map = (const char *)catd;
+	uint32_t nsets = V(map+4);
+	const char *sets = map+20;
+	const char *msgs = map+20+V(map+12);
+	const char *strings = map+20+V(map+16);
+	uint32_t set_id_be = htobe32(set_id);
+	uint32_t msg_id_be = htobe32(msg_id);
+	const char *set = bsearch(&set_id_be, sets, nsets, 12, cmp);
+	if (!set) {
+		errno = ENOMSG;
+		return (char *)s;
+	}
+	uint32_t nmsgs = V(set+4);
+	msgs += 12*V(set+8);
+	const char *msg = bsearch(&msg_id_be, msgs, nmsgs, 12, cmp);
+	if (!msg) {
+		errno = ENOMSG;
+		return (char *)s;
+	}
+	return (char *)(strings + V(msg+8));
 }
diff --git a/src/locale/catopen.c b/src/locale/catopen.c
index 3fbc7717..97f2446d 100644
--- a/src/locale/catopen.c
+++ b/src/locale/catopen.c
@@ -1,8 +1,79 @@
+#define _BSD_SOURCE
 #include <nl_types.h>
+#include <string.h>
+#include <stdint.h>
+#include <endian.h>
 #include <errno.h>
+#include <langinfo.h>
+#include <locale.h>
+#include <sys/mman.h>
+#include "libc.h"
 
-nl_catd catopen (const char *name, int oflag)
+#define V(p) be32toh(*(uint32_t *)(p))
+
+static nl_catd do_catopen(const char *name)
+{
+	size_t size;
+	const unsigned char *map = __map_file(name, &size);
+	/* Size recorded in the file must match file size; otherwise
+	 * the information needed to unmap the file will be lost. */
+	if (!map || V(map) != 0xff88ff89 || 20+V(map+8) != size) {
+		if(map) munmap((void *)map, size);
+		errno = ENOENT;
+		return (nl_catd)-1;
+	}
+	return (nl_catd)map;
+}
+
+nl_catd catopen(const char *name, int oflag)
 {
-	errno = EOPNOTSUPP;
+	nl_catd catd;
+
+	if (strchr(name, '/')) return do_catopen(name);
+
+	char buf[PATH_MAX];
+	size_t i;
+	const char *path, *lang, *p, *z;
+	if (libc.secure || !(path = getenv("NLSPATH"))) {
+		errno = ENOENT;
+		return (nl_catd)-1;
+	}
+	lang = oflag ? nl_langinfo(_NL_LOCALE_NAME(LC_MESSAGES)) : getenv("LANG");
+	if (!lang) lang = "";
+	for (p=path; *p; p=z) {
+		i = 0;
+		z = __strchrnul(p, ':');
+		for (; p<z; p++) {
+			const char *v;
+			size_t l;
+			if (*p!='%') v=p, l=1;
+			else switch (*++p) {
+			case 'N': v=name; l=strlen(v); break;
+			case 'L': v=lang; l=strlen(v); break;
+			case 'l': v=lang; l=strcspn(v,"_.@"); break;
+			case 't':
+				v=__strchrnul(lang,'_');
+				if (*v) v++;
+				l=strcspn(v,".@");
+				break;
+			case 'c': v="UTF-8"; l=5; break;
+			case '%': v="%"; l=1; break;
+			default: v=0;
+			}
+			if (!v || l >= sizeof buf - i) {
+				break;
+			}
+			memcpy(buf+i, v, l);
+			i += l;
+		}
+		if (!*z && (p<z || !i)) break;
+		if (p<z) continue;
+		if (*z) z++;
+		buf[i] = 0;
+		/* Leading : or :: in NLSPATH is same as %N */
+		catd = do_catopen(i ? buf : name);
+		if (catd != (nl_catd)-1) return catd;
+	}
+	errno = ENOENT;
 	return (nl_catd)-1;
 }
diff --git a/src/locale/codepages.h b/src/locale/codepages.h
index 4e236ef3..a254f0f5 100644
--- a/src/locale/codepages.h
+++ b/src/locale/codepages.h
@@ -286,6 +286,17 @@
 "\323\174\103\215\64\365\124\123\213\77\336\150\263\115\66\375\164\363\12\55"
 "\255\304\42\261\57\266\234\162\17\56\260\240\162\113\56\263\310\62\66\50"
 
+"cp858\0"
+"\0\40"
+"\307\360\223\216\70\344\200\123\316\71\352\254\203\316\73\356\260\103\114\61"
+"\311\230\143\14\75\366\310\263\117\76\377\130\303\15\76\243\140\163\15\135"
+"\341\264\63\217\76\361\104\243\212\56\277\270\302\112\57\274\204\262\312\56"
+"\140\207\55\66\315\72\7\43\14\60\251\104\375\163\321\113\213\122\212\315"
+"\67\363\274\163\316\63\367\74\316\60\110\13\175\65\325\116\373\254\65\51"
+"\360\100\243\314\62\310\220\334\214\63\317\340\134\163\327\134\233\302\314\326"
+"\323\174\103\215\64\365\124\123\213\77\336\150\263\115\66\375\164\363\12\55"
+"\255\304\42\261\57\266\234\162\17\56\260\240\162\113\56\263\310\62\66\50"
+
 "cp866\0"
 "\0\40"
 "\337\201\27\236\170\343\221\127\236\171\347\241\227\236\172"
diff --git a/src/locale/dcngettext.c b/src/locale/dcngettext.c
index 8b891d00..0b53286d 100644
--- a/src/locale/dcngettext.c
+++ b/src/locale/dcngettext.c
@@ -10,6 +10,12 @@
 #include "atomic.h"
 #include "pleval.h"
 #include "lock.h"
+#include "fork_impl.h"
+
+#define malloc __libc_malloc
+#define calloc __libc_calloc
+#define realloc undef
+#define free undef
 
 struct binding {
 	struct binding *next;
@@ -34,9 +40,11 @@ static char *gettextdir(const char *domainname, size_t *dirlen)
 	return 0;
 }
 
+static volatile int lock[1];
+volatile int *const __gettext_lockptr = lock;
+
 char *bindtextdomain(const char *domainname, const char *dirname)
 {
-	static volatile int lock[1];
 	struct binding *p, *q;
 
 	if (!domainname) return 0;
@@ -122,6 +130,10 @@ char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2,
 	const struct __locale_map *lm;
 	size_t domlen;
 	struct binding *q;
+	int old_errno = errno;
+
+	/* match gnu gettext behaviour */
+	if (!msgid1) goto notrans;
 
 	if ((unsigned)category >= LC_ALL) goto notrans;
 
@@ -138,6 +150,7 @@ char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2,
 	lm = loc->cat[category];
 	if (!lm) {
 notrans:
+		errno = old_errno;
 		return (char *) ((n == 1) ? msgid1 : msgid2);
 	}
 
@@ -250,6 +263,7 @@ notrans:
 			trans += l+1;
 		}
 	}
+	errno = old_errno;
 	return (char *)trans;
 }
 
diff --git a/src/locale/duplocale.c b/src/locale/duplocale.c
index 030b64cb..5ce33ae6 100644
--- a/src/locale/duplocale.c
+++ b/src/locale/duplocale.c
@@ -3,6 +3,11 @@
 #include "locale_impl.h"
 #include "libc.h"
 
+#define malloc __libc_malloc
+#define calloc undef
+#define realloc undef
+#define free undef
+
 locale_t __duplocale(locale_t old)
 {
 	locale_t new = malloc(sizeof *new);
diff --git a/src/locale/freelocale.c b/src/locale/freelocale.c
index 802b8bfe..385d1206 100644
--- a/src/locale/freelocale.c
+++ b/src/locale/freelocale.c
@@ -1,6 +1,11 @@
 #include <stdlib.h>
 #include "locale_impl.h"
 
+#define malloc undef
+#define calloc undef
+#define realloc undef
+#define free __libc_free
+
 void freelocale(locale_t l)
 {
 	if (__loc_is_allocated(l)) free(l);
diff --git a/src/locale/iconv.c b/src/locale/iconv.c
index 3047c27b..52178950 100644
--- a/src/locale/iconv.c
+++ b/src/locale/iconv.c
@@ -49,10 +49,10 @@ static const unsigned char charmaps[] =
 "ucs4\0utf32\0\0\313"
 "ucs2\0\0\314"
 "eucjp\0\0\320"
-"shiftjis\0sjis\0\0\321"
+"shiftjis\0sjis\0cp932\0\0\321"
 "iso2022jp\0\0\322"
 "gb18030\0\0\330"
-"gbk\0\0\331"
+"gbk\0cp936\0windows936\0\0\331"
 "gb2312\0\0\332"
 "big5\0bigfive\0cp950\0big5hkscs\0\0\340"
 "euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
@@ -339,7 +339,10 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
 			} else if (d-159 <= 252-159) {
 				c++;
 				d -= 159;
+			} else {
+				goto ilseq;
 			}
+			if (c>=84) goto ilseq;
 			c = jis0208[c][d];
 			if (!c) goto ilseq;
 			break;
@@ -403,6 +406,10 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
 			if (c < 128) break;
 			if (c < 0xa1) goto ilseq;
 		case GBK:
+			if (c == 128) {
+				c = 0x20ac;
+				break;
+			}
 		case GB18030:
 			if (c < 128) break;
 			c -= 0x81;
@@ -495,7 +502,7 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
 			if (c >= 93 || d >= 94) {
 				c += (0xa1-0x81);
 				d += 0xa1;
-				if (c >= 93 || c>=0xc6-0x81 && d>0x52)
+				if (c > 0xc6-0x81 || c==0xc6-0x81 && d>0x52)
 					goto ilseq;
 				if (d-'A'<26) d = d-'A';
 				else if (d-'a'<26) d = d-'a'+26;
@@ -538,6 +545,10 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
 				if (*outb < k) goto toobig;
 				memcpy(*out, tmp, k);
 			} else k = wctomb_utf8(*out, c);
+			/* This failure condition should be unreachable, but
+			 * is included to prevent decoder bugs from translating
+			 * into advancement outside the output buffer range. */
+			if (k>4) goto ilseq;
 			*out += k;
 			*outb -= k;
 			break;
diff --git a/src/locale/locale_map.c b/src/locale/locale_map.c
index 2321bac0..da61f7fc 100644
--- a/src/locale/locale_map.c
+++ b/src/locale/locale_map.c
@@ -1,9 +1,16 @@
 #include <locale.h>
 #include <string.h>
 #include <sys/mman.h>
+#include <stdlib.h>
 #include "locale_impl.h"
 #include "libc.h"
 #include "lock.h"
+#include "fork_impl.h"
+
+#define malloc __libc_malloc
+#define calloc undef
+#define realloc undef
+#define free undef
 
 const char *__lctrans_impl(const char *msg, const struct __locale_map *lm)
 {
@@ -21,9 +28,11 @@ static const char envvars[][12] = {
 	"LC_MESSAGES",
 };
 
+volatile int __locale_lock[1];
+volatile int *const __locale_lockptr = __locale_lock;
+
 const struct __locale_map *__get_locale(int cat, const char *val)
 {
-	static volatile int lock[1];
 	static void *volatile loc_head;
 	const struct __locale_map *p;
 	struct __locale_map *new = 0;
@@ -54,20 +63,12 @@ const struct __locale_map *__get_locale(int cat, const char *val)
 	for (p=loc_head; p; p=p->next)
 		if (!strcmp(val, p->name)) return p;
 
-	LOCK(lock);
-
-	for (p=loc_head; p; p=p->next)
-		if (!strcmp(val, p->name)) {
-			UNLOCK(lock);
-			return p;
-		}
-
 	if (!libc.secure) path = getenv("MUSL_LOCPATH");
 	/* FIXME: add a default path? */
 
 	if (path) for (; *path; path=z+!!*z) {
 		z = __strchrnul(path, ':');
-		l = z - path - !!*z;
+		l = z - path;
 		if (l >= sizeof buf - n - 2) continue;
 		memcpy(buf, path, l);
 		buf[l] = '/';
@@ -108,6 +109,5 @@ const struct __locale_map *__get_locale(int cat, const char *val)
 	 * requested name was "C" or "POSIX". */
 	if (!new && cat == LC_CTYPE) new = (void *)&__c_dot_utf8;
 
-	UNLOCK(lock);
 	return new;
 }
diff --git a/src/locale/newlocale.c b/src/locale/newlocale.c
index d20a8489..9ac3cd38 100644
--- a/src/locale/newlocale.c
+++ b/src/locale/newlocale.c
@@ -2,16 +2,15 @@
 #include <string.h>
 #include <pthread.h>
 #include "locale_impl.h"
+#include "lock.h"
 
-static pthread_once_t default_locale_once;
-static struct __locale_struct default_locale, default_ctype_locale;
+#define malloc __libc_malloc
+#define calloc undef
+#define realloc undef
+#define free undef
 
-static void default_locale_init(void)
-{
-	for (int i=0; i<LC_ALL; i++)
-		default_locale.cat[i] = __get_locale(i, "");
-	default_ctype_locale.cat[LC_CTYPE] = default_locale.cat[LC_CTYPE];
-}
+static int default_locale_init_done;
+static struct __locale_struct default_locale, default_ctype_locale;
 
 int __loc_is_allocated(locale_t loc)
 {
@@ -19,7 +18,7 @@ int __loc_is_allocated(locale_t loc)
 		&& loc != &default_locale && loc != &default_ctype_locale;
 }
 
-locale_t __newlocale(int mask, const char *name, locale_t loc)
+static locale_t do_newlocale(int mask, const char *name, locale_t loc)
 {
 	struct __locale_struct tmp;
 
@@ -44,7 +43,12 @@ locale_t __newlocale(int mask, const char *name, locale_t loc)
 
 	/* And provide builtins for the initial default locale, and a
 	 * variant of the C locale honoring the default locale's encoding. */
-	pthread_once(&default_locale_once, default_locale_init);
+	if (!default_locale_init_done) {
+		for (int i=0; i<LC_ALL; i++)
+			default_locale.cat[i] = __get_locale(i, "");
+		default_ctype_locale.cat[LC_CTYPE] = default_locale.cat[LC_CTYPE];
+		default_locale_init_done = 1;
+	}
 	if (!memcmp(&tmp, &default_locale, sizeof tmp)) return &default_locale;
 	if (!memcmp(&tmp, &default_ctype_locale, sizeof tmp))
 		return &default_ctype_locale;
@@ -55,4 +59,12 @@ locale_t __newlocale(int mask, const char *name, locale_t loc)
 	return loc;
 }
 
+locale_t __newlocale(int mask, const char *name, locale_t loc)
+{
+	LOCK(__locale_lock);
+	loc = do_newlocale(mask, name, loc);
+	UNLOCK(__locale_lock);
+	return loc;
+}
+
 weak_alias(__newlocale, newlocale);
diff --git a/src/locale/setlocale.c b/src/locale/setlocale.c
index 2bc7b500..360c4437 100644
--- a/src/locale/setlocale.c
+++ b/src/locale/setlocale.c
@@ -9,12 +9,11 @@ static char buf[LC_ALL*(LOCALE_NAME_MAX+1)];
 
 char *setlocale(int cat, const char *name)
 {
-	static volatile int lock[1];
 	const struct __locale_map *lm;
 
 	if ((unsigned)cat > LC_ALL) return 0;
 
-	LOCK(lock);
+	LOCK(__locale_lock);
 
 	/* For LC_ALL, setlocale is required to return a string which
 	 * encodes the current setting for all categories. The format of
@@ -36,7 +35,7 @@ char *setlocale(int cat, const char *name)
 				}
 				lm = __get_locale(i, part);
 				if (lm == LOC_MAP_FAILED) {
-					UNLOCK(lock);
+					UNLOCK(__locale_lock);
 					return 0;
 				}
 				tmp_locale.cat[i] = lm;
@@ -57,14 +56,14 @@ char *setlocale(int cat, const char *name)
 			s += l+1;
 		}
 		*--s = 0;
-		UNLOCK(lock);
+		UNLOCK(__locale_lock);
 		return same==LC_ALL ? (char *)part : buf;
 	}
 
 	if (name) {
 		lm = __get_locale(cat, name);
 		if (lm == LOC_MAP_FAILED) {
-			UNLOCK(lock);
+			UNLOCK(__locale_lock);
 			return 0;
 		}
 		libc.global_locale.cat[cat] = lm;
@@ -73,7 +72,7 @@ char *setlocale(int cat, const char *name)
 	}
 	char *ret = lm ? (char *)lm->name : "C";
 
-	UNLOCK(lock);
+	UNLOCK(__locale_lock);
 
 	return ret;
 }
diff --git a/src/locale/strtod_l.c b/src/locale/strtod_l.c
new file mode 100644
index 00000000..574ba148
--- /dev/null
+++ b/src/locale/strtod_l.c
@@ -0,0 +1,22 @@
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <locale.h>
+
+float strtof_l(const char *restrict s, char **restrict p, locale_t l)
+{
+	return strtof(s, p);
+}
+
+double strtod_l(const char *restrict s, char **restrict p, locale_t l)
+{
+	return strtod(s, p);
+}
+
+long double strtold_l(const char *restrict s, char **restrict p, locale_t l)
+{
+	return strtold(s, p);
+}
+
+weak_alias(strtof_l, __strtof_l);
+weak_alias(strtod_l, __strtod_l);
+weak_alias(strtold_l, __strtold_l);
diff --git a/src/malloc/DESIGN b/src/malloc/DESIGN
deleted file mode 100644
index 58b0523f..00000000
--- a/src/malloc/DESIGN
+++ /dev/null
@@ -1,22 +0,0 @@
-
-
-In principle, this memory allocator is roughly equivalent to Doug
-Lea's dlmalloc with fine-grained locking.
-
-
-
-malloc:
-
-Uses a freelist binned by chunk size, with a bitmap to optimize
-searching for the smallest non-empty bin which can satisfy an
-allocation. If no free chunks are available, it creates a new chunk of
-the requested size and attempts to merge it with any existing free
-chunk immediately below the newly created chunk.
-
-Whether the chunk was obtained from a bin or newly created, it's
-likely to be larger than the requested allocation. malloc always
-finishes its work by passing the new chunk to realloc, which will
-split it into two chunks and free the tail portion.
-
-
-
diff --git a/src/malloc/aligned_alloc.c b/src/malloc/aligned_alloc.c
deleted file mode 100644
index b6143f30..00000000
--- a/src/malloc/aligned_alloc.c
+++ /dev/null
@@ -1,7 +0,0 @@
-#include <stdlib.h>
-#include "malloc_impl.h"
-
-void *aligned_alloc(size_t align, size_t len)
-{
-	return __memalign(align, len);
-}
diff --git a/src/malloc/calloc.c b/src/malloc/calloc.c
new file mode 100644
index 00000000..bf6bddca
--- /dev/null
+++ b/src/malloc/calloc.c
@@ -0,0 +1,45 @@
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <errno.h>
+#include "dynlink.h"
+
+static size_t mal0_clear(char *p, size_t n)
+{
+	const size_t pagesz = 4096; /* arbitrary */
+	if (n < pagesz) return n;
+#ifdef __GNUC__
+	typedef uint64_t __attribute__((__may_alias__)) T;
+#else
+	typedef unsigned char T;
+#endif
+	char *pp = p + n;
+	size_t i = (uintptr_t)pp & (pagesz - 1);
+	for (;;) {
+		pp = memset(pp - i, 0, i);
+		if (pp - p < pagesz) return pp - p;
+		for (i = pagesz; i; i -= 2*sizeof(T), pp -= 2*sizeof(T))
+		        if (((T *)pp)[-1] | ((T *)pp)[-2])
+				break;
+	}
+}
+
+static int allzerop(void *p)
+{
+	return 0;
+}
+weak_alias(allzerop, __malloc_allzerop);
+
+void *calloc(size_t m, size_t n)
+{
+	if (n && m > (size_t)-1/n) {
+		errno = ENOMEM;
+		return 0;
+	}
+	n *= m;
+	void *p = malloc(n);
+	if (!p || (!__malloc_replaced && __malloc_allzerop(p)))
+		return p;
+	n = mal0_clear(p, n);
+	return memset(p, 0, n);
+}
diff --git a/src/malloc/expand_heap.c b/src/malloc/expand_heap.c
deleted file mode 100644
index e6a3d7a0..00000000
--- a/src/malloc/expand_heap.c
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <limits.h>
-#include <stdint.h>
-#include <errno.h>
-#include <sys/mman.h>
-#include "libc.h"
-#include "syscall.h"
-#include "malloc_impl.h"
-
-/* This function returns true if the interval [old,new]
- * intersects the 'len'-sized interval below &libc.auxv
- * (interpreted as the main-thread stack) or below &b
- * (the current stack). It is used to defend against
- * buggy brk implementations that can cross the stack. */
-
-static int traverses_stack_p(uintptr_t old, uintptr_t new)
-{
-	const uintptr_t len = 8<<20;
-	uintptr_t a, b;
-
-	b = (uintptr_t)libc.auxv;
-	a = b > len ? b-len : 0;
-	if (new>a && old<b) return 1;
-
-	b = (uintptr_t)&b;
-	a = b > len ? b-len : 0;
-	if (new>a && old<b) return 1;
-
-	return 0;
-}
-
-/* Expand the heap in-place if brk can be used, or otherwise via mmap,
- * using an exponential lower bound on growth by mmap to make
- * fragmentation asymptotically irrelevant. The size argument is both
- * an input and an output, since the caller needs to know the size
- * allocated, which will be larger than requested due to page alignment
- * and mmap minimum size rules. The caller is responsible for locking
- * to prevent concurrent calls. */
-
-void *__expand_heap(size_t *pn)
-{
-	static uintptr_t brk;
-	static unsigned mmap_step;
-	size_t n = *pn;
-
-	if (n > SIZE_MAX/2 - PAGE_SIZE) {
-		errno = ENOMEM;
-		return 0;
-	}
-	n += -n & PAGE_SIZE-1;
-
-	if (!brk) {
-		brk = __syscall(SYS_brk, 0);
-		brk += -brk & PAGE_SIZE-1;
-	}
-
-	if (n < SIZE_MAX-brk && !traverses_stack_p(brk, brk+n)
-	    && __syscall(SYS_brk, brk+n)==brk+n) {
-		*pn = n;
-		brk += n;
-		return (void *)(brk-n);
-	}
-
-	size_t min = (size_t)PAGE_SIZE << mmap_step/2;
-	if (n < min) n = min;
-	void *area = __mmap(0, n, PROT_READ|PROT_WRITE,
-		MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-	if (area == MAP_FAILED) return 0;
-	*pn = n;
-	mmap_step++;
-	return area;
-}
diff --git a/src/malloc/free.c b/src/malloc/free.c
new file mode 100644
index 00000000..3944f7b2
--- /dev/null
+++ b/src/malloc/free.c
@@ -0,0 +1,6 @@
+#include <stdlib.h>
+
+void free(void *p)
+{
+	__libc_free(p);
+}
diff --git a/src/malloc/libc_calloc.c b/src/malloc/libc_calloc.c
new file mode 100644
index 00000000..d25eabea
--- /dev/null
+++ b/src/malloc/libc_calloc.c
@@ -0,0 +1,4 @@
+#define calloc __libc_calloc
+#define malloc __libc_malloc
+
+#include "calloc.c"
diff --git a/src/malloc/lite_malloc.c b/src/malloc/lite_malloc.c
index 050d84f6..43a988fb 100644
--- a/src/malloc/lite_malloc.c
+++ b/src/malloc/lite_malloc.c
@@ -2,58 +2,117 @@
 #include <stdint.h>
 #include <limits.h>
 #include <errno.h>
+#include <sys/mman.h>
+#include "libc.h"
 #include "lock.h"
-#include "malloc_impl.h"
+#include "syscall.h"
+#include "fork_impl.h"
 
 #define ALIGN 16
 
+/* This function returns true if the interval [old,new]
+ * intersects the 'len'-sized interval below &libc.auxv
+ * (interpreted as the main-thread stack) or below &b
+ * (the current stack). It is used to defend against
+ * buggy brk implementations that can cross the stack. */
+
+static int traverses_stack_p(uintptr_t old, uintptr_t new)
+{
+	const uintptr_t len = 8<<20;
+	uintptr_t a, b;
+
+	b = (uintptr_t)libc.auxv;
+	a = b > len ? b-len : 0;
+	if (new>a && old<b) return 1;
+
+	b = (uintptr_t)&b;
+	a = b > len ? b-len : 0;
+	if (new>a && old<b) return 1;
+
+	return 0;
+}
+
+static volatile int lock[1];
+volatile int *const __bump_lockptr = lock;
+
 static void *__simple_malloc(size_t n)
 {
-	static char *cur, *end;
-	static volatile int lock[1];
-	size_t align=1, pad;
+	static uintptr_t brk, cur, end;
+	static unsigned mmap_step;
+	size_t align=1;
 	void *p;
 
+	if (n > SIZE_MAX/2) {
+		errno = ENOMEM;
+		return 0;
+	}
+
 	if (!n) n++;
 	while (align<n && align<ALIGN)
 		align += align;
 
 	LOCK(lock);
 
-	pad = -(uintptr_t)cur & align-1;
-
-	if (n <= SIZE_MAX/2 + ALIGN) n += pad;
+	cur += -cur & align-1;
 
 	if (n > end-cur) {
-		size_t m = n;
-		char *new = __expand_heap(&m);
-		if (!new) {
-			UNLOCK(lock);
-			return 0;
+		size_t req = n - (end-cur) + PAGE_SIZE-1 & -PAGE_SIZE;
+
+		if (!cur) {
+			brk = __syscall(SYS_brk, 0);
+			brk += -brk & PAGE_SIZE-1;
+			cur = end = brk;
 		}
-		if (new != end) {
-			cur = new;
-			n -= pad;
-			pad = 0;
+
+		if (brk == end && req < SIZE_MAX-brk
+		    && !traverses_stack_p(brk, brk+req)
+		    && __syscall(SYS_brk, brk+req)==brk+req) {
+			brk = end += req;
+		} else {
+			int new_area = 0;
+			req = n + PAGE_SIZE-1 & -PAGE_SIZE;
+			/* Only make a new area rather than individual mmap
+			 * if wasted space would be over 1/8 of the map. */
+			if (req-n > req/8) {
+				/* Geometric area size growth up to 64 pages,
+				 * bounding waste by 1/8 of the area. */
+				size_t min = PAGE_SIZE<<(mmap_step/2);
+				if (min-n > end-cur) {
+					if (req < min) {
+						req = min;
+						if (mmap_step < 12)
+							mmap_step++;
+					}
+					new_area = 1;
+				}
+			}
+			void *mem = __mmap(0, req, PROT_READ|PROT_WRITE,
+				MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+			if (mem == MAP_FAILED || !new_area) {
+				UNLOCK(lock);
+				return mem==MAP_FAILED ? 0 : mem;
+			}
+			cur = (uintptr_t)mem;
+			end = cur + req;
 		}
-		end = new + m;
 	}
 
-	p = cur + pad;
+	p = (void *)cur;
 	cur += n;
 	UNLOCK(lock);
 	return p;
 }
 
-weak_alias(__simple_malloc, malloc);
+weak_alias(__simple_malloc, __libc_malloc_impl);
 
-static void *__simple_calloc(size_t m, size_t n)
+void *__libc_malloc(size_t n)
 {
-	if (n && m > (size_t)-1/n) {
-		errno = ENOMEM;
-		return 0;
-	}
-	return __simple_malloc(n * m);
+	return __libc_malloc_impl(n);
+}
+
+static void *default_malloc(size_t n)
+{
+	return __libc_malloc_impl(n);
 }
 
-weak_alias(__simple_calloc, calloc);
+weak_alias(default_malloc, malloc);
diff --git a/src/malloc/mallocng/aligned_alloc.c b/src/malloc/mallocng/aligned_alloc.c
new file mode 100644
index 00000000..e0862a83
--- /dev/null
+++ b/src/malloc/mallocng/aligned_alloc.c
@@ -0,0 +1,60 @@
+#include <stdlib.h>
+#include <errno.h>
+#include "meta.h"
+
+void *aligned_alloc(size_t align, size_t len)
+{
+	if ((align & -align) != align) {
+		errno = EINVAL;
+		return 0;
+	}
+
+	if (len > SIZE_MAX - align || align >= (1ULL<<31)*UNIT) {
+		errno = ENOMEM;
+		return 0;
+	}
+
+	if (DISABLE_ALIGNED_ALLOC) {
+		errno = ENOMEM;
+		return 0;
+	}
+
+	if (align <= UNIT) align = UNIT;
+
+	unsigned char *p = malloc(len + align - UNIT);
+	if (!p)
+		return 0;
+
+	struct meta *g = get_meta(p);
+	int idx = get_slot_index(p);
+	size_t stride = get_stride(g);
+	unsigned char *start = g->mem->storage + stride*idx;
+	unsigned char *end = g->mem->storage + stride*(idx+1) - IB;
+	size_t adj = -(uintptr_t)p & (align-1);
+
+	if (!adj) {
+		set_size(p, end, len);
+		return p;
+	}
+	p += adj;
+	uint32_t offset = (size_t)(p-g->mem->storage)/UNIT;
+	if (offset <= 0xffff) {
+		*(uint16_t *)(p-2) = offset;
+		p[-4] = 0;
+	} else {
+		// use a 32-bit offset if 16-bit doesn't fit. for this,
+		// 16-bit field must be zero, [-4] byte nonzero.
+		*(uint16_t *)(p-2) = 0;
+		*(uint32_t *)(p-8) = offset;
+		p[-4] = 1;
+	}
+	p[-3] = idx;
+	set_size(p, end, len);
+	// store offset to aligned enframing. this facilitates cycling
+	// offset and also iteration of heap for debugging/measurement.
+	// for extreme overalignment it won't fit but these are classless
+	// allocations anyway.
+	*(uint16_t *)(start - 2) = (size_t)(p-start)/UNIT;
+	start[-3] = 7<<5;
+	return p;
+}
diff --git a/src/malloc/mallocng/donate.c b/src/malloc/mallocng/donate.c
new file mode 100644
index 00000000..41d850f3
--- /dev/null
+++ b/src/malloc/mallocng/donate.c
@@ -0,0 +1,39 @@
+#include <stdlib.h>
+#include <stdint.h>
+#include <limits.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <errno.h>
+
+#include "meta.h"
+
+static void donate(unsigned char *base, size_t len)
+{
+	uintptr_t a = (uintptr_t)base;
+	uintptr_t b = a + len;
+	a += -a & (UNIT-1);
+	b -= b & (UNIT-1);
+	memset(base, 0, len);
+	for (int sc=47; sc>0 && b>a; sc-=4) {
+		if (b-a < (size_classes[sc]+1)*UNIT) continue;
+		struct meta *m = alloc_meta();
+		m->avail_mask = 0;
+		m->freed_mask = 1;
+		m->mem = (void *)a;
+		m->mem->meta = m;
+		m->last_idx = 0;
+		m->freeable = 0;
+		m->sizeclass = sc;
+		m->maplen = 0;
+		*((unsigned char *)m->mem+UNIT-4) = 0;
+		*((unsigned char *)m->mem+UNIT-3) = 255;
+		m->mem->storage[size_classes[sc]*UNIT-4] = 0;
+		queue(&ctx.active[sc], m);
+		a += (size_classes[sc]+1)*UNIT;
+	}
+}
+
+void __malloc_donate(char *start, char *end)
+{
+	donate((void *)start, end-start);
+}
diff --git a/src/malloc/mallocng/free.c b/src/malloc/mallocng/free.c
new file mode 100644
index 00000000..43f32aad
--- /dev/null
+++ b/src/malloc/mallocng/free.c
@@ -0,0 +1,151 @@
+#define _BSD_SOURCE
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "meta.h"
+
+struct mapinfo {
+	void *base;
+	size_t len;
+};
+
+static struct mapinfo nontrivial_free(struct meta *, int);
+
+static struct mapinfo free_group(struct meta *g)
+{
+	struct mapinfo mi = { 0 };
+	int sc = g->sizeclass;
+	if (sc < 48) {
+		ctx.usage_by_class[sc] -= g->last_idx+1;
+	}
+	if (g->maplen) {
+		step_seq();
+		record_seq(sc);
+		mi.base = g->mem;
+		mi.len = g->maplen*4096UL;
+	} else {
+		void *p = g->mem;
+		struct meta *m = get_meta(p);
+		int idx = get_slot_index(p);
+		g->mem->meta = 0;
+		// not checking size/reserved here; it's intentionally invalid
+		mi = nontrivial_free(m, idx);
+	}
+	free_meta(g);
+	return mi;
+}
+
+static int okay_to_free(struct meta *g)
+{
+	int sc = g->sizeclass;
+
+	if (!g->freeable) return 0;
+
+	// always free individual mmaps not suitable for reuse
+	if (sc >= 48 || get_stride(g) < UNIT*size_classes[sc])
+		return 1;
+
+	// always free groups allocated inside another group's slot
+	// since recreating them should not be expensive and they
+	// might be blocking freeing of a much larger group.
+	if (!g->maplen) return 1;
+
+	// if there is another non-full group, free this one to
+	// consolidate future allocations, reduce fragmentation.
+	if (g->next != g) return 1;
+
+	// free any group in a size class that's not bouncing
+	if (!is_bouncing(sc)) return 1;
+
+	size_t cnt = g->last_idx+1;
+	size_t usage = ctx.usage_by_class[sc];
+
+	// if usage is high enough that a larger count should be
+	// used, free the low-count group so a new one will be made.
+	if (9*cnt <= usage && cnt < 20)
+		return 1;
+
+	// otherwise, keep the last group in a bouncing class.
+	return 0;
+}
+
+static struct mapinfo nontrivial_free(struct meta *g, int i)
+{
+	uint32_t self = 1u<<i;
+	int sc = g->sizeclass;
+	uint32_t mask = g->freed_mask | g->avail_mask;
+
+	if (mask+self == (2u<<g->last_idx)-1 && okay_to_free(g)) {
+		// any multi-slot group is necessarily on an active list
+		// here, but single-slot groups might or might not be.
+		if (g->next) {
+			assert(sc < 48);
+			int activate_new = (ctx.active[sc]==g);
+			dequeue(&ctx.active[sc], g);
+			if (activate_new && ctx.active[sc])
+				activate_group(ctx.active[sc]);
+		}
+		return free_group(g);
+	} else if (!mask) {
+		assert(sc < 48);
+		// might still be active if there were no allocations
+		// after last available slot was taken.
+		if (ctx.active[sc] != g) {
+			queue(&ctx.active[sc], g);
+		}
+	}
+	a_or(&g->freed_mask, self);
+	return (struct mapinfo){ 0 };
+}
+
+void free(void *p)
+{
+	if (!p) return;
+
+	struct meta *g = get_meta(p);
+	int idx = get_slot_index(p);
+	size_t stride = get_stride(g);
+	unsigned char *start = g->mem->storage + stride*idx;
+	unsigned char *end = start + stride - IB;
+	get_nominal_size(p, end);
+	uint32_t self = 1u<<idx, all = (2u<<g->last_idx)-1;
+	((unsigned char *)p)[-3] = 255;
+	// invalidate offset to group header, and cycle offset of
+	// used region within slot if current offset is zero.
+	*(uint16_t *)((char *)p-2) = 0;
+
+	// release any whole pages contained in the slot to be freed
+	// unless it's a single-slot group that will be unmapped.
+	if (((uintptr_t)(start-1) ^ (uintptr_t)end) >= 2*PGSZ && g->last_idx) {
+		unsigned char *base = start + (-(uintptr_t)start & (PGSZ-1));
+		size_t len = (end-base) & -PGSZ;
+		if (len && USE_MADV_FREE) {
+			int e = errno;
+			madvise(base, len, MADV_FREE);
+			errno = e;
+		}
+	}
+
+	// atomic free without locking if this is neither first or last slot
+	for (;;) {
+		uint32_t freed = g->freed_mask;
+		uint32_t avail = g->avail_mask;
+		uint32_t mask = freed | avail;
+		assert(!(mask&self));
+		if (!freed || mask+self==all) break;
+		if (!MT)
+			g->freed_mask = freed+self;
+		else if (a_cas(&g->freed_mask, freed, freed+self)!=freed)
+			continue;
+		return;
+	}
+
+	wrlock();
+	struct mapinfo mi = nontrivial_free(g, idx);
+	unlock();
+	if (mi.len) {
+		int e = errno;
+		munmap(mi.base, mi.len);
+		errno = e;
+	}
+}
diff --git a/src/malloc/mallocng/glue.h b/src/malloc/mallocng/glue.h
new file mode 100644
index 00000000..77f4c812
--- /dev/null
+++ b/src/malloc/mallocng/glue.h
@@ -0,0 +1,95 @@
+#ifndef MALLOC_GLUE_H
+#define MALLOC_GLUE_H
+
+#include <stdint.h>
+#include <sys/mman.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <elf.h>
+#include <string.h>
+#include "atomic.h"
+#include "syscall.h"
+#include "libc.h"
+#include "lock.h"
+#include "dynlink.h"
+
+// use macros to appropriately namespace these.
+#define size_classes __malloc_size_classes
+#define ctx __malloc_context
+#define alloc_meta __malloc_alloc_meta
+#define is_allzero __malloc_allzerop
+#define dump_heap __dump_heap
+
+#define malloc __libc_malloc_impl
+#define realloc __libc_realloc
+#define free __libc_free
+
+#define USE_MADV_FREE 0
+
+#if USE_REAL_ASSERT
+#include <assert.h>
+#else
+#undef assert
+#define assert(x) do { if (!(x)) a_crash(); } while(0)
+#endif
+
+#define brk(p) ((uintptr_t)__syscall(SYS_brk, p))
+
+#define mmap __mmap
+#define madvise __madvise
+#define mremap __mremap
+
+#define DISABLE_ALIGNED_ALLOC (__malloc_replaced && !__aligned_alloc_replaced)
+
+static inline uint64_t get_random_secret()
+{
+	uint64_t secret = (uintptr_t)&secret * 1103515245;
+	for (size_t i=0; libc.auxv[i]; i+=2)
+		if (libc.auxv[i]==AT_RANDOM)
+			memcpy(&secret, (char *)libc.auxv[i+1]+8, sizeof secret);
+	return secret;
+}
+
+#ifndef PAGESIZE
+#define PAGESIZE PAGE_SIZE
+#endif
+
+#define MT (libc.need_locks)
+
+#define RDLOCK_IS_EXCLUSIVE 1
+
+__attribute__((__visibility__("hidden")))
+extern int __malloc_lock[1];
+
+#define LOCK_OBJ_DEF \
+int __malloc_lock[1]; \
+void __malloc_atfork(int who) { malloc_atfork(who); }
+
+static inline void rdlock()
+{
+	if (MT) LOCK(__malloc_lock);
+}
+static inline void wrlock()
+{
+	if (MT) LOCK(__malloc_lock);
+}
+static inline void unlock()
+{
+	UNLOCK(__malloc_lock);
+}
+static inline void upgradelock()
+{
+}
+static inline void resetlock()
+{
+	__malloc_lock[0] = 0;
+}
+
+static inline void malloc_atfork(int who)
+{
+	if (who<0) rdlock();
+	else if (who>0) resetlock();
+	else unlock();
+}
+
+#endif
diff --git a/src/malloc/mallocng/malloc.c b/src/malloc/mallocng/malloc.c
new file mode 100644
index 00000000..d695ab8e
--- /dev/null
+++ b/src/malloc/mallocng/malloc.c
@@ -0,0 +1,387 @@
+#include <stdlib.h>
+#include <stdint.h>
+#include <limits.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <errno.h>
+
+#include "meta.h"
+
+LOCK_OBJ_DEF;
+
+const uint16_t size_classes[] = {
+	1, 2, 3, 4, 5, 6, 7, 8,
+	9, 10, 12, 15,
+	18, 20, 25, 31,
+	36, 42, 50, 63,
+	72, 84, 102, 127,
+	146, 170, 204, 255,
+	292, 340, 409, 511,
+	584, 682, 818, 1023,
+	1169, 1364, 1637, 2047,
+	2340, 2730, 3276, 4095,
+	4680, 5460, 6552, 8191,
+};
+
+static const uint8_t small_cnt_tab[][3] = {
+	{ 30, 30, 30 },
+	{ 31, 15, 15 },
+	{ 20, 10, 10 },
+	{ 31, 15, 7 },
+	{ 25, 12, 6 },
+	{ 21, 10, 5 },
+	{ 18, 8, 4 },
+	{ 31, 15, 7 },
+	{ 28, 14, 6 },
+};
+
+static const uint8_t med_cnt_tab[4] = { 28, 24, 20, 32 };
+
+struct malloc_context ctx = { 0 };
+
+struct meta *alloc_meta(void)
+{
+	struct meta *m;
+	unsigned char *p;
+	if (!ctx.init_done) {
+#ifndef PAGESIZE
+		ctx.pagesize = get_page_size();
+#endif
+		ctx.secret = get_random_secret();
+		ctx.init_done = 1;
+	}
+	size_t pagesize = PGSZ;
+	if (pagesize < 4096) pagesize = 4096;
+	if ((m = dequeue_head(&ctx.free_meta_head))) return m;
+	if (!ctx.avail_meta_count) {
+		int need_unprotect = 1;
+		if (!ctx.avail_meta_area_count && ctx.brk!=-1) {
+			uintptr_t new = ctx.brk + pagesize;
+			int need_guard = 0;
+			if (!ctx.brk) {
+				need_guard = 1;
+				ctx.brk = brk(0);
+				// some ancient kernels returned _ebss
+				// instead of next page as initial brk.
+				ctx.brk += -ctx.brk & (pagesize-1);
+				new = ctx.brk + 2*pagesize;
+			}
+			if (brk(new) != new) {
+				ctx.brk = -1;
+			} else {
+				if (need_guard) mmap((void *)ctx.brk, pagesize,
+					PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
+				ctx.brk = new;
+				ctx.avail_meta_areas = (void *)(new - pagesize);
+				ctx.avail_meta_area_count = pagesize>>12;
+				need_unprotect = 0;
+			}
+		}
+		if (!ctx.avail_meta_area_count) {
+			size_t n = 2UL << ctx.meta_alloc_shift;
+			p = mmap(0, n*pagesize, PROT_NONE,
+				MAP_PRIVATE|MAP_ANON, -1, 0);
+			if (p==MAP_FAILED) return 0;
+			ctx.avail_meta_areas = p + pagesize;
+			ctx.avail_meta_area_count = (n-1)*(pagesize>>12);
+			ctx.meta_alloc_shift++;
+		}
+		p = ctx.avail_meta_areas;
+		if ((uintptr_t)p & (pagesize-1)) need_unprotect = 0;
+		if (need_unprotect)
+			if (mprotect(p, pagesize, PROT_READ|PROT_WRITE)
+			    && errno != ENOSYS)
+				return 0;
+		ctx.avail_meta_area_count--;
+		ctx.avail_meta_areas = p + 4096;
+		if (ctx.meta_area_tail) {
+			ctx.meta_area_tail->next = (void *)p;
+		} else {
+			ctx.meta_area_head = (void *)p;
+		}
+		ctx.meta_area_tail = (void *)p;
+		ctx.meta_area_tail->check = ctx.secret;
+		ctx.avail_meta_count = ctx.meta_area_tail->nslots
+			= (4096-sizeof(struct meta_area))/sizeof *m;
+		ctx.avail_meta = ctx.meta_area_tail->slots;
+	}
+	ctx.avail_meta_count--;
+	m = ctx.avail_meta++;
+	m->prev = m->next = 0;
+	return m;
+}
+
+static uint32_t try_avail(struct meta **pm)
+{
+	struct meta *m = *pm;
+	uint32_t first;
+	if (!m) return 0;
+	uint32_t mask = m->avail_mask;
+	if (!mask) {
+		if (!m) return 0;
+		if (!m->freed_mask) {
+			dequeue(pm, m);
+			m = *pm;
+			if (!m) return 0;
+		} else {
+			m = m->next;
+			*pm = m;
+		}
+
+		mask = m->freed_mask;
+
+		// skip fully-free group unless it's the only one
+		// or it's a permanently non-freeable group
+		if (mask == (2u<<m->last_idx)-1 && m->freeable) {
+			m = m->next;
+			*pm = m;
+			mask = m->freed_mask;
+		}
+
+		// activate more slots in a not-fully-active group
+		// if needed, but only as a last resort. prefer using
+		// any other group with free slots. this avoids
+		// touching & dirtying as-yet-unused pages.
+		if (!(mask & ((2u<<m->mem->active_idx)-1))) {
+			if (m->next != m) {
+				m = m->next;
+				*pm = m;
+			} else {
+				int cnt = m->mem->active_idx + 2;
+				int size = size_classes[m->sizeclass]*UNIT;
+				int span = UNIT + size*cnt;
+				// activate up to next 4k boundary
+				while ((span^(span+size-1)) < 4096) {
+					cnt++;
+					span += size;
+				}
+				if (cnt > m->last_idx+1)
+					cnt = m->last_idx+1;
+				m->mem->active_idx = cnt-1;
+			}
+		}
+		mask = activate_group(m);
+		assert(mask);
+		decay_bounces(m->sizeclass);
+	}
+	first = mask&-mask;
+	m->avail_mask = mask-first;
+	return first;
+}
+
+static int alloc_slot(int, size_t);
+
+static struct meta *alloc_group(int sc, size_t req)
+{
+	size_t size = UNIT*size_classes[sc];
+	int i = 0, cnt;
+	unsigned char *p;
+	struct meta *m = alloc_meta();
+	if (!m) return 0;
+	size_t usage = ctx.usage_by_class[sc];
+	size_t pagesize = PGSZ;
+	int active_idx;
+	if (sc < 9) {
+		while (i<2 && 4*small_cnt_tab[sc][i] > usage)
+			i++;
+		cnt = small_cnt_tab[sc][i];
+	} else {
+		// lookup max number of slots fitting in power-of-two size
+		// from a table, along with number of factors of two we
+		// can divide out without a remainder or reaching 1.
+		cnt = med_cnt_tab[sc&3];
+
+		// reduce cnt to avoid excessive eagar allocation.
+		while (!(cnt&1) && 4*cnt > usage)
+			cnt >>= 1;
+
+		// data structures don't support groups whose slot offsets
+		// in units don't fit in 16 bits.
+		while (size*cnt >= 65536*UNIT)
+			cnt >>= 1;
+	}
+
+	// If we selected a count of 1 above but it's not sufficient to use
+	// mmap, increase to 2. Then it might be; if not it will nest.
+	if (cnt==1 && size*cnt+UNIT <= pagesize/2) cnt = 2;
+
+	// All choices of size*cnt are "just below" a power of two, so anything
+	// larger than half the page size should be allocated as whole pages.
+	if (size*cnt+UNIT > pagesize/2) {
+		// check/update bounce counter to start/increase retention
+		// of freed maps, and inhibit use of low-count, odd-size
+		// small mappings and single-slot groups if activated.
+		int nosmall = is_bouncing(sc);
+		account_bounce(sc);
+		step_seq();
+
+		// since the following count reduction opportunities have
+		// an absolute memory usage cost, don't overdo them. count
+		// coarse usage as part of usage.
+		if (!(sc&1) && sc<32) usage += ctx.usage_by_class[sc+1];
+
+		// try to drop to a lower count if the one found above
+		// increases usage by more than 25%. these reduced counts
+		// roughly fill an integral number of pages, just not a
+		// power of two, limiting amount of unusable space.
+		if (4*cnt > usage && !nosmall) {
+			if (0);
+			else if ((sc&3)==1 && size*cnt>8*pagesize) cnt = 2;
+			else if ((sc&3)==2 && size*cnt>4*pagesize) cnt = 3;
+			else if ((sc&3)==0 && size*cnt>8*pagesize) cnt = 3;
+			else if ((sc&3)==0 && size*cnt>2*pagesize) cnt = 5;
+		}
+		size_t needed = size*cnt + UNIT;
+		needed += -needed & (pagesize-1);
+
+		// produce an individually-mmapped allocation if usage is low,
+		// bounce counter hasn't triggered, and either it saves memory
+		// or it avoids eagar slot allocation without wasting too much.
+		if (!nosmall && cnt<=7) {
+			req += IB + UNIT;
+			req += -req & (pagesize-1);
+			if (req<size+UNIT || (req>=4*pagesize && 2*cnt>usage)) {
+				cnt = 1;
+				needed = req;
+			}
+		}
+
+		p = mmap(0, needed, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+		if (p==MAP_FAILED) {
+			free_meta(m);
+			return 0;
+		}
+		m->maplen = needed>>12;
+		ctx.mmap_counter++;
+		active_idx = (4096-UNIT)/size-1;
+		if (active_idx > cnt-1) active_idx = cnt-1;
+		if (active_idx < 0) active_idx = 0;
+	} else {
+		int j = size_to_class(UNIT+cnt*size-IB);
+		int idx = alloc_slot(j, UNIT+cnt*size-IB);
+		if (idx < 0) {
+			free_meta(m);
+			return 0;
+		}
+		struct meta *g = ctx.active[j];
+		p = enframe(g, idx, UNIT*size_classes[j]-IB, ctx.mmap_counter);
+		m->maplen = 0;
+		p[-3] = (p[-3]&31) | (6<<5);
+		for (int i=0; i<=cnt; i++)
+			p[UNIT+i*size-4] = 0;
+		active_idx = cnt-1;
+	}
+	ctx.usage_by_class[sc] += cnt;
+	m->avail_mask = (2u<<active_idx)-1;
+	m->freed_mask = (2u<<(cnt-1))-1 - m->avail_mask;
+	m->mem = (void *)p;
+	m->mem->meta = m;
+	m->mem->active_idx = active_idx;
+	m->last_idx = cnt-1;
+	m->freeable = 1;
+	m->sizeclass = sc;
+	return m;
+}
+
+static int alloc_slot(int sc, size_t req)
+{
+	uint32_t first = try_avail(&ctx.active[sc]);
+	if (first) return a_ctz_32(first);
+
+	struct meta *g = alloc_group(sc, req);
+	if (!g) return -1;
+
+	g->avail_mask--;
+	queue(&ctx.active[sc], g);
+	return 0;
+}
+
+void *malloc(size_t n)
+{
+	if (size_overflows(n)) return 0;
+	struct meta *g;
+	uint32_t mask, first;
+	int sc;
+	int idx;
+	int ctr;
+
+	if (n >= MMAP_THRESHOLD) {
+		size_t needed = n + IB + UNIT;
+		void *p = mmap(0, needed, PROT_READ|PROT_WRITE,
+			MAP_PRIVATE|MAP_ANON, -1, 0);
+		if (p==MAP_FAILED) return 0;
+		wrlock();
+		step_seq();
+		g = alloc_meta();
+		if (!g) {
+			unlock();
+			munmap(p, needed);
+			return 0;
+		}
+		g->mem = p;
+		g->mem->meta = g;
+		g->last_idx = 0;
+		g->freeable = 1;
+		g->sizeclass = 63;
+		g->maplen = (needed+4095)/4096;
+		g->avail_mask = g->freed_mask = 0;
+		// use a global counter to cycle offset in
+		// individually-mmapped allocations.
+		ctx.mmap_counter++;
+		idx = 0;
+		goto success;
+	}
+
+	sc = size_to_class(n);
+
+	rdlock();
+	g = ctx.active[sc];
+
+	// use coarse size classes initially when there are not yet
+	// any groups of desired size. this allows counts of 2 or 3
+	// to be allocated at first rather than having to start with
+	// 7 or 5, the min counts for even size classes.
+	if (!g && sc>=4 && sc<32 && sc!=6 && !(sc&1) && !ctx.usage_by_class[sc]) {
+		size_t usage = ctx.usage_by_class[sc|1];
+		// if a new group may be allocated, count it toward
+		// usage in deciding if we can use coarse class.
+		if (!ctx.active[sc|1] || (!ctx.active[sc|1]->avail_mask
+		    && !ctx.active[sc|1]->freed_mask))
+			usage += 3;
+		if (usage <= 12)
+			sc |= 1;
+		g = ctx.active[sc];
+	}
+
+	for (;;) {
+		mask = g ? g->avail_mask : 0;
+		first = mask&-mask;
+		if (!first) break;
+		if (RDLOCK_IS_EXCLUSIVE || !MT)
+			g->avail_mask = mask-first;
+		else if (a_cas(&g->avail_mask, mask, mask-first)!=mask)
+			continue;
+		idx = a_ctz_32(first);
+		goto success;
+	}
+	upgradelock();
+
+	idx = alloc_slot(sc, n);
+	if (idx < 0) {
+		unlock();
+		return 0;
+	}
+	g = ctx.active[sc];
+
+success:
+	ctr = ctx.mmap_counter;
+	unlock();
+	return enframe(g, idx, n, ctr);
+}
+
+int is_allzero(void *p)
+{
+	struct meta *g = get_meta(p);
+	return g->sizeclass >= 48 ||
+		get_stride(g) < UNIT*size_classes[g->sizeclass];
+}
diff --git a/src/malloc/mallocng/malloc_usable_size.c b/src/malloc/mallocng/malloc_usable_size.c
new file mode 100644
index 00000000..ce6a960c
--- /dev/null
+++ b/src/malloc/mallocng/malloc_usable_size.c
@@ -0,0 +1,13 @@
+#include <stdlib.h>
+#include "meta.h"
+
+size_t malloc_usable_size(void *p)
+{
+	if (!p) return 0;
+	struct meta *g = get_meta(p);
+	int idx = get_slot_index(p);
+	size_t stride = get_stride(g);
+	unsigned char *start = g->mem->storage + stride*idx;
+	unsigned char *end = start + stride - IB;
+	return get_nominal_size(p, end);
+}
diff --git a/src/malloc/mallocng/meta.h b/src/malloc/mallocng/meta.h
new file mode 100644
index 00000000..61ec53f9
--- /dev/null
+++ b/src/malloc/mallocng/meta.h
@@ -0,0 +1,288 @@
+#ifndef MALLOC_META_H
+#define MALLOC_META_H
+
+#include <stdint.h>
+#include <errno.h>
+#include <limits.h>
+#include "glue.h"
+
+__attribute__((__visibility__("hidden")))
+extern const uint16_t size_classes[];
+
+#define MMAP_THRESHOLD 131052
+
+#define UNIT 16
+#define IB 4
+
+struct group {
+	struct meta *meta;
+	unsigned char active_idx:5;
+	char pad[UNIT - sizeof(struct meta *) - 1];
+	unsigned char storage[];
+};
+
+struct meta {
+	struct meta *prev, *next;
+	struct group *mem;
+	volatile int avail_mask, freed_mask;
+	uintptr_t last_idx:5;
+	uintptr_t freeable:1;
+	uintptr_t sizeclass:6;
+	uintptr_t maplen:8*sizeof(uintptr_t)-12;
+};
+
+struct meta_area {
+	uint64_t check;
+	struct meta_area *next;
+	int nslots;
+	struct meta slots[];
+};
+
+struct malloc_context {
+	uint64_t secret;
+#ifndef PAGESIZE
+	size_t pagesize;
+#endif
+	int init_done;
+	unsigned mmap_counter;
+	struct meta *free_meta_head;
+	struct meta *avail_meta;
+	size_t avail_meta_count, avail_meta_area_count, meta_alloc_shift;
+	struct meta_area *meta_area_head, *meta_area_tail;
+	unsigned char *avail_meta_areas;
+	struct meta *active[48];
+	size_t usage_by_class[48];
+	uint8_t unmap_seq[32], bounces[32];
+	uint8_t seq;
+	uintptr_t brk;
+};
+
+__attribute__((__visibility__("hidden")))
+extern struct malloc_context ctx;
+
+#ifdef PAGESIZE
+#define PGSZ PAGESIZE
+#else
+#define PGSZ ctx.pagesize
+#endif
+
+__attribute__((__visibility__("hidden")))
+struct meta *alloc_meta(void);
+
+__attribute__((__visibility__("hidden")))
+int is_allzero(void *);
+
+static inline void queue(struct meta **phead, struct meta *m)
+{
+	assert(!m->next);
+	assert(!m->prev);
+	if (*phead) {
+		struct meta *head = *phead;
+		m->next = head;
+		m->prev = head->prev;
+		m->next->prev = m->prev->next = m;
+	} else {
+		m->prev = m->next = m;
+		*phead = m;
+	}
+}
+
+static inline void dequeue(struct meta **phead, struct meta *m)
+{
+	if (m->next != m) {
+		m->prev->next = m->next;
+		m->next->prev = m->prev;
+		if (*phead == m) *phead = m->next;
+	} else {
+		*phead = 0;
+	}
+	m->prev = m->next = 0;
+}
+
+static inline struct meta *dequeue_head(struct meta **phead)
+{
+	struct meta *m = *phead;
+	if (m) dequeue(phead, m);
+	return m;
+}
+
+static inline void free_meta(struct meta *m)
+{
+	*m = (struct meta){0};
+	queue(&ctx.free_meta_head, m);
+}
+
+static inline uint32_t activate_group(struct meta *m)
+{
+	assert(!m->avail_mask);
+	uint32_t mask, act = (2u<<m->mem->active_idx)-1;
+	do mask = m->freed_mask;
+	while (a_cas(&m->freed_mask, mask, mask&~act)!=mask);
+	return m->avail_mask = mask & act;
+}
+
+static inline int get_slot_index(const unsigned char *p)
+{
+	return p[-3] & 31;
+}
+
+static inline struct meta *get_meta(const unsigned char *p)
+{
+	assert(!((uintptr_t)p & 15));
+	int offset = *(const uint16_t *)(p - 2);
+	int index = get_slot_index(p);
+	if (p[-4]) {
+		assert(!offset);
+		offset = *(uint32_t *)(p - 8);
+		assert(offset > 0xffff);
+	}
+	const struct group *base = (const void *)(p - UNIT*offset - UNIT);
+	const struct meta *meta = base->meta;
+	assert(meta->mem == base);
+	assert(index <= meta->last_idx);
+	assert(!(meta->avail_mask & (1u<<index)));
+	assert(!(meta->freed_mask & (1u<<index)));
+	const struct meta_area *area = (void *)((uintptr_t)meta & -4096);
+	assert(area->check == ctx.secret);
+	if (meta->sizeclass < 48) {
+		assert(offset >= size_classes[meta->sizeclass]*index);
+		assert(offset < size_classes[meta->sizeclass]*(index+1));
+	} else {
+		assert(meta->sizeclass == 63);
+	}
+	if (meta->maplen) {
+		assert(offset <= meta->maplen*4096UL/UNIT - 1);
+	}
+	return (struct meta *)meta;
+}
+
+static inline size_t get_nominal_size(const unsigned char *p, const unsigned char *end)
+{
+	size_t reserved = p[-3] >> 5;
+	if (reserved >= 5) {
+		assert(reserved == 5);
+		reserved = *(const uint32_t *)(end-4);
+		assert(reserved >= 5);
+		assert(!end[-5]);
+	}
+	assert(reserved <= end-p);
+	assert(!*(end-reserved));
+	// also check the slot's overflow byte
+	assert(!*end);
+	return end-reserved-p;
+}
+
+static inline size_t get_stride(const struct meta *g)
+{
+	if (!g->last_idx && g->maplen) {
+		return g->maplen*4096UL - UNIT;
+	} else {
+		return UNIT*size_classes[g->sizeclass];
+	}
+}
+
+static inline void set_size(unsigned char *p, unsigned char *end, size_t n)
+{
+	int reserved = end-p-n;
+	if (reserved) end[-reserved] = 0;
+	if (reserved >= 5) {
+		*(uint32_t *)(end-4) = reserved;
+		end[-5] = 0;
+		reserved = 5;
+	}
+	p[-3] = (p[-3]&31) + (reserved<<5);
+}
+
+static inline void *enframe(struct meta *g, int idx, size_t n, int ctr)
+{
+	size_t stride = get_stride(g);
+	size_t slack = (stride-IB-n)/UNIT;
+	unsigned char *p = g->mem->storage + stride*idx;
+	unsigned char *end = p+stride-IB;
+	// cycle offset within slot to increase interval to address
+	// reuse, facilitate trapping double-free.
+	int off = (p[-3] ? *(uint16_t *)(p-2) + 1 : ctr) & 255;
+	assert(!p[-4]);
+	if (off > slack) {
+		size_t m = slack;
+		m |= m>>1; m |= m>>2; m |= m>>4;
+		off &= m;
+		if (off > slack) off -= slack+1;
+		assert(off <= slack);
+	}
+	if (off) {
+		// store offset in unused header at offset zero
+		// if enframing at non-zero offset.
+		*(uint16_t *)(p-2) = off;
+		p[-3] = 7<<5;
+		p += UNIT*off;
+		// for nonzero offset there is no permanent check
+		// byte, so make one.
+		p[-4] = 0;
+	}
+	*(uint16_t *)(p-2) = (size_t)(p-g->mem->storage)/UNIT;
+	p[-3] = idx;
+	set_size(p, end, n);
+	return p;
+}
+
+static inline int size_to_class(size_t n)
+{
+	n = (n+IB-1)>>4;
+	if (n<10) return n;
+	n++;
+	int i = (28-a_clz_32(n))*4 + 8;
+	if (n>size_classes[i+1]) i+=2;
+	if (n>size_classes[i]) i++;
+	return i;
+}
+
+static inline int size_overflows(size_t n)
+{
+	if (n >= SIZE_MAX/2 - 4096) {
+		errno = ENOMEM;
+		return 1;
+	}
+	return 0;
+}
+
+static inline void step_seq(void)
+{
+	if (ctx.seq==255) {
+		for (int i=0; i<32; i++) ctx.unmap_seq[i] = 0;
+		ctx.seq = 1;
+	} else {
+		ctx.seq++;
+	}
+}
+
+static inline void record_seq(int sc)
+{
+	if (sc-7U < 32) ctx.unmap_seq[sc-7] = ctx.seq;
+}
+
+static inline void account_bounce(int sc)
+{
+	if (sc-7U < 32) {
+		int seq = ctx.unmap_seq[sc-7];
+		if (seq && ctx.seq-seq < 10) {
+			if (ctx.bounces[sc-7]+1 < 100)
+				ctx.bounces[sc-7]++;
+			else
+				ctx.bounces[sc-7] = 150;
+		}
+	}
+}
+
+static inline void decay_bounces(int sc)
+{
+	if (sc-7U < 32 && ctx.bounces[sc-7])
+		ctx.bounces[sc-7]--;
+}
+
+static inline int is_bouncing(int sc)
+{
+	return (sc-7U < 32 && ctx.bounces[sc-7] >= 100);
+}
+
+#endif
diff --git a/src/malloc/mallocng/realloc.c b/src/malloc/mallocng/realloc.c
new file mode 100644
index 00000000..18769f42
--- /dev/null
+++ b/src/malloc/mallocng/realloc.c
@@ -0,0 +1,51 @@
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <string.h>
+#include "meta.h"
+
+void *realloc(void *p, size_t n)
+{
+	if (!p) return malloc(n);
+	if (size_overflows(n)) return 0;
+
+	struct meta *g = get_meta(p);
+	int idx = get_slot_index(p);
+	size_t stride = get_stride(g);
+	unsigned char *start = g->mem->storage + stride*idx;
+	unsigned char *end = start + stride - IB;
+	size_t old_size = get_nominal_size(p, end);
+	size_t avail_size = end-(unsigned char *)p;
+	void *new;
+
+	// only resize in-place if size class matches
+	if (n <= avail_size && n<MMAP_THRESHOLD
+	    && size_to_class(n)+1 >= g->sizeclass) {
+		set_size(p, end, n);
+		return p;
+	}
+
+	// use mremap if old and new size are both mmap-worthy
+	if (g->sizeclass>=48 && n>=MMAP_THRESHOLD) {
+		assert(g->sizeclass==63);
+		size_t base = (unsigned char *)p-start;
+		size_t needed = (n + base + UNIT + IB + 4095) & -4096;
+		new = g->maplen*4096UL == needed ? g->mem :
+			mremap(g->mem, g->maplen*4096UL, needed, MREMAP_MAYMOVE);
+		if (new!=MAP_FAILED) {
+			g->mem = new;
+			g->maplen = needed/4096;
+			p = g->mem->storage + base;
+			end = g->mem->storage + (needed - UNIT) - IB;
+			*end = 0;
+			set_size(p, end, n);
+			return p;
+		}
+	}
+
+	new = malloc(n);
+	if (!new) return 0;
+	memcpy(new, p, n < old_size ? n : old_size);
+	free(p);
+	return new;
+}
diff --git a/src/malloc/memalign.c b/src/malloc/memalign.c
index cf9dfbda..32cd87d8 100644
--- a/src/malloc/memalign.c
+++ b/src/malloc/memalign.c
@@ -1,54 +1,7 @@
+#define _BSD_SOURCE
 #include <stdlib.h>
-#include <stdint.h>
-#include <errno.h>
-#include "malloc_impl.h"
 
-void *__memalign(size_t align, size_t len)
+void *memalign(size_t align, size_t len)
 {
-	unsigned char *mem, *new;
-
-	if ((align & -align) != align) {
-		errno = EINVAL;
-		return 0;
-	}
-
-	if (len > SIZE_MAX - align || __malloc_replaced) {
-		errno = ENOMEM;
-		return 0;
-	}
-
-	if (align <= SIZE_ALIGN)
-		return malloc(len);
-
-	if (!(mem = malloc(len + align-1)))
-		return 0;
-
-	new = (void *)((uintptr_t)mem + align-1 & -align);
-	if (new == mem) return mem;
-
-	struct chunk *c = MEM_TO_CHUNK(mem);
-	struct chunk *n = MEM_TO_CHUNK(new);
-
-	if (IS_MMAPPED(c)) {
-		/* Apply difference between aligned and original
-		 * address to the "extra" field of mmapped chunk. */
-		n->psize = c->psize + (new-mem);
-		n->csize = c->csize - (new-mem);
-		return new;
-	}
-
-	struct chunk *t = NEXT_CHUNK(c);
-
-	/* Split the allocated chunk into two chunks. The aligned part
-	 * that will be used has the size in its footer reduced by the
-	 * difference between the aligned and original addresses, and
-	 * the resulting size copied to its header. A new header and
-	 * footer are written for the split-off part to be freed. */
-	n->psize = c->csize = C_INUSE | (new-mem);
-	n->csize = t->psize -= new-mem;
-
-	__bin_chunk(c);
-	return new;
+	return aligned_alloc(align, len);
 }
-
-weak_alias(__memalign, memalign);
diff --git a/src/malloc/oldmalloc/aligned_alloc.c b/src/malloc/oldmalloc/aligned_alloc.c
new file mode 100644
index 00000000..4adca3b4
--- /dev/null
+++ b/src/malloc/oldmalloc/aligned_alloc.c
@@ -0,0 +1,53 @@
+#include <stdlib.h>
+#include <stdint.h>
+#include <errno.h>
+#include "malloc_impl.h"
+
+void *aligned_alloc(size_t align, size_t len)
+{
+	unsigned char *mem, *new;
+
+	if ((align & -align) != align) {
+		errno = EINVAL;
+		return 0;
+	}
+
+	if (len > SIZE_MAX - align ||
+	    (__malloc_replaced && !__aligned_alloc_replaced)) {
+		errno = ENOMEM;
+		return 0;
+	}
+
+	if (align <= SIZE_ALIGN)
+		return malloc(len);
+
+	if (!(mem = malloc(len + align-1)))
+		return 0;
+
+	new = (void *)((uintptr_t)mem + align-1 & -align);
+	if (new == mem) return mem;
+
+	struct chunk *c = MEM_TO_CHUNK(mem);
+	struct chunk *n = MEM_TO_CHUNK(new);
+
+	if (IS_MMAPPED(c)) {
+		/* Apply difference between aligned and original
+		 * address to the "extra" field of mmapped chunk. */
+		n->psize = c->psize + (new-mem);
+		n->csize = c->csize - (new-mem);
+		return new;
+	}
+
+	struct chunk *t = NEXT_CHUNK(c);
+
+	/* Split the allocated chunk into two chunks. The aligned part
+	 * that will be used has the size in its footer reduced by the
+	 * difference between the aligned and original addresses, and
+	 * the resulting size copied to its header. A new header and
+	 * footer are written for the split-off part to be freed. */
+	n->psize = c->csize = C_INUSE | (new-mem);
+	n->csize = t->psize -= new-mem;
+
+	__bin_chunk(c);
+	return new;
+}
diff --git a/src/malloc/malloc.c b/src/malloc/oldmalloc/malloc.c
index 96982596..25d00d44 100644
--- a/src/malloc/malloc.c
+++ b/src/malloc/oldmalloc/malloc.c
@@ -9,6 +9,11 @@
 #include "atomic.h"
 #include "pthread_impl.h"
 #include "malloc_impl.h"
+#include "fork_impl.h"
+
+#define malloc __libc_malloc_impl
+#define realloc __libc_realloc
+#define free __libc_free
 
 #if defined(__GNUC__) && defined(__PIC__)
 #define inline inline __attribute__((always_inline))
@@ -17,17 +22,18 @@
 static struct {
 	volatile uint64_t binmap;
 	struct bin bins[64];
-	volatile int free_lock[2];
+	volatile int split_merge_lock[2];
 } mal;
 
-int __malloc_replaced;
-
 /* Synchronization tools */
 
 static inline void lock(volatile int *lk)
 {
-	if (libc.threads_minus_1)
+	int need_locks = libc.need_locks;
+	if (need_locks) {
 		while(a_swap(lk, 1)) __wait(lk, lk+1, 1, 1);
+		if (need_locks < 0) libc.need_locks = 0;
+	}
 }
 
 static inline void unlock(volatile int *lk)
@@ -123,9 +129,72 @@ void __dump_heap(int x)
 }
 #endif
 
+/* This function returns true if the interval [old,new]
+ * intersects the 'len'-sized interval below &libc.auxv
+ * (interpreted as the main-thread stack) or below &b
+ * (the current stack). It is used to defend against
+ * buggy brk implementations that can cross the stack. */
+
+static int traverses_stack_p(uintptr_t old, uintptr_t new)
+{
+	const uintptr_t len = 8<<20;
+	uintptr_t a, b;
+
+	b = (uintptr_t)libc.auxv;
+	a = b > len ? b-len : 0;
+	if (new>a && old<b) return 1;
+
+	b = (uintptr_t)&b;
+	a = b > len ? b-len : 0;
+	if (new>a && old<b) return 1;
+
+	return 0;
+}
+
+/* Expand the heap in-place if brk can be used, or otherwise via mmap,
+ * using an exponential lower bound on growth by mmap to make
+ * fragmentation asymptotically irrelevant. The size argument is both
+ * an input and an output, since the caller needs to know the size
+ * allocated, which will be larger than requested due to page alignment
+ * and mmap minimum size rules. The caller is responsible for locking
+ * to prevent concurrent calls. */
+
+static void *__expand_heap(size_t *pn)
+{
+	static uintptr_t brk;
+	static unsigned mmap_step;
+	size_t n = *pn;
+
+	if (n > SIZE_MAX/2 - PAGE_SIZE) {
+		errno = ENOMEM;
+		return 0;
+	}
+	n += -n & PAGE_SIZE-1;
+
+	if (!brk) {
+		brk = __syscall(SYS_brk, 0);
+		brk += -brk & PAGE_SIZE-1;
+	}
+
+	if (n < SIZE_MAX-brk && !traverses_stack_p(brk, brk+n)
+	    && __syscall(SYS_brk, brk+n)==brk+n) {
+		*pn = n;
+		brk += n;
+		return (void *)(brk-n);
+	}
+
+	size_t min = (size_t)PAGE_SIZE << mmap_step/2;
+	if (n < min) n = min;
+	void *area = __mmap(0, n, PROT_READ|PROT_WRITE,
+		MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	if (area == MAP_FAILED) return 0;
+	*pn = n;
+	mmap_step++;
+	return area;
+}
+
 static struct chunk *expand_heap(size_t n)
 {
-	static int heap_lock[2];
 	static void *end;
 	void *p;
 	struct chunk *w;
@@ -135,13 +204,8 @@ static struct chunk *expand_heap(size_t n)
 	 * we need room for an extra zero-sized sentinel chunk. */
 	n += SIZE_ALIGN;
 
-	lock(heap_lock);
-
 	p = __expand_heap(&n);
-	if (!p) {
-		unlock(heap_lock);
-		return 0;
-	}
+	if (!p) return 0;
 
 	/* If not just expanding existing space, we need to make a
 	 * new sentinel chunk below the allocated space. */
@@ -164,8 +228,6 @@ static struct chunk *expand_heap(size_t n)
 	w = MEM_TO_CHUNK(p);
 	w->csize = n | C_INUSE;
 
-	unlock(heap_lock);
-
 	return w;
 }
 
@@ -195,96 +257,44 @@ static void unbin(struct chunk *c, int i)
 	NEXT_CHUNK(c)->psize |= C_INUSE;
 }
 
-static int alloc_fwd(struct chunk *c)
+static void bin_chunk(struct chunk *self, int i)
 {
-	int i;
-	size_t k;
-	while (!((k=c->csize) & C_INUSE)) {
-		i = bin_index(k);
-		lock_bin(i);
-		if (c->csize == k) {
-			unbin(c, i);
-			unlock_bin(i);
-			return 1;
-		}
-		unlock_bin(i);
-	}
-	return 0;
-}
-
-static int alloc_rev(struct chunk *c)
-{
-	int i;
-	size_t k;
-	while (!((k=c->psize) & C_INUSE)) {
-		i = bin_index(k);
-		lock_bin(i);
-		if (c->psize == k) {
-			unbin(PREV_CHUNK(c), i);
-			unlock_bin(i);
-			return 1;
-		}
-		unlock_bin(i);
-	}
-	return 0;
+	self->next = BIN_TO_CHUNK(i);
+	self->prev = mal.bins[i].tail;
+	self->next->prev = self;
+	self->prev->next = self;
+	if (self->prev == BIN_TO_CHUNK(i))
+		a_or_64(&mal.binmap, 1ULL<<i);
 }
 
-
-/* pretrim - trims a chunk _prior_ to removing it from its bin.
- * Must be called with i as the ideal bin for size n, j the bin
- * for the _free_ chunk self, and bin j locked. */
-static int pretrim(struct chunk *self, size_t n, int i, int j)
+static void trim(struct chunk *self, size_t n)
 {
-	size_t n1;
+	size_t n1 = CHUNK_SIZE(self);
 	struct chunk *next, *split;
 
-	/* We cannot pretrim if it would require re-binning. */
-	if (j < 40) return 0;
-	if (j < i+3) {
-		if (j != 63) return 0;
-		n1 = CHUNK_SIZE(self);
-		if (n1-n <= MMAP_THRESHOLD) return 0;
-	} else {
-		n1 = CHUNK_SIZE(self);
-	}
-	if (bin_index(n1-n) != j) return 0;
+	if (n >= n1 - DONTCARE) return;
 
 	next = NEXT_CHUNK(self);
 	split = (void *)((char *)self + n);
 
-	split->prev = self->prev;
-	split->next = self->next;
-	split->prev->next = split;
-	split->next->prev = split;
 	split->psize = n | C_INUSE;
 	split->csize = n1-n;
 	next->psize = n1-n;
 	self->csize = n | C_INUSE;
-	return 1;
-}
 
-static void trim(struct chunk *self, size_t n)
-{
-	size_t n1 = CHUNK_SIZE(self);
-	struct chunk *next, *split;
+	int i = bin_index(n1-n);
+	lock_bin(i);
 
-	if (n >= n1 - DONTCARE) return;
+	bin_chunk(split, i);
 
-	next = NEXT_CHUNK(self);
-	split = (void *)((char *)self + n);
-
-	split->psize = n | C_INUSE;
-	split->csize = n1-n | C_INUSE;
-	next->psize = n1-n | C_INUSE;
-	self->csize = n | C_INUSE;
-
-	__bin_chunk(split);
+	unlock_bin(i);
 }
 
 void *malloc(size_t n)
 {
 	struct chunk *c;
 	int i, j;
+	uint64_t mask;
 
 	if (adjust_size(&n) < 0) return 0;
 
@@ -300,70 +310,43 @@ void *malloc(size_t n)
 	}
 
 	i = bin_index_up(n);
-	for (;;) {
-		uint64_t mask = mal.binmap & -(1ULL<<i);
-		if (!mask) {
-			c = expand_heap(n);
-			if (!c) return 0;
-			if (alloc_rev(c)) {
-				struct chunk *x = c;
-				c = PREV_CHUNK(c);
-				NEXT_CHUNK(x)->psize = c->csize =
-					x->csize + CHUNK_SIZE(c);
-			}
-			break;
+	if (i<63 && (mal.binmap & (1ULL<<i))) {
+		lock_bin(i);
+		c = mal.bins[i].head;
+		if (c != BIN_TO_CHUNK(i) && CHUNK_SIZE(c)-n <= DONTCARE) {
+			unbin(c, i);
+			unlock_bin(i);
+			return CHUNK_TO_MEM(c);
 		}
+		unlock_bin(i);
+	}
+	lock(mal.split_merge_lock);
+	for (mask = mal.binmap & -(1ULL<<i); mask; mask -= (mask&-mask)) {
 		j = first_set(mask);
 		lock_bin(j);
 		c = mal.bins[j].head;
 		if (c != BIN_TO_CHUNK(j)) {
-			if (!pretrim(c, n, i, j)) unbin(c, j);
+			unbin(c, j);
 			unlock_bin(j);
 			break;
 		}
 		unlock_bin(j);
 	}
-
-	/* Now patch up in case we over-allocated */
+	if (!mask) {
+		c = expand_heap(n);
+		if (!c) {
+			unlock(mal.split_merge_lock);
+			return 0;
+		}
+	}
 	trim(c, n);
-
+	unlock(mal.split_merge_lock);
 	return CHUNK_TO_MEM(c);
 }
 
-static size_t mal0_clear(char *p, size_t pagesz, size_t n)
+int __malloc_allzerop(void *p)
 {
-#ifdef __GNUC__
-	typedef uint64_t __attribute__((__may_alias__)) T;
-#else
-	typedef unsigned char T;
-#endif
-	char *pp = p + n;
-	size_t i = (uintptr_t)pp & (pagesz - 1);
-	for (;;) {
-		pp = memset(pp - i, 0, i);
-		if (pp - p < pagesz) return pp - p;
-		for (i = pagesz; i; i -= 2*sizeof(T), pp -= 2*sizeof(T))
-		        if (((T *)pp)[-1] | ((T *)pp)[-2])
-				break;
-	}
-}
-
-void *calloc(size_t m, size_t n)
-{
-	if (n && m > (size_t)-1/n) {
-		errno = ENOMEM;
-		return 0;
-	}
-	n *= m;
-	void *p = malloc(n);
-	if (!p) return p;
-	if (!__malloc_replaced) {
-		if (IS_MMAPPED(MEM_TO_CHUNK(p)))
-			return p;
-		if (n >= PAGE_SIZE)
-			n = mal0_clear(p, PAGE_SIZE, n);
-	}
-	return memset(p, 0, n);
+	return IS_MMAPPED(MEM_TO_CHUNK(p));
 }
 
 void *realloc(void *p, size_t n)
@@ -379,6 +362,8 @@ void *realloc(void *p, size_t n)
 	self = MEM_TO_CHUNK(p);
 	n1 = n0 = CHUNK_SIZE(self);
 
+	if (n<=n0 && n0-n<=DONTCARE) return p;
+
 	if (IS_MMAPPED(self)) {
 		size_t extra = self->psize;
 		char *base = (char *)self - extra;
@@ -405,34 +390,43 @@ void *realloc(void *p, size_t n)
 	/* Crash on corrupted footer (likely from buffer overflow) */
 	if (next->psize != self->csize) a_crash();
 
-	/* Merge adjacent chunks if we need more space. This is not
-	 * a waste of time even if we fail to get enough space, because our
-	 * subsequent call to free would otherwise have to do the merge. */
-	if (n > n1 && alloc_fwd(next)) {
-		n1 += CHUNK_SIZE(next);
-		next = NEXT_CHUNK(next);
-	}
-	/* FIXME: find what's wrong here and reenable it..? */
-	if (0 && n > n1 && alloc_rev(self)) {
-		self = PREV_CHUNK(self);
-		n1 += CHUNK_SIZE(self);
+	if (n < n0) {
+		int i = bin_index_up(n);
+		int j = bin_index(n0);
+		if (i<j && (mal.binmap & (1ULL << i)))
+			goto copy_realloc;
+		struct chunk *split = (void *)((char *)self + n);
+		self->csize = split->psize = n | C_INUSE;
+		split->csize = next->psize = n0-n | C_INUSE;
+		__bin_chunk(split);
+		return CHUNK_TO_MEM(self);
 	}
-	self->csize = n1 | C_INUSE;
-	next->psize = n1 | C_INUSE;
 
-	/* If we got enough space, split off the excess and return */
-	if (n <= n1) {
-		//memmove(CHUNK_TO_MEM(self), p, n0-OVERHEAD);
-		trim(self, n);
-		return CHUNK_TO_MEM(self);
+	lock(mal.split_merge_lock);
+
+	size_t nsize = next->csize & C_INUSE ? 0 : CHUNK_SIZE(next);
+	if (n0+nsize >= n) {
+		int i = bin_index(nsize);
+		lock_bin(i);
+		if (!(next->csize & C_INUSE)) {
+			unbin(next, i);
+			unlock_bin(i);
+			next = NEXT_CHUNK(next);
+			self->csize = next->psize = n0+nsize | C_INUSE;
+			trim(self, n);
+			unlock(mal.split_merge_lock);
+			return CHUNK_TO_MEM(self);
+		}
+		unlock_bin(i);
 	}
+	unlock(mal.split_merge_lock);
 
 copy_realloc:
 	/* As a last resort, allocate a new chunk and copy to it. */
 	new = malloc(n-OVERHEAD);
 	if (!new) return 0;
 copy_free_ret:
-	memcpy(new, p, n0-OVERHEAD);
+	memcpy(new, p, (n<n0 ? n : n0) - OVERHEAD);
 	free(CHUNK_TO_MEM(self));
 	return new;
 }
@@ -440,67 +434,61 @@ copy_free_ret:
 void __bin_chunk(struct chunk *self)
 {
 	struct chunk *next = NEXT_CHUNK(self);
-	size_t final_size, new_size, size;
-	int reclaim=0;
-	int i;
-
-	final_size = new_size = CHUNK_SIZE(self);
 
 	/* Crash on corrupted footer (likely from buffer overflow) */
 	if (next->psize != self->csize) a_crash();
 
-	for (;;) {
-		if (self->psize & next->csize & C_INUSE) {
-			self->csize = final_size | C_INUSE;
-			next->psize = final_size | C_INUSE;
-			i = bin_index(final_size);
-			lock_bin(i);
-			lock(mal.free_lock);
-			if (self->psize & next->csize & C_INUSE)
-				break;
-			unlock(mal.free_lock);
-			unlock_bin(i);
-		}
+	lock(mal.split_merge_lock);
 
-		if (alloc_rev(self)) {
-			self = PREV_CHUNK(self);
-			size = CHUNK_SIZE(self);
-			final_size += size;
-			if (new_size+size > RECLAIM && (new_size+size^size) > size)
-				reclaim = 1;
-		}
+	size_t osize = CHUNK_SIZE(self), size = osize;
+
+	/* Since we hold split_merge_lock, only transition from free to
+	 * in-use can race; in-use to free is impossible */
+	size_t psize = self->psize & C_INUSE ? 0 : CHUNK_PSIZE(self);
+	size_t nsize = next->csize & C_INUSE ? 0 : CHUNK_SIZE(next);
 
-		if (alloc_fwd(next)) {
-			size = CHUNK_SIZE(next);
-			final_size += size;
-			if (new_size+size > RECLAIM && (new_size+size^size) > size)
-				reclaim = 1;
+	if (psize) {
+		int i = bin_index(psize);
+		lock_bin(i);
+		if (!(self->psize & C_INUSE)) {
+			struct chunk *prev = PREV_CHUNK(self);
+			unbin(prev, i);
+			self = prev;
+			size += psize;
+		}
+		unlock_bin(i);
+	}
+	if (nsize) {
+		int i = bin_index(nsize);
+		lock_bin(i);
+		if (!(next->csize & C_INUSE)) {
+			unbin(next, i);
 			next = NEXT_CHUNK(next);
+			size += nsize;
 		}
+		unlock_bin(i);
 	}
 
-	if (!(mal.binmap & 1ULL<<i))
-		a_or_64(&mal.binmap, 1ULL<<i);
+	int i = bin_index(size);
+	lock_bin(i);
 
-	self->csize = final_size;
-	next->psize = final_size;
-	unlock(mal.free_lock);
-
-	self->next = BIN_TO_CHUNK(i);
-	self->prev = mal.bins[i].tail;
-	self->next->prev = self;
-	self->prev->next = self;
+	self->csize = size;
+	next->psize = size;
+	bin_chunk(self, i);
+	unlock(mal.split_merge_lock);
 
 	/* Replace middle of large chunks with fresh zero pages */
-	if (reclaim) {
+	if (size > RECLAIM && (size^(size-osize)) > size-osize) {
 		uintptr_t a = (uintptr_t)self + SIZE_ALIGN+PAGE_SIZE-1 & -PAGE_SIZE;
 		uintptr_t b = (uintptr_t)next - SIZE_ALIGN & -PAGE_SIZE;
+		int e = errno;
 #if 1
 		__madvise((void *)a, b-a, MADV_DONTNEED);
 #else
 		__mmap((void *)a, b-a, PROT_READ|PROT_WRITE,
 			MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0);
 #endif
+		errno = e;
 	}
 
 	unlock_bin(i);
@@ -513,7 +501,9 @@ static void unmap_chunk(struct chunk *self)
 	size_t len = CHUNK_SIZE(self) + extra;
 	/* Crash on double free */
 	if (extra & 1) a_crash();
+	int e = errno;
 	__munmap(base, len);
+	errno = e;
 }
 
 void free(void *p)
@@ -546,3 +536,21 @@ void __malloc_donate(char *start, char *end)
 	c->csize = n->psize = C_INUSE | (end-start);
 	__bin_chunk(c);
 }
+
+void __malloc_atfork(int who)
+{
+	if (who<0) {
+		lock(mal.split_merge_lock);
+		for (int i=0; i<64; i++)
+			lock(mal.bins[i].lock);
+	} else if (!who) {
+		for (int i=0; i<64; i++)
+			unlock(mal.bins[i].lock);
+		unlock(mal.split_merge_lock);
+	} else {
+		for (int i=0; i<64; i++)
+			mal.bins[i].lock[0] = mal.bins[i].lock[1] = 0;
+		mal.split_merge_lock[1] = 0;
+		mal.split_merge_lock[0] = 0;
+	}
+}
diff --git a/src/internal/malloc_impl.h b/src/malloc/oldmalloc/malloc_impl.h
index 59785a7f..e1cf4774 100644
--- a/src/internal/malloc_impl.h
+++ b/src/malloc/oldmalloc/malloc_impl.h
@@ -2,12 +2,7 @@
 #define MALLOC_IMPL_H
 
 #include <sys/mman.h>
-
-hidden void *__expand_heap(size_t *);
-
-hidden void __malloc_donate(char *, char *);
-
-hidden void *__memalign(size_t, size_t);
+#include "dynlink.h"
 
 struct chunk {
 	size_t psize, csize;
@@ -41,6 +36,4 @@ struct bin {
 
 hidden void __bin_chunk(struct chunk *);
 
-hidden extern int __malloc_replaced;
-
 #endif
diff --git a/src/malloc/malloc_usable_size.c b/src/malloc/oldmalloc/malloc_usable_size.c
index 672b518a..672b518a 100644
--- a/src/malloc/malloc_usable_size.c
+++ b/src/malloc/oldmalloc/malloc_usable_size.c
diff --git a/src/malloc/posix_memalign.c b/src/malloc/posix_memalign.c
index 2ea8bd8a..ad4d8f47 100644
--- a/src/malloc/posix_memalign.c
+++ b/src/malloc/posix_memalign.c
@@ -1,11 +1,10 @@
 #include <stdlib.h>
 #include <errno.h>
-#include "malloc_impl.h"
 
 int posix_memalign(void **res, size_t align, size_t len)
 {
 	if (align < sizeof(void *)) return EINVAL;
-	void *mem = __memalign(align, len);
+	void *mem = aligned_alloc(align, len);
 	if (!mem) return errno;
 	*res = mem;
 	return 0;
diff --git a/src/malloc/realloc.c b/src/malloc/realloc.c
new file mode 100644
index 00000000..fb0e8b7c
--- /dev/null
+++ b/src/malloc/realloc.c
@@ -0,0 +1,6 @@
+#include <stdlib.h>
+
+void *realloc(void *p, size_t n)
+{
+	return __libc_realloc(p, n);
+}
diff --git a/src/malloc/reallocarray.c b/src/malloc/reallocarray.c
new file mode 100644
index 00000000..4a6ebe46
--- /dev/null
+++ b/src/malloc/reallocarray.c
@@ -0,0 +1,13 @@
+#define _BSD_SOURCE
+#include <errno.h>
+#include <stdlib.h>
+
+void *reallocarray(void *ptr, size_t m, size_t n)
+{
+	if (n && m > -1 / n) {
+		errno = ENOMEM;
+		return 0;
+	}
+
+	return realloc(ptr, m * n);
+}
diff --git a/src/malloc/replaced.c b/src/malloc/replaced.c
new file mode 100644
index 00000000..07fce61e
--- /dev/null
+++ b/src/malloc/replaced.c
@@ -0,0 +1,4 @@
+#include "dynlink.h"
+
+int __malloc_replaced;
+int __aligned_alloc_replaced;
diff --git a/src/math/__expo2.c b/src/math/__expo2.c
index 740ac680..248f052b 100644
--- a/src/math/__expo2.c
+++ b/src/math/__expo2.c
@@ -5,12 +5,13 @@ static const int k = 2043;
 static const double kln2 = 0x1.62066151add8bp+10;
 
 /* exp(x)/2 for x >= log(DBL_MAX), slightly better than 0.5*exp(x/2)*exp(x/2) */
-double __expo2(double x)
+double __expo2(double x, double sign)
 {
 	double scale;
 
 	/* note that k is odd and scale*scale overflows */
 	INSERT_WORDS(scale, (uint32_t)(0x3ff + k/2) << 20, 0);
 	/* exp(x - k ln2) * 2**(k-1) */
-	return exp(x - kln2) * scale * scale;
+	/* in directed rounding correct sign before rounding or overflow is important */
+	return exp(x - kln2) * (sign * scale) * scale;
 }
diff --git a/src/math/__expo2f.c b/src/math/__expo2f.c
index 5163e418..538eb09c 100644
--- a/src/math/__expo2f.c
+++ b/src/math/__expo2f.c
@@ -5,12 +5,13 @@ static const int k = 235;
 static const float kln2 = 0x1.45c778p+7f;
 
 /* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */
-float __expo2f(float x)
+float __expo2f(float x, float sign)
 {
 	float scale;
 
 	/* note that k is odd and scale*scale overflows */
 	SET_FLOAT_WORD(scale, (uint32_t)(0x7f + k/2) << 23);
 	/* exp(x - k ln2) * 2**(k-1) */
-	return expf(x - kln2) * scale * scale;
+	/* in directed rounding correct sign before rounding or overflow is important */
+	return expf(x - kln2) * (sign * scale) * scale;
 }
diff --git a/src/math/__math_divzero.c b/src/math/__math_divzero.c
new file mode 100644
index 00000000..59d21350
--- /dev/null
+++ b/src/math/__math_divzero.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+double __math_divzero(uint32_t sign)
+{
+	return fp_barrier(sign ? -1.0 : 1.0) / 0.0;
+}
diff --git a/src/math/__math_divzerof.c b/src/math/__math_divzerof.c
new file mode 100644
index 00000000..ce046f3e
--- /dev/null
+++ b/src/math/__math_divzerof.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+float __math_divzerof(uint32_t sign)
+{
+	return fp_barrierf(sign ? -1.0f : 1.0f) / 0.0f;
+}
diff --git a/src/math/__math_invalid.c b/src/math/__math_invalid.c
new file mode 100644
index 00000000..17740490
--- /dev/null
+++ b/src/math/__math_invalid.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+double __math_invalid(double x)
+{
+	return (x - x) / (x - x);
+}
diff --git a/src/math/__math_invalidf.c b/src/math/__math_invalidf.c
new file mode 100644
index 00000000..357d4b12
--- /dev/null
+++ b/src/math/__math_invalidf.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+float __math_invalidf(float x)
+{
+	return (x - x) / (x - x);
+}
diff --git a/src/math/__math_invalidl.c b/src/math/__math_invalidl.c
new file mode 100644
index 00000000..1fca99de
--- /dev/null
+++ b/src/math/__math_invalidl.c
@@ -0,0 +1,9 @@
+#include <float.h>
+#include "libm.h"
+
+#if LDBL_MANT_DIG != DBL_MANT_DIG
+long double __math_invalidl(long double x)
+{
+	return (x - x) / (x - x);
+}
+#endif
diff --git a/src/math/__math_oflow.c b/src/math/__math_oflow.c
new file mode 100644
index 00000000..c85dbf98
--- /dev/null
+++ b/src/math/__math_oflow.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+double __math_oflow(uint32_t sign)
+{
+	return __math_xflow(sign, 0x1p769);
+}
diff --git a/src/math/__math_oflowf.c b/src/math/__math_oflowf.c
new file mode 100644
index 00000000..fa7d0620
--- /dev/null
+++ b/src/math/__math_oflowf.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+float __math_oflowf(uint32_t sign)
+{
+	return __math_xflowf(sign, 0x1p97f);
+}
diff --git a/src/math/__math_uflow.c b/src/math/__math_uflow.c
new file mode 100644
index 00000000..b90594ae
--- /dev/null
+++ b/src/math/__math_uflow.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+double __math_uflow(uint32_t sign)
+{
+	return __math_xflow(sign, 0x1p-767);
+}
diff --git a/src/math/__math_uflowf.c b/src/math/__math_uflowf.c
new file mode 100644
index 00000000..94d50f2b
--- /dev/null
+++ b/src/math/__math_uflowf.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+float __math_uflowf(uint32_t sign)
+{
+	return __math_xflowf(sign, 0x1p-95f);
+}
diff --git a/src/math/__math_xflow.c b/src/math/__math_xflow.c
new file mode 100644
index 00000000..744203c4
--- /dev/null
+++ b/src/math/__math_xflow.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+double __math_xflow(uint32_t sign, double y)
+{
+	return eval_as_double(fp_barrier(sign ? -y : y) * y);
+}
diff --git a/src/math/__math_xflowf.c b/src/math/__math_xflowf.c
new file mode 100644
index 00000000..f2c84784
--- /dev/null
+++ b/src/math/__math_xflowf.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+float __math_xflowf(uint32_t sign, float y)
+{
+	return eval_as_float(fp_barrierf(sign ? -y : y) * y);
+}
diff --git a/src/math/__rem_pio2.c b/src/math/__rem_pio2.c
index d403f81c..dcf672fb 100644
--- a/src/math/__rem_pio2.c
+++ b/src/math/__rem_pio2.c
@@ -36,6 +36,7 @@
  */
 static const double
 toint   = 1.5/EPS,
+pio4    = 0x1.921fb54442d18p-1,
 invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
 pio2_1  = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
 pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
@@ -117,11 +118,23 @@ int __rem_pio2(double x, double *y)
 	}
 	if (ix < 0x413921fb) {  /* |x| ~< 2^20*(pi/2), medium size */
 medium:
-		/* rint(x/(pi/2)), Assume round-to-nearest. */
+		/* rint(x/(pi/2)) */
 		fn = (double_t)x*invpio2 + toint - toint;
 		n = (int32_t)fn;
 		r = x - fn*pio2_1;
 		w = fn*pio2_1t;  /* 1st round, good to 85 bits */
+		/* Matters with directed rounding. */
+		if (predict_false(r - w < -pio4)) {
+			n--;
+			fn--;
+			r = x - fn*pio2_1;
+			w = fn*pio2_1t;
+		} else if (predict_false(r - w > pio4)) {
+			n++;
+			fn++;
+			r = x - fn*pio2_1;
+			w = fn*pio2_1t;
+		}
 		y[0] = r - w;
 		u.f = y[0];
 		ey = u.i>>52 & 0x7ff;
diff --git a/src/math/__rem_pio2f.c b/src/math/__rem_pio2f.c
index 4473c1c4..e6765643 100644
--- a/src/math/__rem_pio2f.c
+++ b/src/math/__rem_pio2f.c
@@ -35,6 +35,7 @@
  */
 static const double
 toint   = 1.5/EPS,
+pio4    = 0x1.921fb6p-1,
 invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
 pio2_1  = 1.57079631090164184570e+00, /* 0x3FF921FB, 0x50000000 */
 pio2_1t = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */
@@ -50,10 +51,20 @@ int __rem_pio2f(float x, double *y)
 	ix = u.i & 0x7fffffff;
 	/* 25+53 bit pi is good enough for medium size */
 	if (ix < 0x4dc90fdb) {  /* |x| ~< 2^28*(pi/2), medium size */
-		/* Use a specialized rint() to get fn.  Assume round-to-nearest. */
+		/* Use a specialized rint() to get fn. */
 		fn = (double_t)x*invpio2 + toint - toint;
 		n  = (int32_t)fn;
 		*y = x - fn*pio2_1 - fn*pio2_1t;
+		/* Matters with directed rounding. */
+		if (predict_false(*y < -pio4)) {
+			n--;
+			fn--;
+			*y = x - fn*pio2_1 - fn*pio2_1t;
+		} else if (predict_false(*y > pio4)) {
+			n++;
+			fn++;
+			*y = x - fn*pio2_1 - fn*pio2_1t;
+		}
 		return n;
 	}
 	if(ix>=0x7f800000) {  /* x is inf or NaN */
diff --git a/src/math/__rem_pio2l.c b/src/math/__rem_pio2l.c
index 77255bd8..236b2def 100644
--- a/src/math/__rem_pio2l.c
+++ b/src/math/__rem_pio2l.c
@@ -44,6 +44,7 @@ pio2_1 =  1.57079632679597125389e+00, /* 0x3FF921FB, 0x54444000 */
 pio2_2 = -1.07463465549783099519e-12, /* -0x12e7b967674000.0p-92 */
 pio2_3 =  6.36831716351370313614e-25; /*  0x18a2e037074000.0p-133 */
 static const long double
+pio4    =  0x1.921fb54442d1846ap-1L,
 invpio2 =  6.36619772367581343076e-01L, /*  0xa2f9836e4e44152a.0p-64 */
 pio2_1t = -1.07463465549719416346e-12L, /* -0x973dcb3b399d747f.0p-103 */
 pio2_2t =  6.36831716351095013979e-25L, /*  0xc51701b839a25205.0p-144 */
@@ -57,6 +58,7 @@ pio2_3t = -2.75299651904407171810e-37L; /* -0xbb5bf6c7ddd660ce.0p-185 */
 #define NX 5
 #define NY 3
 static const long double
+pio4    =  0x1.921fb54442d18469898cc51701b8p-1L,
 invpio2 =  6.3661977236758134307553505349005747e-01L,	/*  0x145f306dc9c882a53f84eafa3ea6a.0p-113 */
 pio2_1  =  1.5707963267948966192292994253909555e+00L,	/*  0x1921fb54442d18469800000000000.0p-112 */
 pio2_1t =  2.0222662487959507323996846200947577e-21L,	/*  0x13198a2e03707344a4093822299f3.0p-181 */
@@ -76,11 +78,23 @@ int __rem_pio2l(long double x, long double *y)
 	u.f = x;
 	ex = u.i.se & 0x7fff;
 	if (SMALL(u)) {
-		/* rint(x/(pi/2)), Assume round-to-nearest. */
+		/* rint(x/(pi/2)) */
 		fn = x*invpio2 + toint - toint;
 		n = QUOBITS(fn);
 		r = x-fn*pio2_1;
 		w = fn*pio2_1t;  /* 1st round good to 102/180 bits (ld80/ld128) */
+		/* Matters with directed rounding. */
+		if (predict_false(r - w < -pio4)) {
+			n--;
+			fn--;
+			r = x - fn*pio2_1;
+			w = fn*pio2_1t;
+		} else if (predict_false(r - w > pio4)) {
+			n++;
+			fn++;
+			r = x - fn*pio2_1;
+			w = fn*pio2_1t;
+		}
 		y[0] = r-w;
 		u.f = y[0];
 		ey = u.i.se & 0x7fff;
diff --git a/src/math/acoshf.c b/src/math/acoshf.c
index 8a4ec4d5..b773d48e 100644
--- a/src/math/acoshf.c
+++ b/src/math/acoshf.c
@@ -15,12 +15,12 @@ float acoshf(float x)
 	uint32_t a = u.i & 0x7fffffff;
 
 	if (a < 0x3f800000+(1<<23))
-		/* |x| < 2, invalid if x < 1 or nan */
+		/* |x| < 2, invalid if x < 1 */
 		/* up to 2ulp error in [1,1.125] */
 		return log1pf(x-1 + sqrtf((x-1)*(x-1)+2*(x-1)));
-	if (a < 0x3f800000+(12<<23))
-		/* |x| < 0x1p12 */
+	if (u.i < 0x3f800000+(12<<23))
+		/* 2 <= x < 0x1p12 */
 		return logf(2*x - 1/(x+sqrtf(x*x-1)));
-	/* x >= 0x1p12 */
+	/* x >= 0x1p12 or x <= -2 or nan */
 	return logf(x) + 0.693147180559945309417232121458176568f;
 }
diff --git a/src/math/acoshl.c b/src/math/acoshl.c
index 8d4b43f6..943cec17 100644
--- a/src/math/acoshl.c
+++ b/src/math/acoshl.c
@@ -10,14 +10,18 @@ long double acoshl(long double x)
 long double acoshl(long double x)
 {
 	union ldshape u = {x};
-	int e = u.i.se & 0x7fff;
+	int e = u.i.se;
 
 	if (e < 0x3fff + 1)
-		/* |x| < 2, invalid if x < 1 or nan */
+		/* 0 <= x < 2, invalid if x < 1 */
 		return log1pl(x-1 + sqrtl((x-1)*(x-1)+2*(x-1)));
 	if (e < 0x3fff + 32)
-		/* |x| < 0x1p32 */
+		/* 2 <= x < 0x1p32 */
 		return logl(2*x - 1/(x+sqrtl(x*x-1)));
+	if (e & 0x8000)
+		/* x < 0 or x = -0, invalid */
+		return (x - x) / (x - x);
+	/* 0x1p32 <= x or nan */
 	return logl(x) + 0.693147180559945309417232121458176568L;
 }
 #elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
diff --git a/src/math/arm/fabs.c b/src/math/arm/fabs.c
index f890520a..6e1d367d 100644
--- a/src/math/arm/fabs.c
+++ b/src/math/arm/fabs.c
@@ -1,6 +1,6 @@
 #include <math.h>
 
-#if __ARM_PCS_VFP
+#if __ARM_PCS_VFP && __ARM_FP&8
 
 double fabs(double x)
 {
diff --git a/src/math/arm/sqrt.c b/src/math/arm/sqrt.c
index 874af960..567e2e91 100644
--- a/src/math/arm/sqrt.c
+++ b/src/math/arm/sqrt.c
@@ -1,6 +1,6 @@
 #include <math.h>
 
-#if __ARM_PCS_VFP || (__VFP_FP__ && !__SOFTFP__)
+#if (__ARM_PCS_VFP || (__VFP_FP__ && !__SOFTFP__)) && (__ARM_FP&8)
 
 double sqrt(double x)
 {
diff --git a/src/math/atanl.c b/src/math/atanl.c
index 79a3edb8..c3b0c926 100644
--- a/src/math/atanl.c
+++ b/src/math/atanl.c
@@ -70,21 +70,21 @@ static long double T_odd(long double x)
 #elif LDBL_MANT_DIG == 113
 #define EXPMAN(u) ((u.i.se & 0x7fff)<<8 | u.i.top>>8)
 
-const long double atanhi[] = {
+static const long double atanhi[] = {
 	 4.63647609000806116214256231461214397e-01L,
 	 7.85398163397448309615660845819875699e-01L,
 	 9.82793723247329067985710611014666038e-01L,
 	 1.57079632679489661923132169163975140e+00L,
 };
 
-const long double atanlo[] = {
+static const long double atanlo[] = {
 	 4.89509642257333492668618435220297706e-36L,
 	 2.16795253253094525619926100651083806e-35L,
 	-2.31288434538183565909319952098066272e-35L,
 	 4.33590506506189051239852201302167613e-35L,
 };
 
-const long double aT[] = {
+static const long double aT[] = {
 	 3.33333333333333333333333333333333125e-01L,
 	-1.99999999999999999999999999999180430e-01L,
 	 1.42857142857142857142857142125269827e-01L,
diff --git a/src/math/cosh.c b/src/math/cosh.c
index 100f8231..490c15fb 100644
--- a/src/math/cosh.c
+++ b/src/math/cosh.c
@@ -35,6 +35,6 @@ double cosh(double x)
 
 	/* |x| > log(DBL_MAX) or nan */
 	/* note: the result is stored to handle overflow */
-	t = __expo2(x);
+	t = __expo2(x, 1.0);
 	return t;
 }
diff --git a/src/math/coshf.c b/src/math/coshf.c
index b09f2ee5..e739cff9 100644
--- a/src/math/coshf.c
+++ b/src/math/coshf.c
@@ -28,6 +28,6 @@ float coshf(float x)
 	}
 
 	/* |x| > log(FLT_MAX) or nan */
-	t = __expo2f(x);
+	t = __expo2f(x, 1.0f);
 	return t;
 }
diff --git a/src/math/exp.c b/src/math/exp.c
index 9ea672fa..b764d73c 100644
--- a/src/math/exp.c
+++ b/src/math/exp.c
@@ -1,134 +1,134 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_exp.c */
 /*
- * ====================================================
- * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved.
+ * Double-precision e^x function.
  *
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/* exp(x)
- * Returns the exponential of x.
- *
- * Method
- *   1. Argument reduction:
- *      Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658.
- *      Given x, find r and integer k such that
- *
- *               x = k*ln2 + r,  |r| <= 0.5*ln2.
- *
- *      Here r will be represented as r = hi-lo for better
- *      accuracy.
- *
- *   2. Approximation of exp(r) by a special rational function on
- *      the interval [0,0.34658]:
- *      Write
- *          R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ...
- *      We use a special Remez algorithm on [0,0.34658] to generate
- *      a polynomial of degree 5 to approximate R. The maximum error
- *      of this polynomial approximation is bounded by 2**-59. In
- *      other words,
- *          R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5
- *      (where z=r*r, and the values of P1 to P5 are listed below)
- *      and
- *          |                  5          |     -59
- *          | 2.0+P1*z+...+P5*z   -  R(z) | <= 2
- *          |                             |
- *      The computation of exp(r) thus becomes
- *                              2*r
- *              exp(r) = 1 + ----------
- *                            R(r) - r
- *                                 r*c(r)
- *                     = 1 + r + ----------- (for better accuracy)
- *                                2 - c(r)
- *      where
- *                              2       4             10
- *              c(r) = r - (P1*r  + P2*r  + ... + P5*r   ).
- *
- *   3. Scale back to obtain exp(x):
- *      From step 1, we have
- *         exp(x) = 2^k * exp(r)
- *
- * Special cases:
- *      exp(INF) is INF, exp(NaN) is NaN;
- *      exp(-INF) is 0, and
- *      for finite argument, only exp(0)=1 is exact.
- *
- * Accuracy:
- *      according to an error analysis, the error is always less than
- *      1 ulp (unit in the last place).
- *
- * Misc. info.
- *      For IEEE double
- *          if x >  709.782712893383973096 then exp(x) overflows
- *          if x < -745.133219101941108420 then exp(x) underflows
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
  */
 
+#include <math.h>
+#include <stdint.h>
 #include "libm.h"
+#include "exp_data.h"
 
-static const double
-half[2] = {0.5,-0.5},
-ln2hi = 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */
-ln2lo = 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */
-invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */
-P1   =  1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */
-P2   = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */
-P3   =  6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */
-P4   = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */
-P5   =  4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */
+#define N (1 << EXP_TABLE_BITS)
+#define InvLn2N __exp_data.invln2N
+#define NegLn2hiN __exp_data.negln2hiN
+#define NegLn2loN __exp_data.negln2loN
+#define Shift __exp_data.shift
+#define T __exp_data.tab
+#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
+#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
+#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
+#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
 
-double exp(double x)
+/* Handle cases that may overflow or underflow when computing the result that
+   is scale*(1+TMP) without intermediate rounding.  The bit representation of
+   scale is in SBITS, however it has a computed exponent that may have
+   overflown into the sign bit so that needs to be adjusted before using it as
+   a double.  (int32_t)KI is the k used in the argument reduction and exponent
+   adjustment of scale, positive k here means the result may overflow and
+   negative k means the result may underflow.  */
+static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
 {
-	double_t hi, lo, c, xx, y;
-	int k, sign;
-	uint32_t hx;
-
-	GET_HIGH_WORD(hx, x);
-	sign = hx>>31;
-	hx &= 0x7fffffff;  /* high word of |x| */
+	double_t scale, y;
 
-	/* special cases */
-	if (hx >= 0x4086232b) {  /* if |x| >= 708.39... */
-		if (isnan(x))
-			return x;
-		if (x > 709.782712893383973096) {
-			/* overflow if x!=inf */
-			x *= 0x1p1023;
-			return x;
-		}
-		if (x < -708.39641853226410622) {
-			/* underflow if x!=-inf */
-			FORCE_EVAL((float)(-0x1p-149/x));
-			if (x < -745.13321910194110842)
-				return 0;
-		}
+	if ((ki & 0x80000000) == 0) {
+		/* k > 0, the exponent of scale might have overflowed by <= 460.  */
+		sbits -= 1009ull << 52;
+		scale = asdouble(sbits);
+		y = 0x1p1009 * (scale + scale * tmp);
+		return eval_as_double(y);
+	}
+	/* k < 0, need special care in the subnormal range.  */
+	sbits += 1022ull << 52;
+	scale = asdouble(sbits);
+	y = scale + scale * tmp;
+	if (y < 1.0) {
+		/* Round y to the right precision before scaling it into the subnormal
+		 range to avoid double rounding that can cause 0.5+E/2 ulp error where
+		 E is the worst-case ulp error outside the subnormal range.  So this
+		 is only useful if the goal is better than 1 ulp worst-case error.  */
+		double_t hi, lo;
+		lo = scale - y + scale * tmp;
+		hi = 1.0 + y;
+		lo = 1.0 - hi + y + lo;
+		y = eval_as_double(hi + lo) - 1.0;
+		/* Avoid -0.0 with downward rounding.  */
+		if (WANT_ROUNDING && y == 0.0)
+			y = 0.0;
+		/* The underflow exception needs to be signaled explicitly.  */
+		fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
 	}
+	y = 0x1p-1022 * y;
+	return eval_as_double(y);
+}
 
-	/* argument reduction */
-	if (hx > 0x3fd62e42) {  /* if |x| > 0.5 ln2 */
-		if (hx >= 0x3ff0a2b2)  /* if |x| >= 1.5 ln2 */
-			k = (int)(invln2*x + half[sign]);
-		else
-			k = 1 - sign - sign;
-		hi = x - k*ln2hi;  /* k*ln2hi is exact here */
-		lo = k*ln2lo;
-		x = hi - lo;
-	} else if (hx > 0x3e300000)  {  /* if |x| > 2**-28 */
-		k = 0;
-		hi = x;
-		lo = 0;
-	} else {
-		/* inexact if x!=0 */
-		FORCE_EVAL(0x1p1023 + x);
-		return 1 + x;
+/* Top 12 bits of a double (sign and exponent bits).  */
+static inline uint32_t top12(double x)
+{
+	return asuint64(x) >> 52;
+}
+
+double exp(double x)
+{
+	uint32_t abstop;
+	uint64_t ki, idx, top, sbits;
+	double_t kd, z, r, r2, scale, tail, tmp;
+
+	abstop = top12(x) & 0x7ff;
+	if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
+		if (abstop - top12(0x1p-54) >= 0x80000000)
+			/* Avoid spurious underflow for tiny x.  */
+			/* Note: 0 is common input.  */
+			return WANT_ROUNDING ? 1.0 + x : 1.0;
+		if (abstop >= top12(1024.0)) {
+			if (asuint64(x) == asuint64(-INFINITY))
+				return 0.0;
+			if (abstop >= top12(INFINITY))
+				return 1.0 + x;
+			if (asuint64(x) >> 63)
+				return __math_uflow(0);
+			else
+				return __math_oflow(0);
+		}
+		/* Large x is special cased below.  */
+		abstop = 0;
 	}
 
-	/* x is now in primary range */
-	xx = x*x;
-	c = x - xx*(P1+xx*(P2+xx*(P3+xx*(P4+xx*P5))));
-	y = 1 + (x*c/(2-c) - lo + hi);
-	if (k == 0)
-		return y;
-	return scalbn(y, k);
+	/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
+	/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].  */
+	z = InvLn2N * x;
+#if TOINT_INTRINSICS
+	kd = roundtoint(z);
+	ki = converttoint(z);
+#elif EXP_USE_TOINT_NARROW
+	/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes.  */
+	kd = eval_as_double(z + Shift);
+	ki = asuint64(kd) >> 16;
+	kd = (double_t)(int32_t)ki;
+#else
+	/* z - kd is in [-1, 1] in non-nearest rounding modes.  */
+	kd = eval_as_double(z + Shift);
+	ki = asuint64(kd);
+	kd -= Shift;
+#endif
+	r = x + kd * NegLn2hiN + kd * NegLn2loN;
+	/* 2^(k/N) ~= scale * (1 + tail).  */
+	idx = 2 * (ki % N);
+	top = ki << (52 - EXP_TABLE_BITS);
+	tail = asdouble(T[idx]);
+	/* This is only a valid scale when -1023*N < k < 1024*N.  */
+	sbits = T[idx + 1] + top;
+	/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1).  */
+	/* Evaluation is optimized assuming superscalar pipelined execution.  */
+	r2 = r * r;
+	/* Without fma the worst case error is 0.25/N ulp larger.  */
+	/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp.  */
+	tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
+	if (predict_false(abstop == 0))
+		return specialcase(tmp, sbits, ki);
+	scale = asdouble(sbits);
+	/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+	   is no spurious underflow here even without fma.  */
+	return eval_as_double(scale + scale * tmp);
 }
diff --git a/src/math/exp2.c b/src/math/exp2.c
index e14adba5..e0ff54bd 100644
--- a/src/math/exp2.c
+++ b/src/math/exp2.c
@@ -1,375 +1,121 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/s_exp2.c */
-/*-
- * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
+/*
+ * Double-precision 2^x function.
  *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
  */
 
+#include <math.h>
+#include <stdint.h>
 #include "libm.h"
+#include "exp_data.h"
 
-#define TBLSIZE 256
+#define N (1 << EXP_TABLE_BITS)
+#define Shift __exp_data.exp2_shift
+#define T __exp_data.tab
+#define C1 __exp_data.exp2_poly[0]
+#define C2 __exp_data.exp2_poly[1]
+#define C3 __exp_data.exp2_poly[2]
+#define C4 __exp_data.exp2_poly[3]
+#define C5 __exp_data.exp2_poly[4]
 
-static const double
-redux = 0x1.8p52 / TBLSIZE,
-P1    = 0x1.62e42fefa39efp-1,
-P2    = 0x1.ebfbdff82c575p-3,
-P3    = 0x1.c6b08d704a0a6p-5,
-P4    = 0x1.3b2ab88f70400p-7,
-P5    = 0x1.5d88003875c74p-10;
+/* Handle cases that may overflow or underflow when computing the result that
+   is scale*(1+TMP) without intermediate rounding.  The bit representation of
+   scale is in SBITS, however it has a computed exponent that may have
+   overflown into the sign bit so that needs to be adjusted before using it as
+   a double.  (int32_t)KI is the k used in the argument reduction and exponent
+   adjustment of scale, positive k here means the result may overflow and
+   negative k means the result may underflow.  */
+static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
+{
+	double_t scale, y;
 
-static const double tbl[TBLSIZE * 2] = {
-/*  exp2(z + eps)          eps     */
-  0x1.6a09e667f3d5dp-1,  0x1.9880p-44,
-  0x1.6b052fa751744p-1,  0x1.8000p-50,
-  0x1.6c012750bd9fep-1, -0x1.8780p-45,
-  0x1.6cfdcddd476bfp-1,  0x1.ec00p-46,
-  0x1.6dfb23c651a29p-1, -0x1.8000p-50,
-  0x1.6ef9298593ae3p-1, -0x1.c000p-52,
-  0x1.6ff7df9519386p-1, -0x1.fd80p-45,
-  0x1.70f7466f42da3p-1, -0x1.c880p-45,
-  0x1.71f75e8ec5fc3p-1,  0x1.3c00p-46,
-  0x1.72f8286eacf05p-1, -0x1.8300p-44,
-  0x1.73f9a48a58152p-1, -0x1.0c00p-47,
-  0x1.74fbd35d7ccfcp-1,  0x1.f880p-45,
-  0x1.75feb564267f1p-1,  0x1.3e00p-47,
-  0x1.77024b1ab6d48p-1, -0x1.7d00p-45,
-  0x1.780694fde5d38p-1, -0x1.d000p-50,
-  0x1.790b938ac1d00p-1,  0x1.3000p-49,
-  0x1.7a11473eb0178p-1, -0x1.d000p-49,
-  0x1.7b17b0976d060p-1,  0x1.0400p-45,
-  0x1.7c1ed0130c133p-1,  0x1.0000p-53,
-  0x1.7d26a62ff8636p-1, -0x1.6900p-45,
-  0x1.7e2f336cf4e3bp-1, -0x1.2e00p-47,
-  0x1.7f3878491c3e8p-1, -0x1.4580p-45,
-  0x1.80427543e1b4ep-1,  0x1.3000p-44,
-  0x1.814d2add1071ap-1,  0x1.f000p-47,
-  0x1.82589994ccd7ep-1, -0x1.1c00p-45,
-  0x1.8364c1eb942d0p-1,  0x1.9d00p-45,
-  0x1.8471a4623cab5p-1,  0x1.7100p-43,
-  0x1.857f4179f5bbcp-1,  0x1.2600p-45,
-  0x1.868d99b4491afp-1, -0x1.2c40p-44,
-  0x1.879cad931a395p-1, -0x1.3000p-45,
-  0x1.88ac7d98a65b8p-1, -0x1.a800p-45,
-  0x1.89bd0a4785800p-1, -0x1.d000p-49,
-  0x1.8ace5422aa223p-1,  0x1.3280p-44,
-  0x1.8be05bad619fap-1,  0x1.2b40p-43,
-  0x1.8cf3216b54383p-1, -0x1.ed00p-45,
-  0x1.8e06a5e08664cp-1, -0x1.0500p-45,
-  0x1.8f1ae99157807p-1,  0x1.8280p-45,
-  0x1.902fed0282c0ep-1, -0x1.cb00p-46,
-  0x1.9145b0b91ff96p-1, -0x1.5e00p-47,
-  0x1.925c353aa2ff9p-1,  0x1.5400p-48,
-  0x1.93737b0cdc64ap-1,  0x1.7200p-46,
-  0x1.948b82b5f98aep-1, -0x1.9000p-47,
-  0x1.95a44cbc852cbp-1,  0x1.5680p-45,
-  0x1.96bdd9a766f21p-1, -0x1.6d00p-44,
-  0x1.97d829fde4e2ap-1, -0x1.1000p-47,
-  0x1.98f33e47a23a3p-1,  0x1.d000p-45,
-  0x1.9a0f170ca0604p-1, -0x1.8a40p-44,
-  0x1.9b2bb4d53ff89p-1,  0x1.55c0p-44,
-  0x1.9c49182a3f15bp-1,  0x1.6b80p-45,
-  0x1.9d674194bb8c5p-1, -0x1.c000p-49,
-  0x1.9e86319e3238ep-1,  0x1.7d00p-46,
-  0x1.9fa5e8d07f302p-1,  0x1.6400p-46,
-  0x1.a0c667b5de54dp-1, -0x1.5000p-48,
-  0x1.a1e7aed8eb8f6p-1,  0x1.9e00p-47,
-  0x1.a309bec4a2e27p-1,  0x1.ad80p-45,
-  0x1.a42c980460a5dp-1, -0x1.af00p-46,
-  0x1.a5503b23e259bp-1,  0x1.b600p-47,
-  0x1.a674a8af46213p-1,  0x1.8880p-44,
-  0x1.a799e1330b3a7p-1,  0x1.1200p-46,
-  0x1.a8bfe53c12e8dp-1,  0x1.6c00p-47,
-  0x1.a9e6b5579fcd2p-1, -0x1.9b80p-45,
-  0x1.ab0e521356fb8p-1,  0x1.b700p-45,
-  0x1.ac36bbfd3f381p-1,  0x1.9000p-50,
-  0x1.ad5ff3a3c2780p-1,  0x1.4000p-49,
-  0x1.ae89f995ad2a3p-1, -0x1.c900p-45,
-  0x1.afb4ce622f367p-1,  0x1.6500p-46,
-  0x1.b0e07298db790p-1,  0x1.fd40p-45,
-  0x1.b20ce6c9a89a9p-1,  0x1.2700p-46,
-  0x1.b33a2b84f1a4bp-1,  0x1.d470p-43,
-  0x1.b468415b747e7p-1, -0x1.8380p-44,
-  0x1.b59728de5593ap-1,  0x1.8000p-54,
-  0x1.b6c6e29f1c56ap-1,  0x1.ad00p-47,
-  0x1.b7f76f2fb5e50p-1,  0x1.e800p-50,
-  0x1.b928cf22749b2p-1, -0x1.4c00p-47,
-  0x1.ba5b030a10603p-1, -0x1.d700p-47,
-  0x1.bb8e0b79a6f66p-1,  0x1.d900p-47,
-  0x1.bcc1e904bc1ffp-1,  0x1.2a00p-47,
-  0x1.bdf69c3f3a16fp-1, -0x1.f780p-46,
-  0x1.bf2c25bd71db8p-1, -0x1.0a00p-46,
-  0x1.c06286141b2e9p-1, -0x1.1400p-46,
-  0x1.c199bdd8552e0p-1,  0x1.be00p-47,
-  0x1.c2d1cd9fa64eep-1, -0x1.9400p-47,
-  0x1.c40ab5fffd02fp-1, -0x1.ed00p-47,
-  0x1.c544778fafd15p-1,  0x1.9660p-44,
-  0x1.c67f12e57d0cbp-1, -0x1.a100p-46,
-  0x1.c7ba88988c1b6p-1, -0x1.8458p-42,
-  0x1.c8f6d9406e733p-1, -0x1.a480p-46,
-  0x1.ca3405751c4dfp-1,  0x1.b000p-51,
-  0x1.cb720dcef9094p-1,  0x1.1400p-47,
-  0x1.ccb0f2e6d1689p-1,  0x1.0200p-48,
-  0x1.cdf0b555dc412p-1,  0x1.3600p-48,
-  0x1.cf3155b5bab3bp-1, -0x1.6900p-47,
-  0x1.d072d4a0789bcp-1,  0x1.9a00p-47,
-  0x1.d1b532b08c8fap-1, -0x1.5e00p-46,
-  0x1.d2f87080d8a85p-1,  0x1.d280p-46,
-  0x1.d43c8eacaa203p-1,  0x1.1a00p-47,
-  0x1.d5818dcfba491p-1,  0x1.f000p-50,
-  0x1.d6c76e862e6a1p-1, -0x1.3a00p-47,
-  0x1.d80e316c9834ep-1, -0x1.cd80p-47,
-  0x1.d955d71ff6090p-1,  0x1.4c00p-48,
-  0x1.da9e603db32aep-1,  0x1.f900p-48,
-  0x1.dbe7cd63a8325p-1,  0x1.9800p-49,
-  0x1.dd321f301b445p-1, -0x1.5200p-48,
-  0x1.de7d5641c05bfp-1, -0x1.d700p-46,
-  0x1.dfc97337b9aecp-1, -0x1.6140p-46,
-  0x1.e11676b197d5ep-1,  0x1.b480p-47,
-  0x1.e264614f5a3e7p-1,  0x1.0ce0p-43,
-  0x1.e3b333b16ee5cp-1,  0x1.c680p-47,
-  0x1.e502ee78b3fb4p-1, -0x1.9300p-47,
-  0x1.e653924676d68p-1, -0x1.5000p-49,
-  0x1.e7a51fbc74c44p-1, -0x1.7f80p-47,
-  0x1.e8f7977cdb726p-1, -0x1.3700p-48,
-  0x1.ea4afa2a490e8p-1,  0x1.5d00p-49,
-  0x1.eb9f4867ccae4p-1,  0x1.61a0p-46,
-  0x1.ecf482d8e680dp-1,  0x1.5500p-48,
-  0x1.ee4aaa2188514p-1,  0x1.6400p-51,
-  0x1.efa1bee615a13p-1, -0x1.e800p-49,
-  0x1.f0f9c1cb64106p-1, -0x1.a880p-48,
-  0x1.f252b376bb963p-1, -0x1.c900p-45,
-  0x1.f3ac948dd7275p-1,  0x1.a000p-53,
-  0x1.f50765b6e4524p-1, -0x1.4f00p-48,
-  0x1.f6632798844fdp-1,  0x1.a800p-51,
-  0x1.f7bfdad9cbe38p-1,  0x1.abc0p-48,
-  0x1.f91d802243c82p-1, -0x1.4600p-50,
-  0x1.fa7c1819e908ep-1, -0x1.b0c0p-47,
-  0x1.fbdba3692d511p-1, -0x1.0e00p-51,
-  0x1.fd3c22b8f7194p-1, -0x1.0de8p-46,
-  0x1.fe9d96b2a23eep-1,  0x1.e430p-49,
-  0x1.0000000000000p+0,  0x0.0000p+0,
-  0x1.00b1afa5abcbep+0, -0x1.3400p-52,
-  0x1.0163da9fb3303p+0, -0x1.2170p-46,
-  0x1.02168143b0282p+0,  0x1.a400p-52,
-  0x1.02c9a3e77806cp+0,  0x1.f980p-49,
-  0x1.037d42e11bbcap+0, -0x1.7400p-51,
-  0x1.04315e86e7f89p+0,  0x1.8300p-50,
-  0x1.04e5f72f65467p+0, -0x1.a3f0p-46,
-  0x1.059b0d315855ap+0, -0x1.2840p-47,
-  0x1.0650a0e3c1f95p+0,  0x1.1600p-48,
-  0x1.0706b29ddf71ap+0,  0x1.5240p-46,
-  0x1.07bd42b72a82dp+0, -0x1.9a00p-49,
-  0x1.0874518759bd0p+0,  0x1.6400p-49,
-  0x1.092bdf66607c8p+0, -0x1.0780p-47,
-  0x1.09e3ecac6f383p+0, -0x1.8000p-54,
-  0x1.0a9c79b1f3930p+0,  0x1.fa00p-48,
-  0x1.0b5586cf988fcp+0, -0x1.ac80p-48,
-  0x1.0c0f145e46c8ap+0,  0x1.9c00p-50,
-  0x1.0cc922b724816p+0,  0x1.5200p-47,
-  0x1.0d83b23395dd8p+0, -0x1.ad00p-48,
-  0x1.0e3ec32d3d1f3p+0,  0x1.bac0p-46,
-  0x1.0efa55fdfa9a6p+0, -0x1.4e80p-47,
-  0x1.0fb66affed2f0p+0, -0x1.d300p-47,
-  0x1.1073028d7234bp+0,  0x1.1500p-48,
-  0x1.11301d0125b5bp+0,  0x1.c000p-49,
-  0x1.11edbab5e2af9p+0,  0x1.6bc0p-46,
-  0x1.12abdc06c31d5p+0,  0x1.8400p-49,
-  0x1.136a814f2047dp+0, -0x1.ed00p-47,
-  0x1.1429aaea92de9p+0,  0x1.8e00p-49,
-  0x1.14e95934f3138p+0,  0x1.b400p-49,
-  0x1.15a98c8a58e71p+0,  0x1.5300p-47,
-  0x1.166a45471c3dfp+0,  0x1.3380p-47,
-  0x1.172b83c7d5211p+0,  0x1.8d40p-45,
-  0x1.17ed48695bb9fp+0, -0x1.5d00p-47,
-  0x1.18af9388c8d93p+0, -0x1.c880p-46,
-  0x1.1972658375d66p+0,  0x1.1f00p-46,
-  0x1.1a35beb6fcba7p+0,  0x1.0480p-46,
-  0x1.1af99f81387e3p+0, -0x1.7390p-43,
-  0x1.1bbe084045d54p+0,  0x1.4e40p-45,
-  0x1.1c82f95281c43p+0, -0x1.a200p-47,
-  0x1.1d4873168b9b2p+0,  0x1.3800p-49,
-  0x1.1e0e75eb44031p+0,  0x1.ac00p-49,
-  0x1.1ed5022fcd938p+0,  0x1.1900p-47,
-  0x1.1f9c18438cdf7p+0, -0x1.b780p-46,
-  0x1.2063b88628d8fp+0,  0x1.d940p-45,
-  0x1.212be3578a81ep+0,  0x1.8000p-50,
-  0x1.21f49917ddd41p+0,  0x1.b340p-45,
-  0x1.22bdda2791323p+0,  0x1.9f80p-46,
-  0x1.2387a6e7561e7p+0, -0x1.9c80p-46,
-  0x1.2451ffb821427p+0,  0x1.2300p-47,
-  0x1.251ce4fb2a602p+0, -0x1.3480p-46,
-  0x1.25e85711eceb0p+0,  0x1.2700p-46,
-  0x1.26b4565e27d16p+0,  0x1.1d00p-46,
-  0x1.2780e341de00fp+0,  0x1.1ee0p-44,
-  0x1.284dfe1f5633ep+0, -0x1.4c00p-46,
-  0x1.291ba7591bb30p+0, -0x1.3d80p-46,
-  0x1.29e9df51fdf09p+0,  0x1.8b00p-47,
-  0x1.2ab8a66d10e9bp+0, -0x1.27c0p-45,
-  0x1.2b87fd0dada3ap+0,  0x1.a340p-45,
-  0x1.2c57e39771af9p+0, -0x1.0800p-46,
-  0x1.2d285a6e402d9p+0, -0x1.ed00p-47,
-  0x1.2df961f641579p+0, -0x1.4200p-48,
-  0x1.2ecafa93e2ecfp+0, -0x1.4980p-45,
-  0x1.2f9d24abd8822p+0, -0x1.6300p-46,
-  0x1.306fe0a31b625p+0, -0x1.2360p-44,
-  0x1.31432edeea50bp+0, -0x1.0df8p-40,
-  0x1.32170fc4cd7b8p+0, -0x1.2480p-45,
-  0x1.32eb83ba8e9a2p+0, -0x1.5980p-45,
-  0x1.33c08b2641766p+0,  0x1.ed00p-46,
-  0x1.3496266e3fa27p+0, -0x1.c000p-50,
-  0x1.356c55f929f0fp+0, -0x1.0d80p-44,
-  0x1.36431a2de88b9p+0,  0x1.2c80p-45,
-  0x1.371a7373aaa39p+0,  0x1.0600p-45,
-  0x1.37f26231e74fep+0, -0x1.6600p-46,
-  0x1.38cae6d05d838p+0, -0x1.ae00p-47,
-  0x1.39a401b713ec3p+0, -0x1.4720p-43,
-  0x1.3a7db34e5a020p+0,  0x1.8200p-47,
-  0x1.3b57fbfec6e95p+0,  0x1.e800p-44,
-  0x1.3c32dc313a8f2p+0,  0x1.f800p-49,
-  0x1.3d0e544ede122p+0, -0x1.7a00p-46,
-  0x1.3dea64c1234bbp+0,  0x1.6300p-45,
-  0x1.3ec70df1c4eccp+0, -0x1.8a60p-43,
-  0x1.3fa4504ac7e8cp+0, -0x1.cdc0p-44,
-  0x1.40822c367a0bbp+0,  0x1.5b80p-45,
-  0x1.4160a21f72e95p+0,  0x1.ec00p-46,
-  0x1.423fb27094646p+0, -0x1.3600p-46,
-  0x1.431f5d950a920p+0,  0x1.3980p-45,
-  0x1.43ffa3f84b9ebp+0,  0x1.a000p-48,
-  0x1.44e0860618919p+0, -0x1.6c00p-48,
-  0x1.45c2042a7d201p+0, -0x1.bc00p-47,
-  0x1.46a41ed1d0016p+0, -0x1.2800p-46,
-  0x1.4786d668b3326p+0,  0x1.0e00p-44,
-  0x1.486a2b5c13c00p+0, -0x1.d400p-45,
-  0x1.494e1e192af04p+0,  0x1.c200p-47,
-  0x1.4a32af0d7d372p+0, -0x1.e500p-46,
-  0x1.4b17dea6db801p+0,  0x1.7800p-47,
-  0x1.4bfdad53629e1p+0, -0x1.3800p-46,
-  0x1.4ce41b817c132p+0,  0x1.0800p-47,
-  0x1.4dcb299fddddbp+0,  0x1.c700p-45,
-  0x1.4eb2d81d8ab96p+0, -0x1.ce00p-46,
-  0x1.4f9b2769d2d02p+0,  0x1.9200p-46,
-  0x1.508417f4531c1p+0, -0x1.8c00p-47,
-  0x1.516daa2cf662ap+0, -0x1.a000p-48,
-  0x1.5257de83f51eap+0,  0x1.a080p-43,
-  0x1.5342b569d4edap+0, -0x1.6d80p-45,
-  0x1.542e2f4f6ac1ap+0, -0x1.2440p-44,
-  0x1.551a4ca5d94dbp+0,  0x1.83c0p-43,
-  0x1.56070dde9116bp+0,  0x1.4b00p-45,
-  0x1.56f4736b529dep+0,  0x1.15a0p-43,
-  0x1.57e27dbe2c40ep+0, -0x1.9e00p-45,
-  0x1.58d12d497c76fp+0, -0x1.3080p-45,
-  0x1.59c0827ff0b4cp+0,  0x1.dec0p-43,
-  0x1.5ab07dd485427p+0, -0x1.4000p-51,
-  0x1.5ba11fba87af4p+0,  0x1.0080p-44,
-  0x1.5c9268a59460bp+0, -0x1.6c80p-45,
-  0x1.5d84590998e3fp+0,  0x1.69a0p-43,
-  0x1.5e76f15ad20e1p+0, -0x1.b400p-46,
-  0x1.5f6a320dcebcap+0,  0x1.7700p-46,
-  0x1.605e1b976dcb8p+0,  0x1.6f80p-45,
-  0x1.6152ae6cdf715p+0,  0x1.1000p-47,
-  0x1.6247eb03a5531p+0, -0x1.5d00p-46,
-  0x1.633dd1d1929b5p+0, -0x1.2d00p-46,
-  0x1.6434634ccc313p+0, -0x1.a800p-49,
-  0x1.652b9febc8efap+0, -0x1.8600p-45,
-  0x1.6623882553397p+0,  0x1.1fe0p-40,
-  0x1.671c1c708328ep+0, -0x1.7200p-44,
-  0x1.68155d44ca97ep+0,  0x1.6800p-49,
-  0x1.690f4b19e9471p+0, -0x1.9780p-45,
-};
+	if ((ki & 0x80000000) == 0) {
+		/* k > 0, the exponent of scale might have overflowed by 1.  */
+		sbits -= 1ull << 52;
+		scale = asdouble(sbits);
+		y = 2 * (scale + scale * tmp);
+		return eval_as_double(y);
+	}
+	/* k < 0, need special care in the subnormal range.  */
+	sbits += 1022ull << 52;
+	scale = asdouble(sbits);
+	y = scale + scale * tmp;
+	if (y < 1.0) {
+		/* Round y to the right precision before scaling it into the subnormal
+		   range to avoid double rounding that can cause 0.5+E/2 ulp error where
+		   E is the worst-case ulp error outside the subnormal range.  So this
+		   is only useful if the goal is better than 1 ulp worst-case error.  */
+		double_t hi, lo;
+		lo = scale - y + scale * tmp;
+		hi = 1.0 + y;
+		lo = 1.0 - hi + y + lo;
+		y = eval_as_double(hi + lo) - 1.0;
+		/* Avoid -0.0 with downward rounding.  */
+		if (WANT_ROUNDING && y == 0.0)
+			y = 0.0;
+		/* The underflow exception needs to be signaled explicitly.  */
+		fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
+	}
+	y = 0x1p-1022 * y;
+	return eval_as_double(y);
+}
+
+/* Top 12 bits of a double (sign and exponent bits).  */
+static inline uint32_t top12(double x)
+{
+	return asuint64(x) >> 52;
+}
 
-/*
- * exp2(x): compute the base 2 exponential of x
- *
- * Accuracy: Peak error < 0.503 ulp for normalized results.
- *
- * Method: (accurate tables)
- *
- *   Reduce x:
- *     x = k + y, for integer k and |y| <= 1/2.
- *     Thus we have exp2(x) = 2**k * exp2(y).
- *
- *   Reduce y:
- *     y = i/TBLSIZE + z - eps[i] for integer i near y * TBLSIZE.
- *     Thus we have exp2(y) = exp2(i/TBLSIZE) * exp2(z - eps[i]),
- *     with |z - eps[i]| <= 2**-9 + 2**-39 for the table used.
- *
- *   We compute exp2(i/TBLSIZE) via table lookup and exp2(z - eps[i]) via
- *   a degree-5 minimax polynomial with maximum error under 1.3 * 2**-61.
- *   The values in exp2t[] and eps[] are chosen such that
- *   exp2t[i] = exp2(i/TBLSIZE + eps[i]), and eps[i] is a small offset such
- *   that exp2t[i] is accurate to 2**-64.
- *
- *   Note that the range of i is +-TBLSIZE/2, so we actually index the tables
- *   by i0 = i + TBLSIZE/2.  For cache efficiency, exp2t[] and eps[] are
- *   virtual tables, interleaved in the real table tbl[].
- *
- *   This method is due to Gal, with many details due to Gal and Bachelis:
- *
- *      Gal, S. and Bachelis, B.  An Accurate Elementary Mathematical Library
- *      for the IEEE Floating Point Standard.  TOMS 17(1), 26-46 (1991).
- */
 double exp2(double x)
 {
-	double_t r, t, z;
-	uint32_t ix, i0;
-	union {double f; uint64_t i;} u = {x};
-	union {uint32_t u; int32_t i;} k;
+	uint32_t abstop;
+	uint64_t ki, idx, top, sbits;
+	double_t kd, r, r2, scale, tail, tmp;
 
-	/* Filter out exceptional cases. */
-	ix = u.i>>32 & 0x7fffffff;
-	if (ix >= 0x408ff000) {  /* |x| >= 1022 or nan */
-		if (ix >= 0x40900000 && u.i>>63 == 0) {  /* x >= 1024 or nan */
-			/* overflow */
-			x *= 0x1p1023;
-			return x;
-		}
-		if (ix >= 0x7ff00000)  /* -inf or -nan */
-			return -1/x;
-		if (u.i>>63) {  /* x <= -1022 */
-			/* underflow */
-			if (x <= -1075 || x - 0x1p52 + 0x1p52 != x)
-				FORCE_EVAL((float)(-0x1p-149/x));
-			if (x <= -1075)
-				return 0;
+	abstop = top12(x) & 0x7ff;
+	if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
+		if (abstop - top12(0x1p-54) >= 0x80000000)
+			/* Avoid spurious underflow for tiny x.  */
+			/* Note: 0 is common input.  */
+			return WANT_ROUNDING ? 1.0 + x : 1.0;
+		if (abstop >= top12(1024.0)) {
+			if (asuint64(x) == asuint64(-INFINITY))
+				return 0.0;
+			if (abstop >= top12(INFINITY))
+				return 1.0 + x;
+			if (!(asuint64(x) >> 63))
+				return __math_oflow(0);
+			else if (asuint64(x) >= asuint64(-1075.0))
+				return __math_uflow(0);
 		}
-	} else if (ix < 0x3c900000) {  /* |x| < 0x1p-54 */
-		return 1.0 + x;
+		if (2 * asuint64(x) > 2 * asuint64(928.0))
+			/* Large x is special cased below.  */
+			abstop = 0;
 	}
 
-	/* Reduce x, computing z, i0, and k. */
-	u.f = x + redux;
-	i0 = u.i;
-	i0 += TBLSIZE / 2;
-	k.u = i0 / TBLSIZE * TBLSIZE;
-	k.i /= TBLSIZE;
-	i0 %= TBLSIZE;
-	u.f -= redux;
-	z = x - u.f;
-
-	/* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */
-	t = tbl[2*i0];       /* exp2t[i0] */
-	z -= tbl[2*i0 + 1];  /* eps[i0]   */
-	r = t + t * z * (P1 + z * (P2 + z * (P3 + z * (P4 + z * P5))));
-
-	return scalbn(r, k.i);
+	/* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)].  */
+	/* x = k/N + r, with int k and r in [-1/2N, 1/2N].  */
+	kd = eval_as_double(x + Shift);
+	ki = asuint64(kd); /* k.  */
+	kd -= Shift; /* k/N for int k.  */
+	r = x - kd;
+	/* 2^(k/N) ~= scale * (1 + tail).  */
+	idx = 2 * (ki % N);
+	top = ki << (52 - EXP_TABLE_BITS);
+	tail = asdouble(T[idx]);
+	/* This is only a valid scale when -1023*N < k < 1024*N.  */
+	sbits = T[idx + 1] + top;
+	/* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1).  */
+	/* Evaluation is optimized assuming superscalar pipelined execution.  */
+	r2 = r * r;
+	/* Without fma the worst case error is 0.5/N ulp larger.  */
+	/* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp.  */
+	tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
+	if (predict_false(abstop == 0))
+		return specialcase(tmp, sbits, ki);
+	scale = asdouble(sbits);
+	/* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
+	   is no spurious underflow here even without fma.  */
+	return eval_as_double(scale + scale * tmp);
 }
diff --git a/src/math/exp2f.c b/src/math/exp2f.c
index 296b6343..0360482c 100644
--- a/src/math/exp2f.c
+++ b/src/math/exp2f.c
@@ -1,126 +1,69 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/s_exp2f.c */
-/*-
- * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
+/*
+ * Single-precision 2^x function.
  *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
  */
 
+#include <math.h>
+#include <stdint.h>
 #include "libm.h"
+#include "exp2f_data.h"
 
-#define TBLSIZE 16
+/*
+EXP2F_TABLE_BITS = 5
+EXP2F_POLY_ORDER = 3
 
-static const float
-redux = 0x1.8p23f / TBLSIZE,
-P1    = 0x1.62e430p-1f,
-P2    = 0x1.ebfbe0p-3f,
-P3    = 0x1.c6b348p-5f,
-P4    = 0x1.3b2c9cp-7f;
+ULP error: 0.502 (nearest rounding.)
+Relative error: 1.69 * 2^-34 in [-1/64, 1/64] (before rounding.)
+Wrong count: 168353 (all nearest rounding wrong results with fma.)
+Non-nearest ULP error: 1 (rounded ULP error)
+*/
 
-static const double exp2ft[TBLSIZE] = {
-  0x1.6a09e667f3bcdp-1,
-  0x1.7a11473eb0187p-1,
-  0x1.8ace5422aa0dbp-1,
-  0x1.9c49182a3f090p-1,
-  0x1.ae89f995ad3adp-1,
-  0x1.c199bdd85529cp-1,
-  0x1.d5818dcfba487p-1,
-  0x1.ea4afa2a490dap-1,
-  0x1.0000000000000p+0,
-  0x1.0b5586cf9890fp+0,
-  0x1.172b83c7d517bp+0,
-  0x1.2387a6e756238p+0,
-  0x1.306fe0a31b715p+0,
-  0x1.3dea64c123422p+0,
-  0x1.4bfdad5362a27p+0,
-  0x1.5ab07dd485429p+0,
-};
+#define N (1 << EXP2F_TABLE_BITS)
+#define T __exp2f_data.tab
+#define C __exp2f_data.poly
+#define SHIFT __exp2f_data.shift_scaled
+
+static inline uint32_t top12(float x)
+{
+	return asuint(x) >> 20;
+}
 
-/*
- * exp2f(x): compute the base 2 exponential of x
- *
- * Accuracy: Peak error < 0.501 ulp; location of peak: -0.030110927.
- *
- * Method: (equally-spaced tables)
- *
- *   Reduce x:
- *     x = k + y, for integer k and |y| <= 1/2.
- *     Thus we have exp2f(x) = 2**k * exp2(y).
- *
- *   Reduce y:
- *     y = i/TBLSIZE + z for integer i near y * TBLSIZE.
- *     Thus we have exp2(y) = exp2(i/TBLSIZE) * exp2(z),
- *     with |z| <= 2**-(TBLSIZE+1).
- *
- *   We compute exp2(i/TBLSIZE) via table lookup and exp2(z) via a
- *   degree-4 minimax polynomial with maximum error under 1.4 * 2**-33.
- *   Using double precision for everything except the reduction makes
- *   roundoff error insignificant and simplifies the scaling step.
- *
- *   This method is due to Tang, but I do not use his suggested parameters:
- *
- *      Tang, P.  Table-driven Implementation of the Exponential Function
- *      in IEEE Floating-Point Arithmetic.  TOMS 15(2), 144-157 (1989).
- */
 float exp2f(float x)
 {
-	double_t t, r, z;
-	union {float f; uint32_t i;} u = {x};
-	union {double f; uint64_t i;} uk;
-	uint32_t ix, i0, k;
+	uint32_t abstop;
+	uint64_t ki, t;
+	double_t kd, xd, z, r, r2, y, s;
 
-	/* Filter out exceptional cases. */
-	ix = u.i & 0x7fffffff;
-	if (ix > 0x42fc0000) {  /* |x| > 126 */
-		if (ix > 0x7f800000) /* NaN */
-			return x;
-		if (u.i >= 0x43000000 && u.i < 0x80000000) {  /* x >= 128 */
-			x *= 0x1p127f;
-			return x;
-		}
-		if (u.i >= 0x80000000) {  /* x < -126 */
-			if (u.i >= 0xc3160000 || (u.i & 0x0000ffff))
-				FORCE_EVAL(-0x1p-149f/x);
-			if (u.i >= 0xc3160000)  /* x <= -150 */
-				return 0;
-		}
-	} else if (ix <= 0x33000000) {  /* |x| <= 0x1p-25 */
-		return 1.0f + x;
+	xd = (double_t)x;
+	abstop = top12(x) & 0x7ff;
+	if (predict_false(abstop >= top12(128.0f))) {
+		/* |x| >= 128 or x is nan.  */
+		if (asuint(x) == asuint(-INFINITY))
+			return 0.0f;
+		if (abstop >= top12(INFINITY))
+			return x + x;
+		if (x > 0.0f)
+			return __math_oflowf(0);
+		if (x <= -150.0f)
+			return __math_uflowf(0);
 	}
 
-	/* Reduce x, computing z, i0, and k. */
-	u.f = x + redux;
-	i0 = u.i;
-	i0 += TBLSIZE / 2;
-	k = i0 / TBLSIZE;
-	uk.i = (uint64_t)(0x3ff + k)<<52;
-	i0 &= TBLSIZE - 1;
-	u.f -= redux;
-	z = x - u.f;
-	/* Compute r = exp2(y) = exp2ft[i0] * p(z). */
-	r = exp2ft[i0];
-	t = r * z;
-	r = r + t * (P1 + z * P2) + t * (z * z) * (P3 + z * P4);
+	/* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k.  */
+	kd = eval_as_double(xd + SHIFT);
+	ki = asuint64(kd);
+	kd -= SHIFT; /* k/N for int k.  */
+	r = xd - kd;
 
-	/* Scale by 2**k */
-	return r * uk.f;
+	/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
+	t = T[ki % N];
+	t += ki << (52 - EXP2F_TABLE_BITS);
+	s = asdouble(t);
+	z = C[0] * r + C[1];
+	r2 = r * r;
+	y = C[2] * r + 1;
+	y = z * r2 + y;
+	y = y * s;
+	return eval_as_float(y);
 }
diff --git a/src/math/exp2f_data.c b/src/math/exp2f_data.c
new file mode 100644
index 00000000..be324727
--- /dev/null
+++ b/src/math/exp2f_data.c
@@ -0,0 +1,35 @@
+/*
+ * Shared data between expf, exp2f and powf.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "exp2f_data.h"
+
+#define N (1 << EXP2F_TABLE_BITS)
+
+const struct exp2f_data __exp2f_data = {
+  /* tab[i] = uint(2^(i/N)) - (i << 52-BITS)
+     used for computing 2^(k/N) for an int |k| < 150 N as
+     double(tab[k%N] + (k << 52-BITS)) */
+  .tab = {
+0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
+0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
+0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
+0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585,
+0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13,
+0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
+0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
+0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
+  },
+  .shift_scaled = 0x1.8p+52 / N,
+  .poly = {
+  0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1,
+  },
+  .shift = 0x1.8p+52,
+  .invln2_scaled = 0x1.71547652b82fep+0 * N,
+  .poly_scaled = {
+  0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N,
+  },
+};
diff --git a/src/math/exp2f_data.h b/src/math/exp2f_data.h
new file mode 100644
index 00000000..fe744f15
--- /dev/null
+++ b/src/math/exp2f_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _EXP2F_DATA_H
+#define _EXP2F_DATA_H
+
+#include <features.h>
+#include <stdint.h>
+
+/* Shared between expf, exp2f and powf.  */
+#define EXP2F_TABLE_BITS 5
+#define EXP2F_POLY_ORDER 3
+extern hidden const struct exp2f_data {
+	uint64_t tab[1 << EXP2F_TABLE_BITS];
+	double shift_scaled;
+	double poly[EXP2F_POLY_ORDER];
+	double shift;
+	double invln2_scaled;
+	double poly_scaled[EXP2F_POLY_ORDER];
+} __exp2f_data;
+
+#endif
diff --git a/src/math/exp_data.c b/src/math/exp_data.c
new file mode 100644
index 00000000..21be0146
--- /dev/null
+++ b/src/math/exp_data.c
@@ -0,0 +1,182 @@
+/*
+ * Shared data between exp, exp2 and pow.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "exp_data.h"
+
+#define N (1 << EXP_TABLE_BITS)
+
+const struct exp_data __exp_data = {
+// N/ln2
+.invln2N = 0x1.71547652b82fep0 * N,
+// -ln2/N
+.negln2hiN = -0x1.62e42fefa0000p-8,
+.negln2loN = -0x1.cf79abc9e3b3ap-47,
+// Used for rounding when !TOINT_INTRINSICS
+#if EXP_USE_TOINT_NARROW
+.shift = 0x1800000000.8p0,
+#else
+.shift = 0x1.8p52,
+#endif
+// exp polynomial coefficients.
+.poly = {
+// abs error: 1.555*2^-66
+// ulp error: 0.509 (0.511 without fma)
+// if |x| < ln2/256+eps
+// abs error if |x| < ln2/256+0x1p-15: 1.09*2^-65
+// abs error if |x| < ln2/128: 1.7145*2^-56
+0x1.ffffffffffdbdp-2,
+0x1.555555555543cp-3,
+0x1.55555cf172b91p-5,
+0x1.1111167a4d017p-7,
+},
+.exp2_shift = 0x1.8p52 / N,
+// exp2 polynomial coefficients.
+.exp2_poly = {
+// abs error: 1.2195*2^-65
+// ulp error: 0.507 (0.511 without fma)
+// if |x| < 1/256
+// abs error if |x| < 1/128: 1.9941*2^-56
+0x1.62e42fefa39efp-1,
+0x1.ebfbdff82c424p-3,
+0x1.c6b08d70cf4b5p-5,
+0x1.3b2abd24650ccp-7,
+0x1.5d7e09b4e3a84p-10,
+},
+// 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
+// tab[2*k] = asuint64(T[k])
+// tab[2*k+1] = asuint64(H[k]) - (k << 52)/N
+.tab = {
+0x0, 0x3ff0000000000000,
+0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
+0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
+0xbc905e7a108766d1, 0x3fefe315e86e7f85,
+0x3c8cd2523567f613, 0x3fefd9b0d3158574,
+0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
+0x3c60f74e61e6c861, 0x3fefc74518759bc8,
+0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
+0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
+0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
+0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
+0xbc6a033489906e0b, 0x3fef9b66affed31b,
+0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
+0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
+0xbc91c923b9d5f416, 0x3fef829aaea92de0,
+0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
+0xbc801b15eaa59348, 0x3fef72b83c7d517b,
+0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
+0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
+0xbc96d99c7611eb26, 0x3fef5be084045cd4,
+0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
+0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
+0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
+0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
+0x3c968efde3a8a894, 0x3fef387a6e756238,
+0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
+0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
+0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
+0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
+0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
+0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
+0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
+0x3c834d754db0abb6, 0x3fef06fe0a31b715,
+0x3c864201e2ac744c, 0x3fef0170fc4cd831,
+0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
+0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
+0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
+0xbc9907f81b512d8e, 0x3feeecae6d05d866,
+0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
+0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
+0x3c859f48a72a4c6d, 0x3feedea64c123422,
+0xbc9312607a28698a, 0x3feeda4504ac801c,
+0xbc58a78f4817895b, 0x3feed60a21f72e2a,
+0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
+0x3c4363ed60c2ac11, 0x3feece086061892d,
+0x3c9666093b0664ef, 0x3feeca41ed1d0057,
+0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
+0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
+0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
+0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
+0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
+0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
+0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
+0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
+0x3c93350518fdd78e, 0x3feeaf4736b527da,
+0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
+0x3c9063e1e21c5409, 0x3feeab07dd485429,
+0x3c34c7855019c6ea, 0x3feea9268a5946b7,
+0x3c9432e62b64c035, 0x3feea76f15ad2148,
+0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
+0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
+0xbc845378892be9ae, 0x3feea34634ccc320,
+0xbc93cedd78565858, 0x3feea23882552225,
+0x3c5710aa807e1964, 0x3feea155d44ca973,
+0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
+0xbc6a12ad8734b982, 0x3feea012750bdabf,
+0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
+0xbc80dc3d54e08851, 0x3fee9f7df9519484,
+0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
+0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
+0xbc8619321e55e68a, 0x3fee9feb564267c9,
+0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
+0xbc7b32dcb94da51d, 0x3feea11473eb0187,
+0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
+0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
+0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
+0xbc9369b6f13b3734, 0x3feea589994cce13,
+0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
+0xbc94d450d872576e, 0x3feea8d99b4492ed,
+0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
+0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
+0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
+0x3c7bf68359f35f44, 0x3feeb1ae99157736,
+0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
+0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
+0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
+0xbc92434322f4f9aa, 0x3feebd829fde4e50,
+0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
+0x3c71affc2b91ce27, 0x3feec49182a3f090,
+0x3c6dd235e10a73bb, 0x3feec86319e32323,
+0xbc87c50422622263, 0x3feecc667b5de565,
+0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
+0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
+0x3c90cc319cee31d2, 0x3feed99e1330b358,
+0x3c8469846e735ab3, 0x3feede6b5579fdbf,
+0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
+0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
+0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
+0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
+0xbc90a40e3da6f640, 0x3feef9728de5593a,
+0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
+0xbc91eee26b588a35, 0x3fef05b030a1064a,
+0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
+0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
+0x3c736eae30af0cb3, 0x3fef199bdd85529c,
+0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
+0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
+0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
+0x3c676b2c6c921968, 0x3fef3720dcef9069,
+0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
+0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
+0xbc900dae3875a949, 0x3fef4f87080d89f2,
+0x3c74a385a63d07a7, 0x3fef5818dcfba487,
+0xbc82919e2040220f, 0x3fef60e316c98398,
+0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
+0x3c843a59ac016b4b, 0x3fef7321f301b460,
+0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
+0xbc892ab93b470dc9, 0x3fef864614f5a129,
+0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
+0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
+0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
+0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
+0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
+0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
+0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
+0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
+0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
+0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
+},
+};
diff --git a/src/math/exp_data.h b/src/math/exp_data.h
new file mode 100644
index 00000000..3e24bac5
--- /dev/null
+++ b/src/math/exp_data.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _EXP_DATA_H
+#define _EXP_DATA_H
+
+#include <features.h>
+#include <stdint.h>
+
+#define EXP_TABLE_BITS 7
+#define EXP_POLY_ORDER 5
+#define EXP_USE_TOINT_NARROW 0
+#define EXP2_POLY_ORDER 5
+extern hidden const struct exp_data {
+	double invln2N;
+	double shift;
+	double negln2hiN;
+	double negln2loN;
+	double poly[4]; /* Last four coefficients.  */
+	double exp2_shift;
+	double exp2_poly[EXP2_POLY_ORDER];
+	uint64_t tab[2*(1 << EXP_TABLE_BITS)];
+} __exp_data;
+
+#endif
diff --git a/src/math/expf.c b/src/math/expf.c
index feee2b0e..f9fbf8e7 100644
--- a/src/math/expf.c
+++ b/src/math/expf.c
@@ -1,83 +1,80 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_expf.c */
 /*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ * Single-precision e^x function.
  *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
  */
 
+#include <math.h>
+#include <stdint.h>
 #include "libm.h"
+#include "exp2f_data.h"
 
-static const float
-half[2] = {0.5,-0.5},
-ln2hi   = 6.9314575195e-1f,  /* 0x3f317200 */
-ln2lo   = 1.4286067653e-6f,  /* 0x35bfbe8e */
-invln2  = 1.4426950216e+0f,  /* 0x3fb8aa3b */
 /*
- * Domain [-0.34568, 0.34568], range ~[-4.278e-9, 4.447e-9]:
- * |x*(exp(x)+1)/(exp(x)-1) - p(x)| < 2**-27.74
- */
-P1 =  1.6666625440e-1f, /*  0xaaaa8f.0p-26 */
-P2 = -2.7667332906e-3f; /* -0xb55215.0p-32 */
+EXP2F_TABLE_BITS = 5
+EXP2F_POLY_ORDER = 3
 
-float expf(float x)
+ULP error: 0.502 (nearest rounding.)
+Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
+Wrong count: 170635 (all nearest rounding wrong results with fma.)
+Non-nearest ULP error: 1 (rounded ULP error)
+*/
+
+#define N (1 << EXP2F_TABLE_BITS)
+#define InvLn2N __exp2f_data.invln2_scaled
+#define T __exp2f_data.tab
+#define C __exp2f_data.poly_scaled
+
+static inline uint32_t top12(float x)
 {
-	float_t hi, lo, c, xx, y;
-	int k, sign;
-	uint32_t hx;
+	return asuint(x) >> 20;
+}
 
-	GET_FLOAT_WORD(hx, x);
-	sign = hx >> 31;   /* sign bit of x */
-	hx &= 0x7fffffff;  /* high word of |x| */
+float expf(float x)
+{
+	uint32_t abstop;
+	uint64_t ki, t;
+	double_t kd, xd, z, r, r2, y, s;
 
-	/* special cases */
-	if (hx >= 0x42aeac50) {  /* if |x| >= -87.33655f or NaN */
-		if (hx > 0x7f800000) /* NaN */
-			return x;
-		if (hx >= 0x42b17218 && !sign) {  /* x >= 88.722839f */
-			/* overflow */
-			x *= 0x1p127f;
-			return x;
-		}
-		if (sign) {
-			/* underflow */
-			FORCE_EVAL(-0x1p-149f/x);
-			if (hx >= 0x42cff1b5)  /* x <= -103.972084f */
-				return 0;
-		}
+	xd = (double_t)x;
+	abstop = top12(x) & 0x7ff;
+	if (predict_false(abstop >= top12(88.0f))) {
+		/* |x| >= 88 or x is nan.  */
+		if (asuint(x) == asuint(-INFINITY))
+			return 0.0f;
+		if (abstop >= top12(INFINITY))
+			return x + x;
+		if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
+			return __math_oflowf(0);
+		if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
+			return __math_uflowf(0);
 	}
 
-	/* argument reduction */
-	if (hx > 0x3eb17218) {  /* if |x| > 0.5 ln2 */
-		if (hx > 0x3f851592)  /* if |x| > 1.5 ln2 */
-			k = invln2*x + half[sign];
-		else
-			k = 1 - sign - sign;
-		hi = x - k*ln2hi;  /* k*ln2hi is exact here */
-		lo = k*ln2lo;
-		x = hi - lo;
-	} else if (hx > 0x39000000) {  /* |x| > 2**-14 */
-		k = 0;
-		hi = x;
-		lo = 0;
-	} else {
-		/* raise inexact */
-		FORCE_EVAL(0x1p127f + x);
-		return 1 + x;
-	}
+	/* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k.  */
+	z = InvLn2N * xd;
+
+	/* Round and convert z to int, the result is in [-150*N, 128*N] and
+	   ideally ties-to-even rule is used, otherwise the magnitude of r
+	   can be bigger which gives larger approximation error.  */
+#if TOINT_INTRINSICS
+	kd = roundtoint(z);
+	ki = converttoint(z);
+#else
+# define SHIFT __exp2f_data.shift
+	kd = eval_as_double(z + SHIFT);
+	ki = asuint64(kd);
+	kd -= SHIFT;
+#endif
+	r = z - kd;
 
-	/* x is now in primary range */
-	xx = x*x;
-	c = x - xx*(P1+xx*P2);
-	y = 1 + (x*c/(2-c) - lo + hi);
-	if (k == 0)
-		return y;
-	return scalbnf(y, k);
+	/* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
+	t = T[ki % N];
+	t += ki << (52 - EXP2F_TABLE_BITS);
+	s = asdouble(t);
+	z = C[0] * r + C[1];
+	r2 = r * r;
+	y = C[2] * r + 1;
+	y = z * r2 + y;
+	y = y * s;
+	return eval_as_float(y);
 }
diff --git a/src/math/expm1f.c b/src/math/expm1f.c
index 297e0b44..09a41afe 100644
--- a/src/math/expm1f.c
+++ b/src/math/expm1f.c
@@ -16,7 +16,6 @@
 #include "libm.h"
 
 static const float
-o_threshold = 8.8721679688e+01, /* 0x42b17180 */
 ln2_hi      = 6.9313812256e-01, /* 0x3f317180 */
 ln2_lo      = 9.0580006145e-06, /* 0x3717f7d1 */
 invln2      = 1.4426950216e+00, /* 0x3fb8aa3b */
@@ -41,7 +40,7 @@ float expm1f(float x)
 			return x;
 		if (sign)
 			return -1;
-		if (x > o_threshold) {
+		if (hx > 0x42b17217) { /* x > log(FLT_MAX) */
 			x *= 0x1p127f;
 			return x;
 		}
diff --git a/src/math/fma.c b/src/math/fma.c
index 0c6f90c9..adfadca8 100644
--- a/src/math/fma.c
+++ b/src/math/fma.c
@@ -53,7 +53,7 @@ double fma(double x, double y, double z)
 		return x*y + z;
 	if (nz.e >= ZEROINFNAN) {
 		if (nz.e > ZEROINFNAN) /* z==0 */
-			return x*y + z;
+			return x*y;
 		return z;
 	}
 
diff --git a/src/math/fmaf.c b/src/math/fmaf.c
index 80f5cd8a..7c65acf1 100644
--- a/src/math/fmaf.c
+++ b/src/math/fmaf.c
@@ -77,17 +77,16 @@ float fmaf(float x, float y, float z)
 	 * If result is inexact, and exactly halfway between two float values,
 	 * we need to adjust the low-order bit in the direction of the error.
 	 */
-#ifdef FE_TOWARDZERO
-	fesetround(FE_TOWARDZERO);
-#endif
-	volatile double vxy = xy;  /* XXX work around gcc CSE bug */
-	double adjusted_result = vxy + z;
-	fesetround(FE_TONEAREST);
-	if (result == adjusted_result) {
-		u.f = adjusted_result;
+	double err;
+	int neg = u.i >> 63;
+	if (neg == (z > xy))
+		err = xy - result + z;
+	else
+		err = z - result + xy;
+	if (neg == (err < 0))
 		u.i++;
-		adjusted_result = u.f;
-	}
-	z = adjusted_result;
+	else
+		u.i--;
+	z = u.f;
 	return z;
 }
diff --git a/src/math/i386/acos.s b/src/math/i386/acos.s
index 47f365ef..af423a2f 100644
--- a/src/math/i386/acos.s
+++ b/src/math/i386/acos.s
@@ -1,22 +1,10 @@
 # use acos(x) = atan2(fabs(sqrt((1-x)*(1+x))), x)
 
-.global acosf
-.type acosf,@function
-acosf:
-	flds 4(%esp)
-	jmp 1f
-
-.global acosl
-.type acosl,@function
-acosl:
-	fldt 4(%esp)
-	jmp 1f
-
 .global acos
 .type acos,@function
 acos:
 	fldl 4(%esp)
-1:	fld %st(0)
+	fld %st(0)
 	fld1
 	fsub %st(0),%st(1)
 	fadd %st(2)
@@ -25,4 +13,6 @@ acos:
 	fabs         # fix sign of zero (matters in downward rounding mode)
 	fxch %st(1)
 	fpatan
+	fstpl 4(%esp)
+	fldl 4(%esp)
 	ret
diff --git a/src/math/i386/acosf.s b/src/math/i386/acosf.s
index 6c95509f..d2cdfdbf 100644
--- a/src/math/i386/acosf.s
+++ b/src/math/i386/acosf.s
@@ -1 +1,16 @@
-# see acos.s
+.global acosf
+.type acosf,@function
+acosf:
+	flds 4(%esp)
+	fld %st(0)
+	fld1
+	fsub %st(0),%st(1)
+	fadd %st(2)
+	fmulp
+	fsqrt
+	fabs         # fix sign of zero (matters in downward rounding mode)
+	fxch %st(1)
+	fpatan
+	fstps 4(%esp)
+	flds 4(%esp)
+	ret
diff --git a/src/math/i386/acosl.s b/src/math/i386/acosl.s
index 6c95509f..599c8230 100644
--- a/src/math/i386/acosl.s
+++ b/src/math/i386/acosl.s
@@ -1 +1,14 @@
-# see acos.s
+.global acosl
+.type acosl,@function
+acosl:
+	fldt 4(%esp)
+	fld %st(0)
+	fld1
+	fsub %st(0),%st(1)
+	fadd %st(2)
+	fmulp
+	fsqrt
+	fabs         # fix sign of zero (matters in downward rounding mode)
+	fxch %st(1)
+	fpatan
+	ret
diff --git a/src/math/i386/asin.s b/src/math/i386/asin.s
index a9f691bf..2bc8356f 100644
--- a/src/math/i386/asin.s
+++ b/src/math/i386/asin.s
@@ -1,26 +1,3 @@
-.global asinf
-.type asinf,@function
-asinf:
-	flds 4(%esp)
-	mov 4(%esp),%eax
-	add %eax,%eax
-	cmp $0x01000000,%eax
-	jae 1f
-		# subnormal x, return x with underflow
-	fnstsw %ax
-	and $16,%ax
-	jnz 2f
-	fld %st(0)
-	fmul %st(1)
-	fstps 4(%esp)
-2:	ret
-
-.global asinl
-.type asinl,@function
-asinl:
-	fldt 4(%esp)
-	jmp 1f
-
 .global asin
 .type asin,@function
 asin:
@@ -28,18 +5,17 @@ asin:
 	mov 8(%esp),%eax
 	add %eax,%eax
 	cmp $0x00200000,%eax
-	jae 1f
-		# subnormal x, return x with underflow
-	fnstsw %ax
-	and $16,%ax
-	jnz 2f
-	fsts 4(%esp)
-2:	ret
-1:	fld %st(0)
+	jb 1f
+	fld %st(0)
 	fld1
 	fsub %st(0),%st(1)
 	fadd %st(2)
 	fmulp
 	fsqrt
 	fpatan
+	fstpl 4(%esp)
+	fldl 4(%esp)
+	ret
+		# subnormal x, return x with underflow
+1:	fsts 4(%esp)
 	ret
diff --git a/src/math/i386/asinf.s b/src/math/i386/asinf.s
index e07bf599..05909753 100644
--- a/src/math/i386/asinf.s
+++ b/src/math/i386/asinf.s
@@ -1 +1,23 @@
-# see asin.s
+.global asinf
+.type asinf,@function
+asinf:
+	flds 4(%esp)
+	mov 4(%esp),%eax
+	add %eax,%eax
+	cmp $0x01000000,%eax
+	jb 1f
+	fld %st(0)
+	fld1
+	fsub %st(0),%st(1)
+	fadd %st(2)
+	fmulp
+	fsqrt
+	fpatan
+	fstps 4(%esp)
+	flds 4(%esp)
+	ret
+		# subnormal x, return x with underflow
+1:	fld %st(0)
+	fmul %st(1)
+	fstps 4(%esp)
+	ret
diff --git a/src/math/i386/asinl.s b/src/math/i386/asinl.s
index e07bf599..e973fc85 100644
--- a/src/math/i386/asinl.s
+++ b/src/math/i386/asinl.s
@@ -1 +1,12 @@
-# see asin.s
+.global asinl
+.type asinl,@function
+asinl:
+	fldt 4(%esp)
+	fld %st(0)
+	fld1
+	fsub %st(0),%st(1)
+	fadd %st(2)
+	fmulp
+	fsqrt
+	fpatan
+	ret
diff --git a/src/math/i386/atan.s b/src/math/i386/atan.s
index d73137b2..2c57f6b3 100644
--- a/src/math/i386/atan.s
+++ b/src/math/i386/atan.s
@@ -8,10 +8,9 @@ atan:
 	jb 1f
 	fld1
 	fpatan
+	fstpl 4(%esp)
+	fldl 4(%esp)
 	ret
 		# subnormal x, return x with underflow
-1:	fnstsw %ax
-	and $16,%ax
-	jnz 2f
-	fsts 4(%esp)
-2:	ret
+1:	fsts 4(%esp)
+	ret
diff --git a/src/math/i386/atan2.s b/src/math/i386/atan2.s
index a7d2979b..8bc441b1 100644
--- a/src/math/i386/atan2.s
+++ b/src/math/i386/atan2.s
@@ -4,14 +4,12 @@ atan2:
 	fldl 4(%esp)
 	fldl 12(%esp)
 	fpatan
-	fstl 4(%esp)
+	fstpl 4(%esp)
+	fldl 4(%esp)
 	mov 8(%esp),%eax
 	add %eax,%eax
 	cmp $0x00200000,%eax
 	jae 1f
 		# subnormal x, return x with underflow
-	fnstsw %ax
-	and $16,%ax
-	jnz 1f
 	fsts 4(%esp)
 1:	ret
diff --git a/src/math/i386/atan2f.s b/src/math/i386/atan2f.s
index 14b88ce5..3908c86d 100644
--- a/src/math/i386/atan2f.s
+++ b/src/math/i386/atan2f.s
@@ -4,15 +4,13 @@ atan2f:
 	flds 4(%esp)
 	flds 8(%esp)
 	fpatan
-	fsts 4(%esp)
+	fstps 4(%esp)
+	flds 4(%esp)
 	mov 4(%esp),%eax
 	add %eax,%eax
 	cmp $0x01000000,%eax
 	jae 1f
 		# subnormal x, return x with underflow
-	fnstsw %ax
-	and $16,%ax
-	jnz 1f
 	fld %st(0)
 	fmul %st(1)
 	fstps 4(%esp)
diff --git a/src/math/i386/atanf.s b/src/math/i386/atanf.s
index 8caddefa..c2cbe2e0 100644
--- a/src/math/i386/atanf.s
+++ b/src/math/i386/atanf.s
@@ -8,12 +8,11 @@ atanf:
 	jb 1f
 	fld1
 	fpatan
+	fstps 4(%esp)
+	flds 4(%esp)
 	ret
 		# subnormal x, return x with underflow
-1:	fnstsw %ax
-	and $16,%ax
-	jnz 2f
-	fld %st(0)
+1:	fld %st(0)
 	fmul %st(1)
 	fstps 4(%esp)
-2:	ret
+	ret
diff --git a/src/math/i386/exp2.s b/src/math/i386/exp2.s
deleted file mode 100644
index f335a3e5..00000000
--- a/src/math/i386/exp2.s
+++ /dev/null
@@ -1 +0,0 @@
-# see exp.s
diff --git a/src/math/i386/exp2f.s b/src/math/i386/exp2f.s
deleted file mode 100644
index f335a3e5..00000000
--- a/src/math/i386/exp2f.s
+++ /dev/null
@@ -1 +0,0 @@
-# see exp.s
diff --git a/src/math/i386/exp2l.s b/src/math/i386/exp2l.s
index f335a3e5..8125761d 100644
--- a/src/math/i386/exp2l.s
+++ b/src/math/i386/exp2l.s
@@ -1 +1 @@
-# see exp.s
+# see exp_ld.s
diff --git a/src/math/i386/exp.s b/src/math/i386/exp_ld.s
index c7aa5b6e..99cba01f 100644
--- a/src/math/i386/exp.s
+++ b/src/math/i386/exp_ld.s
@@ -1,41 +1,8 @@
-.global expm1f
-.type expm1f,@function
-expm1f:
-	flds 4(%esp)
-	mov 4(%esp),%eax
-	add %eax,%eax
-	cmp $0x01000000,%eax
-	jae 1f
-		# subnormal x, return x with underflow
-	fnstsw %ax
-	and $16,%ax
-	jnz 2f
-	fld %st(0)
-	fmul %st(1)
-	fstps 4(%esp)
-2:	ret
-
 .global expm1l
 .type expm1l,@function
 expm1l:
 	fldt 4(%esp)
-	jmp 1f
-
-.global expm1
-.type expm1,@function
-expm1:
-	fldl 4(%esp)
-	mov 8(%esp),%eax
-	add %eax,%eax
-	cmp $0x00200000,%eax
-	jae 1f
-		# subnormal x, return x with underflow
-	fnstsw %ax
-	and $16,%ax
-	jnz 2f
-	fsts 4(%esp)
-2:	ret
-1:	fldl2e
+	fldl2e
 	fmulp
 	mov $0xc2820000,%eax
 	push %eax
@@ -65,12 +32,6 @@ expm1:
 	fsubrp
 	ret
 
-.global exp2f
-.type exp2f,@function
-exp2f:
-	flds 4(%esp)
-	jmp 1f
-
 .global exp2l
 .global __exp2l
 .hidden __exp2l
@@ -78,26 +39,6 @@ exp2f:
 exp2l:
 __exp2l:
 	fldt 4(%esp)
-	jmp 1f
-
-.global expf
-.type expf,@function
-expf:
-	flds 4(%esp)
-	jmp 2f
-
-.global exp
-.type exp,@function
-exp:
-	fldl 4(%esp)
-2:	fldl2e
-	fmulp
-	jmp 1f
-
-.global exp2
-.type exp2,@function
-exp2:
-	fldl 4(%esp)
 1:	sub $12,%esp
 	fld %st(0)
 	fstpt (%esp)
diff --git a/src/math/i386/expf.s b/src/math/i386/expf.s
deleted file mode 100644
index f335a3e5..00000000
--- a/src/math/i386/expf.s
+++ /dev/null
@@ -1 +0,0 @@
-# see exp.s
diff --git a/src/math/i386/expm1.s b/src/math/i386/expm1.s
deleted file mode 100644
index f335a3e5..00000000
--- a/src/math/i386/expm1.s
+++ /dev/null
@@ -1 +0,0 @@
-# see exp.s
diff --git a/src/math/i386/expm1f.s b/src/math/i386/expm1f.s
deleted file mode 100644
index f335a3e5..00000000
--- a/src/math/i386/expm1f.s
+++ /dev/null
@@ -1 +0,0 @@
-# see exp.s
diff --git a/src/math/i386/expm1l.s b/src/math/i386/expm1l.s
index f335a3e5..8125761d 100644
--- a/src/math/i386/expm1l.s
+++ b/src/math/i386/expm1l.s
@@ -1 +1 @@
-# see exp.s
+# see exp_ld.s
diff --git a/src/math/i386/fabs.c b/src/math/i386/fabs.c
new file mode 100644
index 00000000..39672786
--- /dev/null
+++ b/src/math/i386/fabs.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fabs(double x)
+{
+	__asm__ ("fabs" : "+t"(x));
+	return x;
+}
diff --git a/src/math/i386/fabs.s b/src/math/i386/fabs.s
deleted file mode 100644
index d66ea9a1..00000000
--- a/src/math/i386/fabs.s
+++ /dev/null
@@ -1,6 +0,0 @@
-.global fabs
-.type fabs,@function
-fabs:
-	fldl 4(%esp)
-	fabs
-	ret
diff --git a/src/math/i386/fabsf.c b/src/math/i386/fabsf.c
new file mode 100644
index 00000000..d882eee3
--- /dev/null
+++ b/src/math/i386/fabsf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fabsf(float x)
+{
+	__asm__ ("fabs" : "+t"(x));
+	return x;
+}
diff --git a/src/math/i386/fabsf.s b/src/math/i386/fabsf.s
deleted file mode 100644
index a981c422..00000000
--- a/src/math/i386/fabsf.s
+++ /dev/null
@@ -1,6 +0,0 @@
-.global fabsf
-.type fabsf,@function
-fabsf:
-	flds 4(%esp)
-	fabs
-	ret
diff --git a/src/math/i386/fabsl.c b/src/math/i386/fabsl.c
new file mode 100644
index 00000000..cc1c9ed9
--- /dev/null
+++ b/src/math/i386/fabsl.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double fabsl(long double x)
+{
+	__asm__ ("fabs" : "+t"(x));
+	return x;
+}
diff --git a/src/math/i386/fabsl.s b/src/math/i386/fabsl.s
deleted file mode 100644
index ceef9e4c..00000000
--- a/src/math/i386/fabsl.s
+++ /dev/null
@@ -1,6 +0,0 @@
-.global fabsl
-.type fabsl,@function
-fabsl:
-	fldt 4(%esp)
-	fabs
-	ret
diff --git a/src/math/i386/fmod.c b/src/math/i386/fmod.c
new file mode 100644
index 00000000..ea0c58d9
--- /dev/null
+++ b/src/math/i386/fmod.c
@@ -0,0 +1,10 @@
+#include <math.h>
+
+double fmod(double x, double y)
+{
+	unsigned short fpsr;
+	// fprem does not introduce excess precision into x
+	do __asm__ ("fprem; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y));
+	while (fpsr & 0x400);
+	return x;
+}
diff --git a/src/math/i386/fmod.s b/src/math/i386/fmod.s
deleted file mode 100644
index 2113b3c5..00000000
--- a/src/math/i386/fmod.s
+++ /dev/null
@@ -1,11 +0,0 @@
-.global fmod
-.type fmod,@function
-fmod:
-	fldl 12(%esp)
-	fldl 4(%esp)
-1:	fprem
-	fnstsw %ax
-	sahf
-	jp 1b
-	fstp %st(1)
-	ret
diff --git a/src/math/i386/fmodf.c b/src/math/i386/fmodf.c
new file mode 100644
index 00000000..90b56ab0
--- /dev/null
+++ b/src/math/i386/fmodf.c
@@ -0,0 +1,10 @@
+#include <math.h>
+
+float fmodf(float x, float y)
+{
+	unsigned short fpsr;
+	// fprem does not introduce excess precision into x
+	do __asm__ ("fprem; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y));
+	while (fpsr & 0x400);
+	return x;
+}
diff --git a/src/math/i386/fmodf.s b/src/math/i386/fmodf.s
deleted file mode 100644
index e04e2a56..00000000
--- a/src/math/i386/fmodf.s
+++ /dev/null
@@ -1,11 +0,0 @@
-.global fmodf
-.type fmodf,@function
-fmodf:
-	flds 8(%esp)
-	flds 4(%esp)
-1:	fprem
-	fnstsw %ax
-	sahf
-	jp 1b
-	fstp %st(1)
-	ret
diff --git a/src/math/i386/fmodl.c b/src/math/i386/fmodl.c
new file mode 100644
index 00000000..3daeab06
--- /dev/null
+++ b/src/math/i386/fmodl.c
@@ -0,0 +1,9 @@
+#include <math.h>
+
+long double fmodl(long double x, long double y)
+{
+	unsigned short fpsr;
+	do __asm__ ("fprem; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y));
+	while (fpsr & 0x400);
+	return x;
+}
diff --git a/src/math/i386/fmodl.s b/src/math/i386/fmodl.s
deleted file mode 100644
index 0cb3fe9b..00000000
--- a/src/math/i386/fmodl.s
+++ /dev/null
@@ -1,11 +0,0 @@
-.global fmodl
-.type fmodl,@function
-fmodl:
-	fldt 16(%esp)
-	fldt 4(%esp)
-1:	fprem
-	fnstsw %ax
-	sahf
-	jp 1b
-	fstp %st(1)
-	ret
diff --git a/src/math/i386/llrint.c b/src/math/i386/llrint.c
new file mode 100644
index 00000000..aa400817
--- /dev/null
+++ b/src/math/i386/llrint.c
@@ -0,0 +1,8 @@
+#include <math.h>
+
+long long llrint(double x)
+{
+	long long r;
+	__asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st");
+	return r;
+}
diff --git a/src/math/i386/llrint.s b/src/math/i386/llrint.s
deleted file mode 100644
index 8e89cd91..00000000
--- a/src/math/i386/llrint.s
+++ /dev/null
@@ -1,8 +0,0 @@
-.global llrint
-.type llrint,@function
-llrint:
-	fldl 4(%esp)
-	fistpll 4(%esp)
-	mov 4(%esp),%eax
-	mov 8(%esp),%edx
-	ret
diff --git a/src/math/i386/llrintf.c b/src/math/i386/llrintf.c
new file mode 100644
index 00000000..c41a317b
--- /dev/null
+++ b/src/math/i386/llrintf.c
@@ -0,0 +1,8 @@
+#include <math.h>
+
+long long llrintf(float x)
+{
+	long long r;
+	__asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st");
+	return r;
+}
diff --git a/src/math/i386/llrintf.s b/src/math/i386/llrintf.s
deleted file mode 100644
index aa850c6c..00000000
--- a/src/math/i386/llrintf.s
+++ /dev/null
@@ -1,9 +0,0 @@
-.global llrintf
-.type llrintf,@function
-llrintf:
-	sub $8,%esp
-	flds 12(%esp)
-	fistpll (%esp)
-	pop %eax
-	pop %edx
-	ret
diff --git a/src/math/i386/llrintl.c b/src/math/i386/llrintl.c
new file mode 100644
index 00000000..c439ef28
--- /dev/null
+++ b/src/math/i386/llrintl.c
@@ -0,0 +1,8 @@
+#include <math.h>
+
+long long llrintl(long double x)
+{
+	long long r;
+	__asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st");
+	return r;
+}
diff --git a/src/math/i386/llrintl.s b/src/math/i386/llrintl.s
deleted file mode 100644
index 1cfb56f1..00000000
--- a/src/math/i386/llrintl.s
+++ /dev/null
@@ -1,8 +0,0 @@
-.global llrintl
-.type llrintl,@function
-llrintl:
-	fldt 4(%esp)
-	fistpll 4(%esp)
-	mov 4(%esp),%eax
-	mov 8(%esp),%edx
-	ret
diff --git a/src/math/i386/log.s b/src/math/i386/log.s
index fcccf030..08c59924 100644
--- a/src/math/i386/log.s
+++ b/src/math/i386/log.s
@@ -4,4 +4,6 @@ log:
 	fldln2
 	fldl 4(%esp)
 	fyl2x
+	fstpl 4(%esp)
+	fldl 4(%esp)
 	ret
diff --git a/src/math/i386/log10.s b/src/math/i386/log10.s
index 28eb5b2f..120e91ec 100644
--- a/src/math/i386/log10.s
+++ b/src/math/i386/log10.s
@@ -4,4 +4,6 @@ log10:
 	fldlg2
 	fldl 4(%esp)
 	fyl2x
+	fstpl 4(%esp)
+	fldl 4(%esp)
 	ret
diff --git a/src/math/i386/log10f.s b/src/math/i386/log10f.s
index c0c0c67e..b055493a 100644
--- a/src/math/i386/log10f.s
+++ b/src/math/i386/log10f.s
@@ -4,4 +4,6 @@ log10f:
 	fldlg2
 	flds 4(%esp)
 	fyl2x
+	fstps 4(%esp)
+	flds 4(%esp)
 	ret
diff --git a/src/math/i386/log1p.s b/src/math/i386/log1p.s
index 6b6929c7..f3c95f83 100644
--- a/src/math/i386/log1p.s
+++ b/src/math/i386/log1p.s
@@ -10,15 +10,16 @@ log1p:
 	cmp $0x00100000,%eax
 	jb 2f
 	fyl2xp1
+	fstpl 4(%esp)
+	fldl 4(%esp)
 	ret
 1:	fld1
 	faddp
 	fyl2x
+	fstpl 4(%esp)
+	fldl 4(%esp)
 	ret
 		# subnormal x, return x with underflow
-2:	fnstsw %ax
-	and $16,%ax
-	jnz 1f
-	fsts 4(%esp)
+2:	fsts 4(%esp)
 	fstp %st(1)
-1:	ret
+	ret
diff --git a/src/math/i386/log1pf.s b/src/math/i386/log1pf.s
index c0bcd30f..9f13d95f 100644
--- a/src/math/i386/log1pf.s
+++ b/src/math/i386/log1pf.s
@@ -10,16 +10,17 @@ log1pf:
 	cmp $0x00800000,%eax
 	jb 2f
 	fyl2xp1
+	fstps 4(%esp)
+	flds 4(%esp)
 	ret
 1:	fld1
 	faddp
 	fyl2x
+	fstps 4(%esp)
+	flds 4(%esp)
 	ret
 		# subnormal x, return x with underflow
-2:	fnstsw %ax
-	and $16,%ax
-	jnz 1f
-	fxch
+2:	fxch
 	fmul %st(1)
 	fstps 4(%esp)
-1:	ret
+	ret
diff --git a/src/math/i386/log2.s b/src/math/i386/log2.s
index 15088037..7eff0b61 100644
--- a/src/math/i386/log2.s
+++ b/src/math/i386/log2.s
@@ -4,4 +4,6 @@ log2:
 	fld1
 	fldl 4(%esp)
 	fyl2x
+	fstpl 4(%esp)
+	fldl 4(%esp)
 	ret
diff --git a/src/math/i386/log2f.s b/src/math/i386/log2f.s
index 00cdce75..b32fa2f7 100644
--- a/src/math/i386/log2f.s
+++ b/src/math/i386/log2f.s
@@ -4,4 +4,6 @@ log2f:
 	fld1
 	flds 4(%esp)
 	fyl2x
+	fstps 4(%esp)
+	flds 4(%esp)
 	ret
diff --git a/src/math/i386/logf.s b/src/math/i386/logf.s
index da7ff3ae..4d0346a4 100644
--- a/src/math/i386/logf.s
+++ b/src/math/i386/logf.s
@@ -4,4 +4,6 @@ logf:
 	fldln2
 	flds 4(%esp)
 	fyl2x
+	fstps 4(%esp)
+	flds 4(%esp)
 	ret
diff --git a/src/math/i386/lrint.c b/src/math/i386/lrint.c
new file mode 100644
index 00000000..89563ab2
--- /dev/null
+++ b/src/math/i386/lrint.c
@@ -0,0 +1,8 @@
+#include <math.h>
+
+long lrint(double x)
+{
+	long r;
+	__asm__ ("fistpl %0" : "=m"(r) : "t"(x) : "st");
+	return r;
+}
diff --git a/src/math/i386/lrint.s b/src/math/i386/lrint.s
deleted file mode 100644
index 02b83d9f..00000000
--- a/src/math/i386/lrint.s
+++ /dev/null
@@ -1,7 +0,0 @@
-.global lrint
-.type lrint,@function
-lrint:
-	fldl 4(%esp)
-	fistpl 4(%esp)
-	mov 4(%esp),%eax
-	ret
diff --git a/src/math/i386/lrintf.c b/src/math/i386/lrintf.c
new file mode 100644
index 00000000..0bbf29de
--- /dev/null
+++ b/src/math/i386/lrintf.c
@@ -0,0 +1,8 @@
+#include <math.h>
+
+long lrintf(float x)
+{
+	long r;
+	__asm__ ("fistpl %0" : "=m"(r) : "t"(x) : "st");
+	return r;
+}
diff --git a/src/math/i386/lrintf.s b/src/math/i386/lrintf.s
deleted file mode 100644
index 907aac29..00000000
--- a/src/math/i386/lrintf.s
+++ /dev/null
@@ -1,7 +0,0 @@
-.global lrintf
-.type lrintf,@function
-lrintf:
-	flds 4(%esp)
-	fistpl 4(%esp)
-	mov 4(%esp),%eax
-	ret
diff --git a/src/math/i386/lrintl.c b/src/math/i386/lrintl.c
new file mode 100644
index 00000000..eb8c0902
--- /dev/null
+++ b/src/math/i386/lrintl.c
@@ -0,0 +1,8 @@
+#include <math.h>
+
+long lrintl(long double x)
+{
+	long r;
+	__asm__ ("fistpl %0" : "=m"(r) : "t"(x) : "st");
+	return r;
+}
diff --git a/src/math/i386/lrintl.s b/src/math/i386/lrintl.s
deleted file mode 100644
index 3ae05aac..00000000
--- a/src/math/i386/lrintl.s
+++ /dev/null
@@ -1,7 +0,0 @@
-.global lrintl
-.type lrintl,@function
-lrintl:
-	fldt 4(%esp)
-	fistpl 4(%esp)
-	mov 4(%esp),%eax
-	ret
diff --git a/src/math/i386/remainder.c b/src/math/i386/remainder.c
new file mode 100644
index 00000000..c083df90
--- /dev/null
+++ b/src/math/i386/remainder.c
@@ -0,0 +1,12 @@
+#include <math.h>
+
+double remainder(double x, double y)
+{
+	unsigned short fpsr;
+	// fprem1 does not introduce excess precision into x
+	do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y));
+	while (fpsr & 0x400);
+	return x;
+}
+
+weak_alias(remainder, drem);
diff --git a/src/math/i386/remainder.s b/src/math/i386/remainder.s
deleted file mode 100644
index ab1da95d..00000000
--- a/src/math/i386/remainder.s
+++ /dev/null
@@ -1,14 +0,0 @@
-.global remainder
-.type remainder,@function
-remainder:
-.weak drem
-.type drem,@function
-drem:
-	fldl 12(%esp)
-	fldl 4(%esp)
-1:	fprem1
-	fnstsw %ax
-	sahf
-	jp 1b
-	fstp %st(1)
-	ret
diff --git a/src/math/i386/remainderf.c b/src/math/i386/remainderf.c
new file mode 100644
index 00000000..280207d2
--- /dev/null
+++ b/src/math/i386/remainderf.c
@@ -0,0 +1,12 @@
+#include <math.h>
+
+float remainderf(float x, float y)
+{
+	unsigned short fpsr;
+	// fprem1 does not introduce excess precision into x
+	do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y));
+	while (fpsr & 0x400);
+	return x;
+}
+
+weak_alias(remainderf, dremf);
diff --git a/src/math/i386/remainderf.s b/src/math/i386/remainderf.s
deleted file mode 100644
index 6a7378a3..00000000
--- a/src/math/i386/remainderf.s
+++ /dev/null
@@ -1,14 +0,0 @@
-.global remainderf
-.type remainderf,@function
-remainderf:
-.weak dremf
-.type dremf,@function
-dremf:
-	flds 8(%esp)
-	flds 4(%esp)
-1:	fprem1
-	fnstsw %ax
-	sahf
-	jp 1b
-	fstp %st(1)
-	ret
diff --git a/src/math/i386/remainderl.c b/src/math/i386/remainderl.c
new file mode 100644
index 00000000..8cf75071
--- /dev/null
+++ b/src/math/i386/remainderl.c
@@ -0,0 +1,9 @@
+#include <math.h>
+
+long double remainderl(long double x, long double y)
+{
+	unsigned short fpsr;
+	do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y));
+	while (fpsr & 0x400);
+	return x;
+}
diff --git a/src/math/i386/remainderl.s b/src/math/i386/remainderl.s
deleted file mode 100644
index b41518ed..00000000
--- a/src/math/i386/remainderl.s
+++ /dev/null
@@ -1,11 +0,0 @@
-.global remainderl
-.type remainderl,@function
-remainderl:
-	fldt 16(%esp)
-	fldt 4(%esp)
-1:	fprem1
-	fnstsw %ax
-	sahf
-	jp 1b
-	fstp %st(1)
-	ret
diff --git a/src/math/i386/rint.c b/src/math/i386/rint.c
new file mode 100644
index 00000000..a5276a60
--- /dev/null
+++ b/src/math/i386/rint.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double rint(double x)
+{
+	__asm__ ("frndint" : "+t"(x));
+	return x;
+}
diff --git a/src/math/i386/rint.s b/src/math/i386/rint.s
deleted file mode 100644
index bb99a11c..00000000
--- a/src/math/i386/rint.s
+++ /dev/null
@@ -1,6 +0,0 @@
-.global rint
-.type rint,@function
-rint:
-	fldl 4(%esp)
-	frndint
-	ret
diff --git a/src/math/i386/rintf.c b/src/math/i386/rintf.c
new file mode 100644
index 00000000..bb4121a4
--- /dev/null
+++ b/src/math/i386/rintf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float rintf(float x)
+{
+	__asm__ ("frndint" : "+t"(x));
+	return x;
+}
diff --git a/src/math/i386/rintf.s b/src/math/i386/rintf.s
deleted file mode 100644
index bce4c5a6..00000000
--- a/src/math/i386/rintf.s
+++ /dev/null
@@ -1,6 +0,0 @@
-.global rintf
-.type rintf,@function
-rintf:
-	flds 4(%esp)
-	frndint
-	ret
diff --git a/src/math/i386/rintl.c b/src/math/i386/rintl.c
new file mode 100644
index 00000000..e1a92077
--- /dev/null
+++ b/src/math/i386/rintl.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double rintl(long double x)
+{
+	__asm__ ("frndint" : "+t"(x));
+	return x;
+}
diff --git a/src/math/i386/rintl.s b/src/math/i386/rintl.s
deleted file mode 100644
index cd2bf9a9..00000000
--- a/src/math/i386/rintl.s
+++ /dev/null
@@ -1,6 +0,0 @@
-.global rintl
-.type rintl,@function
-rintl:
-	fldt 4(%esp)
-	frndint
-	ret
diff --git a/src/math/i386/sqrt.c b/src/math/i386/sqrt.c
new file mode 100644
index 00000000..934fbcca
--- /dev/null
+++ b/src/math/i386/sqrt.c
@@ -0,0 +1,15 @@
+#include "libm.h"
+
+double sqrt(double x)
+{
+	union ldshape ux;
+	unsigned fpsr;
+	__asm__ ("fsqrt; fnstsw %%ax": "=t"(ux.f), "=a"(fpsr) : "0"(x));
+	if ((ux.i.m & 0x7ff) != 0x400)
+		return (double)ux.f;
+	/* Rounding to double would have encountered an exact halfway case.
+	   Adjust mantissa downwards if fsqrt rounded up, else upwards.
+	   (result of fsqrt could not have been exact) */
+	ux.i.m ^= (fpsr & 0x200) + 0x300;
+	return (double)ux.f;
+}
diff --git a/src/math/i386/sqrt.s b/src/math/i386/sqrt.s
deleted file mode 100644
index 57837e25..00000000
--- a/src/math/i386/sqrt.s
+++ /dev/null
@@ -1,21 +0,0 @@
-.global sqrt
-.type sqrt,@function
-sqrt:	fldl 4(%esp)
-	fsqrt
-	fnstsw %ax
-	sub $12,%esp
-	fld %st(0)
-	fstpt (%esp)
-	mov (%esp),%ecx
-	and $0x7ff,%ecx
-	cmp $0x400,%ecx
-	jnz 1f
-	and $0x200,%eax
-	sub $0x100,%eax
-	sub %eax,(%esp)
-	fstp %st(0)
-	fldt (%esp)
-1:	add $12,%esp
-	fstpl 4(%esp)
-	fldl 4(%esp)
-	ret
diff --git a/src/math/i386/sqrtf.c b/src/math/i386/sqrtf.c
new file mode 100644
index 00000000..41c65c2b
--- /dev/null
+++ b/src/math/i386/sqrtf.c
@@ -0,0 +1,12 @@
+#include <math.h>
+
+float sqrtf(float x)
+{
+	long double t;
+	/* The long double result has sufficient precision so that
+	 * second rounding to float still keeps the returned value
+	 * correctly rounded, see Pierre Roux, "Innocuous Double
+	 * Rounding of Basic Arithmetic Operations". */
+	__asm__ ("fsqrt" : "=t"(t) : "0"(x));
+	return (float)t;
+}
diff --git a/src/math/i386/sqrtf.s b/src/math/i386/sqrtf.s
deleted file mode 100644
index 9e944f45..00000000
--- a/src/math/i386/sqrtf.s
+++ /dev/null
@@ -1,7 +0,0 @@
-.global sqrtf
-.type sqrtf,@function
-sqrtf:	flds 4(%esp)
-	fsqrt
-	fstps 4(%esp)
-	flds 4(%esp)
-	ret
diff --git a/src/math/i386/sqrtl.c b/src/math/i386/sqrtl.c
new file mode 100644
index 00000000..864cfcc4
--- /dev/null
+++ b/src/math/i386/sqrtl.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double sqrtl(long double x)
+{
+	__asm__ ("fsqrt" : "+t"(x));
+	return x;
+}
diff --git a/src/math/i386/sqrtl.s b/src/math/i386/sqrtl.s
deleted file mode 100644
index e0d42616..00000000
--- a/src/math/i386/sqrtl.s
+++ /dev/null
@@ -1,5 +0,0 @@
-.global sqrtl
-.type sqrtl,@function
-sqrtl:	fldt 4(%esp)
-	fsqrt
-	ret
diff --git a/src/math/log.c b/src/math/log.c
index e61e113d..cc52585a 100644
--- a/src/math/log.c
+++ b/src/math/log.c
@@ -1,118 +1,112 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_log.c */
 /*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ * Double-precision log(x) function.
  *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/* log(x)
- * Return the logarithm of x
- *
- * Method :
- *   1. Argument Reduction: find k and f such that
- *                      x = 2^k * (1+f),
- *         where  sqrt(2)/2 < 1+f < sqrt(2) .
- *
- *   2. Approximation of log(1+f).
- *      Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
- *               = 2s + 2/3 s**3 + 2/5 s**5 + .....,
- *               = 2s + s*R
- *      We use a special Remez algorithm on [0,0.1716] to generate
- *      a polynomial of degree 14 to approximate R The maximum error
- *      of this polynomial approximation is bounded by 2**-58.45. In
- *      other words,
- *                      2      4      6      8      10      12      14
- *          R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s  +Lg6*s  +Lg7*s
- *      (the values of Lg1 to Lg7 are listed in the program)
- *      and
- *          |      2          14          |     -58.45
- *          | Lg1*s +...+Lg7*s    -  R(z) | <= 2
- *          |                             |
- *      Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
- *      In order to guarantee error in log below 1ulp, we compute log
- *      by
- *              log(1+f) = f - s*(f - R)        (if f is not too large)
- *              log(1+f) = f - (hfsq - s*(hfsq+R)).     (better accuracy)
- *
- *      3. Finally,  log(x) = k*ln2 + log(1+f).
- *                          = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
- *         Here ln2 is split into two floating point number:
- *                      ln2_hi + ln2_lo,
- *         where n*ln2_hi is always exact for |n| < 2000.
- *
- * Special cases:
- *      log(x) is NaN with signal if x < 0 (including -INF) ;
- *      log(+INF) is +INF; log(0) is -INF with signal;
- *      log(NaN) is that NaN with no signal.
- *
- * Accuracy:
- *      according to an error analysis, the error is always less than
- *      1 ulp (unit in the last place).
- *
- * Constants:
- * The hexadecimal values are the intended ones for the following
- * constants. The decimal values may be used, provided that the
- * compiler will convert from decimal to binary accurately enough
- * to produce the hexadecimal values shown.
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
  */
 
 #include <math.h>
 #include <stdint.h>
+#include "libm.h"
+#include "log_data.h"
+
+#define T __log_data.tab
+#define T2 __log_data.tab2
+#define B __log_data.poly1
+#define A __log_data.poly
+#define Ln2hi __log_data.ln2hi
+#define Ln2lo __log_data.ln2lo
+#define N (1 << LOG_TABLE_BITS)
+#define OFF 0x3fe6000000000000
 
-static const double
-ln2_hi = 6.93147180369123816490e-01,  /* 3fe62e42 fee00000 */
-ln2_lo = 1.90821492927058770002e-10,  /* 3dea39ef 35793c76 */
-Lg1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */
-Lg2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */
-Lg3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */
-Lg4 = 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */
-Lg5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */
-Lg6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
-Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
+/* Top 16 bits of a double.  */
+static inline uint32_t top16(double x)
+{
+	return asuint64(x) >> 48;
+}
 
 double log(double x)
 {
-	union {double f; uint64_t i;} u = {x};
-	double_t hfsq,f,s,z,R,w,t1,t2,dk;
-	uint32_t hx;
-	int k;
+	double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
+	uint64_t ix, iz, tmp;
+	uint32_t top;
+	int k, i;
+
+	ix = asuint64(x);
+	top = top16(x);
+#define LO asuint64(1.0 - 0x1p-4)
+#define HI asuint64(1.0 + 0x1.09p-4)
+	if (predict_false(ix - LO < HI - LO)) {
+		/* Handle close to 1.0 inputs separately.  */
+		/* Fix sign of zero with downward rounding when x==1.  */
+		if (WANT_ROUNDING && predict_false(ix == asuint64(1.0)))
+			return 0;
+		r = x - 1.0;
+		r2 = r * r;
+		r3 = r * r2;
+		y = r3 *
+		    (B[1] + r * B[2] + r2 * B[3] +
+		     r3 * (B[4] + r * B[5] + r2 * B[6] +
+			   r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
+		/* Worst-case error is around 0.507 ULP.  */
+		w = r * 0x1p27;
+		double_t rhi = r + w - w;
+		double_t rlo = r - rhi;
+		w = rhi * rhi * B[0]; /* B[0] == -0.5.  */
+		hi = r + w;
+		lo = r - hi + w;
+		lo += B[0] * rlo * (rhi + r);
+		y += lo;
+		y += hi;
+		return eval_as_double(y);
+	}
+	if (predict_false(top - 0x0010 >= 0x7ff0 - 0x0010)) {
+		/* x < 0x1p-1022 or inf or nan.  */
+		if (ix * 2 == 0)
+			return __math_divzero(1);
+		if (ix == asuint64(INFINITY)) /* log(inf) == inf.  */
+			return x;
+		if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
+			return __math_invalid(x);
+		/* x is subnormal, normalize it.  */
+		ix = asuint64(x * 0x1p52);
+		ix -= 52ULL << 52;
+	}
+
+	/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+	   The range is split into N subintervals.
+	   The ith subinterval contains z and c is near its center.  */
+	tmp = ix - OFF;
+	i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
+	k = (int64_t)tmp >> 52; /* arithmetic shift */
+	iz = ix - (tmp & 0xfffULL << 52);
+	invc = T[i].invc;
+	logc = T[i].logc;
+	z = asdouble(iz);
 
-	hx = u.i>>32;
-	k = 0;
-	if (hx < 0x00100000 || hx>>31) {
-		if (u.i<<1 == 0)
-			return -1/(x*x);  /* log(+-0)=-inf */
-		if (hx>>31)
-			return (x-x)/0.0; /* log(-#) = NaN */
-		/* subnormal number, scale x up */
-		k -= 54;
-		x *= 0x1p54;
-		u.f = x;
-		hx = u.i>>32;
-	} else if (hx >= 0x7ff00000) {
-		return x;
-	} else if (hx == 0x3ff00000 && u.i<<32 == 0)
-		return 0;
+	/* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
+	/* r ~= z/c - 1, |r| < 1/(2*N).  */
+#if __FP_FAST_FMA
+	/* rounding error: 0x1p-55/N.  */
+	r = __builtin_fma(z, invc, -1.0);
+#else
+	/* rounding error: 0x1p-55/N + 0x1p-66.  */
+	r = (z - T2[i].chi - T2[i].clo) * invc;
+#endif
+	kd = (double_t)k;
 
-	/* reduce x into [sqrt(2)/2, sqrt(2)] */
-	hx += 0x3ff00000 - 0x3fe6a09e;
-	k += (int)(hx>>20) - 0x3ff;
-	hx = (hx&0x000fffff) + 0x3fe6a09e;
-	u.i = (uint64_t)hx<<32 | (u.i&0xffffffff);
-	x = u.f;
+	/* hi + lo = r + log(c) + k*Ln2.  */
+	w = kd * Ln2hi + logc;
+	hi = w + r;
+	lo = w - hi + r + kd * Ln2lo;
 
-	f = x - 1.0;
-	hfsq = 0.5*f*f;
-	s = f/(2.0+f);
-	z = s*s;
-	w = z*z;
-	t1 = w*(Lg2+w*(Lg4+w*Lg6));
-	t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
-	R = t2 + t1;
-	dk = k;
-	return s*(hfsq+R) + dk*ln2_lo - hfsq + f + dk*ln2_hi;
+	/* log(x) = lo + (log1p(r) - r) + hi.  */
+	r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
+	/* Worst case error if |y| > 0x1p-5:
+	   0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
+	   Worst case error if |y| > 0x1p-4:
+	   0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma).  */
+	y = lo + r2 * A[0] +
+	    r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
+	return eval_as_double(y);
 }
diff --git a/src/math/log2.c b/src/math/log2.c
index 0aafad4b..1276ed4e 100644
--- a/src/math/log2.c
+++ b/src/math/log2.c
@@ -1,122 +1,122 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_log2.c */
 /*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ * Double-precision log2(x) function.
  *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/*
- * Return the base 2 logarithm of x.  See log.c for most comments.
- *
- * Reduce x to 2^k (1+f) and calculate r = log(1+f) - f + f*f/2
- * as in log.c, then combine and scale in extra precision:
- *    log2(x) = (f - f*f/2 + r)/log(2) + k
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
  */
 
 #include <math.h>
 #include <stdint.h>
+#include "libm.h"
+#include "log2_data.h"
 
-static const double
-ivln2hi = 1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 */
-ivln2lo = 1.67517131648865118353e-10, /* 0x3de705fc, 0x2eefa200 */
-Lg1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */
-Lg2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */
-Lg3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */
-Lg4 = 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */
-Lg5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */
-Lg6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
-Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
+#define T __log2_data.tab
+#define T2 __log2_data.tab2
+#define B __log2_data.poly1
+#define A __log2_data.poly
+#define InvLn2hi __log2_data.invln2hi
+#define InvLn2lo __log2_data.invln2lo
+#define N (1 << LOG2_TABLE_BITS)
+#define OFF 0x3fe6000000000000
 
-double log2(double x)
+/* Top 16 bits of a double.  */
+static inline uint32_t top16(double x)
 {
-	union {double f; uint64_t i;} u = {x};
-	double_t hfsq,f,s,z,R,w,t1,t2,y,hi,lo,val_hi,val_lo;
-	uint32_t hx;
-	int k;
-
-	hx = u.i>>32;
-	k = 0;
-	if (hx < 0x00100000 || hx>>31) {
-		if (u.i<<1 == 0)
-			return -1/(x*x);  /* log(+-0)=-inf */
-		if (hx>>31)
-			return (x-x)/0.0; /* log(-#) = NaN */
-		/* subnormal number, scale x up */
-		k -= 54;
-		x *= 0x1p54;
-		u.f = x;
-		hx = u.i>>32;
-	} else if (hx >= 0x7ff00000) {
-		return x;
-	} else if (hx == 0x3ff00000 && u.i<<32 == 0)
-		return 0;
-
-	/* reduce x into [sqrt(2)/2, sqrt(2)] */
-	hx += 0x3ff00000 - 0x3fe6a09e;
-	k += (int)(hx>>20) - 0x3ff;
-	hx = (hx&0x000fffff) + 0x3fe6a09e;
-	u.i = (uint64_t)hx<<32 | (u.i&0xffffffff);
-	x = u.f;
+	return asuint64(x) >> 48;
+}
 
-	f = x - 1.0;
-	hfsq = 0.5*f*f;
-	s = f/(2.0+f);
-	z = s*s;
-	w = z*z;
-	t1 = w*(Lg2+w*(Lg4+w*Lg6));
-	t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
-	R = t2 + t1;
+double log2(double x)
+{
+	double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
+	uint64_t ix, iz, tmp;
+	uint32_t top;
+	int k, i;
 
-	/*
-	 * f-hfsq must (for args near 1) be evaluated in extra precision
-	 * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2).
-	 * This is fairly efficient since f-hfsq only depends on f, so can
-	 * be evaluated in parallel with R.  Not combining hfsq with R also
-	 * keeps R small (though not as small as a true `lo' term would be),
-	 * so that extra precision is not needed for terms involving R.
-	 *
-	 * Compiler bugs involving extra precision used to break Dekker's
-	 * theorem for spitting f-hfsq as hi+lo, unless double_t was used
-	 * or the multi-precision calculations were avoided when double_t
-	 * has extra precision.  These problems are now automatically
-	 * avoided as a side effect of the optimization of combining the
-	 * Dekker splitting step with the clear-low-bits step.
-	 *
-	 * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra
-	 * precision to avoid a very large cancellation when x is very near
-	 * these values.  Unlike the above cancellations, this problem is
-	 * specific to base 2.  It is strange that adding +-1 is so much
-	 * harder than adding +-ln2 or +-log10_2.
-	 *
-	 * This uses Dekker's theorem to normalize y+val_hi, so the
-	 * compiler bugs are back in some configurations, sigh.  And I
-	 * don't want to used double_t to avoid them, since that gives a
-	 * pessimization and the support for avoiding the pessimization
-	 * is not yet available.
-	 *
-	 * The multi-precision calculations for the multiplications are
-	 * routine.
-	 */
+	ix = asuint64(x);
+	top = top16(x);
+#define LO asuint64(1.0 - 0x1.5b51p-5)
+#define HI asuint64(1.0 + 0x1.6ab2p-5)
+	if (predict_false(ix - LO < HI - LO)) {
+		/* Handle close to 1.0 inputs separately.  */
+		/* Fix sign of zero with downward rounding when x==1.  */
+		if (WANT_ROUNDING && predict_false(ix == asuint64(1.0)))
+			return 0;
+		r = x - 1.0;
+#if __FP_FAST_FMA
+		hi = r * InvLn2hi;
+		lo = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -hi);
+#else
+		double_t rhi, rlo;
+		rhi = asdouble(asuint64(r) & -1ULL << 32);
+		rlo = r - rhi;
+		hi = rhi * InvLn2hi;
+		lo = rlo * InvLn2hi + r * InvLn2lo;
+#endif
+		r2 = r * r; /* rounding error: 0x1p-62.  */
+		r4 = r2 * r2;
+		/* Worst-case error is less than 0.54 ULP (0.55 ULP without fma).  */
+		p = r2 * (B[0] + r * B[1]);
+		y = hi + p;
+		lo += hi - y + p;
+		lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) +
+			    r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
+		y += lo;
+		return eval_as_double(y);
+	}
+	if (predict_false(top - 0x0010 >= 0x7ff0 - 0x0010)) {
+		/* x < 0x1p-1022 or inf or nan.  */
+		if (ix * 2 == 0)
+			return __math_divzero(1);
+		if (ix == asuint64(INFINITY)) /* log(inf) == inf.  */
+			return x;
+		if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
+			return __math_invalid(x);
+		/* x is subnormal, normalize it.  */
+		ix = asuint64(x * 0x1p52);
+		ix -= 52ULL << 52;
+	}
 
-	/* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */
-	hi = f - hfsq;
-	u.f = hi;
-	u.i &= (uint64_t)-1<<32;
-	hi = u.f;
-	lo = f - hi - hfsq + s*(hfsq+R);
+	/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+	   The range is split into N subintervals.
+	   The ith subinterval contains z and c is near its center.  */
+	tmp = ix - OFF;
+	i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
+	k = (int64_t)tmp >> 52; /* arithmetic shift */
+	iz = ix - (tmp & 0xfffULL << 52);
+	invc = T[i].invc;
+	logc = T[i].logc;
+	z = asdouble(iz);
+	kd = (double_t)k;
 
-	val_hi = hi*ivln2hi;
-	val_lo = (lo+hi)*ivln2lo + lo*ivln2hi;
+	/* log2(x) = log2(z/c) + log2(c) + k.  */
+	/* r ~= z/c - 1, |r| < 1/(2*N).  */
+#if __FP_FAST_FMA
+	/* rounding error: 0x1p-55/N.  */
+	r = __builtin_fma(z, invc, -1.0);
+	t1 = r * InvLn2hi;
+	t2 = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -t1);
+#else
+	double_t rhi, rlo;
+	/* rounding error: 0x1p-55/N + 0x1p-65.  */
+	r = (z - T2[i].chi - T2[i].clo) * invc;
+	rhi = asdouble(asuint64(r) & -1ULL << 32);
+	rlo = r - rhi;
+	t1 = rhi * InvLn2hi;
+	t2 = rlo * InvLn2hi + r * InvLn2lo;
+#endif
 
-	/* spadd(val_hi, val_lo, y), except for not using double_t: */
-	y = k;
-	w = y + val_hi;
-	val_lo += (y - w) + val_hi;
-	val_hi = w;
+	/* hi + lo = r/ln2 + log2(c) + k.  */
+	t3 = kd + logc;
+	hi = t3 + t1;
+	lo = t3 - hi + t1 + t2;
 
-	return val_lo + val_hi;
+	/* log2(r+1) = r/ln2 + r^2*poly(r).  */
+	/* Evaluation is optimized assuming superscalar pipelined execution.  */
+	r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
+	r4 = r2 * r2;
+	/* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
+	   ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma).  */
+	p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
+	y = lo + r2 * p + hi;
+	return eval_as_double(y);
 }
diff --git a/src/math/log2_data.c b/src/math/log2_data.c
new file mode 100644
index 00000000..3dd1ca51
--- /dev/null
+++ b/src/math/log2_data.c
@@ -0,0 +1,201 @@
+/*
+ * Data for log2.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "log2_data.h"
+
+#define N (1 << LOG2_TABLE_BITS)
+
+const struct log2_data __log2_data = {
+// First coefficient: 0x1.71547652b82fe1777d0ffda0d24p0
+.invln2hi = 0x1.7154765200000p+0,
+.invln2lo = 0x1.705fc2eefa200p-33,
+.poly1 = {
+// relative error: 0x1.2fad8188p-63
+// in -0x1.5b51p-5 0x1.6ab2p-5
+-0x1.71547652b82fep-1,
+0x1.ec709dc3a03f7p-2,
+-0x1.71547652b7c3fp-2,
+0x1.2776c50f05be4p-2,
+-0x1.ec709dd768fe5p-3,
+0x1.a61761ec4e736p-3,
+-0x1.7153fbc64a79bp-3,
+0x1.484d154f01b4ap-3,
+-0x1.289e4a72c383cp-3,
+0x1.0b32f285aee66p-3,
+},
+.poly = {
+// relative error: 0x1.a72c2bf8p-58
+// abs error: 0x1.67a552c8p-66
+// in -0x1.f45p-8 0x1.f45p-8
+-0x1.71547652b8339p-1,
+0x1.ec709dc3a04bep-2,
+-0x1.7154764702ffbp-2,
+0x1.2776c50034c48p-2,
+-0x1.ec7b328ea92bcp-3,
+0x1.a6225e117f92ep-3,
+},
+/* Algorithm:
+
+	x = 2^k z
+	log2(x) = k + log2(c) + log2(z/c)
+	log2(z/c) = poly(z/c - 1)
+
+where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
+into the ith one, then table entries are computed as
+
+	tab[i].invc = 1/c
+	tab[i].logc = (double)log2(c)
+	tab2[i].chi = (double)c
+	tab2[i].clo = (double)(c - (double)c)
+
+where c is near the center of the subinterval and is chosen by trying +-2^29
+floating point invc candidates around 1/center and selecting one for which
+
+	1) the rounding error in 0x1.8p10 + logc is 0,
+	2) the rounding error in z - chi - clo is < 0x1p-64 and
+	3) the rounding error in (double)log2(c) is minimized (< 0x1p-68).
+
+Note: 1) ensures that k + logc can be computed without rounding error, 2)
+ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to a
+single rounding error when there is no fast fma for z*invc - 1, 3) ensures
+that logc + poly(z/c - 1) has small error, however near x == 1 when
+|log2(x)| < 0x1p-4, this is not enough so that is special cased.  */
+.tab = {
+{0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
+{0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
+{0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
+{0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2},
+{0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2},
+{0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2},
+{0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2},
+{0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2},
+{0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2},
+{0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2},
+{0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2},
+{0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2},
+{0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2},
+{0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2},
+{0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2},
+{0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2},
+{0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2},
+{0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2},
+{0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2},
+{0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2},
+{0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3},
+{0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3},
+{0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3},
+{0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3},
+{0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3},
+{0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3},
+{0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3},
+{0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3},
+{0x1.19453847f2200p+0, -0x1.162595afdc000p-3},
+{0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4},
+{0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4},
+{0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4},
+{0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4},
+{0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4},
+{0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4},
+{0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5},
+{0x1.07325cac53b83p+0, -0x1.47a954f770000p-5},
+{0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6},
+{0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6},
+{0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8},
+{0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7},
+{0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5},
+{0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5},
+{0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4},
+{0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4},
+{0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4},
+{0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3},
+{0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3},
+{0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3},
+{0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3},
+{0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3},
+{0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3},
+{0x1.ac57026295039p-1, 0x1.0790ab4678000p-2},
+{0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2},
+{0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2},
+{0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2},
+{0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2},
+{0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2},
+{0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2},
+{0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2},
+{0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2},
+{0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
+{0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
+{0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2},
+},
+#if !__FP_FAST_FMA
+.tab2 = {
+{0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
+{0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
+{0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
+{0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55},
+{0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55},
+{0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56},
+{0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56},
+{0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57},
+{0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55},
+{0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57},
+{0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55},
+{0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55},
+{0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56},
+{0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56},
+{0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56},
+{0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55},
+{0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57},
+{0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55},
+{0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55},
+{0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58},
+{0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55},
+{0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58},
+{0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56},
+{0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56},
+{0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57},
+{0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56},
+{0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56},
+{0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55},
+{0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58},
+{0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56},
+{0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55},
+{0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56},
+{0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55},
+{0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56},
+{0x1.ea00027edc00cp-1, -0x1.c848309459811p-55},
+{0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55},
+{0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55},
+{0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59},
+{0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58},
+{0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55},
+{0x1.0200004292367p+0, 0x1.b7ff365324681p-54},
+{0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55},
+{0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58},
+{0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54},
+{0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55},
+{0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54},
+{0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54},
+{0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54},
+{0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55},
+{0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55},
+{0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56},
+{0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54},
+{0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56},
+{0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54},
+{0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56},
+{0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54},
+{0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56},
+{0x1.460000d387cb1p+0, 0x1.20837856599a6p-55},
+{0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55},
+{0x1.4e000043543f3p+0, -0x1.81125ed175329p-56},
+{0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54},
+{0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
+{0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
+{0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54},
+},
+#endif
+};
diff --git a/src/math/log2_data.h b/src/math/log2_data.h
new file mode 100644
index 00000000..276a786d
--- /dev/null
+++ b/src/math/log2_data.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _LOG2_DATA_H
+#define _LOG2_DATA_H
+
+#include <features.h>
+
+#define LOG2_TABLE_BITS 6
+#define LOG2_POLY_ORDER 7
+#define LOG2_POLY1_ORDER 11
+extern hidden const struct log2_data {
+	double invln2hi;
+	double invln2lo;
+	double poly[LOG2_POLY_ORDER - 1];
+	double poly1[LOG2_POLY1_ORDER - 1];
+	struct {
+		double invc, logc;
+	} tab[1 << LOG2_TABLE_BITS];
+#if !__FP_FAST_FMA
+	struct {
+		double chi, clo;
+	} tab2[1 << LOG2_TABLE_BITS];
+#endif
+} __log2_data;
+
+#endif
diff --git a/src/math/log2f.c b/src/math/log2f.c
index b3e305fe..c368f88f 100644
--- a/src/math/log2f.c
+++ b/src/math/log2f.c
@@ -1,74 +1,72 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_log2f.c */
 /*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ * Single-precision log2 function.
  *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/*
- * See comments in log2.c.
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
  */
 
 #include <math.h>
 #include <stdint.h>
+#include "libm.h"
+#include "log2f_data.h"
+
+/*
+LOG2F_TABLE_BITS = 4
+LOG2F_POLY_ORDER = 4
+
+ULP error: 0.752 (nearest rounding.)
+Relative error: 1.9 * 2^-26 (before rounding.)
+*/
 
-static const float
-ivln2hi =  1.4428710938e+00, /* 0x3fb8b000 */
-ivln2lo = -1.7605285393e-04, /* 0xb9389ad4 */
-/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */
-Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */
-Lg2 = 0xccce13.0p-25, /* 0.40000972152 */
-Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */
-Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */
+#define N (1 << LOG2F_TABLE_BITS)
+#define T __log2f_data.tab
+#define A __log2f_data.poly
+#define OFF 0x3f330000
 
 float log2f(float x)
 {
-	union {float f; uint32_t i;} u = {x};
-	float_t hfsq,f,s,z,R,w,t1,t2,hi,lo;
-	uint32_t ix;
-	int k;
+	double_t z, r, r2, p, y, y0, invc, logc;
+	uint32_t ix, iz, top, tmp;
+	int k, i;
 
-	ix = u.i;
-	k = 0;
-	if (ix < 0x00800000 || ix>>31) {  /* x < 2**-126  */
-		if (ix<<1 == 0)
-			return -1/(x*x);  /* log(+-0)=-inf */
-		if (ix>>31)
-			return (x-x)/0.0f; /* log(-#) = NaN */
-		/* subnormal number, scale up x */
-		k -= 25;
-		x *= 0x1p25f;
-		u.f = x;
-		ix = u.i;
-	} else if (ix >= 0x7f800000) {
-		return x;
-	} else if (ix == 0x3f800000)
+	ix = asuint(x);
+	/* Fix sign of zero with downward rounding when x==1.  */
+	if (WANT_ROUNDING && predict_false(ix == 0x3f800000))
 		return 0;
+	if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
+		/* x < 0x1p-126 or inf or nan.  */
+		if (ix * 2 == 0)
+			return __math_divzerof(1);
+		if (ix == 0x7f800000) /* log2(inf) == inf.  */
+			return x;
+		if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
+			return __math_invalidf(x);
+		/* x is subnormal, normalize it.  */
+		ix = asuint(x * 0x1p23f);
+		ix -= 23 << 23;
+	}
 
-	/* reduce x into [sqrt(2)/2, sqrt(2)] */
-	ix += 0x3f800000 - 0x3f3504f3;
-	k += (int)(ix>>23) - 0x7f;
-	ix = (ix&0x007fffff) + 0x3f3504f3;
-	u.i = ix;
-	x = u.f;
+	/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+	   The range is split into N subintervals.
+	   The ith subinterval contains z and c is near its center.  */
+	tmp = ix - OFF;
+	i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N;
+	top = tmp & 0xff800000;
+	iz = ix - top;
+	k = (int32_t)tmp >> 23; /* arithmetic shift */
+	invc = T[i].invc;
+	logc = T[i].logc;
+	z = (double_t)asfloat(iz);
 
-	f = x - 1.0f;
-	s = f/(2.0f + f);
-	z = s*s;
-	w = z*z;
-	t1= w*(Lg2+w*Lg4);
-	t2= z*(Lg1+w*Lg3);
-	R = t2 + t1;
-	hfsq = 0.5f*f*f;
+	/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
+	r = z * invc - 1;
+	y0 = logc + (double_t)k;
 
-	hi = f - hfsq;
-	u.f = hi;
-	u.i &= 0xfffff000;
-	hi = u.f;
-	lo = f - hi - hfsq + s*(hfsq+R);
-	return (lo+hi)*ivln2lo + lo*ivln2hi + hi*ivln2hi + k;
+	/* Pipelined polynomial evaluation to approximate log1p(r)/ln2.  */
+	r2 = r * r;
+	y = A[1] * r + A[2];
+	y = A[0] * r2 + y;
+	p = A[3] * r + y0;
+	y = y * r2 + p;
+	return eval_as_float(y);
 }
diff --git a/src/math/log2f_data.c b/src/math/log2f_data.c
new file mode 100644
index 00000000..24e450f1
--- /dev/null
+++ b/src/math/log2f_data.c
@@ -0,0 +1,33 @@
+/*
+ * Data definition for log2f.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "log2f_data.h"
+
+const struct log2f_data __log2f_data = {
+  .tab = {
+  { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
+  { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 },
+  { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 },
+  { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 },
+  { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 },
+  { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 },
+  { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 },
+  { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 },
+  { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 },
+  { 0x1p+0, 0x0p+0 },
+  { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 },
+  { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 },
+  { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 },
+  { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 },
+  { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 },
+  { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 },
+  },
+  .poly = {
+  -0x1.712b6f70a7e4dp-2, 0x1.ecabf496832ep-2, -0x1.715479ffae3dep-1,
+  0x1.715475f35c8b8p0,
+  }
+};
diff --git a/src/math/log2f_data.h b/src/math/log2f_data.h
new file mode 100644
index 00000000..4fa48956
--- /dev/null
+++ b/src/math/log2f_data.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _LOG2F_DATA_H
+#define _LOG2F_DATA_H
+
+#include <features.h>
+
+#define LOG2F_TABLE_BITS 4
+#define LOG2F_POLY_ORDER 4
+extern hidden const struct log2f_data {
+	struct {
+		double invc, logc;
+	} tab[1 << LOG2F_TABLE_BITS];
+	double poly[LOG2F_POLY_ORDER];
+} __log2f_data;
+
+#endif
diff --git a/src/math/log_data.c b/src/math/log_data.c
new file mode 100644
index 00000000..1a6ec712
--- /dev/null
+++ b/src/math/log_data.c
@@ -0,0 +1,328 @@
+/*
+ * Data for log.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "log_data.h"
+
+#define N (1 << LOG_TABLE_BITS)
+
+const struct log_data __log_data = {
+.ln2hi = 0x1.62e42fefa3800p-1,
+.ln2lo = 0x1.ef35793c76730p-45,
+.poly1 = {
+// relative error: 0x1.c04d76cp-63
+// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
+-0x1p-1,
+0x1.5555555555577p-2,
+-0x1.ffffffffffdcbp-3,
+0x1.999999995dd0cp-3,
+-0x1.55555556745a7p-3,
+0x1.24924a344de3p-3,
+-0x1.fffffa4423d65p-4,
+0x1.c7184282ad6cap-4,
+-0x1.999eb43b068ffp-4,
+0x1.78182f7afd085p-4,
+-0x1.5521375d145cdp-4,
+},
+.poly = {
+// relative error: 0x1.926199e8p-56
+// abs error: 0x1.882ff33p-65
+// in -0x1.fp-9 0x1.fp-9
+-0x1.0000000000001p-1,
+0x1.555555551305bp-2,
+-0x1.fffffffeb459p-3,
+0x1.999b324f10111p-3,
+-0x1.55575e506c89fp-3,
+},
+/* Algorithm:
+
+	x = 2^k z
+	log(x) = k ln2 + log(c) + log(z/c)
+	log(z/c) = poly(z/c - 1)
+
+where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
+into the ith one, then table entries are computed as
+
+	tab[i].invc = 1/c
+	tab[i].logc = (double)log(c)
+	tab2[i].chi = (double)c
+	tab2[i].clo = (double)(c - (double)c)
+
+where c is near the center of the subinterval and is chosen by trying +-2^29
+floating point invc candidates around 1/center and selecting one for which
+
+	1) the rounding error in 0x1.8p9 + logc is 0,
+	2) the rounding error in z - chi - clo is < 0x1p-66 and
+	3) the rounding error in (double)log(c) is minimized (< 0x1p-66).
+
+Note: 1) ensures that k*ln2hi + logc can be computed without rounding error,
+2) ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to
+a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
+that logc + poly(z/c - 1) has small error, however near x == 1 when
+|log(x)| < 0x1p-4, this is not enough so that is special cased.  */
+.tab = {
+{0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
+{0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
+{0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
+{0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
+{0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
+{0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
+{0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
+{0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
+{0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
+{0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
+{0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
+{0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
+{0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
+{0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
+{0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
+{0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
+{0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
+{0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
+{0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
+{0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
+{0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
+{0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
+{0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
+{0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
+{0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
+{0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
+{0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
+{0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
+{0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
+{0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
+{0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
+{0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
+{0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
+{0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
+{0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
+{0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
+{0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
+{0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
+{0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
+{0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
+{0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
+{0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
+{0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
+{0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
+{0x1.293726014b530p+0, -0x1.31b996b490000p-3},
+{0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
+{0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
+{0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
+{0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
+{0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
+{0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
+{0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
+{0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
+{0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
+{0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
+{0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
+{0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
+{0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
+{0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
+{0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
+{0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
+{0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
+{0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
+{0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
+{0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
+{0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
+{0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
+{0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
+{0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
+{0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
+{0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
+{0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
+{0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
+{0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
+{0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
+{0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
+{0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
+{0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
+{0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
+{0x1.008040614b195p+0, -0x1.0040979240000p-9},
+{0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
+{0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
+{0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
+{0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
+{0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
+{0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
+{0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
+{0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
+{0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
+{0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
+{0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
+{0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
+{0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
+{0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
+{0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
+{0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
+{0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
+{0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
+{0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
+{0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
+{0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
+{0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
+{0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
+{0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
+{0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
+{0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
+{0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
+{0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
+{0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
+{0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
+{0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
+{0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
+{0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
+{0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
+{0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
+{0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
+{0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
+{0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
+{0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
+{0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
+{0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
+{0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
+{0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
+{0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
+{0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
+{0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
+{0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
+{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
+},
+#if !__FP_FAST_FMA
+.tab2 = {
+{0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
+{0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
+{0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
+{0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
+{0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
+{0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
+{0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
+{0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
+{0x1.710000e86978p-1, 0x1.bff6671097952p-56},
+{0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
+{0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
+{0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
+{0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
+{0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
+{0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
+{0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
+{0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
+{0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
+{0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
+{0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
+{0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
+{0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
+{0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
+{0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
+{0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
+{0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
+{0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
+{0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
+{0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
+{0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
+{0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
+{0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
+{0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
+{0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
+{0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
+{0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
+{0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
+{0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
+{0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
+{0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
+{0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
+{0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
+{0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
+{0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
+{0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
+{0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
+{0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
+{0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
+{0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
+{0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
+{0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
+{0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
+{0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
+{0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
+{0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
+{0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
+{0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
+{0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
+{0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
+{0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
+{0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
+{0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
+{0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
+{0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
+{0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
+{0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
+{0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
+{0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
+{0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
+{0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
+{0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
+{0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
+{0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
+{0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
+{0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
+{0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
+{0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
+{0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
+{0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
+{0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
+{0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
+{0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
+{0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
+{0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
+{0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
+{0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
+{0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
+{0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
+{0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
+{0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
+{0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
+{0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
+{0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
+{0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
+{0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
+{0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
+{0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
+{0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
+{0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
+{0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
+{0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
+{0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
+{0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
+{0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
+{0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
+{0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
+{0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
+{0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
+{0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
+{0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
+{0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
+{0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
+{0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
+{0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
+{0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
+{0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
+{0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
+{0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
+{0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
+{0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
+{0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
+{0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
+{0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
+{0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
+{0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
+{0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
+{0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
+{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
+},
+#endif
+};
diff --git a/src/math/log_data.h b/src/math/log_data.h
new file mode 100644
index 00000000..1be22ab2
--- /dev/null
+++ b/src/math/log_data.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _LOG_DATA_H
+#define _LOG_DATA_H
+
+#include <features.h>
+
+#define LOG_TABLE_BITS 7
+#define LOG_POLY_ORDER 6
+#define LOG_POLY1_ORDER 12
+extern hidden const struct log_data {
+	double ln2hi;
+	double ln2lo;
+	double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
+	double poly1[LOG_POLY1_ORDER - 1];
+	struct {
+		double invc, logc;
+	} tab[1 << LOG_TABLE_BITS];
+#if !__FP_FAST_FMA
+	struct {
+		double chi, clo;
+	} tab2[1 << LOG_TABLE_BITS];
+#endif
+} __log_data;
+
+#endif
diff --git a/src/math/logf.c b/src/math/logf.c
index 52230a1b..e4c2237c 100644
--- a/src/math/logf.c
+++ b/src/math/logf.c
@@ -1,69 +1,71 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_logf.c */
 /*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ * Single-precision log function.
  *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
  */
 
 #include <math.h>
 #include <stdint.h>
+#include "libm.h"
+#include "logf_data.h"
+
+/*
+LOGF_TABLE_BITS = 4
+LOGF_POLY_ORDER = 4
+
+ULP error: 0.818 (nearest rounding.)
+Relative error: 1.957 * 2^-26 (before rounding.)
+*/
 
-static const float
-ln2_hi = 6.9313812256e-01, /* 0x3f317180 */
-ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */
-/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */
-Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */
-Lg2 = 0xccce13.0p-25, /* 0.40000972152 */
-Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */
-Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */
+#define T __logf_data.tab
+#define A __logf_data.poly
+#define Ln2 __logf_data.ln2
+#define N (1 << LOGF_TABLE_BITS)
+#define OFF 0x3f330000
 
 float logf(float x)
 {
-	union {float f; uint32_t i;} u = {x};
-	float_t hfsq,f,s,z,R,w,t1,t2,dk;
-	uint32_t ix;
-	int k;
+	double_t z, r, r2, y, y0, invc, logc;
+	uint32_t ix, iz, tmp;
+	int k, i;
 
-	ix = u.i;
-	k = 0;
-	if (ix < 0x00800000 || ix>>31) {  /* x < 2**-126  */
-		if (ix<<1 == 0)
-			return -1/(x*x);  /* log(+-0)=-inf */
-		if (ix>>31)
-			return (x-x)/0.0f; /* log(-#) = NaN */
-		/* subnormal number, scale up x */
-		k -= 25;
-		x *= 0x1p25f;
-		u.f = x;
-		ix = u.i;
-	} else if (ix >= 0x7f800000) {
-		return x;
-	} else if (ix == 0x3f800000)
+	ix = asuint(x);
+	/* Fix sign of zero with downward rounding when x==1.  */
+	if (WANT_ROUNDING && predict_false(ix == 0x3f800000))
 		return 0;
+	if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
+		/* x < 0x1p-126 or inf or nan.  */
+		if (ix * 2 == 0)
+			return __math_divzerof(1);
+		if (ix == 0x7f800000) /* log(inf) == inf.  */
+			return x;
+		if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
+			return __math_invalidf(x);
+		/* x is subnormal, normalize it.  */
+		ix = asuint(x * 0x1p23f);
+		ix -= 23 << 23;
+	}
+
+	/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+	   The range is split into N subintervals.
+	   The ith subinterval contains z and c is near its center.  */
+	tmp = ix - OFF;
+	i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
+	k = (int32_t)tmp >> 23; /* arithmetic shift */
+	iz = ix - (tmp & 0xff800000);
+	invc = T[i].invc;
+	logc = T[i].logc;
+	z = (double_t)asfloat(iz);
 
-	/* reduce x into [sqrt(2)/2, sqrt(2)] */
-	ix += 0x3f800000 - 0x3f3504f3;
-	k += (int)(ix>>23) - 0x7f;
-	ix = (ix&0x007fffff) + 0x3f3504f3;
-	u.i = ix;
-	x = u.f;
+	/* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
+	r = z * invc - 1;
+	y0 = logc + (double_t)k * Ln2;
 
-	f = x - 1.0f;
-	s = f/(2.0f + f);
-	z = s*s;
-	w = z*z;
-	t1= w*(Lg2+w*Lg4);
-	t2= z*(Lg1+w*Lg3);
-	R = t2 + t1;
-	hfsq = 0.5f*f*f;
-	dk = k;
-	return s*(hfsq+R) + dk*ln2_lo - hfsq + f + dk*ln2_hi;
+	/* Pipelined polynomial evaluation to approximate log1p(r).  */
+	r2 = r * r;
+	y = A[1] * r + A[2];
+	y = A[0] * r2 + y;
+	y = y * r2 + (y0 + r);
+	return eval_as_float(y);
 }
diff --git a/src/math/logf_data.c b/src/math/logf_data.c
new file mode 100644
index 00000000..857221f7
--- /dev/null
+++ b/src/math/logf_data.c
@@ -0,0 +1,33 @@
+/*
+ * Data definition for logf.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "logf_data.h"
+
+const struct logf_data __logf_data = {
+  .tab = {
+  { 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
+  { 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 },
+  { 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 },
+  { 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 },
+  { 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 },
+  { 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 },
+  { 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 },
+  { 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 },
+  { 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 },
+  { 0x1p+0, 0x0p+0 },
+  { 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 },
+  { 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 },
+  { 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 },
+  { 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 },
+  { 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 },
+  { 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 },
+  },
+  .ln2 = 0x1.62e42fefa39efp-1,
+  .poly = {
+  -0x1.00ea348b88334p-2, 0x1.5575b0be00b6ap-2, -0x1.ffffef20a4123p-2,
+  }
+};
diff --git a/src/math/logf_data.h b/src/math/logf_data.h
new file mode 100644
index 00000000..00cff6f8
--- /dev/null
+++ b/src/math/logf_data.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _LOGF_DATA_H
+#define _LOGF_DATA_H
+
+#include <features.h>
+
+#define LOGF_TABLE_BITS 4
+#define LOGF_POLY_ORDER 4
+extern hidden const struct logf_data {
+	struct {
+		double invc, logc;
+	} tab[1 << LOGF_TABLE_BITS];
+	double ln2;
+	double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1.  */
+} __logf_data;
+
+#endif
diff --git a/src/math/lrint.c b/src/math/lrint.c
index bdca8b7c..ddee7a0d 100644
--- a/src/math/lrint.c
+++ b/src/math/lrint.c
@@ -1,5 +1,6 @@
 #include <limits.h>
 #include <fenv.h>
+#include <math.h>
 #include "libm.h"
 
 /*
@@ -26,7 +27,18 @@ as a double.
 */
 
 #if LONG_MAX < 1U<<53 && defined(FE_INEXACT)
-long lrint(double x)
+#include <float.h>
+#include <stdint.h>
+#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
+#define EPS DBL_EPSILON
+#elif FLT_EVAL_METHOD==2
+#define EPS LDBL_EPSILON
+#endif
+#ifdef __GNUC__
+/* avoid stack frame in lrint */
+__attribute__((noinline))
+#endif
+static long lrint_slow(double x)
 {
 	#pragma STDC FENV_ACCESS ON
 	int e;
@@ -38,6 +50,20 @@ long lrint(double x)
 	/* conversion */
 	return x;
 }
+
+long lrint(double x)
+{
+	uint32_t abstop = asuint64(x)>>32 & 0x7fffffff;
+	uint64_t sign = asuint64(x) & (1ULL << 63);
+
+	if (abstop < 0x41dfffff) {
+		/* |x| < 0x7ffffc00, no overflow */
+		double_t toint = asdouble(asuint64(1/EPS) | sign);
+		double_t y = x + toint - toint;
+		return (long)y;
+	}
+	return lrint_slow(x);
+}
 #else
 long lrint(double x)
 {
diff --git a/src/math/m68k/sqrtl.c b/src/math/m68k/sqrtl.c
new file mode 100644
index 00000000..b1c303c7
--- /dev/null
+++ b/src/math/m68k/sqrtl.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __HAVE_68881__
+
+long double sqrtl(long double x)
+{
+	__asm__ ("fsqrt.x %1,%0" : "=f"(x) : "fm"(x));
+	return x;
+}
+
+#else
+
+#include "../sqrtl.c"
+
+#endif
diff --git a/src/math/mips/fabs.c b/src/math/mips/fabs.c
new file mode 100644
index 00000000..0a5aa3b1
--- /dev/null
+++ b/src/math/mips/fabs.c
@@ -0,0 +1,16 @@
+#if !defined(__mips_soft_float) && defined(__mips_abs2008)
+
+#include <math.h>
+
+double fabs(double x)
+{
+	double r;
+	__asm__("abs.d %0,%1" : "=f"(r) : "f"(x));
+	return r;
+}
+
+#else
+
+#include "../fabs.c"
+
+#endif
diff --git a/src/math/mips/fabsf.c b/src/math/mips/fabsf.c
new file mode 100644
index 00000000..35307be6
--- /dev/null
+++ b/src/math/mips/fabsf.c
@@ -0,0 +1,16 @@
+#if !defined(__mips_soft_float) && defined(__mips_abs2008)
+
+#include <math.h>
+
+float fabsf(float x)
+{
+	float r;
+	__asm__("abs.s %0,%1" : "=f"(r) : "f"(x));
+	return r;
+}
+
+#else
+
+#include "../fabsf.c"
+
+#endif
diff --git a/src/math/mips/sqrt.c b/src/math/mips/sqrt.c
new file mode 100644
index 00000000..595c9dbc
--- /dev/null
+++ b/src/math/mips/sqrt.c
@@ -0,0 +1,16 @@
+#if !defined(__mips_soft_float) && __mips >= 3
+
+#include <math.h>
+
+double sqrt(double x)
+{
+	double r;
+	__asm__("sqrt.d %0,%1" : "=f"(r) : "f"(x));
+	return r;
+}
+
+#else
+
+#include "../sqrt.c"
+
+#endif
diff --git a/src/math/mips/sqrtf.c b/src/math/mips/sqrtf.c
new file mode 100644
index 00000000..84090d2d
--- /dev/null
+++ b/src/math/mips/sqrtf.c
@@ -0,0 +1,16 @@
+#if !defined(__mips_soft_float) && __mips >= 2
+
+#include <math.h>
+
+float sqrtf(float x)
+{
+	float r;
+	__asm__("sqrt.s %0,%1" : "=f"(r) : "f"(x));
+	return r;
+}
+
+#else
+
+#include "../sqrtf.c"
+
+#endif
diff --git a/src/math/pow.c b/src/math/pow.c
index 3ddc1b6f..694c2ef6 100644
--- a/src/math/pow.c
+++ b/src/math/pow.c
@@ -1,328 +1,343 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_pow.c */
 /*
- * ====================================================
- * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved.
+ * Double-precision x^y function.
  *
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/* pow(x,y) return x**y
- *
- *                    n
- * Method:  Let x =  2   * (1+f)
- *      1. Compute and return log2(x) in two pieces:
- *              log2(x) = w1 + w2,
- *         where w1 has 53-24 = 29 bit trailing zeros.
- *      2. Perform y*log2(x) = n+y' by simulating muti-precision
- *         arithmetic, where |y'|<=0.5.
- *      3. Return x**y = 2**n*exp(y'*log2)
- *
- * Special cases:
- *      1.  (anything) ** 0  is 1
- *      2.  1 ** (anything)  is 1
- *      3.  (anything except 1) ** NAN is NAN
- *      4.  NAN ** (anything except 0) is NAN
- *      5.  +-(|x| > 1) **  +INF is +INF
- *      6.  +-(|x| > 1) **  -INF is +0
- *      7.  +-(|x| < 1) **  +INF is +0
- *      8.  +-(|x| < 1) **  -INF is +INF
- *      9.  -1          ** +-INF is 1
- *      10. +0 ** (+anything except 0, NAN)               is +0
- *      11. -0 ** (+anything except 0, NAN, odd integer)  is +0
- *      12. +0 ** (-anything except 0, NAN)               is +INF, raise divbyzero
- *      13. -0 ** (-anything except 0, NAN, odd integer)  is +INF, raise divbyzero
- *      14. -0 ** (+odd integer) is -0
- *      15. -0 ** (-odd integer) is -INF, raise divbyzero
- *      16. +INF ** (+anything except 0,NAN) is +INF
- *      17. +INF ** (-anything except 0,NAN) is +0
- *      18. -INF ** (+odd integer) is -INF
- *      19. -INF ** (anything) = -0 ** (-anything), (anything except odd integer)
- *      20. (anything) ** 1 is (anything)
- *      21. (anything) ** -1 is 1/(anything)
- *      22. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
- *      23. (-anything except 0 and inf) ** (non-integer) is NAN
- *
- * Accuracy:
- *      pow(x,y) returns x**y nearly rounded. In particular
- *                      pow(integer,integer)
- *      always returns the correct integer provided it is
- *      representable.
- *
- * Constants :
- * The hexadecimal values are the intended ones for the following
- * constants. The decimal values may be used, provided that the
- * compiler will convert from decimal to binary accurately enough
- * to produce the hexadecimal values shown.
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
  */
 
+#include <math.h>
+#include <stdint.h>
 #include "libm.h"
+#include "exp_data.h"
+#include "pow_data.h"
 
-static const double
-bp[]   = {1.0, 1.5,},
-dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */
-dp_l[] = { 0.0, 1.35003920212974897128e-08,}, /* 0x3E4CFDEB, 0x43CFD006 */
-two53  =  9007199254740992.0, /* 0x43400000, 0x00000000 */
-huge   =  1.0e300,
-tiny   =  1.0e-300,
-/* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */
-L1 =  5.99999999999994648725e-01, /* 0x3FE33333, 0x33333303 */
-L2 =  4.28571428578550184252e-01, /* 0x3FDB6DB6, 0xDB6FABFF */
-L3 =  3.33333329818377432918e-01, /* 0x3FD55555, 0x518F264D */
-L4 =  2.72728123808534006489e-01, /* 0x3FD17460, 0xA91D4101 */
-L5 =  2.30660745775561754067e-01, /* 0x3FCD864A, 0x93C9DB65 */
-L6 =  2.06975017800338417784e-01, /* 0x3FCA7E28, 0x4A454EEF */
-P1 =  1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */
-P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */
-P3 =  6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */
-P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */
-P5 =  4.13813679705723846039e-08, /* 0x3E663769, 0x72BEA4D0 */
-lg2     =  6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
-lg2_h   =  6.93147182464599609375e-01, /* 0x3FE62E43, 0x00000000 */
-lg2_l   = -1.90465429995776804525e-09, /* 0xBE205C61, 0x0CA86C39 */
-ovt     =  8.0085662595372944372e-017, /* -(1024-log2(ovfl+.5ulp)) */
-cp      =  9.61796693925975554329e-01, /* 0x3FEEC709, 0xDC3A03FD =2/(3ln2) */
-cp_h    =  9.61796700954437255859e-01, /* 0x3FEEC709, 0xE0000000 =(float)cp */
-cp_l    = -7.02846165095275826516e-09, /* 0xBE3E2FE0, 0x145B01F5 =tail of cp_h*/
-ivln2   =  1.44269504088896338700e+00, /* 0x3FF71547, 0x652B82FE =1/ln2 */
-ivln2_h =  1.44269502162933349609e+00, /* 0x3FF71547, 0x60000000 =24b 1/ln2*/
-ivln2_l =  1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
+/*
+Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
+relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma)
+ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
+*/
 
-double pow(double x, double y)
+#define T __pow_log_data.tab
+#define A __pow_log_data.poly
+#define Ln2hi __pow_log_data.ln2hi
+#define Ln2lo __pow_log_data.ln2lo
+#define N (1 << POW_LOG_TABLE_BITS)
+#define OFF 0x3fe6955500000000
+
+/* Top 12 bits of a double (sign and exponent bits).  */
+static inline uint32_t top12(double x)
 {
-	double z,ax,z_h,z_l,p_h,p_l;
-	double y1,t1,t2,r,s,t,u,v,w;
-	int32_t i,j,k,yisint,n;
-	int32_t hx,hy,ix,iy;
-	uint32_t lx,ly;
+	return asuint64(x) >> 52;
+}
 
-	EXTRACT_WORDS(hx, lx, x);
-	EXTRACT_WORDS(hy, ly, y);
-	ix = hx & 0x7fffffff;
-	iy = hy & 0x7fffffff;
+/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
+   additional 15 bits precision.  IX is the bit representation of x, but
+   normalized in the subnormal range using the sign bit for the exponent.  */
+static inline double_t log_inline(uint64_t ix, double_t *tail)
+{
+	/* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+	double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
+	uint64_t iz, tmp;
+	int k, i;
 
-	/* x**0 = 1, even if x is NaN */
-	if ((iy|ly) == 0)
-		return 1.0;
-	/* 1**y = 1, even if y is NaN */
-	if (hx == 0x3ff00000 && lx == 0)
-		return 1.0;
-	/* NaN if either arg is NaN */
-	if (ix > 0x7ff00000 || (ix == 0x7ff00000 && lx != 0) ||
-	    iy > 0x7ff00000 || (iy == 0x7ff00000 && ly != 0))
-		return x + y;
+	/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+	   The range is split into N subintervals.
+	   The ith subinterval contains z and c is near its center.  */
+	tmp = ix - OFF;
+	i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
+	k = (int64_t)tmp >> 52; /* arithmetic shift */
+	iz = ix - (tmp & 0xfffULL << 52);
+	z = asdouble(iz);
+	kd = (double_t)k;
 
-	/* determine if y is an odd int when x < 0
-	 * yisint = 0       ... y is not an integer
-	 * yisint = 1       ... y is an odd int
-	 * yisint = 2       ... y is an even int
-	 */
-	yisint = 0;
-	if (hx < 0) {
-		if (iy >= 0x43400000)
-			yisint = 2; /* even integer y */
-		else if (iy >= 0x3ff00000) {
-			k = (iy>>20) - 0x3ff;  /* exponent */
-			if (k > 20) {
-				uint32_t j = ly>>(52-k);
-				if ((j<<(52-k)) == ly)
-					yisint = 2 - (j&1);
-			} else if (ly == 0) {
-				uint32_t j = iy>>(20-k);
-				if ((j<<(20-k)) == iy)
-					yisint = 2 - (j&1);
-			}
-		}
-	}
+	/* log(x) = k*Ln2 + log(c) + log1p(z/c-1).  */
+	invc = T[i].invc;
+	logc = T[i].logc;
+	logctail = T[i].logctail;
 
-	/* special value of y */
-	if (ly == 0) {
-		if (iy == 0x7ff00000) {  /* y is +-inf */
-			if (((ix-0x3ff00000)|lx) == 0)  /* (-1)**+-inf is 1 */
-				return 1.0;
-			else if (ix >= 0x3ff00000) /* (|x|>1)**+-inf = inf,0 */
-				return hy >= 0 ? y : 0.0;
-			else                       /* (|x|<1)**+-inf = 0,inf */
-				return hy >= 0 ? 0.0 : -y;
-		}
-		if (iy == 0x3ff00000) {    /* y is +-1 */
-			if (hy >= 0)
-				return x;
-			y = 1/x;
-#if FLT_EVAL_METHOD!=0
-			{
-				union {double f; uint64_t i;} u = {y};
-				uint64_t i = u.i & -1ULL/2;
-				if (i>>52 == 0 && (i&(i-1)))
-					FORCE_EVAL((float)y);
-			}
+	/* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
+     |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible.  */
+#if __FP_FAST_FMA
+	r = __builtin_fma(z, invc, -1.0);
+#else
+	/* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|.  */
+	double_t zhi = asdouble((iz + (1ULL << 31)) & (-1ULL << 32));
+	double_t zlo = z - zhi;
+	double_t rhi = zhi * invc - 1.0;
+	double_t rlo = zlo * invc;
+	r = rhi + rlo;
 #endif
-			return y;
-		}
-		if (hy == 0x40000000)    /* y is 2 */
-			return x*x;
-		if (hy == 0x3fe00000) {  /* y is 0.5 */
-			if (hx >= 0)     /* x >= +0 */
-				return sqrt(x);
-		}
+
+	/* k*Ln2 + log(c) + r.  */
+	t1 = kd * Ln2hi + logc;
+	t2 = t1 + r;
+	lo1 = kd * Ln2lo + logctail;
+	lo2 = t1 - t2 + r;
+
+	/* Evaluation is optimized assuming superscalar pipelined execution.  */
+	double_t ar, ar2, ar3, lo3, lo4;
+	ar = A[0] * r; /* A[0] = -0.5.  */
+	ar2 = r * ar;
+	ar3 = r * ar2;
+	/* k*Ln2 + log(c) + r + A[0]*r*r.  */
+#if __FP_FAST_FMA
+	hi = t2 + ar2;
+	lo3 = __builtin_fma(ar, r, -ar2);
+	lo4 = t2 - hi + ar2;
+#else
+	double_t arhi = A[0] * rhi;
+	double_t arhi2 = rhi * arhi;
+	hi = t2 + arhi2;
+	lo3 = rlo * (ar + arhi);
+	lo4 = t2 - hi + arhi2;
+#endif
+	/* p = log1p(r) - r - A[0]*r*r.  */
+	p = (ar3 * (A[1] + r * A[2] +
+		    ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
+	lo = lo1 + lo2 + lo3 + lo4 + p;
+	y = hi + lo;
+	*tail = hi - y + lo;
+	return y;
+}
+
+#undef N
+#undef T
+#define N (1 << EXP_TABLE_BITS)
+#define InvLn2N __exp_data.invln2N
+#define NegLn2hiN __exp_data.negln2hiN
+#define NegLn2loN __exp_data.negln2loN
+#define Shift __exp_data.shift
+#define T __exp_data.tab
+#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
+#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
+#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
+#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
+#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
+
+/* Handle cases that may overflow or underflow when computing the result that
+   is scale*(1+TMP) without intermediate rounding.  The bit representation of
+   scale is in SBITS, however it has a computed exponent that may have
+   overflown into the sign bit so that needs to be adjusted before using it as
+   a double.  (int32_t)KI is the k used in the argument reduction and exponent
+   adjustment of scale, positive k here means the result may overflow and
+   negative k means the result may underflow.  */
+static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
+{
+	double_t scale, y;
+
+	if ((ki & 0x80000000) == 0) {
+		/* k > 0, the exponent of scale might have overflowed by <= 460.  */
+		sbits -= 1009ull << 52;
+		scale = asdouble(sbits);
+		y = 0x1p1009 * (scale + scale * tmp);
+		return eval_as_double(y);
+	}
+	/* k < 0, need special care in the subnormal range.  */
+	sbits += 1022ull << 52;
+	/* Note: sbits is signed scale.  */
+	scale = asdouble(sbits);
+	y = scale + scale * tmp;
+	if (fabs(y) < 1.0) {
+		/* Round y to the right precision before scaling it into the subnormal
+		   range to avoid double rounding that can cause 0.5+E/2 ulp error where
+		   E is the worst-case ulp error outside the subnormal range.  So this
+		   is only useful if the goal is better than 1 ulp worst-case error.  */
+		double_t hi, lo, one = 1.0;
+		if (y < 0.0)
+			one = -1.0;
+		lo = scale - y + scale * tmp;
+		hi = one + y;
+		lo = one - hi + y + lo;
+		y = eval_as_double(hi + lo) - one;
+		/* Fix the sign of 0.  */
+		if (y == 0.0)
+			y = asdouble(sbits & 0x8000000000000000);
+		/* The underflow exception needs to be signaled explicitly.  */
+		fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
 	}
+	y = 0x1p-1022 * y;
+	return eval_as_double(y);
+}
 
-	ax = fabs(x);
-	/* special value of x */
-	if (lx == 0) {
-		if (ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000) { /* x is +-0,+-inf,+-1 */
-			z = ax;
-			if (hy < 0)   /* z = (1/|x|) */
-				z = 1.0/z;
-			if (hx < 0) {
-				if (((ix-0x3ff00000)|yisint) == 0) {
-					z = (z-z)/(z-z); /* (-1)**non-int is NaN */
-				} else if (yisint == 1)
-					z = -z;          /* (x<0)**odd = -(|x|**odd) */
-			}
-			return z;
+#define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
+
+/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
+   The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1.  */
+static inline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias)
+{
+	uint32_t abstop;
+	uint64_t ki, idx, top, sbits;
+	/* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+	double_t kd, z, r, r2, scale, tail, tmp;
+
+	abstop = top12(x) & 0x7ff;
+	if (predict_false(abstop - top12(0x1p-54) >=
+			  top12(512.0) - top12(0x1p-54))) {
+		if (abstop - top12(0x1p-54) >= 0x80000000) {
+			/* Avoid spurious underflow for tiny x.  */
+			/* Note: 0 is common input.  */
+			double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
+			return sign_bias ? -one : one;
+		}
+		if (abstop >= top12(1024.0)) {
+			/* Note: inf and nan are already handled.  */
+			if (asuint64(x) >> 63)
+				return __math_uflow(sign_bias);
+			else
+				return __math_oflow(sign_bias);
 		}
+		/* Large x is special cased below.  */
+		abstop = 0;
 	}
 
-	s = 1.0; /* sign of result */
-	if (hx < 0) {
-		if (yisint == 0) /* (x<0)**(non-int) is NaN */
-			return (x-x)/(x-x);
-		if (yisint == 1) /* (x<0)**(odd int) */
-			s = -1.0;
-	}
+	/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
+	/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].  */
+	z = InvLn2N * x;
+#if TOINT_INTRINSICS
+	kd = roundtoint(z);
+	ki = converttoint(z);
+#elif EXP_USE_TOINT_NARROW
+	/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes.  */
+	kd = eval_as_double(z + Shift);
+	ki = asuint64(kd) >> 16;
+	kd = (double_t)(int32_t)ki;
+#else
+	/* z - kd is in [-1, 1] in non-nearest rounding modes.  */
+	kd = eval_as_double(z + Shift);
+	ki = asuint64(kd);
+	kd -= Shift;
+#endif
+	r = x + kd * NegLn2hiN + kd * NegLn2loN;
+	/* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
+	r += xtail;
+	/* 2^(k/N) ~= scale * (1 + tail).  */
+	idx = 2 * (ki % N);
+	top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
+	tail = asdouble(T[idx]);
+	/* This is only a valid scale when -1023*N < k < 1024*N.  */
+	sbits = T[idx + 1] + top;
+	/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1).  */
+	/* Evaluation is optimized assuming superscalar pipelined execution.  */
+	r2 = r * r;
+	/* Without fma the worst case error is 0.25/N ulp larger.  */
+	/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp.  */
+	tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
+	if (predict_false(abstop == 0))
+		return specialcase(tmp, sbits, ki);
+	scale = asdouble(sbits);
+	/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+	   is no spurious underflow here even without fma.  */
+	return eval_as_double(scale + scale * tmp);
+}
 
-	/* |y| is huge */
-	if (iy > 0x41e00000) { /* if |y| > 2**31 */
-		if (iy > 0x43f00000) {  /* if |y| > 2**64, must o/uflow */
-			if (ix <= 0x3fefffff)
-				return hy < 0 ? huge*huge : tiny*tiny;
-			if (ix >= 0x3ff00000)
-				return hy > 0 ? huge*huge : tiny*tiny;
+/* Returns 0 if not int, 1 if odd int, 2 if even int.  The argument is
+   the bit representation of a non-zero finite floating-point value.  */
+static inline int checkint(uint64_t iy)
+{
+	int e = iy >> 52 & 0x7ff;
+	if (e < 0x3ff)
+		return 0;
+	if (e > 0x3ff + 52)
+		return 2;
+	if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
+		return 0;
+	if (iy & (1ULL << (0x3ff + 52 - e)))
+		return 1;
+	return 2;
+}
+
+/* Returns 1 if input is the bit representation of 0, infinity or nan.  */
+static inline int zeroinfnan(uint64_t i)
+{
+	return 2 * i - 1 >= 2 * asuint64(INFINITY) - 1;
+}
+
+double pow(double x, double y)
+{
+	uint32_t sign_bias = 0;
+	uint64_t ix, iy;
+	uint32_t topx, topy;
+
+	ix = asuint64(x);
+	iy = asuint64(y);
+	topx = top12(x);
+	topy = top12(y);
+	if (predict_false(topx - 0x001 >= 0x7ff - 0x001 ||
+			  (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)) {
+		/* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
+		   and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1.  */
+		/* Special cases: (x < 0x1p-126 or inf or nan) or
+		   (|y| < 0x1p-65 or |y| >= 0x1p63 or nan).  */
+		if (predict_false(zeroinfnan(iy))) {
+			if (2 * iy == 0)
+				return issignaling_inline(x) ? x + y : 1.0;
+			if (ix == asuint64(1.0))
+				return issignaling_inline(y) ? x + y : 1.0;
+			if (2 * ix > 2 * asuint64(INFINITY) ||
+			    2 * iy > 2 * asuint64(INFINITY))
+				return x + y;
+			if (2 * ix == 2 * asuint64(1.0))
+				return 1.0;
+			if ((2 * ix < 2 * asuint64(1.0)) == !(iy >> 63))
+				return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf.  */
+			return y * y;
 		}
-		/* over/underflow if x is not close to one */
-		if (ix < 0x3fefffff)
-			return hy < 0 ? s*huge*huge : s*tiny*tiny;
-		if (ix > 0x3ff00000)
-			return hy > 0 ? s*huge*huge : s*tiny*tiny;
-		/* now |1-x| is tiny <= 2**-20, suffice to compute
-		   log(x) by x-x^2/2+x^3/3-x^4/4 */
-		t = ax - 1.0;       /* t has 20 trailing zeros */
-		w = (t*t)*(0.5 - t*(0.3333333333333333333333-t*0.25));
-		u = ivln2_h*t;      /* ivln2_h has 21 sig. bits */
-		v = t*ivln2_l - w*ivln2;
-		t1 = u + v;
-		SET_LOW_WORD(t1, 0);
-		t2 = v - (t1-u);
-	} else {
-		double ss,s2,s_h,s_l,t_h,t_l;
-		n = 0;
-		/* take care subnormal number */
-		if (ix < 0x00100000) {
-			ax *= two53;
-			n -= 53;
-			GET_HIGH_WORD(ix,ax);
+		if (predict_false(zeroinfnan(ix))) {
+			double_t x2 = x * x;
+			if (ix >> 63 && checkint(iy) == 1)
+				x2 = -x2;
+			/* Without the barrier some versions of clang hoist the 1/x2 and
+			   thus division by zero exception can be signaled spuriously.  */
+			return iy >> 63 ? fp_barrier(1 / x2) : x2;
 		}
-		n += ((ix)>>20) - 0x3ff;
-		j = ix & 0x000fffff;
-		/* determine interval */
-		ix = j | 0x3ff00000;   /* normalize ix */
-		if (j <= 0x3988E)      /* |x|<sqrt(3/2) */
-			k = 0;
-		else if (j < 0xBB67A)  /* |x|<sqrt(3)   */
-			k = 1;
-		else {
-			k = 0;
-			n += 1;
-			ix -= 0x00100000;
+		/* Here x and y are non-zero finite.  */
+		if (ix >> 63) {
+			/* Finite x < 0.  */
+			int yint = checkint(iy);
+			if (yint == 0)
+				return __math_invalid(x);
+			if (yint == 1)
+				sign_bias = SIGN_BIAS;
+			ix &= 0x7fffffffffffffff;
+			topx &= 0x7ff;
+		}
+		if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
+			/* Note: sign_bias == 0 here because y is not odd.  */
+			if (ix == asuint64(1.0))
+				return 1.0;
+			if ((topy & 0x7ff) < 0x3be) {
+				/* |y| < 2^-65, x^y ~= 1 + y*log(x).  */
+				if (WANT_ROUNDING)
+					return ix > asuint64(1.0) ? 1.0 + y :
+								    1.0 - y;
+				else
+					return 1.0;
+			}
+			return (ix > asuint64(1.0)) == (topy < 0x800) ?
+				       __math_oflow(0) :
+				       __math_uflow(0);
+		}
+		if (topx == 0) {
+			/* Normalize subnormal x so exponent becomes negative.  */
+			ix = asuint64(x * 0x1p52);
+			ix &= 0x7fffffffffffffff;
+			ix -= 52ULL << 52;
 		}
-		SET_HIGH_WORD(ax, ix);
-
-		/* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
-		u = ax - bp[k];        /* bp[0]=1.0, bp[1]=1.5 */
-		v = 1.0/(ax+bp[k]);
-		ss = u*v;
-		s_h = ss;
-		SET_LOW_WORD(s_h, 0);
-		/* t_h=ax+bp[k] High */
-		t_h = 0.0;
-		SET_HIGH_WORD(t_h, ((ix>>1)|0x20000000) + 0x00080000 + (k<<18));
-		t_l = ax - (t_h-bp[k]);
-		s_l = v*((u-s_h*t_h)-s_h*t_l);
-		/* compute log(ax) */
-		s2 = ss*ss;
-		r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6)))));
-		r += s_l*(s_h+ss);
-		s2 = s_h*s_h;
-		t_h = 3.0 + s2 + r;
-		SET_LOW_WORD(t_h, 0);
-		t_l = r - ((t_h-3.0)-s2);
-		/* u+v = ss*(1+...) */
-		u = s_h*t_h;
-		v = s_l*t_h + t_l*ss;
-		/* 2/(3log2)*(ss+...) */
-		p_h = u + v;
-		SET_LOW_WORD(p_h, 0);
-		p_l = v - (p_h-u);
-		z_h = cp_h*p_h;        /* cp_h+cp_l = 2/(3*log2) */
-		z_l = cp_l*p_h+p_l*cp + dp_l[k];
-		/* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */
-		t = (double)n;
-		t1 = ((z_h + z_l) + dp_h[k]) + t;
-		SET_LOW_WORD(t1, 0);
-		t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
 	}
 
-	/* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
-	y1 = y;
-	SET_LOW_WORD(y1, 0);
-	p_l = (y-y1)*t1 + y*t2;
-	p_h = y1*t1;
-	z = p_l + p_h;
-	EXTRACT_WORDS(j, i, z);
-	if (j >= 0x40900000) {                      /* z >= 1024 */
-		if (((j-0x40900000)|i) != 0)        /* if z > 1024 */
-			return s*huge*huge;         /* overflow */
-		if (p_l + ovt > z - p_h)
-			return s*huge*huge;         /* overflow */
-	} else if ((j&0x7fffffff) >= 0x4090cc00) {  /* z <= -1075 */  // FIXME: instead of abs(j) use unsigned j
-		if (((j-0xc090cc00)|i) != 0)        /* z < -1075 */
-			return s*tiny*tiny;         /* underflow */
-		if (p_l <= z - p_h)
-			return s*tiny*tiny;         /* underflow */
-	}
-	/*
-	 * compute 2**(p_h+p_l)
-	 */
-	i = j & 0x7fffffff;
-	k = (i>>20) - 0x3ff;
-	n = 0;
-	if (i > 0x3fe00000) {  /* if |z| > 0.5, set n = [z+0.5] */
-		n = j + (0x00100000>>(k+1));
-		k = ((n&0x7fffffff)>>20) - 0x3ff;  /* new k for n */
-		t = 0.0;
-		SET_HIGH_WORD(t, n & ~(0x000fffff>>k));
-		n = ((n&0x000fffff)|0x00100000)>>(20-k);
-		if (j < 0)
-			n = -n;
-		p_h -= t;
-	}
-	t = p_l + p_h;
-	SET_LOW_WORD(t, 0);
-	u = t*lg2_h;
-	v = (p_l-(t-p_h))*lg2 + t*lg2_l;
-	z = u + v;
-	w = v - (z-u);
-	t = z*z;
-	t1 = z - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
-	r = (z*t1)/(t1-2.0) - (w + z*w);
-	z = 1.0 - (r-z);
-	GET_HIGH_WORD(j, z);
-	j += n<<20;
-	if ((j>>20) <= 0)  /* subnormal output */
-		z = scalbn(z,n);
-	else
-		SET_HIGH_WORD(z, j);
-	return s*z;
+	double_t lo;
+	double_t hi = log_inline(ix, &lo);
+	double_t ehi, elo;
+#if __FP_FAST_FMA
+	ehi = y * hi;
+	elo = y * lo + __builtin_fma(y, hi, -ehi);
+#else
+	double_t yhi = asdouble(iy & -1ULL << 27);
+	double_t ylo = y - yhi;
+	double_t lhi = asdouble(asuint64(hi) & -1ULL << 27);
+	double_t llo = hi - lhi + lo;
+	ehi = yhi * lhi;
+	elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25.  */
+#endif
+	return exp_inline(ehi, elo, sign_bias);
 }
diff --git a/src/math/pow_data.c b/src/math/pow_data.c
new file mode 100644
index 00000000..81e760de
--- /dev/null
+++ b/src/math/pow_data.c
@@ -0,0 +1,180 @@
+/*
+ * Data for the log part of pow.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "pow_data.h"
+
+#define N (1 << POW_LOG_TABLE_BITS)
+
+const struct pow_log_data __pow_log_data = {
+.ln2hi = 0x1.62e42fefa3800p-1,
+.ln2lo = 0x1.ef35793c76730p-45,
+.poly = {
+// relative error: 0x1.11922ap-70
+// in -0x1.6bp-8 0x1.6bp-8
+// Coefficients are scaled to match the scaling during evaluation.
+-0x1p-1,
+0x1.555555555556p-2 * -2,
+-0x1.0000000000006p-2 * -2,
+0x1.999999959554ep-3 * 4,
+-0x1.555555529a47ap-3 * 4,
+0x1.2495b9b4845e9p-3 * -8,
+-0x1.0002b8b263fc3p-3 * -8,
+},
+/* Algorithm:
+
+	x = 2^k z
+	log(x) = k ln2 + log(c) + log(z/c)
+	log(z/c) = poly(z/c - 1)
+
+where z is in [0x1.69555p-1; 0x1.69555p0] which is split into N subintervals
+and z falls into the ith one, then table entries are computed as
+
+	tab[i].invc = 1/c
+	tab[i].logc = round(0x1p43*log(c))/0x1p43
+	tab[i].logctail = (double)(log(c) - logc)
+
+where c is chosen near the center of the subinterval such that 1/c has only a
+few precision bits so z/c - 1 is exactly representible as double:
+
+	1/c = center < 1 ? round(N/center)/N : round(2*N/center)/N/2
+
+Note: |z/c - 1| < 1/N for the chosen c, |log(c) - logc - logctail| < 0x1p-97,
+the last few bits of logc are rounded away so k*ln2hi + logc has no rounding
+error and the interval for z is selected such that near x == 1, where log(x)
+is tiny, large cancellation error is avoided in logc + poly(z/c - 1).  */
+.tab = {
+#define A(a, b, c) {a, 0, b, c},
+A(0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48)
+A(0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46)
+A(0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45)
+A(0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49)
+A(0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47)
+A(0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46)
+A(0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50)
+A(0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45)
+A(0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45)
+A(0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45)
+A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46)
+A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46)
+A(0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46)
+A(0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46)
+A(0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46)
+A(0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45)
+A(0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47)
+A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48)
+A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48)
+A(0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47)
+A(0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45)
+A(0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46)
+A(0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45)
+A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45)
+A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45)
+A(0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46)
+A(0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52)
+A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45)
+A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45)
+A(0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45)
+A(0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45)
+A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45)
+A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45)
+A(0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46)
+A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46)
+A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46)
+A(0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45)
+A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46)
+A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46)
+A(0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48)
+A(0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45)
+A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45)
+A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45)
+A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47)
+A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47)
+A(0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45)
+A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45)
+A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45)
+A(0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46)
+A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45)
+A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45)
+A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46)
+A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46)
+A(0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45)
+A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46)
+A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46)
+A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45)
+A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45)
+A(0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46)
+A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45)
+A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45)
+A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46)
+A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46)
+A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45)
+A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45)
+A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48)
+A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48)
+A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45)
+A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45)
+A(0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45)
+A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50)
+A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50)
+A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46)
+A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46)
+A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0)
+A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0)
+A(0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46)
+A(0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45)
+A(0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45)
+A(0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47)
+A(0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45)
+A(0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46)
+A(0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46)
+A(0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47)
+A(0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45)
+A(0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45)
+A(0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45)
+A(0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49)
+A(0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45)
+A(0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46)
+A(0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45)
+A(0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45)
+A(0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45)
+A(0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45)
+A(0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45)
+A(0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47)
+A(0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51)
+A(0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45)
+A(0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45)
+A(0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46)
+A(0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45)
+A(0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46)
+A(0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47)
+A(0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47)
+A(0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45)
+A(0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47)
+A(0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45)
+A(0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48)
+A(0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45)
+A(0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51)
+A(0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51)
+A(0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46)
+A(0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48)
+A(0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45)
+A(0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45)
+A(0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45)
+A(0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45)
+A(0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47)
+A(0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45)
+A(0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45)
+A(0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46)
+A(0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46)
+A(0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47)
+A(0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45)
+A(0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45)
+A(0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45)
+A(0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46)
+A(0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47)
+},
+};
diff --git a/src/math/pow_data.h b/src/math/pow_data.h
new file mode 100644
index 00000000..5d609ae8
--- /dev/null
+++ b/src/math/pow_data.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _POW_DATA_H
+#define _POW_DATA_H
+
+#include <features.h>
+
+#define POW_LOG_TABLE_BITS 7
+#define POW_LOG_POLY_ORDER 8
+extern hidden const struct pow_log_data {
+	double ln2hi;
+	double ln2lo;
+	double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
+	/* Note: the pad field is unused, but allows slightly faster indexing.  */
+	struct {
+		double invc, pad, logc, logctail;
+	} tab[1 << POW_LOG_TABLE_BITS];
+} __pow_log_data;
+
+#endif
diff --git a/src/math/powerpc/fabs.c b/src/math/powerpc/fabs.c
index f6ec4433..9453a3aa 100644
--- a/src/math/powerpc/fabs.c
+++ b/src/math/powerpc/fabs.c
@@ -1,6 +1,6 @@
 #include <math.h>
 
-#ifdef _SOFT_FLOAT
+#if defined(_SOFT_FLOAT) || defined(__NO_FPRS__) || defined(BROKEN_PPC_D_ASM)
 
 #include "../fabs.c"
 
diff --git a/src/math/powerpc/fabsf.c b/src/math/powerpc/fabsf.c
index d88b5911..2e9da588 100644
--- a/src/math/powerpc/fabsf.c
+++ b/src/math/powerpc/fabsf.c
@@ -1,6 +1,6 @@
 #include <math.h>
 
-#ifdef _SOFT_FLOAT
+#if defined(_SOFT_FLOAT) || defined(__NO_FPRS__)
 
 #include "../fabsf.c"
 
diff --git a/src/math/powerpc/fma.c b/src/math/powerpc/fma.c
index fd268f5f..0eb2ba1e 100644
--- a/src/math/powerpc/fma.c
+++ b/src/math/powerpc/fma.c
@@ -1,6 +1,6 @@
 #include <math.h>
 
-#ifdef _SOFT_FLOAT
+#if defined(_SOFT_FLOAT) || defined(__NO_FPRS__) || defined(BROKEN_PPC_D_ASM)
 
 #include "../fma.c"
 
diff --git a/src/math/powerpc/fmaf.c b/src/math/powerpc/fmaf.c
index a99a2a3b..dc1a749d 100644
--- a/src/math/powerpc/fmaf.c
+++ b/src/math/powerpc/fmaf.c
@@ -1,6 +1,6 @@
 #include <math.h>
 
-#ifdef _SOFT_FLOAT
+#if defined(_SOFT_FLOAT) || defined(__NO_FPRS__)
 
 #include "../fmaf.c"
 
diff --git a/src/math/powf.c b/src/math/powf.c
index 427c8965..de8fab54 100644
--- a/src/math/powf.c
+++ b/src/math/powf.c
@@ -1,259 +1,185 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_powf.c */
 /*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
  */
 
+#include <math.h>
+#include <stdint.h>
 #include "libm.h"
+#include "exp2f_data.h"
+#include "powf_data.h"
 
-static const float
-bp[]   = {1.0, 1.5,},
-dp_h[] = { 0.0, 5.84960938e-01,}, /* 0x3f15c000 */
-dp_l[] = { 0.0, 1.56322085e-06,}, /* 0x35d1cfdc */
-two24  =  16777216.0,  /* 0x4b800000 */
-huge   =  1.0e30,
-tiny   =  1.0e-30,
-/* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */
-L1 =  6.0000002384e-01, /* 0x3f19999a */
-L2 =  4.2857143283e-01, /* 0x3edb6db7 */
-L3 =  3.3333334327e-01, /* 0x3eaaaaab */
-L4 =  2.7272811532e-01, /* 0x3e8ba305 */
-L5 =  2.3066075146e-01, /* 0x3e6c3255 */
-L6 =  2.0697501302e-01, /* 0x3e53f142 */
-P1 =  1.6666667163e-01, /* 0x3e2aaaab */
-P2 = -2.7777778450e-03, /* 0xbb360b61 */
-P3 =  6.6137559770e-05, /* 0x388ab355 */
-P4 = -1.6533901999e-06, /* 0xb5ddea0e */
-P5 =  4.1381369442e-08, /* 0x3331bb4c */
-lg2     =  6.9314718246e-01, /* 0x3f317218 */
-lg2_h   =  6.93145752e-01,   /* 0x3f317200 */
-lg2_l   =  1.42860654e-06,   /* 0x35bfbe8c */
-ovt     =  4.2995665694e-08, /* -(128-log2(ovfl+.5ulp)) */
-cp      =  9.6179670095e-01, /* 0x3f76384f =2/(3ln2) */
-cp_h    =  9.6191406250e-01, /* 0x3f764000 =12b cp */
-cp_l    = -1.1736857402e-04, /* 0xb8f623c6 =tail of cp_h */
-ivln2   =  1.4426950216e+00, /* 0x3fb8aa3b =1/ln2 */
-ivln2_h =  1.4426879883e+00, /* 0x3fb8aa00 =16b 1/ln2*/
-ivln2_l =  7.0526075433e-06; /* 0x36eca570 =1/ln2 tail*/
+/*
+POWF_LOG2_POLY_ORDER = 5
+EXP2F_TABLE_BITS = 5
 
-float powf(float x, float y)
+ULP error: 0.82 (~ 0.5 + relerr*2^24)
+relerr: 1.27 * 2^-26 (Relative error ~= 128*Ln2*relerr_log2 + relerr_exp2)
+relerr_log2: 1.83 * 2^-33 (Relative error of logx.)
+relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).)
+*/
+
+#define N (1 << POWF_LOG2_TABLE_BITS)
+#define T __powf_log2_data.tab
+#define A __powf_log2_data.poly
+#define OFF 0x3f330000
+
+/* Subnormal input is normalized so ix has negative biased exponent.
+   Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set.  */
+static inline double_t log2_inline(uint32_t ix)
 {
-	float z,ax,z_h,z_l,p_h,p_l;
-	float y1,t1,t2,r,s,sn,t,u,v,w;
-	int32_t i,j,k,yisint,n;
-	int32_t hx,hy,ix,iy,is;
+	double_t z, r, r2, r4, p, q, y, y0, invc, logc;
+	uint32_t iz, top, tmp;
+	int k, i;
 
-	GET_FLOAT_WORD(hx, x);
-	GET_FLOAT_WORD(hy, y);
-	ix = hx & 0x7fffffff;
-	iy = hy & 0x7fffffff;
+	/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+	   The range is split into N subintervals.
+	   The ith subinterval contains z and c is near its center.  */
+	tmp = ix - OFF;
+	i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N;
+	top = tmp & 0xff800000;
+	iz = ix - top;
+	k = (int32_t)top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */
+	invc = T[i].invc;
+	logc = T[i].logc;
+	z = (double_t)asfloat(iz);
 
-	/* x**0 = 1, even if x is NaN */
-	if (iy == 0)
-		return 1.0f;
-	/* 1**y = 1, even if y is NaN */
-	if (hx == 0x3f800000)
-		return 1.0f;
-	/* NaN if either arg is NaN */
-	if (ix > 0x7f800000 || iy > 0x7f800000)
-		return x + y;
+	/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
+	r = z * invc - 1;
+	y0 = logc + (double_t)k;
 
-	/* determine if y is an odd int when x < 0
-	 * yisint = 0       ... y is not an integer
-	 * yisint = 1       ... y is an odd int
-	 * yisint = 2       ... y is an even int
-	 */
-	yisint  = 0;
-	if (hx < 0) {
-		if (iy >= 0x4b800000)
-			yisint = 2; /* even integer y */
-		else if (iy >= 0x3f800000) {
-			k = (iy>>23) - 0x7f;         /* exponent */
-			j = iy>>(23-k);
-			if ((j<<(23-k)) == iy)
-				yisint = 2 - (j & 1);
-		}
-	}
+	/* Pipelined polynomial evaluation to approximate log1p(r)/ln2.  */
+	r2 = r * r;
+	y = A[0] * r + A[1];
+	p = A[2] * r + A[3];
+	r4 = r2 * r2;
+	q = A[4] * r + y0;
+	q = p * r2 + q;
+	y = y * r4 + q;
+	return y;
+}
 
-	/* special value of y */
-	if (iy == 0x7f800000) {  /* y is +-inf */
-		if (ix == 0x3f800000)      /* (-1)**+-inf is 1 */
-			return 1.0f;
-		else if (ix > 0x3f800000)  /* (|x|>1)**+-inf = inf,0 */
-			return hy >= 0 ? y : 0.0f;
-		else                       /* (|x|<1)**+-inf = 0,inf */
-			return hy >= 0 ? 0.0f: -y;
-	}
-	if (iy == 0x3f800000)    /* y is +-1 */
-		return hy >= 0 ? x : 1.0f/x;
-	if (hy == 0x40000000)    /* y is 2 */
-		return x*x;
-	if (hy == 0x3f000000) {  /* y is  0.5 */
-		if (hx >= 0)     /* x >= +0 */
-			return sqrtf(x);
-	}
+#undef N
+#undef T
+#define N (1 << EXP2F_TABLE_BITS)
+#define T __exp2f_data.tab
+#define SIGN_BIAS (1 << (EXP2F_TABLE_BITS + 11))
 
-	ax = fabsf(x);
-	/* special value of x */
-	if (ix == 0x7f800000 || ix == 0 || ix == 0x3f800000) { /* x is +-0,+-inf,+-1 */
-		z = ax;
-		if (hy < 0)  /* z = (1/|x|) */
-			z = 1.0f/z;
-		if (hx < 0) {
-			if (((ix-0x3f800000)|yisint) == 0) {
-				z = (z-z)/(z-z); /* (-1)**non-int is NaN */
-			} else if (yisint == 1)
-				z = -z;          /* (x<0)**odd = -(|x|**odd) */
-		}
-		return z;
-	}
+/* The output of log2 and thus the input of exp2 is either scaled by N
+   (in case of fast toint intrinsics) or not.  The unscaled xd must be
+   in [-1021,1023], sign_bias sets the sign of the result.  */
+static inline float exp2_inline(double_t xd, uint32_t sign_bias)
+{
+	uint64_t ki, ski, t;
+	double_t kd, z, r, r2, y, s;
 
-	sn = 1.0f; /* sign of result */
-	if (hx < 0) {
-		if (yisint == 0) /* (x<0)**(non-int) is NaN */
-			return (x-x)/(x-x);
-		if (yisint == 1) /* (x<0)**(odd int) */
-			sn = -1.0f;
-	}
+#if TOINT_INTRINSICS
+#define C __exp2f_data.poly_scaled
+	/* N*x = k + r with r in [-1/2, 1/2] */
+	kd = roundtoint(xd); /* k */
+	ki = converttoint(xd);
+#else
+#define C __exp2f_data.poly
+#define SHIFT __exp2f_data.shift_scaled
+	/* x = k/N + r with r in [-1/(2N), 1/(2N)] */
+	kd = eval_as_double(xd + SHIFT);
+	ki = asuint64(kd);
+	kd -= SHIFT; /* k/N */
+#endif
+	r = xd - kd;
 
-	/* |y| is huge */
-	if (iy > 0x4d000000) { /* if |y| > 2**27 */
-		/* over/underflow if x is not close to one */
-		if (ix < 0x3f7ffff8)
-			return hy < 0 ? sn*huge*huge : sn*tiny*tiny;
-		if (ix > 0x3f800007)
-			return hy > 0 ? sn*huge*huge : sn*tiny*tiny;
-		/* now |1-x| is tiny <= 2**-20, suffice to compute
-		   log(x) by x-x^2/2+x^3/3-x^4/4 */
-		t = ax - 1;     /* t has 20 trailing zeros */
-		w = (t*t)*(0.5f - t*(0.333333333333f - t*0.25f));
-		u = ivln2_h*t;  /* ivln2_h has 16 sig. bits */
-		v = t*ivln2_l - w*ivln2;
-		t1 = u + v;
-		GET_FLOAT_WORD(is, t1);
-		SET_FLOAT_WORD(t1, is & 0xfffff000);
-		t2 = v - (t1-u);
-	} else {
-		float s2,s_h,s_l,t_h,t_l;
-		n = 0;
-		/* take care subnormal number */
-		if (ix < 0x00800000) {
-			ax *= two24;
-			n -= 24;
-			GET_FLOAT_WORD(ix, ax);
-		}
-		n += ((ix)>>23) - 0x7f;
-		j = ix & 0x007fffff;
-		/* determine interval */
-		ix = j | 0x3f800000;     /* normalize ix */
-		if (j <= 0x1cc471)       /* |x|<sqrt(3/2) */
-			k = 0;
-		else if (j < 0x5db3d7)   /* |x|<sqrt(3)   */
-			k = 1;
-		else {
-			k = 0;
-			n += 1;
-			ix -= 0x00800000;
-		}
-		SET_FLOAT_WORD(ax, ix);
+	/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
+	t = T[ki % N];
+	ski = ki + sign_bias;
+	t += ski << (52 - EXP2F_TABLE_BITS);
+	s = asdouble(t);
+	z = C[0] * r + C[1];
+	r2 = r * r;
+	y = C[2] * r + 1;
+	y = z * r2 + y;
+	y = y * s;
+	return eval_as_float(y);
+}
 
-		/* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
-		u = ax - bp[k];   /* bp[0]=1.0, bp[1]=1.5 */
-		v = 1.0f/(ax+bp[k]);
-		s = u*v;
-		s_h = s;
-		GET_FLOAT_WORD(is, s_h);
-		SET_FLOAT_WORD(s_h, is & 0xfffff000);
-		/* t_h=ax+bp[k] High */
-		is = ((ix>>1) & 0xfffff000) | 0x20000000;
-		SET_FLOAT_WORD(t_h, is + 0x00400000 + (k<<21));
-		t_l = ax - (t_h - bp[k]);
-		s_l = v*((u - s_h*t_h) - s_h*t_l);
-		/* compute log(ax) */
-		s2 = s*s;
-		r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6)))));
-		r += s_l*(s_h+s);
-		s2 = s_h*s_h;
-		t_h = 3.0f + s2 + r;
-		GET_FLOAT_WORD(is, t_h);
-		SET_FLOAT_WORD(t_h, is & 0xfffff000);
-		t_l = r - ((t_h - 3.0f) - s2);
-		/* u+v = s*(1+...) */
-		u = s_h*t_h;
-		v = s_l*t_h + t_l*s;
-		/* 2/(3log2)*(s+...) */
-		p_h = u + v;
-		GET_FLOAT_WORD(is, p_h);
-		SET_FLOAT_WORD(p_h, is & 0xfffff000);
-		p_l = v - (p_h - u);
-		z_h = cp_h*p_h;  /* cp_h+cp_l = 2/(3*log2) */
-		z_l = cp_l*p_h + p_l*cp+dp_l[k];
-		/* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
-		t = (float)n;
-		t1 = (((z_h + z_l) + dp_h[k]) + t);
-		GET_FLOAT_WORD(is, t1);
-		SET_FLOAT_WORD(t1, is & 0xfffff000);
-		t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
-	}
+/* Returns 0 if not int, 1 if odd int, 2 if even int.  The argument is
+   the bit representation of a non-zero finite floating-point value.  */
+static inline int checkint(uint32_t iy)
+{
+	int e = iy >> 23 & 0xff;
+	if (e < 0x7f)
+		return 0;
+	if (e > 0x7f + 23)
+		return 2;
+	if (iy & ((1 << (0x7f + 23 - e)) - 1))
+		return 0;
+	if (iy & (1 << (0x7f + 23 - e)))
+		return 1;
+	return 2;
+}
+
+static inline int zeroinfnan(uint32_t ix)
+{
+	return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
+}
 
-	/* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
-	GET_FLOAT_WORD(is, y);
-	SET_FLOAT_WORD(y1, is & 0xfffff000);
-	p_l = (y-y1)*t1 + y*t2;
-	p_h = y1*t1;
-	z = p_l + p_h;
-	GET_FLOAT_WORD(j, z);
-	if (j > 0x43000000)          /* if z > 128 */
-		return sn*huge*huge;  /* overflow */
-	else if (j == 0x43000000) {  /* if z == 128 */
-		if (p_l + ovt > z - p_h)
-			return sn*huge*huge;  /* overflow */
-	} else if ((j&0x7fffffff) > 0x43160000)  /* z < -150 */ // FIXME: check should be  (uint32_t)j > 0xc3160000
-		return sn*tiny*tiny;  /* underflow */
-	else if (j == 0xc3160000) {  /* z == -150 */
-		if (p_l <= z-p_h)
-			return sn*tiny*tiny;  /* underflow */
+float powf(float x, float y)
+{
+	uint32_t sign_bias = 0;
+	uint32_t ix, iy;
+
+	ix = asuint(x);
+	iy = asuint(y);
+	if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000 ||
+			  zeroinfnan(iy))) {
+		/* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan).  */
+		if (predict_false(zeroinfnan(iy))) {
+			if (2 * iy == 0)
+				return issignalingf_inline(x) ? x + y : 1.0f;
+			if (ix == 0x3f800000)
+				return issignalingf_inline(y) ? x + y : 1.0f;
+			if (2 * ix > 2u * 0x7f800000 ||
+			    2 * iy > 2u * 0x7f800000)
+				return x + y;
+			if (2 * ix == 2 * 0x3f800000)
+				return 1.0f;
+			if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
+				return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf.  */
+			return y * y;
+		}
+		if (predict_false(zeroinfnan(ix))) {
+			float_t x2 = x * x;
+			if (ix & 0x80000000 && checkint(iy) == 1)
+				x2 = -x2;
+			/* Without the barrier some versions of clang hoist the 1/x2 and
+			   thus division by zero exception can be signaled spuriously.  */
+			return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2;
+		}
+		/* x and y are non-zero finite.  */
+		if (ix & 0x80000000) {
+			/* Finite x < 0.  */
+			int yint = checkint(iy);
+			if (yint == 0)
+				return __math_invalidf(x);
+			if (yint == 1)
+				sign_bias = SIGN_BIAS;
+			ix &= 0x7fffffff;
+		}
+		if (ix < 0x00800000) {
+			/* Normalize subnormal x so exponent becomes negative.  */
+			ix = asuint(x * 0x1p23f);
+			ix &= 0x7fffffff;
+			ix -= 23 << 23;
+		}
 	}
-	/*
-	 * compute 2**(p_h+p_l)
-	 */
-	i = j & 0x7fffffff;
-	k = (i>>23) - 0x7f;
-	n = 0;
-	if (i > 0x3f000000) {   /* if |z| > 0.5, set n = [z+0.5] */
-		n = j + (0x00800000>>(k+1));
-		k = ((n&0x7fffffff)>>23) - 0x7f;  /* new k for n */
-		SET_FLOAT_WORD(t, n & ~(0x007fffff>>k));
-		n = ((n&0x007fffff)|0x00800000)>>(23-k);
-		if (j < 0)
-			n = -n;
-		p_h -= t;
+	double_t logx = log2_inline(ix);
+	double_t ylogx = y * logx; /* cannot overflow, y is single prec.  */
+	if (predict_false((asuint64(ylogx) >> 47 & 0xffff) >=
+			  asuint64(126.0 * POWF_SCALE) >> 47)) {
+		/* |y*log(x)| >= 126.  */
+		if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE)
+			return __math_oflowf(sign_bias);
+		if (ylogx <= -150.0 * POWF_SCALE)
+			return __math_uflowf(sign_bias);
 	}
-	t = p_l + p_h;
-	GET_FLOAT_WORD(is, t);
-	SET_FLOAT_WORD(t, is & 0xffff8000);
-	u = t*lg2_h;
-	v = (p_l-(t-p_h))*lg2 + t*lg2_l;
-	z = u + v;
-	w = v - (z - u);
-	t = z*z;
-	t1 = z - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
-	r = (z*t1)/(t1-2.0f) - (w+z*w);
-	z = 1.0f - (r - z);
-	GET_FLOAT_WORD(j, z);
-	j += n<<23;
-	if ((j>>23) <= 0)  /* subnormal output */
-		z = scalbnf(z, n);
-	else
-		SET_FLOAT_WORD(z, j);
-	return sn*z;
+	return exp2_inline(ylogx, sign_bias);
 }
diff --git a/src/math/powf_data.c b/src/math/powf_data.c
new file mode 100644
index 00000000..13e1d9a0
--- /dev/null
+++ b/src/math/powf_data.c
@@ -0,0 +1,34 @@
+/*
+ * Data definition for powf.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "powf_data.h"
+
+const struct powf_log2_data __powf_log2_data = {
+  .tab = {
+  { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE },
+  { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * POWF_SCALE },
+  { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * POWF_SCALE },
+  { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * POWF_SCALE },
+  { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * POWF_SCALE },
+  { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * POWF_SCALE },
+  { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * POWF_SCALE },
+  { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * POWF_SCALE },
+  { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * POWF_SCALE },
+  { 0x1p+0, 0x0p+0 * POWF_SCALE },
+  { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * POWF_SCALE },
+  { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * POWF_SCALE },
+  { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * POWF_SCALE },
+  { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * POWF_SCALE },
+  { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * POWF_SCALE },
+  { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * POWF_SCALE },
+  },
+  .poly = {
+  0x1.27616c9496e0bp-2 * POWF_SCALE, -0x1.71969a075c67ap-2 * POWF_SCALE,
+  0x1.ec70a6ca7baddp-2 * POWF_SCALE, -0x1.7154748bef6c8p-1 * POWF_SCALE,
+  0x1.71547652ab82bp0 * POWF_SCALE,
+  }
+};
diff --git a/src/math/powf_data.h b/src/math/powf_data.h
new file mode 100644
index 00000000..5b136e28
--- /dev/null
+++ b/src/math/powf_data.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _POWF_DATA_H
+#define _POWF_DATA_H
+
+#include "libm.h"
+#include "exp2f_data.h"
+
+#define POWF_LOG2_TABLE_BITS 4
+#define POWF_LOG2_POLY_ORDER 5
+#if TOINT_INTRINSICS
+#define POWF_SCALE_BITS EXP2F_TABLE_BITS
+#else
+#define POWF_SCALE_BITS 0
+#endif
+#define POWF_SCALE ((double)(1 << POWF_SCALE_BITS))
+extern hidden const struct powf_log2_data {
+	struct {
+		double invc, logc;
+	} tab[1 << POWF_LOG2_TABLE_BITS];
+	double poly[POWF_LOG2_POLY_ORDER];
+} __powf_log2_data;
+
+#endif
diff --git a/src/math/powl.c b/src/math/powl.c
index 5b6da07b..9eb22162 100644
--- a/src/math/powl.c
+++ b/src/math/powl.c
@@ -57,14 +57,6 @@
  *    IEEE     0,8700       60000      6.5e-18      1.0e-18
  * 0.99 < x < 1.01, 0 < y < 8700, uniformly distributed.
  *
- *
- * ERROR MESSAGES:
- *
- *   message         condition      value returned
- * pow overflow     x**y > MAXNUM      INFINITY
- * pow underflow   x**y < 1/MAXNUM       0.0
- * pow domain      x<0 and y noninteger  0.0
- *
  */
 
 #include "libm.h"
@@ -212,25 +204,33 @@ long double powl(long double x, long double y)
 	}
 	if (x == 1.0)
 		return 1.0; /* 1**y = 1, even if y is nan */
-	if (x == -1.0 && !isfinite(y))
-		return 1.0; /* -1**inf = 1 */
 	if (y == 0.0)
 		return 1.0; /* x**0 = 1, even if x is nan */
 	if (y == 1.0)
 		return x;
-	if (y >= LDBL_MAX) {
-		if (x > 1.0 || x < -1.0)
-			return INFINITY;
-		if (x != 0.0)
-			return 0.0;
-	}
-	if (y <= -LDBL_MAX) {
-		if (x > 1.0 || x < -1.0)
+	/* if y*log2(x) < log2(LDBL_TRUE_MIN)-1 then x^y uflows to 0
+	   if y*log2(x) > -log2(LDBL_TRUE_MIN)+1 > LDBL_MAX_EXP then x^y oflows
+	   if |x|!=1 then |log2(x)| > |log(x)| > LDBL_EPSILON/2 so
+	   x^y oflows/uflows if |y|*LDBL_EPSILON/2 > -log2(LDBL_TRUE_MIN)+1 */
+	if (fabsl(y) > 2*(-LDBL_MIN_EXP+LDBL_MANT_DIG+1)/LDBL_EPSILON) {
+		/* y is not an odd int */
+		if (x == -1.0)
+			return 1.0;
+		if (y == INFINITY) {
+			if (x > 1.0 || x < -1.0)
+				return INFINITY;
 			return 0.0;
-		if (x != 0.0 || y == -INFINITY)
+		}
+		if (y == -INFINITY) {
+			if (x > 1.0 || x < -1.0)
+				return 0.0;
 			return INFINITY;
+		}
+		if ((x > 1.0 || x < -1.0) == (y > 0))
+			return huge * huge;
+		return twom10000 * twom10000;
 	}
-	if (x >= LDBL_MAX) {
+	if (x == INFINITY) {
 		if (y > 0.0)
 			return INFINITY;
 		return 0.0;
@@ -253,7 +253,7 @@ long double powl(long double x, long double y)
 			yoddint = 1;
 	}
 
-	if (x <= -LDBL_MAX) {
+	if (x == -INFINITY) {
 		if (y > 0.0) {
 			if (yoddint)
 				return -INFINITY;
diff --git a/src/math/riscv32/copysign.c b/src/math/riscv32/copysign.c
new file mode 100644
index 00000000..c7854178
--- /dev/null
+++ b/src/math/riscv32/copysign.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 64
+
+double copysign(double x, double y)
+{
+	__asm__ ("fsgnj.d %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+	return x;
+}
+
+#else
+
+#include "../copysign.c"
+
+#endif
diff --git a/src/math/riscv32/copysignf.c b/src/math/riscv32/copysignf.c
new file mode 100644
index 00000000..a125611a
--- /dev/null
+++ b/src/math/riscv32/copysignf.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 32
+
+float copysignf(float x, float y)
+{
+	__asm__ ("fsgnj.s %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+	return x;
+}
+
+#else
+
+#include "../copysignf.c"
+
+#endif
diff --git a/src/math/riscv32/fabs.c b/src/math/riscv32/fabs.c
new file mode 100644
index 00000000..5290b6f0
--- /dev/null
+++ b/src/math/riscv32/fabs.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 64
+
+double fabs(double x)
+{
+	__asm__ ("fabs.d %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
+
+#else
+
+#include "../fabs.c"
+
+#endif
diff --git a/src/math/riscv32/fabsf.c b/src/math/riscv32/fabsf.c
new file mode 100644
index 00000000..f5032e35
--- /dev/null
+++ b/src/math/riscv32/fabsf.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 32
+
+float fabsf(float x)
+{
+	__asm__ ("fabs.s %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
+
+#else
+
+#include "../fabsf.c"
+
+#endif
diff --git a/src/math/riscv32/fma.c b/src/math/riscv32/fma.c
new file mode 100644
index 00000000..99b05713
--- /dev/null
+++ b/src/math/riscv32/fma.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 64
+
+double fma(double x, double y, double z)
+{
+	__asm__ ("fmadd.d %0, %1, %2, %3" : "=f"(x) : "f"(x), "f"(y), "f"(z));
+	return x;
+}
+
+#else
+
+#include "../fma.c"
+
+#endif
diff --git a/src/math/riscv32/fmaf.c b/src/math/riscv32/fmaf.c
new file mode 100644
index 00000000..f9dc47ed
--- /dev/null
+++ b/src/math/riscv32/fmaf.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 32
+
+float fmaf(float x, float y, float z)
+{
+	__asm__ ("fmadd.s %0, %1, %2, %3" : "=f"(x) : "f"(x), "f"(y), "f"(z));
+	return x;
+}
+
+#else
+
+#include "../fmaf.c"
+
+#endif
diff --git a/src/math/riscv32/fmax.c b/src/math/riscv32/fmax.c
new file mode 100644
index 00000000..023709cd
--- /dev/null
+++ b/src/math/riscv32/fmax.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 64
+
+double fmax(double x, double y)
+{
+	__asm__ ("fmax.d %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+	return x;
+}
+
+#else
+
+#include "../fmax.c"
+
+#endif
diff --git a/src/math/riscv32/fmaxf.c b/src/math/riscv32/fmaxf.c
new file mode 100644
index 00000000..863d2bd1
--- /dev/null
+++ b/src/math/riscv32/fmaxf.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 32
+
+float fmaxf(float x, float y)
+{
+	__asm__ ("fmax.s %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+	return x;
+}
+
+#else
+
+#include "../fmaxf.c"
+
+#endif
diff --git a/src/math/riscv32/fmin.c b/src/math/riscv32/fmin.c
new file mode 100644
index 00000000..a4e3b067
--- /dev/null
+++ b/src/math/riscv32/fmin.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 64
+
+double fmin(double x, double y)
+{
+	__asm__ ("fmin.d %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+	return x;
+}
+
+#else
+
+#include "../fmin.c"
+
+#endif
diff --git a/src/math/riscv32/fminf.c b/src/math/riscv32/fminf.c
new file mode 100644
index 00000000..32156e80
--- /dev/null
+++ b/src/math/riscv32/fminf.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 32
+
+float fminf(float x, float y)
+{
+	__asm__ ("fmin.s %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+	return x;
+}
+
+#else
+
+#include "../fminf.c"
+
+#endif
diff --git a/src/math/riscv32/sqrt.c b/src/math/riscv32/sqrt.c
new file mode 100644
index 00000000..867a504c
--- /dev/null
+++ b/src/math/riscv32/sqrt.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 64
+
+double sqrt(double x)
+{
+	__asm__ ("fsqrt.d %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
+
+#else
+
+#include "../sqrt.c"
+
+#endif
diff --git a/src/math/riscv32/sqrtf.c b/src/math/riscv32/sqrtf.c
new file mode 100644
index 00000000..610c2cf8
--- /dev/null
+++ b/src/math/riscv32/sqrtf.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 32
+
+float sqrtf(float x)
+{
+	__asm__ ("fsqrt.s %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
+
+#else
+
+#include "../sqrtf.c"
+
+#endif
diff --git a/src/math/riscv64/copysign.c b/src/math/riscv64/copysign.c
new file mode 100644
index 00000000..c7854178
--- /dev/null
+++ b/src/math/riscv64/copysign.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 64
+
+double copysign(double x, double y)
+{
+	__asm__ ("fsgnj.d %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+	return x;
+}
+
+#else
+
+#include "../copysign.c"
+
+#endif
diff --git a/src/math/riscv64/copysignf.c b/src/math/riscv64/copysignf.c
new file mode 100644
index 00000000..a125611a
--- /dev/null
+++ b/src/math/riscv64/copysignf.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 32
+
+float copysignf(float x, float y)
+{
+	__asm__ ("fsgnj.s %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+	return x;
+}
+
+#else
+
+#include "../copysignf.c"
+
+#endif
diff --git a/src/math/riscv64/fabs.c b/src/math/riscv64/fabs.c
new file mode 100644
index 00000000..5290b6f0
--- /dev/null
+++ b/src/math/riscv64/fabs.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 64
+
+double fabs(double x)
+{
+	__asm__ ("fabs.d %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
+
+#else
+
+#include "../fabs.c"
+
+#endif
diff --git a/src/math/riscv64/fabsf.c b/src/math/riscv64/fabsf.c
new file mode 100644
index 00000000..f5032e35
--- /dev/null
+++ b/src/math/riscv64/fabsf.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 32
+
+float fabsf(float x)
+{
+	__asm__ ("fabs.s %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
+
+#else
+
+#include "../fabsf.c"
+
+#endif
diff --git a/src/math/riscv64/fma.c b/src/math/riscv64/fma.c
new file mode 100644
index 00000000..99b05713
--- /dev/null
+++ b/src/math/riscv64/fma.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 64
+
+double fma(double x, double y, double z)
+{
+	__asm__ ("fmadd.d %0, %1, %2, %3" : "=f"(x) : "f"(x), "f"(y), "f"(z));
+	return x;
+}
+
+#else
+
+#include "../fma.c"
+
+#endif
diff --git a/src/math/riscv64/fmaf.c b/src/math/riscv64/fmaf.c
new file mode 100644
index 00000000..f9dc47ed
--- /dev/null
+++ b/src/math/riscv64/fmaf.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 32
+
+float fmaf(float x, float y, float z)
+{
+	__asm__ ("fmadd.s %0, %1, %2, %3" : "=f"(x) : "f"(x), "f"(y), "f"(z));
+	return x;
+}
+
+#else
+
+#include "../fmaf.c"
+
+#endif
diff --git a/src/math/riscv64/fmax.c b/src/math/riscv64/fmax.c
new file mode 100644
index 00000000..023709cd
--- /dev/null
+++ b/src/math/riscv64/fmax.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 64
+
+double fmax(double x, double y)
+{
+	__asm__ ("fmax.d %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+	return x;
+}
+
+#else
+
+#include "../fmax.c"
+
+#endif
diff --git a/src/math/riscv64/fmaxf.c b/src/math/riscv64/fmaxf.c
new file mode 100644
index 00000000..863d2bd1
--- /dev/null
+++ b/src/math/riscv64/fmaxf.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 32
+
+float fmaxf(float x, float y)
+{
+	__asm__ ("fmax.s %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+	return x;
+}
+
+#else
+
+#include "../fmaxf.c"
+
+#endif
diff --git a/src/math/riscv64/fmin.c b/src/math/riscv64/fmin.c
new file mode 100644
index 00000000..a4e3b067
--- /dev/null
+++ b/src/math/riscv64/fmin.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 64
+
+double fmin(double x, double y)
+{
+	__asm__ ("fmin.d %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+	return x;
+}
+
+#else
+
+#include "../fmin.c"
+
+#endif
diff --git a/src/math/riscv64/fminf.c b/src/math/riscv64/fminf.c
new file mode 100644
index 00000000..32156e80
--- /dev/null
+++ b/src/math/riscv64/fminf.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 32
+
+float fminf(float x, float y)
+{
+	__asm__ ("fmin.s %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+	return x;
+}
+
+#else
+
+#include "../fminf.c"
+
+#endif
diff --git a/src/math/riscv64/sqrt.c b/src/math/riscv64/sqrt.c
new file mode 100644
index 00000000..867a504c
--- /dev/null
+++ b/src/math/riscv64/sqrt.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 64
+
+double sqrt(double x)
+{
+	__asm__ ("fsqrt.d %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
+
+#else
+
+#include "../sqrt.c"
+
+#endif
diff --git a/src/math/riscv64/sqrtf.c b/src/math/riscv64/sqrtf.c
new file mode 100644
index 00000000..610c2cf8
--- /dev/null
+++ b/src/math/riscv64/sqrtf.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __riscv_flen >= 32
+
+float sqrtf(float x)
+{
+	__asm__ ("fsqrt.s %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
+
+#else
+
+#include "../sqrtf.c"
+
+#endif
diff --git a/src/math/sinh.c b/src/math/sinh.c
index 00022c4e..a01951ae 100644
--- a/src/math/sinh.c
+++ b/src/math/sinh.c
@@ -34,6 +34,6 @@ double sinh(double x)
 
 	/* |x| > log(DBL_MAX) or nan */
 	/* note: the result is stored to handle overflow */
-	t = 2*h*__expo2(absx);
+	t = __expo2(absx, 2*h);
 	return t;
 }
diff --git a/src/math/sinhf.c b/src/math/sinhf.c
index 6ad19ea2..b9caa793 100644
--- a/src/math/sinhf.c
+++ b/src/math/sinhf.c
@@ -26,6 +26,6 @@ float sinhf(float x)
 	}
 
 	/* |x| > logf(FLT_MAX) or nan */
-	t = 2*h*__expo2f(absx);
+	t = __expo2f(absx, 2*h);
 	return t;
 }
diff --git a/src/math/sqrt.c b/src/math/sqrt.c
index b2775673..5ba26559 100644
--- a/src/math/sqrt.c
+++ b/src/math/sqrt.c
@@ -1,185 +1,158 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/* sqrt(x)
- * Return correctly rounded sqrt.
- *           ------------------------------------------
- *           |  Use the hardware sqrt if you have one |
- *           ------------------------------------------
- * Method:
- *   Bit by bit method using integer arithmetic. (Slow, but portable)
- *   1. Normalization
- *      Scale x to y in [1,4) with even powers of 2:
- *      find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
- *              sqrt(x) = 2^k * sqrt(y)
- *   2. Bit by bit computation
- *      Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
- *           i                                                   0
- *                                     i+1         2
- *          s  = 2*q , and      y  =  2   * ( y - q  ).         (1)
- *           i      i            i                 i
- *
- *      To compute q    from q , one checks whether
- *                  i+1       i
- *
- *                            -(i+1) 2
- *                      (q + 2      ) <= y.                     (2)
- *                        i
- *                                                            -(i+1)
- *      If (2) is false, then q   = q ; otherwise q   = q  + 2      .
- *                             i+1   i             i+1   i
- *
- *      With some algebric manipulation, it is not difficult to see
- *      that (2) is equivalent to
- *                             -(i+1)
- *                      s  +  2       <= y                      (3)
- *                       i                i
- *
- *      The advantage of (3) is that s  and y  can be computed by
- *                                    i      i
- *      the following recurrence formula:
- *          if (3) is false
- *
- *          s     =  s  ,       y    = y   ;                    (4)
- *           i+1      i          i+1    i
- *
- *          otherwise,
- *                         -i                     -(i+1)
- *          s     =  s  + 2  ,  y    = y  -  s  - 2             (5)
- *           i+1      i          i+1    i     i
- *
- *      One may easily use induction to prove (4) and (5).
- *      Note. Since the left hand side of (3) contain only i+2 bits,
- *            it does not necessary to do a full (53-bit) comparison
- *            in (3).
- *   3. Final rounding
- *      After generating the 53 bits result, we compute one more bit.
- *      Together with the remainder, we can decide whether the
- *      result is exact, bigger than 1/2ulp, or less than 1/2ulp
- *      (it will never equal to 1/2ulp).
- *      The rounding mode can be detected by checking whether
- *      huge + tiny is equal to huge, and whether huge - tiny is
- *      equal to huge for some floating point number "huge" and "tiny".
- *
- * Special cases:
- *      sqrt(+-0) = +-0         ... exact
- *      sqrt(inf) = inf
- *      sqrt(-ve) = NaN         ... with invalid signal
- *      sqrt(NaN) = NaN         ... with invalid signal for signaling NaN
- */
-
+#include <stdint.h>
+#include <math.h>
 #include "libm.h"
+#include "sqrt_data.h"
 
-static const double tiny = 1.0e-300;
+#define FENV_SUPPORT 1
 
-double sqrt(double x)
+/* returns a*b*2^-32 - e, with error 0 <= e < 1.  */
+static inline uint32_t mul32(uint32_t a, uint32_t b)
 {
-	double z;
-	int32_t sign = (int)0x80000000;
-	int32_t ix0,s0,q,m,t,i;
-	uint32_t r,t1,s1,ix1,q1;
+	return (uint64_t)a*b >> 32;
+}
 
-	EXTRACT_WORDS(ix0, ix1, x);
+/* returns a*b*2^-64 - e, with error 0 <= e < 3.  */
+static inline uint64_t mul64(uint64_t a, uint64_t b)
+{
+	uint64_t ahi = a>>32;
+	uint64_t alo = a&0xffffffff;
+	uint64_t bhi = b>>32;
+	uint64_t blo = b&0xffffffff;
+	return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32);
+}
 
-	/* take care of Inf and NaN */
-	if ((ix0&0x7ff00000) == 0x7ff00000) {
-		return x*x + x;  /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
-	}
-	/* take care of zero */
-	if (ix0 <= 0) {
-		if (((ix0&~sign)|ix1) == 0)
-			return x;  /* sqrt(+-0) = +-0 */
-		if (ix0 < 0)
-			return (x-x)/(x-x);  /* sqrt(-ve) = sNaN */
-	}
-	/* normalize x */
-	m = ix0>>20;
-	if (m == 0) {  /* subnormal x */
-		while (ix0 == 0) {
-			m -= 21;
-			ix0 |= (ix1>>11);
-			ix1 <<= 21;
-		}
-		for (i=0; (ix0&0x00100000) == 0; i++)
-			ix0<<=1;
-		m -= i - 1;
-		ix0 |= ix1>>(32-i);
-		ix1 <<= i;
-	}
-	m -= 1023;    /* unbias exponent */
-	ix0 = (ix0&0x000fffff)|0x00100000;
-	if (m & 1) {  /* odd m, double x to make it even */
-		ix0 += ix0 + ((ix1&sign)>>31);
-		ix1 += ix1;
-	}
-	m >>= 1;      /* m = [m/2] */
-
-	/* generate sqrt(x) bit by bit */
-	ix0 += ix0 + ((ix1&sign)>>31);
-	ix1 += ix1;
-	q = q1 = s0 = s1 = 0;  /* [q,q1] = sqrt(x) */
-	r = 0x00200000;        /* r = moving bit from right to left */
-
-	while (r != 0) {
-		t = s0 + r;
-		if (t <= ix0) {
-			s0   = t + r;
-			ix0 -= t;
-			q   += r;
-		}
-		ix0 += ix0 + ((ix1&sign)>>31);
-		ix1 += ix1;
-		r >>= 1;
-	}
+double sqrt(double x)
+{
+	uint64_t ix, top, m;
 
-	r = sign;
-	while (r != 0) {
-		t1 = s1 + r;
-		t  = s0;
-		if (t < ix0 || (t == ix0 && t1 <= ix1)) {
-			s1 = t1 + r;
-			if ((t1&sign) == sign && (s1&sign) == 0)
-				s0++;
-			ix0 -= t;
-			if (ix1 < t1)
-				ix0--;
-			ix1 -= t1;
-			q1 += r;
-		}
-		ix0 += ix0 + ((ix1&sign)>>31);
-		ix1 += ix1;
-		r >>= 1;
+	/* special case handling.  */
+	ix = asuint64(x);
+	top = ix >> 52;
+	if (predict_false(top - 0x001 >= 0x7ff - 0x001)) {
+		/* x < 0x1p-1022 or inf or nan.  */
+		if (ix * 2 == 0)
+			return x;
+		if (ix == 0x7ff0000000000000)
+			return x;
+		if (ix > 0x7ff0000000000000)
+			return __math_invalid(x);
+		/* x is subnormal, normalize it.  */
+		ix = asuint64(x * 0x1p52);
+		top = ix >> 52;
+		top -= 52;
 	}
 
-	/* use floating add to find out rounding direction */
-	if ((ix0|ix1) != 0) {
-		z = 1.0 - tiny; /* raise inexact flag */
-		if (z >= 1.0) {
-			z = 1.0 + tiny;
-			if (q1 == (uint32_t)0xffffffff) {
-				q1 = 0;
-				q++;
-			} else if (z > 1.0) {
-				if (q1 == (uint32_t)0xfffffffe)
-					q++;
-				q1 += 2;
-			} else
-				q1 += q1 & 1;
-		}
+	/* argument reduction:
+	   x = 4^e m; with integer e, and m in [1, 4)
+	   m: fixed point representation [2.62]
+	   2^e is the exponent part of the result.  */
+	int even = top & 1;
+	m = (ix << 11) | 0x8000000000000000;
+	if (even) m >>= 1;
+	top = (top + 0x3ff) >> 1;
+
+	/* approximate r ~ 1/sqrt(m) and s ~ sqrt(m) when m in [1,4)
+
+	   initial estimate:
+	   7bit table lookup (1bit exponent and 6bit significand).
+
+	   iterative approximation:
+	   using 2 goldschmidt iterations with 32bit int arithmetics
+	   and a final iteration with 64bit int arithmetics.
+
+	   details:
+
+	   the relative error (e = r0 sqrt(m)-1) of a linear estimate
+	   (r0 = a m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best,
+	   a table lookup is faster and needs one less iteration
+	   6 bit lookup table (128b) gives |e| < 0x1.f9p-8
+	   7 bit lookup table (256b) gives |e| < 0x1.fdp-9
+	   for single and double prec 6bit is enough but for quad
+	   prec 7bit is needed (or modified iterations). to avoid
+	   one more iteration >=13bit table would be needed (16k).
+
+	   a newton-raphson iteration for r is
+	     w = r*r
+	     u = 3 - m*w
+	     r = r*u/2
+	   can use a goldschmidt iteration for s at the end or
+	     s = m*r
+
+	   first goldschmidt iteration is
+	     s = m*r
+	     u = 3 - s*r
+	     r = r*u/2
+	     s = s*u/2
+	   next goldschmidt iteration is
+	     u = 3 - s*r
+	     r = r*u/2
+	     s = s*u/2
+	   and at the end r is not computed only s.
+
+	   they use the same amount of operations and converge at the
+	   same quadratic rate, i.e. if
+	     r1 sqrt(m) - 1 = e, then
+	     r2 sqrt(m) - 1 = -3/2 e^2 - 1/2 e^3
+	   the advantage of goldschmidt is that the mul for s and r
+	   are independent (computed in parallel), however it is not
+	   "self synchronizing": it only uses the input m in the
+	   first iteration so rounding errors accumulate. at the end
+	   or when switching to larger precision arithmetics rounding
+	   errors dominate so the first iteration should be used.
+
+	   the fixed point representations are
+	     m: 2.30 r: 0.32, s: 2.30, d: 2.30, u: 2.30, three: 2.30
+	   and after switching to 64 bit
+	     m: 2.62 r: 0.64, s: 2.62, d: 2.62, u: 2.62, three: 2.62  */
+
+	static const uint64_t three = 0xc0000000;
+	uint64_t r, s, d, u, i;
+
+	i = (ix >> 46) % 128;
+	r = (uint32_t)__rsqrt_tab[i] << 16;
+	/* |r sqrt(m) - 1| < 0x1.fdp-9 */
+	s = mul32(m>>32, r);
+	/* |s/sqrt(m) - 1| < 0x1.fdp-9 */
+	d = mul32(s, r);
+	u = three - d;
+	r = mul32(r, u) << 1;
+	/* |r sqrt(m) - 1| < 0x1.7bp-16 */
+	s = mul32(s, u) << 1;
+	/* |s/sqrt(m) - 1| < 0x1.7bp-16 */
+	d = mul32(s, r);
+	u = three - d;
+	r = mul32(r, u) << 1;
+	/* |r sqrt(m) - 1| < 0x1.3704p-29 (measured worst-case) */
+	r = r << 32;
+	s = mul64(m, r);
+	d = mul64(s, r);
+	u = (three<<32) - d;
+	s = mul64(s, u);  /* repr: 3.61 */
+	/* -0x1p-57 < s - sqrt(m) < 0x1.8001p-61 */
+	s = (s - 2) >> 9; /* repr: 12.52 */
+	/* -0x1.09p-52 < s - sqrt(m) < -0x1.fffcp-63 */
+
+	/* s < sqrt(m) < s + 0x1.09p-52,
+	   compute nearest rounded result:
+	   the nearest result to 52 bits is either s or s+0x1p-52,
+	   we can decide by comparing (2^52 s + 0.5)^2 to 2^104 m.  */
+	uint64_t d0, d1, d2;
+	double y, t;
+	d0 = (m << 42) - s*s;
+	d1 = s - d0;
+	d2 = d1 + s + 1;
+	s += d1 >> 63;
+	s &= 0x000fffffffffffff;
+	s |= top << 52;
+	y = asdouble(s);
+	if (FENV_SUPPORT) {
+		/* handle rounding modes and inexact exception:
+		   only (s+1)^2 == 2^42 m case is exact otherwise
+		   add a tiny value to cause the fenv effects.  */
+		uint64_t tiny = predict_false(d2==0) ? 0 : 0x0010000000000000;
+		tiny |= (d1^d2) & 0x8000000000000000;
+		t = asdouble(tiny);
+		y = eval_as_double(y + t);
 	}
-	ix0 = (q>>1) + 0x3fe00000;
-	ix1 = q1>>1;
-	if (q&1)
-		ix1 |= sign;
-	ix0 += m << 20;
-	INSERT_WORDS(z, ix0, ix1);
-	return z;
+	return y;
 }
diff --git a/src/math/sqrt_data.c b/src/math/sqrt_data.c
new file mode 100644
index 00000000..61bc22f4
--- /dev/null
+++ b/src/math/sqrt_data.c
@@ -0,0 +1,19 @@
+#include "sqrt_data.h"
+const uint16_t __rsqrt_tab[128] = {
+0xb451,0xb2f0,0xb196,0xb044,0xaef9,0xadb6,0xac79,0xab43,
+0xaa14,0xa8eb,0xa7c8,0xa6aa,0xa592,0xa480,0xa373,0xa26b,
+0xa168,0xa06a,0x9f70,0x9e7b,0x9d8a,0x9c9d,0x9bb5,0x9ad1,
+0x99f0,0x9913,0x983a,0x9765,0x9693,0x95c4,0x94f8,0x9430,
+0x936b,0x92a9,0x91ea,0x912e,0x9075,0x8fbe,0x8f0a,0x8e59,
+0x8daa,0x8cfe,0x8c54,0x8bac,0x8b07,0x8a64,0x89c4,0x8925,
+0x8889,0x87ee,0x8756,0x86c0,0x862b,0x8599,0x8508,0x8479,
+0x83ec,0x8361,0x82d8,0x8250,0x81c9,0x8145,0x80c2,0x8040,
+0xff02,0xfd0e,0xfb25,0xf947,0xf773,0xf5aa,0xf3ea,0xf234,
+0xf087,0xeee3,0xed47,0xebb3,0xea27,0xe8a3,0xe727,0xe5b2,
+0xe443,0xe2dc,0xe17a,0xe020,0xdecb,0xdd7d,0xdc34,0xdaf1,
+0xd9b3,0xd87b,0xd748,0xd61a,0xd4f1,0xd3cd,0xd2ad,0xd192,
+0xd07b,0xcf69,0xce5b,0xcd51,0xcc4a,0xcb48,0xca4a,0xc94f,
+0xc858,0xc764,0xc674,0xc587,0xc49d,0xc3b7,0xc2d4,0xc1f4,
+0xc116,0xc03c,0xbf65,0xbe90,0xbdbe,0xbcef,0xbc23,0xbb59,
+0xba91,0xb9cc,0xb90a,0xb84a,0xb78c,0xb6d0,0xb617,0xb560,
+};
diff --git a/src/math/sqrt_data.h b/src/math/sqrt_data.h
new file mode 100644
index 00000000..260c7f9c
--- /dev/null
+++ b/src/math/sqrt_data.h
@@ -0,0 +1,13 @@
+#ifndef _SQRT_DATA_H
+#define _SQRT_DATA_H
+
+#include <features.h>
+#include <stdint.h>
+
+/* if x in [1,2): i = (int)(64*x);
+   if x in [2,4): i = (int)(32*x-64);
+   __rsqrt_tab[i]*2^-16 is estimating 1/sqrt(x) with small relative error:
+   |__rsqrt_tab[i]*0x1p-16*sqrt(x) - 1| < -0x1.fdp-9 < 2^-8 */
+extern hidden const uint16_t __rsqrt_tab[128];
+
+#endif
diff --git a/src/math/sqrtf.c b/src/math/sqrtf.c
index 28cb4ad3..740d81cb 100644
--- a/src/math/sqrtf.c
+++ b/src/math/sqrtf.c
@@ -1,84 +1,83 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */
-/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
+#include <stdint.h>
+#include <math.h>
 #include "libm.h"
+#include "sqrt_data.h"
 
-static const float tiny = 1.0e-30;
+#define FENV_SUPPORT 1
 
-float sqrtf(float x)
+static inline uint32_t mul32(uint32_t a, uint32_t b)
 {
-	float z;
-	int32_t sign = (int)0x80000000;
-	int32_t ix,s,q,m,t,i;
-	uint32_t r;
+	return (uint64_t)a*b >> 32;
+}
 
-	GET_FLOAT_WORD(ix, x);
+/* see sqrt.c for more detailed comments.  */
 
-	/* take care of Inf and NaN */
-	if ((ix&0x7f800000) == 0x7f800000)
-		return x*x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
+float sqrtf(float x)
+{
+	uint32_t ix, m, m1, m0, even, ey;
 
-	/* take care of zero */
-	if (ix <= 0) {
-		if ((ix&~sign) == 0)
-			return x;  /* sqrt(+-0) = +-0 */
-		if (ix < 0)
-			return (x-x)/(x-x);  /* sqrt(-ve) = sNaN */
-	}
-	/* normalize x */
-	m = ix>>23;
-	if (m == 0) {  /* subnormal x */
-		for (i = 0; (ix&0x00800000) == 0; i++)
-			ix<<=1;
-		m -= i - 1;
+	ix = asuint(x);
+	if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
+		/* x < 0x1p-126 or inf or nan.  */
+		if (ix * 2 == 0)
+			return x;
+		if (ix == 0x7f800000)
+			return x;
+		if (ix > 0x7f800000)
+			return __math_invalidf(x);
+		/* x is subnormal, normalize it.  */
+		ix = asuint(x * 0x1p23f);
+		ix -= 23 << 23;
 	}
-	m -= 127;  /* unbias exponent */
-	ix = (ix&0x007fffff)|0x00800000;
-	if (m&1)  /* odd m, double x to make it even */
-		ix += ix;
-	m >>= 1;  /* m = [m/2] */
 
-	/* generate sqrt(x) bit by bit */
-	ix += ix;
-	q = s = 0;       /* q = sqrt(x) */
-	r = 0x01000000;  /* r = moving bit from right to left */
+	/* x = 4^e m; with int e and m in [1, 4).  */
+	even = ix & 0x00800000;
+	m1 = (ix << 8) | 0x80000000;
+	m0 = (ix << 7) & 0x7fffffff;
+	m = even ? m0 : m1;
 
-	while (r != 0) {
-		t = s + r;
-		if (t <= ix) {
-			s = t+r;
-			ix -= t;
-			q += r;
-		}
-		ix += ix;
-		r >>= 1;
-	}
+	/* 2^e is the exponent part of the return value.  */
+	ey = ix >> 1;
+	ey += 0x3f800000 >> 1;
+	ey &= 0x7f800000;
+
+	/* compute r ~ 1/sqrt(m), s ~ sqrt(m) with 2 goldschmidt iterations.  */
+	static const uint32_t three = 0xc0000000;
+	uint32_t r, s, d, u, i;
+	i = (ix >> 17) % 128;
+	r = (uint32_t)__rsqrt_tab[i] << 16;
+	/* |r*sqrt(m) - 1| < 0x1p-8 */
+	s = mul32(m, r);
+	/* |s/sqrt(m) - 1| < 0x1p-8 */
+	d = mul32(s, r);
+	u = three - d;
+	r = mul32(r, u) << 1;
+	/* |r*sqrt(m) - 1| < 0x1.7bp-16 */
+	s = mul32(s, u) << 1;
+	/* |s/sqrt(m) - 1| < 0x1.7bp-16 */
+	d = mul32(s, r);
+	u = three - d;
+	s = mul32(s, u);
+	/* -0x1.03p-28 < s/sqrt(m) - 1 < 0x1.fp-31 */
+	s = (s - 1)>>6;
+	/* s < sqrt(m) < s + 0x1.08p-23 */
 
-	/* use floating add to find out rounding direction */
-	if (ix != 0) {
-		z = 1.0f - tiny; /* raise inexact flag */
-		if (z >= 1.0f) {
-			z = 1.0f + tiny;
-			if (z > 1.0f)
-				q += 2;
-			else
-				q += q & 1;
-		}
+	/* compute nearest rounded result.  */
+	uint32_t d0, d1, d2;
+	float y, t;
+	d0 = (m << 16) - s*s;
+	d1 = s - d0;
+	d2 = d1 + s + 1;
+	s += d1 >> 31;
+	s &= 0x007fffff;
+	s |= ey;
+	y = asfloat(s);
+	if (FENV_SUPPORT) {
+		/* handle rounding and inexact exception. */
+		uint32_t tiny = predict_false(d2==0) ? 0 : 0x01000000;
+		tiny |= (d1^d2) & 0x80000000;
+		t = asfloat(tiny);
+		y = eval_as_float(y + t);
 	}
-	ix = (q>>1) + 0x3f000000;
-	ix += m << 23;
-	SET_FLOAT_WORD(z, ix);
-	return z;
+	return y;
 }
diff --git a/src/math/sqrtl.c b/src/math/sqrtl.c
index 83a8f80c..a231b3f2 100644
--- a/src/math/sqrtl.c
+++ b/src/math/sqrtl.c
@@ -1,7 +1,259 @@
+#include <stdint.h>
 #include <math.h>
+#include <float.h>
+#include "libm.h"
 
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double sqrtl(long double x)
 {
-	/* FIXME: implement in C, this is for LDBL_MANT_DIG == 64 only */
 	return sqrt(x);
 }
+#elif (LDBL_MANT_DIG == 113 || LDBL_MANT_DIG == 64) && LDBL_MAX_EXP == 16384
+#include "sqrt_data.h"
+
+#define FENV_SUPPORT 1
+
+typedef struct {
+	uint64_t hi;
+	uint64_t lo;
+} u128;
+
+/* top: 16 bit sign+exponent, x: significand.  */
+static inline long double mkldbl(uint64_t top, u128 x)
+{
+	union ldshape u;
+#if LDBL_MANT_DIG == 113
+	u.i2.hi = x.hi;
+	u.i2.lo = x.lo;
+	u.i2.hi &= 0x0000ffffffffffff;
+	u.i2.hi |= top << 48;
+#elif LDBL_MANT_DIG == 64
+	u.i.se = top;
+	u.i.m = x.lo;
+	/* force the top bit on non-zero (and non-subnormal) results.  */
+	if (top & 0x7fff)
+		u.i.m |= 0x8000000000000000;
+#endif
+	return u.f;
+}
+
+/* return: top 16 bit is sign+exp and following bits are the significand.  */
+static inline u128 asu128(long double x)
+{
+	union ldshape u = {.f=x};
+	u128 r;
+#if LDBL_MANT_DIG == 113
+	r.hi = u.i2.hi;
+	r.lo = u.i2.lo;
+#elif LDBL_MANT_DIG == 64
+	r.lo = u.i.m<<49;
+	/* ignore the top bit: pseudo numbers are not handled. */
+	r.hi = u.i.m>>15;
+	r.hi &= 0x0000ffffffffffff;
+	r.hi |= (uint64_t)u.i.se << 48;
+#endif
+	return r;
+}
+
+/* returns a*b*2^-32 - e, with error 0 <= e < 1.  */
+static inline uint32_t mul32(uint32_t a, uint32_t b)
+{
+	return (uint64_t)a*b >> 32;
+}
+
+/* returns a*b*2^-64 - e, with error 0 <= e < 3.  */
+static inline uint64_t mul64(uint64_t a, uint64_t b)
+{
+	uint64_t ahi = a>>32;
+	uint64_t alo = a&0xffffffff;
+	uint64_t bhi = b>>32;
+	uint64_t blo = b&0xffffffff;
+	return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32);
+}
+
+static inline u128 add64(u128 a, uint64_t b)
+{
+	u128 r;
+	r.lo = a.lo + b;
+	r.hi = a.hi;
+	if (r.lo < a.lo)
+		r.hi++;
+	return r;
+}
+
+static inline u128 add128(u128 a, u128 b)
+{
+	u128 r;
+	r.lo = a.lo + b.lo;
+	r.hi = a.hi + b.hi;
+	if (r.lo < a.lo)
+		r.hi++;
+	return r;
+}
+
+static inline u128 sub64(u128 a, uint64_t b)
+{
+	u128 r;
+	r.lo = a.lo - b;
+	r.hi = a.hi;
+	if (a.lo < b)
+		r.hi--;
+	return r;
+}
+
+static inline u128 sub128(u128 a, u128 b)
+{
+	u128 r;
+	r.lo = a.lo - b.lo;
+	r.hi = a.hi - b.hi;
+	if (a.lo < b.lo)
+		r.hi--;
+	return r;
+}
+
+/* a<<n, 0 <= n <= 127 */
+static inline u128 lsh(u128 a, int n)
+{
+	if (n == 0)
+		return a;
+	if (n >= 64) {
+		a.hi = a.lo<<(n-64);
+		a.lo = 0;
+	} else {
+		a.hi = (a.hi<<n) | (a.lo>>(64-n));
+		a.lo = a.lo<<n;
+	}
+	return a;
+}
+
+/* a>>n, 0 <= n <= 127 */
+static inline u128 rsh(u128 a, int n)
+{
+	if (n == 0)
+		return a;
+	if (n >= 64) {
+		a.lo = a.hi>>(n-64);
+		a.hi = 0;
+	} else {
+		a.lo = (a.lo>>n) | (a.hi<<(64-n));
+		a.hi = a.hi>>n;
+	}
+	return a;
+}
+
+/* returns a*b exactly.  */
+static inline u128 mul64_128(uint64_t a, uint64_t b)
+{
+	u128 r;
+	uint64_t ahi = a>>32;
+	uint64_t alo = a&0xffffffff;
+	uint64_t bhi = b>>32;
+	uint64_t blo = b&0xffffffff;
+	uint64_t lo1 = ((ahi*blo)&0xffffffff) + ((alo*bhi)&0xffffffff) + (alo*blo>>32);
+	uint64_t lo2 = (alo*blo)&0xffffffff;
+	r.hi = ahi*bhi + (ahi*blo>>32) + (alo*bhi>>32) + (lo1>>32);
+	r.lo = (lo1<<32) + lo2;
+	return r;
+}
+
+/* returns a*b*2^-128 - e, with error 0 <= e < 7.  */
+static inline u128 mul128(u128 a, u128 b)
+{
+	u128 hi = mul64_128(a.hi, b.hi);
+	uint64_t m1 = mul64(a.hi, b.lo);
+	uint64_t m2 = mul64(a.lo, b.hi);
+	return add64(add64(hi, m1), m2);
+}
+
+/* returns a*b % 2^128.  */
+static inline u128 mul128_tail(u128 a, u128 b)
+{
+	u128 lo = mul64_128(a.lo, b.lo);
+	lo.hi += a.hi*b.lo + a.lo*b.hi;
+	return lo;
+}
+
+
+/* see sqrt.c for detailed comments.  */
+
+long double sqrtl(long double x)
+{
+	u128 ix, ml;
+	uint64_t top;
+
+	ix = asu128(x);
+	top = ix.hi >> 48;
+	if (predict_false(top - 0x0001 >= 0x7fff - 0x0001)) {
+		/* x < 0x1p-16382 or inf or nan.  */
+		if (2*ix.hi == 0 && ix.lo == 0)
+			return x;
+		if (ix.hi == 0x7fff000000000000 && ix.lo == 0)
+			return x;
+		if (top >= 0x7fff)
+			return __math_invalidl(x);
+		/* x is subnormal, normalize it.  */
+		ix = asu128(x * 0x1p112);
+		top = ix.hi >> 48;
+		top -= 112;
+	}
+
+	/* x = 4^e m; with int e and m in [1, 4) */
+	int even = top & 1;
+	ml = lsh(ix, 15);
+	ml.hi |= 0x8000000000000000;
+	if (even) ml = rsh(ml, 1);
+	top = (top + 0x3fff) >> 1;
+
+	/* r ~ 1/sqrt(m) */
+	const uint64_t three = 0xc0000000;
+	uint64_t r, s, d, u, i;
+	i = (ix.hi >> 42) % 128;
+	r = (uint32_t)__rsqrt_tab[i] << 16;
+	/* |r sqrt(m) - 1| < 0x1p-8 */
+	s = mul32(ml.hi>>32, r);
+	d = mul32(s, r);
+	u = three - d;
+	r = mul32(u, r) << 1;
+	/* |r sqrt(m) - 1| < 0x1.7bp-16, switch to 64bit */
+	r = r<<32;
+	s = mul64(ml.hi, r);
+	d = mul64(s, r);
+	u = (three<<32) - d;
+	r = mul64(u, r) << 1;
+	/* |r sqrt(m) - 1| < 0x1.a5p-31 */
+	s = mul64(u, s) << 1;
+	d = mul64(s, r);
+	u = (three<<32) - d;
+	r = mul64(u, r) << 1;
+	/* |r sqrt(m) - 1| < 0x1.c001p-59, switch to 128bit */
+
+	const u128 threel = {.hi=three<<32, .lo=0};
+	u128 rl, sl, dl, ul;
+	rl.hi = r;
+	rl.lo = 0;
+	sl = mul128(ml, rl);
+	dl = mul128(sl, rl);
+	ul = sub128(threel, dl);
+	sl = mul128(ul, sl); /* repr: 3.125 */
+	/* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
+	sl = rsh(sub64(sl, 4), 125-(LDBL_MANT_DIG-1));
+	/* s < sqrt(m) < s + 1 ULP + tiny */
+
+	long double y;
+	u128 d2, d1, d0;
+	d0 = sub128(lsh(ml, 2*(LDBL_MANT_DIG-1)-126), mul128_tail(sl,sl));
+	d1 = sub128(sl, d0);
+	d2 = add128(add64(sl, 1), d1);
+	sl = add64(sl, d1.hi >> 63);
+	y = mkldbl(top, sl);
+	if (FENV_SUPPORT) {
+		/* handle rounding modes and inexact exception.  */
+		top = predict_false((d2.hi|d2.lo)==0) ? 0 : 1;
+		top |= ((d1.hi^d2.hi)&0x8000000000000000) >> 48;
+		y += mkldbl(top, (u128){0});
+	}
+	return y;
+}
+#else
+#error unsupported long double format
+#endif
diff --git a/src/math/x32/lrintl.s b/src/math/x32/lrintl.s
index ee97d1cf..d4355c32 100644
--- a/src/math/x32/lrintl.s
+++ b/src/math/x32/lrintl.s
@@ -2,6 +2,6 @@
 .type lrintl,@function
 lrintl:
 	fldt 8(%esp)
-	fistpll 8(%esp)
-	mov 8(%esp),%rax
+	fistpl 8(%esp)
+	movl 8(%esp),%eax
 	ret
diff --git a/src/math/x86_64/fabs.c b/src/math/x86_64/fabs.c
new file mode 100644
index 00000000..16562477
--- /dev/null
+++ b/src/math/x86_64/fabs.c
@@ -0,0 +1,10 @@
+#include <math.h>
+
+double fabs(double x)
+{
+	double t;
+	__asm__ ("pcmpeqd %0, %0" : "=x"(t));          // t = ~0
+	__asm__ ("psrlq   $1, %0" : "+x"(t));          // t >>= 1
+	__asm__ ("andps   %1, %0" : "+x"(x) : "x"(t)); // x &= t
+	return x;
+}
diff --git a/src/math/x86_64/fabs.s b/src/math/x86_64/fabs.s
deleted file mode 100644
index 5715005e..00000000
--- a/src/math/x86_64/fabs.s
+++ /dev/null
@@ -1,9 +0,0 @@
-.global fabs
-.type fabs,@function
-fabs:
-	xor %eax,%eax
-	dec %rax
-	shr %rax
-	movq %rax,%xmm1
-	andpd %xmm1,%xmm0
-	ret
diff --git a/src/math/x86_64/fabsf.c b/src/math/x86_64/fabsf.c
new file mode 100644
index 00000000..36ea7481
--- /dev/null
+++ b/src/math/x86_64/fabsf.c
@@ -0,0 +1,10 @@
+#include <math.h>
+
+float fabsf(float x)
+{
+	float t;
+	__asm__ ("pcmpeqd %0, %0" : "=x"(t));          // t = ~0
+	__asm__ ("psrld   $1, %0" : "+x"(t));          // t >>= 1
+	__asm__ ("andps   %1, %0" : "+x"(x) : "x"(t)); // x &= t
+	return x;
+}
diff --git a/src/math/x86_64/fabsf.s b/src/math/x86_64/fabsf.s
deleted file mode 100644
index 501a1f17..00000000
--- a/src/math/x86_64/fabsf.s
+++ /dev/null
@@ -1,7 +0,0 @@
-.global fabsf
-.type fabsf,@function
-fabsf:
-	mov $0x7fffffff,%eax
-	movq %rax,%xmm1
-	andps %xmm1,%xmm0
-	ret
diff --git a/src/math/x86_64/fabsl.c b/src/math/x86_64/fabsl.c
new file mode 100644
index 00000000..cc1c9ed9
--- /dev/null
+++ b/src/math/x86_64/fabsl.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double fabsl(long double x)
+{
+	__asm__ ("fabs" : "+t"(x));
+	return x;
+}
diff --git a/src/math/x86_64/fabsl.s b/src/math/x86_64/fabsl.s
deleted file mode 100644
index 4e7ab525..00000000
--- a/src/math/x86_64/fabsl.s
+++ /dev/null
@@ -1,6 +0,0 @@
-.global fabsl
-.type fabsl,@function
-fabsl:
-	fldt 8(%rsp)
-	fabs
-	ret
diff --git a/src/math/x86_64/fmodl.c b/src/math/x86_64/fmodl.c
new file mode 100644
index 00000000..3daeab06
--- /dev/null
+++ b/src/math/x86_64/fmodl.c
@@ -0,0 +1,9 @@
+#include <math.h>
+
+long double fmodl(long double x, long double y)
+{
+	unsigned short fpsr;
+	do __asm__ ("fprem; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y));
+	while (fpsr & 0x400);
+	return x;
+}
diff --git a/src/math/x86_64/fmodl.s b/src/math/x86_64/fmodl.s
deleted file mode 100644
index ea07b402..00000000
--- a/src/math/x86_64/fmodl.s
+++ /dev/null
@@ -1,11 +0,0 @@
-.global fmodl
-.type fmodl,@function
-fmodl:
-	fldt 24(%rsp)
-	fldt 8(%rsp)
-1:	fprem
-	fnstsw %ax
-	testb $4,%ah
-	jnz 1b
-	fstp %st(1)
-	ret
diff --git a/src/math/x86_64/llrint.c b/src/math/x86_64/llrint.c
new file mode 100644
index 00000000..dd38a722
--- /dev/null
+++ b/src/math/x86_64/llrint.c
@@ -0,0 +1,8 @@
+#include <math.h>
+
+long long llrint(double x)
+{
+	long long r;
+	__asm__ ("cvtsd2si %1, %0" : "=r"(r) : "x"(x));
+	return r;
+}
diff --git a/src/math/x86_64/llrint.s b/src/math/x86_64/llrint.s
deleted file mode 100644
index bf476498..00000000
--- a/src/math/x86_64/llrint.s
+++ /dev/null
@@ -1,5 +0,0 @@
-.global llrint
-.type llrint,@function
-llrint:
-	cvtsd2si %xmm0,%rax
-	ret
diff --git a/src/math/x86_64/llrintf.c b/src/math/x86_64/llrintf.c
new file mode 100644
index 00000000..fc8625e8
--- /dev/null
+++ b/src/math/x86_64/llrintf.c
@@ -0,0 +1,8 @@
+#include <math.h>
+
+long long llrintf(float x)
+{
+	long long r;
+	__asm__ ("cvtss2si %1, %0" : "=r"(r) : "x"(x));
+	return r;
+}
diff --git a/src/math/x86_64/llrintf.s b/src/math/x86_64/llrintf.s
deleted file mode 100644
index d7204ac0..00000000
--- a/src/math/x86_64/llrintf.s
+++ /dev/null
@@ -1,5 +0,0 @@
-.global llrintf
-.type llrintf,@function
-llrintf:
-	cvtss2si %xmm0,%rax
-	ret
diff --git a/src/math/x86_64/llrintl.c b/src/math/x86_64/llrintl.c
new file mode 100644
index 00000000..c439ef28
--- /dev/null
+++ b/src/math/x86_64/llrintl.c
@@ -0,0 +1,8 @@
+#include <math.h>
+
+long long llrintl(long double x)
+{
+	long long r;
+	__asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st");
+	return r;
+}
diff --git a/src/math/x86_64/llrintl.s b/src/math/x86_64/llrintl.s
deleted file mode 100644
index 1ec0817d..00000000
--- a/src/math/x86_64/llrintl.s
+++ /dev/null
@@ -1,7 +0,0 @@
-.global llrintl
-.type llrintl,@function
-llrintl:
-	fldt 8(%rsp)
-	fistpll 8(%rsp)
-	mov 8(%rsp),%rax
-	ret
diff --git a/src/math/x86_64/lrint.c b/src/math/x86_64/lrint.c
new file mode 100644
index 00000000..a742fec6
--- /dev/null
+++ b/src/math/x86_64/lrint.c
@@ -0,0 +1,8 @@
+#include <math.h>
+
+long lrint(double x)
+{
+	long r;
+	__asm__ ("cvtsd2si %1, %0" : "=r"(r) : "x"(x));
+	return r;
+}
diff --git a/src/math/x86_64/lrint.s b/src/math/x86_64/lrint.s
deleted file mode 100644
index 15fc2454..00000000
--- a/src/math/x86_64/lrint.s
+++ /dev/null
@@ -1,5 +0,0 @@
-.global lrint
-.type lrint,@function
-lrint:
-	cvtsd2si %xmm0,%rax
-	ret
diff --git a/src/math/x86_64/lrintf.c b/src/math/x86_64/lrintf.c
new file mode 100644
index 00000000..2ba5639d
--- /dev/null
+++ b/src/math/x86_64/lrintf.c
@@ -0,0 +1,8 @@
+#include <math.h>
+
+long lrintf(float x)
+{
+	long r;
+	__asm__ ("cvtss2si %1, %0" : "=r"(r) : "x"(x));
+	return r;
+}
diff --git a/src/math/x86_64/lrintf.s b/src/math/x86_64/lrintf.s
deleted file mode 100644
index 488423d2..00000000
--- a/src/math/x86_64/lrintf.s
+++ /dev/null
@@ -1,5 +0,0 @@
-.global lrintf
-.type lrintf,@function
-lrintf:
-	cvtss2si %xmm0,%rax
-	ret
diff --git a/src/math/x86_64/lrintl.c b/src/math/x86_64/lrintl.c
new file mode 100644
index 00000000..068e2e4d
--- /dev/null
+++ b/src/math/x86_64/lrintl.c
@@ -0,0 +1,8 @@
+#include <math.h>
+
+long lrintl(long double x)
+{
+	long r;
+	__asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st");
+	return r;
+}
diff --git a/src/math/x86_64/lrintl.s b/src/math/x86_64/lrintl.s
deleted file mode 100644
index d587b12b..00000000
--- a/src/math/x86_64/lrintl.s
+++ /dev/null
@@ -1,7 +0,0 @@
-.global lrintl
-.type lrintl,@function
-lrintl:
-	fldt 8(%rsp)
-	fistpll 8(%rsp)
-	mov 8(%rsp),%rax
-	ret
diff --git a/src/math/x86_64/remainderl.c b/src/math/x86_64/remainderl.c
new file mode 100644
index 00000000..8cf75071
--- /dev/null
+++ b/src/math/x86_64/remainderl.c
@@ -0,0 +1,9 @@
+#include <math.h>
+
+long double remainderl(long double x, long double y)
+{
+	unsigned short fpsr;
+	do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y));
+	while (fpsr & 0x400);
+	return x;
+}
diff --git a/src/math/x86_64/remainderl.s b/src/math/x86_64/remainderl.s
deleted file mode 100644
index cb3857b4..00000000
--- a/src/math/x86_64/remainderl.s
+++ /dev/null
@@ -1,11 +0,0 @@
-.global remainderl
-.type remainderl,@function
-remainderl:
-	fldt 24(%rsp)
-	fldt 8(%rsp)
-1:	fprem1
-	fnstsw %ax
-	testb $4,%ah
-	jnz 1b
-	fstp %st(1)
-	ret
diff --git a/src/math/x86_64/remquol.c b/src/math/x86_64/remquol.c
new file mode 100644
index 00000000..60eef089
--- /dev/null
+++ b/src/math/x86_64/remquol.c
@@ -0,0 +1,32 @@
+#include <math.h>
+
+long double remquol(long double x, long double y, int *quo)
+{
+	signed char *cx = (void *)&x, *cy = (void *)&y;
+	/* By ensuring that addresses of x and y cannot be discarded,
+	 * this empty asm guides GCC into representing extraction of
+	 * their sign bits as memory loads rather than making x and y
+	 * not-address-taken internally and using bitfield operations,
+	 * which in the end wouldn't work out, as extraction from FPU
+	 * registers needs to go through memory anyway. This way GCC
+	 * should manage to use incoming stack slots without spills. */
+	__asm__ ("" :: "X"(cx), "X"(cy));
+
+	long double t = x;
+	unsigned fpsr;
+	do __asm__ ("fprem1; fnstsw %%ax" : "+t"(t), "=a"(fpsr) : "u"(y));
+	while (fpsr & 0x400);
+	/* C0, C1, C3 flags in x87 status word carry low bits of quotient:
+	 * 15 14 13 12 11 10  9  8
+	 *  . C3  .  .  . C2 C1 C0
+	 *  . b1  .  .  .  0 b0 b2 */
+	unsigned char i = fpsr >> 8;
+	i = i>>4 | i<<4;
+	/* i[5:2] is now {b0 b2 ? b1}. Retrieve {0 b2 b1 b0} via
+	 * in-register table lookup. */
+	unsigned qbits = 0x7575313164642020 >> (i & 60);
+	qbits &= 7;
+
+	*quo = (cx[9]^cy[9]) < 0 ? -qbits : qbits;
+	return t;
+}
diff --git a/src/math/x86_64/rintl.c b/src/math/x86_64/rintl.c
new file mode 100644
index 00000000..e1a92077
--- /dev/null
+++ b/src/math/x86_64/rintl.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double rintl(long double x)
+{
+	__asm__ ("frndint" : "+t"(x));
+	return x;
+}
diff --git a/src/math/x86_64/rintl.s b/src/math/x86_64/rintl.s
deleted file mode 100644
index 64e663cd..00000000
--- a/src/math/x86_64/rintl.s
+++ /dev/null
@@ -1,6 +0,0 @@
-.global rintl
-.type rintl,@function
-rintl:
-	fldt 8(%rsp)
-	frndint
-	ret
diff --git a/src/math/x86_64/sqrt.c b/src/math/x86_64/sqrt.c
new file mode 100644
index 00000000..657e09e3
--- /dev/null
+++ b/src/math/x86_64/sqrt.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double sqrt(double x)
+{
+	__asm__ ("sqrtsd %1, %0" : "=x"(x) : "x"(x));
+	return x;
+}
diff --git a/src/math/x86_64/sqrt.s b/src/math/x86_64/sqrt.s
deleted file mode 100644
index d3c609f9..00000000
--- a/src/math/x86_64/sqrt.s
+++ /dev/null
@@ -1,4 +0,0 @@
-.global sqrt
-.type sqrt,@function
-sqrt:	sqrtsd %xmm0, %xmm0
-	ret
diff --git a/src/math/x86_64/sqrtf.c b/src/math/x86_64/sqrtf.c
new file mode 100644
index 00000000..720baec6
--- /dev/null
+++ b/src/math/x86_64/sqrtf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float sqrtf(float x)
+{
+	__asm__ ("sqrtss %1, %0" : "=x"(x) : "x"(x));
+	return x;
+}
diff --git a/src/math/x86_64/sqrtf.s b/src/math/x86_64/sqrtf.s
deleted file mode 100644
index eec48c60..00000000
--- a/src/math/x86_64/sqrtf.s
+++ /dev/null
@@ -1,4 +0,0 @@
-.global sqrtf
-.type sqrtf,@function
-sqrtf:  sqrtss %xmm0, %xmm0
-	ret
diff --git a/src/math/x86_64/sqrtl.c b/src/math/x86_64/sqrtl.c
new file mode 100644
index 00000000..864cfcc4
--- /dev/null
+++ b/src/math/x86_64/sqrtl.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double sqrtl(long double x)
+{
+	__asm__ ("fsqrt" : "+t"(x));
+	return x;
+}
diff --git a/src/math/x86_64/sqrtl.s b/src/math/x86_64/sqrtl.s
deleted file mode 100644
index 23cd687d..00000000
--- a/src/math/x86_64/sqrtl.s
+++ /dev/null
@@ -1,5 +0,0 @@
-.global sqrtl
-.type sqrtl,@function
-sqrtl:	fldt 8(%rsp)
-	fsqrt
-	ret
diff --git a/src/misc/getentropy.c b/src/misc/getentropy.c
index d2f282ce..651ea95f 100644
--- a/src/misc/getentropy.c
+++ b/src/misc/getentropy.c
@@ -6,7 +6,7 @@
 
 int getentropy(void *buffer, size_t len)
 {
-	int cs, ret;
+	int cs, ret = 0;
 	char *pos = buffer;
 
 	if (len > 256) {
diff --git a/src/misc/getopt.c b/src/misc/getopt.c
index 864d52cd..b02b81c3 100644
--- a/src/misc/getopt.c
+++ b/src/misc/getopt.c
@@ -1,3 +1,4 @@
+#define _BSD_SOURCE
 #include <unistd.h>
 #include <wchar.h>
 #include <string.h>
@@ -86,7 +87,8 @@ int getopt(int argc, char * const argv[], const char *optstring)
 	if (optstring[i] == ':') {
 		optarg = 0;
 		if (optstring[i+1] != ':' || optpos) {
-			optarg = argv[optind++] + optpos;
+			optarg = argv[optind++];
+			if (optpos) optarg += optpos;
 			optpos = 0;
 		}
 		if (optind > argc) {
diff --git a/src/misc/getrlimit.c b/src/misc/getrlimit.c
index 2ab2f0f4..a5558d81 100644
--- a/src/misc/getrlimit.c
+++ b/src/misc/getrlimit.c
@@ -6,12 +6,13 @@
 
 int getrlimit(int resource, struct rlimit *rlim)
 {
-	unsigned long k_rlim[2];
 	int ret = syscall(SYS_prlimit64, 0, resource, 0, rlim);
 	if (!ret) {
 		FIX(rlim->rlim_cur);
 		FIX(rlim->rlim_max);
 	}
+#ifdef SYS_getrlimit
+	unsigned long k_rlim[2];
 	if (!ret || errno != ENOSYS)
 		return ret;
 	if (syscall(SYS_getrlimit, resource, k_rlim) < 0)
@@ -21,6 +22,7 @@ int getrlimit(int resource, struct rlimit *rlim)
 	FIX(rlim->rlim_cur);
 	FIX(rlim->rlim_max);
 	return 0;
+#else
+	return ret;
+#endif
 }
-
-weak_alias(getrlimit, getrlimit64);
diff --git a/src/misc/getrusage.c b/src/misc/getrusage.c
index 0aaf0ac7..8e03e2e3 100644
--- a/src/misc/getrusage.c
+++ b/src/misc/getrusage.c
@@ -1,7 +1,35 @@
 #include <sys/resource.h>
+#include <string.h>
+#include <errno.h>
 #include "syscall.h"
 
 int getrusage(int who, struct rusage *ru)
 {
-	return syscall(SYS_getrusage, who, ru);
+	int r;
+#ifdef SYS_getrusage_time64
+	long long kru64[18];
+	r = __syscall(SYS_getrusage_time64, who, kru64);
+	if (!r) {
+		ru->ru_utime = (struct timeval)
+			{ .tv_sec = kru64[0], .tv_usec = kru64[1] };
+		ru->ru_stime = (struct timeval)
+			{ .tv_sec = kru64[2], .tv_usec = kru64[3] };
+		char *slots = (char *)&ru->ru_maxrss;
+		for (int i=0; i<14; i++)
+			*(long *)(slots + i*sizeof(long)) = kru64[4+i];
+	}
+	if (SYS_getrusage_time64 == SYS_getrusage || r != -ENOSYS)
+		return __syscall_ret(r);
+#endif
+	char *dest = (char *)&ru->ru_maxrss - 4*sizeof(long);
+	r = __syscall(SYS_getrusage, who, dest);
+	if (!r && sizeof(time_t) > sizeof(long)) {
+		long kru[4];
+		memcpy(kru, dest, 4*sizeof(long));
+		ru->ru_utime = (struct timeval)
+			{ .tv_sec = kru[0], .tv_usec = kru[1] };
+		ru->ru_stime = (struct timeval)
+			{ .tv_sec = kru[2], .tv_usec = kru[3] };
+	}
+	return __syscall_ret(r);
 }
diff --git a/src/misc/initgroups.c b/src/misc/initgroups.c
index 922a9581..101f5c7b 100644
--- a/src/misc/initgroups.c
+++ b/src/misc/initgroups.c
@@ -1,11 +1,29 @@
 #define _GNU_SOURCE
 #include <grp.h>
 #include <limits.h>
+#include <stdlib.h>
 
 int initgroups(const char *user, gid_t gid)
 {
-	gid_t groups[NGROUPS_MAX];
-	int count = NGROUPS_MAX;
-	if (getgrouplist(user, gid, groups, &count) < 0) return -1;
-	return setgroups(count, groups);
+	gid_t buf[32], *groups = buf;
+	int count = sizeof buf / sizeof *buf, prev_count = count;
+	while (getgrouplist(user, gid, groups, &count) < 0) {
+		if (groups != buf) free(groups);
+
+		/* Return if failure isn't buffer size */
+		if (count <= prev_count)
+			return -1;
+
+		/* Always increase by at least 50% to limit to
+		 * logarithmically many retries on TOCTOU races. */
+		if (count < prev_count + (prev_count>>1))
+			count = prev_count + (prev_count>>1);
+
+		groups = calloc(count, sizeof *groups);
+		if (!groups) return -1;
+		prev_count = count;
+	}
+	int ret = setgroups(count, groups);
+	if (groups != buf) free(groups);
+	return ret;
 }
diff --git a/src/misc/ioctl.c b/src/misc/ioctl.c
index 5a41f0e8..35804f02 100644
--- a/src/misc/ioctl.c
+++ b/src/misc/ioctl.c
@@ -1,7 +1,130 @@
 #include <sys/ioctl.h>
 #include <stdarg.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/time.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <endian.h>
 #include "syscall.h"
 
+#define alignof(t) offsetof(struct { char c; t x; }, x)
+
+#define W 1
+#define R 2
+#define WR 3
+
+struct ioctl_compat_map {
+	int new_req, old_req;
+	unsigned char old_size, dir, force_align, noffs;
+	unsigned char offsets[8];
+};
+
+#define NINTH(a,b,c,d,e,f,g,h,i,...) i
+#define COUNT(...) NINTH(__VA_ARGS__,8,7,6,5,4,3,2,1,0)
+#define OFFS(...) COUNT(__VA_ARGS__), { __VA_ARGS__ }
+
+/* yields a type for a struct with original size n, with a misaligned
+ * timeval/timespec expanded from 32- to 64-bit. for use with ioctl
+ * number producing macros; only size of result is meaningful. */
+#define new_misaligned(n) struct { int i; time_t t; char c[(n)-4]; }
+
+struct v4l2_event {
+	uint32_t a;
+	uint64_t b[8];
+	uint32_t c[2], ts[2], d[9];
+};
+
+static const struct ioctl_compat_map compat_map[] = {
+	{ SIOCGSTAMP, SIOCGSTAMP_OLD, 8, R, 0, OFFS(0, 4) },
+	{ SIOCGSTAMPNS, SIOCGSTAMPNS_OLD, 8, R, 0, OFFS(0, 4) },
+
+	/* SNDRV_TIMER_IOCTL_STATUS */
+	{ _IOR('T', 0x14, char[96]), _IOR('T', 0x14, 88), 88, R, 0, OFFS(0,4) },
+
+	/* SNDRV_PCM_IOCTL_STATUS[_EXT] */
+	{ _IOR('A', 0x20, char[128]), _IOR('A', 0x20, char[108]), 108, R, 1, OFFS(4,8,12,16,52,56,60,64) },
+	{ _IOWR('A', 0x24, char[128]), _IOWR('A', 0x24, char[108]), 108, WR, 1, OFFS(4,8,12,16,52,56,60,64) },
+
+	/* SNDRV_RAWMIDI_IOCTL_STATUS */
+	{ _IOWR('W', 0x20, char[48]), _IOWR('W', 0x20, char[36]), 36, WR, 1, OFFS(4,8) },
+
+	/* SNDRV_PCM_IOCTL_SYNC_PTR - with 3 subtables */
+	{ _IOWR('A', 0x23, char[136]), _IOWR('A', 0x23, char[132]), 0, WR, 1, 0 },
+	{ 0, 0, 4, WR, 1, 0 }, /* snd_pcm_sync_ptr (flags only) */
+	{ 0, 0, 32, WR, 1, OFFS(8,12,16,24,28) }, /* snd_pcm_mmap_status */
+	{ 0, 0, 4, WR, 1, 0 }, /* snd_pcm_mmap_control (each member) */
+
+	/* VIDIOC_QUERYBUF, VIDIOC_QBUF, VIDIOC_DQBUF, VIDIOC_PREPARE_BUF */
+	{ _IOWR('V',  9, new_misaligned(68)), _IOWR('V',  9, char[68]), 68, WR, 1, OFFS(20, 24) },
+	{ _IOWR('V', 15, new_misaligned(68)), _IOWR('V', 15, char[68]), 68, WR, 1, OFFS(20, 24) },
+	{ _IOWR('V', 17, new_misaligned(68)), _IOWR('V', 17, char[68]), 68, WR, 1, OFFS(20, 24) },
+	{ _IOWR('V', 93, new_misaligned(68)), _IOWR('V', 93, char[68]), 68, WR, 1, OFFS(20, 24) },
+
+	/* VIDIOC_DQEVENT */
+	{ _IOR('V', 89, new_misaligned(120)), _IOR('V', 89, struct v4l2_event), sizeof(struct v4l2_event),
+	  R, 0, OFFS(offsetof(struct v4l2_event, ts[0]), offsetof(struct v4l2_event, ts[1])) },
+
+	/* VIDIOC_OMAP3ISP_STAT_REQ */
+	{ _IOWR('V', 192+6, char[32]), _IOWR('V', 192+6, char[24]), 22, WR, 0, OFFS(0,4) },
+
+	/* PPPIOCGIDLE */
+	{ _IOR('t', 63, char[16]), _IOR('t', 63, char[8]), 8, R, 0, OFFS(0,4) },
+
+	/* PPGETTIME, PPSETTIME */
+	{ _IOR('p', 0x95, char[16]), _IOR('p', 0x95, char[8]), 8, R, 0, OFFS(0,4) },
+	{ _IOW('p', 0x96, char[16]), _IOW('p', 0x96, char[8]), 8, W, 0, OFFS(0,4) },
+
+	/* LPSETTIMEOUT */
+	{ _IOW(0x6, 0xf, char[16]), 0x060f, 8, W, 0, OFFS(0,4) },
+};
+
+static void convert_ioctl_struct(const struct ioctl_compat_map *map, char *old, char *new, int dir)
+{
+	int new_offset = 0;
+	int old_offset = 0;
+	int old_size = map->old_size;
+	if (!(dir & map->dir)) return;
+	if (!map->old_size) {
+		/* offsets hard-coded for SNDRV_PCM_IOCTL_SYNC_PTR;
+		 * if another exception appears this needs changing. */
+		convert_ioctl_struct(map+1, old, new, dir);
+		convert_ioctl_struct(map+2, old+4, new+8, dir);
+		/* snd_pcm_mmap_control, special-cased due to kernel
+		 * type definition having been botched. */
+		int adj = BYTE_ORDER==BIG_ENDIAN ? 4 : 0;
+		convert_ioctl_struct(map+3, old+68, new+72+adj, dir);
+		convert_ioctl_struct(map+3, old+72, new+76+3*adj, dir);
+		return;
+	}
+	for (int i=0; i < map->noffs; i++) {
+		int ts_offset = map->offsets[i];
+		int len = ts_offset-old_offset;
+		if (dir==W) memcpy(old+old_offset, new+new_offset, len);
+		else memcpy(new+new_offset, old+old_offset, len);
+		new_offset += len;
+		old_offset += len;
+		long long new_ts;
+		long old_ts;
+		int align = map->force_align ? sizeof(time_t) : alignof(time_t);
+		new_offset += (align-1) & -new_offset;
+		if (dir==W) {
+			memcpy(&new_ts, new+new_offset, sizeof new_ts);
+			old_ts = new_ts;
+			memcpy(old+old_offset, &old_ts, sizeof old_ts);
+		} else {
+			memcpy(&old_ts, old+old_offset, sizeof old_ts);
+			new_ts = old_ts;
+			memcpy(new+new_offset, &new_ts, sizeof new_ts);
+		}
+		new_offset += sizeof new_ts;
+		old_offset += sizeof old_ts;
+	}
+	if (dir==W) memcpy(old+old_offset, new+new_offset, old_size-old_offset);
+	else memcpy(new+new_offset, old+old_offset, old_size-old_offset);
+}
+
 int ioctl(int fd, int req, ...)
 {
 	void *arg;
@@ -9,5 +132,20 @@ int ioctl(int fd, int req, ...)
 	va_start(ap, req);
 	arg = va_arg(ap, void *);
 	va_end(ap);
-	return syscall(SYS_ioctl, fd, req, arg);
+	int r = __syscall(SYS_ioctl, fd, req, arg);
+	if (SIOCGSTAMP != SIOCGSTAMP_OLD && req && r==-ENOTTY) {
+		for (int i=0; i<sizeof compat_map/sizeof *compat_map; i++) {
+			if (compat_map[i].new_req != req) continue;
+			union {
+				long long align;
+				char buf[256];
+			} u;
+			convert_ioctl_struct(&compat_map[i], u.buf, arg, W);
+			r = __syscall(SYS_ioctl, fd, compat_map[i].old_req, u.buf);
+			if (r<0) break;
+			convert_ioctl_struct(&compat_map[i], u.buf, arg, R);
+			break;
+		}
+	}
+	return __syscall_ret(r);
 }
diff --git a/src/misc/lockf.c b/src/misc/lockf.c
index 16a80bec..0162442b 100644
--- a/src/misc/lockf.c
+++ b/src/misc/lockf.c
@@ -28,5 +28,3 @@ int lockf(int fd, int op, off_t size)
 	errno = EINVAL;
 	return -1;
 }
-
-weak_alias(lockf, lockf64);
diff --git a/src/misc/mntent.c b/src/misc/mntent.c
index eabb8200..76f9c162 100644
--- a/src/misc/mntent.c
+++ b/src/misc/mntent.c
@@ -2,6 +2,7 @@
 #include <string.h>
 #include <mntent.h>
 #include <errno.h>
+#include <limits.h>
 
 static char *internal_buf;
 static size_t internal_bufsize;
@@ -19,9 +20,46 @@ int endmntent(FILE *f)
 	return 1;
 }
 
+static char *unescape_ent(char *beg)
+{
+	char *dest = beg;
+	const char *src = beg;
+	while (*src) {
+		const char *val;
+		unsigned char cval = 0;
+		if (*src != '\\') {
+			*dest++ = *src++;
+			continue;
+		}
+		if (src[1] == '\\') {
+			++src;
+			*dest++ = *src++;
+			continue;
+		}
+		val = src + 1;
+		for (int i = 0; i < 3; ++i) {
+			if (*val >= '0' && *val <= '7') {
+				cval <<= 3;
+				cval += *val++ - '0';
+			} else {
+				break;
+			}
+		}
+		if (cval) {
+			*dest++ = cval;
+			src = val;
+		} else {
+			*dest++ = *src++;
+		}
+	}
+	*dest = 0;
+	return beg;
+}
+
 struct mntent *getmntent_r(FILE *f, struct mntent *mnt, char *linebuf, int buflen)
 {
-	int cnt, n[8], use_internal = (linebuf == SENTINEL);
+	int n[8], use_internal = (linebuf == SENTINEL);
+	size_t len, i;
 
 	mnt->mnt_freq = 0;
 	mnt->mnt_passno = 0;
@@ -39,20 +77,24 @@ struct mntent *getmntent_r(FILE *f, struct mntent *mnt, char *linebuf, int bufle
 			errno = ERANGE;
 			return 0;
 		}
-		cnt = sscanf(linebuf, " %n%*s%n %n%*s%n %n%*s%n %n%*s%n %d %d",
+
+		len = strlen(linebuf);
+		if (len > INT_MAX) continue;
+		for (i = 0; i < sizeof n / sizeof *n; i++) n[i] = len;
+		sscanf(linebuf, " %n%*[^ \t\n]%n %n%*[^ \t\n]%n %n%*[^ \t\n]%n %n%*[^ \t\n]%n %d %d",
 			n, n+1, n+2, n+3, n+4, n+5, n+6, n+7,
 			&mnt->mnt_freq, &mnt->mnt_passno);
-	} while (cnt < 2 || linebuf[n[0]] == '#');
+	} while (linebuf[n[0]] == '#' || n[1]==len);
 
 	linebuf[n[1]] = 0;
 	linebuf[n[3]] = 0;
 	linebuf[n[5]] = 0;
 	linebuf[n[7]] = 0;
 
-	mnt->mnt_fsname = linebuf+n[0];
-	mnt->mnt_dir = linebuf+n[2];
-	mnt->mnt_type = linebuf+n[4];
-	mnt->mnt_opts = linebuf+n[6];
+	mnt->mnt_fsname = unescape_ent(linebuf+n[0]);
+	mnt->mnt_dir = unescape_ent(linebuf+n[2]);
+	mnt->mnt_type = unescape_ent(linebuf+n[4]);
+	mnt->mnt_opts = unescape_ent(linebuf+n[6]);
 
 	return mnt;
 }
@@ -73,5 +115,13 @@ int addmntent(FILE *f, const struct mntent *mnt)
 
 char *hasmntopt(const struct mntent *mnt, const char *opt)
 {
-	return strstr(mnt->mnt_opts, opt);
+	size_t l = strlen(opt);
+	char *p = mnt->mnt_opts;
+	for (;;) {
+		if (!strncmp(p, opt, l) && (!p[l] || p[l]==',' || p[l]=='='))
+			return p;
+		p = strchr(p, ',');
+		if (!p) return 0;
+		p++;
+	}
 }
diff --git a/src/misc/nftw.c b/src/misc/nftw.c
index 0a464100..71bc62ee 100644
--- a/src/misc/nftw.c
+++ b/src/misc/nftw.c
@@ -1,5 +1,6 @@
 #include <ftw.h>
 #include <dirent.h>
+#include <fcntl.h>
 #include <sys/stat.h>
 #include <errno.h>
 #include <unistd.h>
@@ -26,16 +27,19 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int,
 	struct history new;
 	int type;
 	int r;
+	int dfd;
+	int err;
 	struct FTW lev;
 
+	st.st_dev = st.st_ino = 0;
+
 	if ((flags & FTW_PHYS) ? lstat(path, &st) : stat(path, &st) < 0) {
 		if (!(flags & FTW_PHYS) && errno==ENOENT && !lstat(path, &st))
 			type = FTW_SLN;
 		else if (errno != EACCES) return -1;
 		else type = FTW_NS;
 	} else if (S_ISDIR(st.st_mode)) {
-		if (access(path, R_OK) < 0) type = FTW_DNR;
-		else if (flags & FTW_DEPTH) type = FTW_DP;
+		if (flags & FTW_DEPTH) type = FTW_DP;
 		else type = FTW_D;
 	} else if (S_ISLNK(st.st_mode)) {
 		if (flags & FTW_PHYS) type = FTW_SL;
@@ -44,7 +48,7 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int,
 		type = FTW_F;
 	}
 
-	if ((flags & FTW_MOUNT) && h && st.st_dev != h->dev)
+	if ((flags & FTW_MOUNT) && h && type != FTW_NS && st.st_dev != h->dev)
 		return 0;
 	
 	new.chain = h;
@@ -63,6 +67,13 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int,
 		lev.base = k;
 	}
 
+	if (type == FTW_D || type == FTW_DP) {
+		dfd = open(path, O_RDONLY);
+		err = errno;
+		if (dfd < 0 && err == EACCES) type = FTW_DNR;
+		if (!fd_limit) close(dfd);
+	}
+
 	if (!(flags & FTW_DEPTH) && (r=fn(path, &st, type, &lev)))
 		return r;
 
@@ -71,7 +82,11 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int,
 			return 0;
 
 	if ((type == FTW_D || type == FTW_DP) && fd_limit) {
-		DIR *d = opendir(path);
+		if (dfd < 0) {
+			errno = err;
+			return -1;
+		}
+		DIR *d = fdopendir(dfd);
 		if (d) {
 			struct dirent *de;
 			while ((de = readdir(d))) {
@@ -92,7 +107,8 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int,
 				}
 			}
 			closedir(d);
-		} else if (errno != EACCES) {
+		} else {
+			close(dfd);
 			return -1;
 		}
 	}
@@ -124,5 +140,3 @@ int nftw(const char *path, int (*fn)(const char *, const struct stat *, int, str
 	pthread_setcancelstate(cs, 0);
 	return r;
 }
-
-weak_alias(nftw, nftw64);
diff --git a/src/misc/pty.c b/src/misc/pty.c
index b9cb5eaa..a0577147 100644
--- a/src/misc/pty.c
+++ b/src/misc/pty.c
@@ -7,7 +7,9 @@
 
 int posix_openpt(int flags)
 {
-	return open("/dev/ptmx", flags);
+	int r = open("/dev/ptmx", flags);
+	if (r < 0 && errno == ENOSPC) errno = EAGAIN;
+	return r;
 }
 
 int grantpt(int fd)
diff --git a/src/misc/realpath.c b/src/misc/realpath.c
index d2708e59..db8b74dc 100644
--- a/src/misc/realpath.c
+++ b/src/misc/realpath.c
@@ -1,43 +1,156 @@
 #include <stdlib.h>
 #include <limits.h>
-#include <sys/stat.h>
-#include <fcntl.h>
 #include <errno.h>
 #include <unistd.h>
 #include <string.h>
-#include "syscall.h"
+
+static size_t slash_len(const char *s)
+{
+	const char *s0 = s;
+	while (*s == '/') s++;
+	return s-s0;
+}
 
 char *realpath(const char *restrict filename, char *restrict resolved)
 {
-	int fd;
-	ssize_t r;
-	struct stat st1, st2;
-	char buf[15+3*sizeof(int)];
-	char tmp[PATH_MAX];
+	char stack[PATH_MAX+1];
+	char output[PATH_MAX];
+	size_t p, q, l, l0, cnt=0, nup=0;
+	int check_dir=0;
 
 	if (!filename) {
 		errno = EINVAL;
 		return 0;
 	}
+	l = strnlen(filename, sizeof stack);
+	if (!l) {
+		errno = ENOENT;
+		return 0;
+	}
+	if (l >= PATH_MAX) goto toolong;
+	p = sizeof stack - l - 1;
+	q = 0;
+	memcpy(stack+p, filename, l+1);
+
+	/* Main loop. Each iteration pops the next part from stack of
+	 * remaining path components and consumes any slashes that follow.
+	 * If not a link, it's moved to output; if a link, contents are
+	 * pushed to the stack. */
+restart:
+	for (; ; p+=slash_len(stack+p)) {
+		/* If stack starts with /, the whole component is / or //
+		 * and the output state must be reset. */
+		if (stack[p] == '/') {
+			check_dir=0;
+			nup=0;
+			q=0;
+			output[q++] = '/';
+			p++;
+			/* Initial // is special. */
+			if (stack[p] == '/' && stack[p+1] != '/')
+				output[q++] = '/';
+			continue;
+		}
+
+		char *z = __strchrnul(stack+p, '/');
+		l0 = l = z-(stack+p);
 
-	fd = sys_open(filename, O_PATH|O_NONBLOCK|O_CLOEXEC);
-	if (fd < 0) return 0;
-	__procfdname(buf, fd);
+		if (!l && !check_dir) break;
 
-	r = readlink(buf, tmp, sizeof tmp - 1);
-	if (r < 0) goto err;
-	tmp[r] = 0;
+		/* Skip any . component but preserve check_dir status. */
+		if (l==1 && stack[p]=='.') {
+			p += l;
+			continue;
+		}
 
-	fstat(fd, &st1);
-	r = stat(tmp, &st2);
-	if (r<0 || st1.st_dev != st2.st_dev || st1.st_ino != st2.st_ino) {
-		if (!r) errno = ELOOP;
-		goto err;
+		/* Copy next component onto output at least temporarily, to
+		 * call readlink, but wait to advance output position until
+		 * determining it's not a link. */
+		if (q && output[q-1] != '/') {
+			if (!p) goto toolong;
+			stack[--p] = '/';
+			l++;
+		}
+		if (q+l >= PATH_MAX) goto toolong;
+		memcpy(output+q, stack+p, l);
+		output[q+l] = 0;
+		p += l;
+
+		int up = 0;
+		if (l0==2 && stack[p-2]=='.' && stack[p-1]=='.') {
+			up = 1;
+			/* Any non-.. path components we could cancel start
+			 * after nup repetitions of the 3-byte string "../";
+			 * if there are none, accumulate .. components to
+			 * later apply to cwd, if needed. */
+			if (q <= 3*nup) {
+				nup++;
+				q += l;
+				continue;
+			}
+			/* When previous components are already known to be
+			 * directories, processing .. can skip readlink. */
+			if (!check_dir) goto skip_readlink;
+		}
+		ssize_t k = readlink(output, stack, p);
+		if (k==p) goto toolong;
+		if (!k) {
+			errno = ENOENT;
+			return 0;
+		}
+		if (k<0) {
+			if (errno != EINVAL) return 0;
+skip_readlink:
+			check_dir = 0;
+			if (up) {
+				while(q && output[q-1]!='/') q--;
+				if (q>1 && (q>2 || output[0]!='/')) q--;
+				continue;
+			}
+			if (l0) q += l;
+			check_dir = stack[p];
+			continue;
+		}
+		if (++cnt == SYMLOOP_MAX) {
+			errno = ELOOP;
+			return 0;
+		}
+
+		/* If link contents end in /, strip any slashes already on
+		 * stack to avoid /->// or //->/// or spurious toolong. */
+		if (stack[k-1]=='/') while (stack[p]=='/') p++;
+		p -= k;
+		memmove(stack+p, stack, k);
+
+		/* Skip the stack advancement in case we have a new
+		 * absolute base path. */
+		goto restart;
 	}
 
-	__syscall(SYS_close, fd);
-	return resolved ? strcpy(resolved, tmp) : strdup(tmp);
-err:
-	__syscall(SYS_close, fd);
+ 	output[q] = 0;
+
+	if (output[0] != '/') {
+		if (!getcwd(stack, sizeof stack)) return 0;
+		l = strlen(stack);
+		/* Cancel any initial .. components. */
+		p = 0;
+		while (nup--) {
+			while(l>1 && stack[l-1]!='/') l--;
+			if (l>1) l--;
+			p += 2;
+			if (p<q) p++;
+		}
+		if (q-p && stack[l-1]!='/') stack[l++] = '/';
+		if (l + (q-p) + 1 >= PATH_MAX) goto toolong;
+		memmove(output + l, output + p, q - p + 1);
+		memcpy(output, stack, l);
+		q = l + q-p;
+	}
+
+	if (resolved) return memcpy(resolved, output, q+1);
+	else return strdup(output);
+
+toolong:
+	errno = ENAMETOOLONG;
 	return 0;
 }
diff --git a/src/misc/setrlimit.c b/src/misc/setrlimit.c
index 7a66ab29..edb413fa 100644
--- a/src/misc/setrlimit.c
+++ b/src/misc/setrlimit.c
@@ -6,45 +6,46 @@
 #define MIN(a, b) ((a)<(b) ? (a) : (b))
 #define FIX(x) do{ if ((x)>=SYSCALL_RLIM_INFINITY) (x)=RLIM_INFINITY; }while(0)
 
-static int __setrlimit(int resource, const struct rlimit *rlim)
-{
-	unsigned long k_rlim[2];
-	struct rlimit tmp;
-	if (SYSCALL_RLIM_INFINITY != RLIM_INFINITY) {
-		tmp = *rlim;
-		FIX(tmp.rlim_cur);
-		FIX(tmp.rlim_max);
-		rlim = &tmp;
-	}
-	int ret = __syscall(SYS_prlimit64, 0, resource, rlim, 0);
-	if (ret != -ENOSYS) return ret;
-	k_rlim[0] = MIN(rlim->rlim_cur, MIN(-1UL, SYSCALL_RLIM_INFINITY));
-	k_rlim[1] = MIN(rlim->rlim_max, MIN(-1UL, SYSCALL_RLIM_INFINITY));
-	return __syscall(SYS_setrlimit, resource, k_rlim);
-}
-
 struct ctx {
-	const struct rlimit *rlim;
+	unsigned long lim[2];
 	int res;
 	int err;
 };
 
+#ifdef SYS_setrlimit
 static void do_setrlimit(void *p)
 {
 	struct ctx *c = p;
 	if (c->err>0) return;
-	c->err = -__setrlimit(c->res, c->rlim);
+	c->err = -__syscall(SYS_setrlimit, c->res, c->lim);
 }
+#endif
 
 int setrlimit(int resource, const struct rlimit *rlim)
 {
-	struct ctx c = { .res = resource, .rlim = rlim, .err = -1 };
+	struct rlimit tmp;
+	if (SYSCALL_RLIM_INFINITY != RLIM_INFINITY) {
+		tmp = *rlim;
+		FIX(tmp.rlim_cur);
+		FIX(tmp.rlim_max);
+		rlim = &tmp;
+	}
+	int ret = __syscall(SYS_prlimit64, 0, resource, rlim, 0);
+#ifdef SYS_setrlimit
+	if (ret != -ENOSYS) return __syscall_ret(ret);
+
+	struct ctx c = {
+		.lim[0] = MIN(rlim->rlim_cur, MIN(-1UL, SYSCALL_RLIM_INFINITY)),
+		.lim[1] = MIN(rlim->rlim_max, MIN(-1UL, SYSCALL_RLIM_INFINITY)),
+		.res = resource, .err = -1
+	};
 	__synccall(do_setrlimit, &c);
 	if (c.err) {
 		if (c.err>0) errno = c.err;
 		return -1;
 	}
 	return 0;
+#else
+	return __syscall_ret(ret);
+#endif
 }
-
-weak_alias(setrlimit, setrlimit64);
diff --git a/src/misc/syslog.c b/src/misc/syslog.c
index 13d4b0a6..710202f9 100644
--- a/src/misc/syslog.c
+++ b/src/misc/syslog.c
@@ -10,6 +10,8 @@
 #include <errno.h>
 #include <fcntl.h>
 #include "lock.h"
+#include "fork_impl.h"
+#include "locale_impl.h"
 
 static volatile int lock[1];
 static char log_ident[32];
@@ -17,6 +19,7 @@ static int log_opt;
 static int log_facility = LOG_USER;
 static int log_mask = 0xff;
 static int log_fd = -1;
+volatile int *const __syslog_lockptr = lock;
 
 int setlogmask(int maskpri)
 {
@@ -97,7 +100,7 @@ static void _vsyslog(int priority, const char *message, va_list ap)
 
 	now = time(NULL);
 	gmtime_r(&now, &tm);
-	strftime(timebuf, sizeof timebuf, "%b %e %T", &tm);
+	strftime_l(timebuf, sizeof timebuf, "%b %e %T", &tm, C_LOCALE);
 
 	pid = (log_opt & LOG_PID) ? getpid() : 0;
 	l = snprintf(buf, sizeof buf, "<%d>%s %n%s%s%.0d%s: ",
diff --git a/src/mman/mlock.c b/src/mman/mlock.c
index e683a44a..71af582f 100644
--- a/src/mman/mlock.c
+++ b/src/mman/mlock.c
@@ -3,5 +3,9 @@
 
 int mlock(const void *addr, size_t len)
 {
+#ifdef SYS_mlock
 	return syscall(SYS_mlock, addr, len);
+#else
+	return syscall(SYS_mlock2, addr, len, 0);
+#endif
 }
diff --git a/src/mman/mmap.c b/src/mman/mmap.c
index eff88d82..43e5e029 100644
--- a/src/mman/mmap.c
+++ b/src/mman/mmap.c
@@ -37,5 +37,3 @@ void *__mmap(void *start, size_t len, int prot, int flags, int fd, off_t off)
 }
 
 weak_alias(__mmap, mmap);
-
-weak_alias(mmap, mmap64);
diff --git a/src/mq/mq_notify.c b/src/mq/mq_notify.c
index 221591c7..0e1e6c7a 100644
--- a/src/mq/mq_notify.c
+++ b/src/mq/mq_notify.c
@@ -4,11 +4,14 @@
 #include <sys/socket.h>
 #include <signal.h>
 #include <unistd.h>
+#include <semaphore.h>
 #include "syscall.h"
 
 struct args {
-	pthread_barrier_t barrier;
+	sem_t sem;
 	int sock;
+	mqd_t mqd;
+	int err;
 	const struct sigevent *sev;
 };
 
@@ -20,8 +23,19 @@ static void *start(void *p)
 	int s = args->sock;
 	void (*func)(union sigval) = args->sev->sigev_notify_function;
 	union sigval val = args->sev->sigev_value;
+	struct sigevent sev2;
+	static const char zeros[32];
+	int err;
+
+	sev2.sigev_notify = SIGEV_THREAD;
+	sev2.sigev_signo = s;
+	sev2.sigev_value.sival_ptr = (void *)&zeros;
+
+	args->err = err = -__syscall(SYS_mq_notify, args->mqd, &sev2);
+	sem_post(&args->sem);
+	if (err) return 0;
 
-	pthread_barrier_wait(&args->barrier);
+	pthread_detach(pthread_self());
 	n = recv(s, buf, sizeof(buf), MSG_NOSIGNAL|MSG_WAITALL);
 	close(s);
 	if (n==sizeof buf && buf[sizeof buf - 1] == 1)
@@ -35,8 +49,8 @@ int mq_notify(mqd_t mqd, const struct sigevent *sev)
 	pthread_attr_t attr;
 	pthread_t td;
 	int s;
-	struct sigevent sev2;
-	static const char zeros[32];
+	int cs;
+	sigset_t allmask, origmask;
 
 	if (!sev || sev->sigev_notify != SIGEV_THREAD)
 		return syscall(SYS_mq_notify, mqd, sev);
@@ -44,30 +58,35 @@ int mq_notify(mqd_t mqd, const struct sigevent *sev)
 	s = socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, 0);
 	if (s < 0) return -1;
 	args.sock = s;
+	args.mqd = mqd;
 
 	if (sev->sigev_notify_attributes) attr = *sev->sigev_notify_attributes;
 	else pthread_attr_init(&attr);
-	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
-	pthread_barrier_init(&args.barrier, 0, 2);
+	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
+	sem_init(&args.sem, 0, 0);
 
+	sigfillset(&allmask);
+	pthread_sigmask(SIG_BLOCK, &allmask, &origmask);
 	if (pthread_create(&td, &attr, start, &args)) {
 		__syscall(SYS_close, s);
+		pthread_sigmask(SIG_SETMASK, &origmask, 0);
 		errno = EAGAIN;
 		return -1;
 	}
+	pthread_sigmask(SIG_SETMASK, &origmask, 0);
 
-	pthread_barrier_wait(&args.barrier);
-	pthread_barrier_destroy(&args.barrier);
-
-	sev2.sigev_notify = SIGEV_THREAD;
-	sev2.sigev_signo = s;
-	sev2.sigev_value.sival_ptr = (void *)&zeros;
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
+	sem_wait(&args.sem);
+	sem_destroy(&args.sem);
 
-	if (syscall(SYS_mq_notify, mqd, &sev2) < 0) {
-		pthread_cancel(td);
+	if (args.err) {
 		__syscall(SYS_close, s);
+		pthread_join(td, 0);
+		pthread_setcancelstate(cs, 0);
+		errno = args.err;
 		return -1;
 	}
 
+	pthread_setcancelstate(cs, 0);
 	return 0;
 }
diff --git a/src/mq/mq_timedreceive.c b/src/mq/mq_timedreceive.c
index 2cef6a86..f41b6642 100644
--- a/src/mq/mq_timedreceive.c
+++ b/src/mq/mq_timedreceive.c
@@ -1,7 +1,24 @@
 #include <mqueue.h>
+#include <errno.h>
 #include "syscall.h"
 
+#define IS32BIT(x) !((x)+0x80000000ULL>>32)
+#define CLAMP(x) (int)(IS32BIT(x) ? (x) : 0x7fffffffU+((0ULL+(x))>>63))
+
 ssize_t mq_timedreceive(mqd_t mqd, char *restrict msg, size_t len, unsigned *restrict prio, const struct timespec *restrict at)
 {
+#ifdef SYS_mq_timedreceive_time64
+	time_t s = at ? at->tv_sec : 0;
+	long ns = at ? at->tv_nsec : 0;
+	long r = -ENOSYS;
+	if (SYS_mq_timedreceive == SYS_mq_timedreceive_time64 || !IS32BIT(s))
+		r = __syscall_cp(SYS_mq_timedreceive_time64, mqd, msg, len, prio,
+			at ? ((long long []){at->tv_sec, at->tv_nsec}) : 0);
+	if (SYS_mq_timedreceive == SYS_mq_timedreceive_time64 || r != -ENOSYS)
+		return __syscall_ret(r);
+	return syscall_cp(SYS_mq_timedreceive, mqd, msg, len, prio,
+		at ? ((long[]){CLAMP(s), ns}) : 0);
+#else
 	return syscall_cp(SYS_mq_timedreceive, mqd, msg, len, prio, at);
+#endif
 }
diff --git a/src/mq/mq_timedsend.c b/src/mq/mq_timedsend.c
index 1c00aa0b..56cfcbb8 100644
--- a/src/mq/mq_timedsend.c
+++ b/src/mq/mq_timedsend.c
@@ -1,7 +1,24 @@
 #include <mqueue.h>
+#include <errno.h>
 #include "syscall.h"
 
+#define IS32BIT(x) !((x)+0x80000000ULL>>32)
+#define CLAMP(x) (int)(IS32BIT(x) ? (x) : 0x7fffffffU+((0ULL+(x))>>63))
+
 int mq_timedsend(mqd_t mqd, const char *msg, size_t len, unsigned prio, const struct timespec *at)
 {
+#ifdef SYS_mq_timedsend_time64
+	time_t s = at ? at->tv_sec : 0;
+	long ns = at ? at->tv_nsec : 0;
+	long r = -ENOSYS;
+	if (SYS_mq_timedsend == SYS_mq_timedsend_time64 || !IS32BIT(s))
+		r = __syscall_cp(SYS_mq_timedsend_time64, mqd, msg, len, prio,
+			at ? ((long long []){at->tv_sec, at->tv_nsec}) : 0);
+	if (SYS_mq_timedsend == SYS_mq_timedsend_time64 || r != -ENOSYS)
+		return __syscall_ret(r);
+	return syscall_cp(SYS_mq_timedsend, mqd, msg, len, prio,
+		at ? ((long[]){CLAMP(s), ns}) : 0);
+#else
 	return syscall_cp(SYS_mq_timedsend, mqd, msg, len, prio, at);
+#endif
 }
diff --git a/src/mq/x32/mq_open.c b/src/mq/x32/mq_open.c
new file mode 100644
index 00000000..23481959
--- /dev/null
+++ b/src/mq/x32/mq_open.c
@@ -0,0 +1,22 @@
+#include <mqueue.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include "syscall.h"
+
+mqd_t mq_open(const char *name, int flags, ...)
+{
+	mode_t mode = 0;
+	struct mq_attr *attr = 0;
+	long long attrbuf[8];
+	if (*name == '/') name++;
+	if (flags & O_CREAT) {
+		va_list ap;
+		va_start(ap, flags);
+		mode = va_arg(ap, mode_t);
+		attr = va_arg(ap, struct mq_attr *);
+		if (attr) for (int i=0; i<8; i++)
+			attrbuf[i] = *(long *)((char *)attr + i*sizeof(long));
+		va_end(ap);
+	}
+	return syscall(SYS_mq_open, name, flags, mode, attr?attrbuf:0);
+}
diff --git a/src/mq/x32/mq_setattr.c b/src/mq/x32/mq_setattr.c
new file mode 100644
index 00000000..0c631175
--- /dev/null
+++ b/src/mq/x32/mq_setattr.c
@@ -0,0 +1,14 @@
+#include <mqueue.h>
+#include "syscall.h"
+
+int mq_setattr(mqd_t mqd, const struct mq_attr *restrict new, struct mq_attr *restrict old)
+{
+	long long attr[8];
+	if (new) for (int i=0; i<8; i++)
+		attr[i] = *(long *)((char *)new + i*sizeof(long));
+	int ret = __syscall(SYS_mq_getsetattr, mqd, new?attr:0, old?attr:0);
+	if (ret < 0) return __syscall_ret(ret);
+	if (old) for (int i=0; i<8; i++)
+		*(long *)((char *)old + i*sizeof(long)) = attr[i];
+	return 0;
+}
diff --git a/src/multibyte/mbrtowc.c b/src/multibyte/mbrtowc.c
index c94819e7..7824997e 100644
--- a/src/multibyte/mbrtowc.c
+++ b/src/multibyte/mbrtowc.c
@@ -8,7 +8,7 @@ size_t mbrtowc(wchar_t *restrict wc, const char *restrict src, size_t n, mbstate
 	static unsigned internal_state;
 	unsigned c;
 	const unsigned char *s = (const void *)src;
-	const unsigned N = n;
+	const size_t N = n;
 	wchar_t dummy;
 
 	if (!st) st = (void *)&internal_state;
diff --git a/src/multibyte/mbsnrtowcs.c b/src/multibyte/mbsnrtowcs.c
index 931192e2..47cbdc00 100644
--- a/src/multibyte/mbsnrtowcs.c
+++ b/src/multibyte/mbsnrtowcs.c
@@ -2,11 +2,13 @@
 
 size_t mbsnrtowcs(wchar_t *restrict wcs, const char **restrict src, size_t n, size_t wn, mbstate_t *restrict st)
 {
+	static unsigned internal_state;
 	size_t l, cnt=0, n2;
 	wchar_t *ws, wbuf[256];
 	const char *s = *src;
 	const char *tmp_s;
 
+	if (!st) st = (void *)&internal_state;
 	if (!wcs) ws = wbuf, wn = sizeof wbuf / sizeof *wbuf;
 	else ws = wcs;
 
@@ -41,8 +43,8 @@ size_t mbsnrtowcs(wchar_t *restrict wcs, const char **restrict src, size_t n, si
 				s = 0;
 				break;
 			}
-			/* have to roll back partial character */
-			*(unsigned *)st = 0;
+			s += n;
+			n -= n;
 			break;
 		}
 		s += l; n -= l;
diff --git a/src/multibyte/mbsrtowcs.c b/src/multibyte/mbsrtowcs.c
index 0ee8b69c..9b2f2dfb 100644
--- a/src/multibyte/mbsrtowcs.c
+++ b/src/multibyte/mbsrtowcs.c
@@ -38,12 +38,15 @@ size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbs
 	}
 
 	if (!ws) for (;;) {
+#ifdef __GNUC__
+		typedef uint32_t __attribute__((__may_alias__)) w32;
 		if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) {
-			while (!(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) {
+			while (!(( *(w32*)s | *(w32*)s-0x01010101) & 0x80808080)) {
 				s += 4;
 				wn -= 4;
 			}
 		}
+#endif
 		if (*s-1u < 0x7f) {
 			s++;
 			wn--;
@@ -69,8 +72,10 @@ resume0:
 			*src = (const void *)s;
 			return wn0;
 		}
+#ifdef __GNUC__
+		typedef uint32_t __attribute__((__may_alias__)) w32;
 		if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) {
-			while (wn>=5 && !(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) {
+			while (wn>=5 && !(( *(w32*)s | *(w32*)s-0x01010101) & 0x80808080)) {
 				*ws++ = *s++;
 				*ws++ = *s++;
 				*ws++ = *s++;
@@ -78,6 +83,7 @@ resume0:
 				wn -= 4;
 			}
 		}
+#endif
 		if (*s-1u < 0x7f) {
 			*ws++ = *s++;
 			wn--;
diff --git a/src/multibyte/wcsnrtombs.c b/src/multibyte/wcsnrtombs.c
index 676932b5..95e25e70 100644
--- a/src/multibyte/wcsnrtombs.c
+++ b/src/multibyte/wcsnrtombs.c
@@ -1,41 +1,33 @@
 #include <wchar.h>
+#include <limits.h>
+#include <string.h>
 
 size_t wcsnrtombs(char *restrict dst, const wchar_t **restrict wcs, size_t wn, size_t n, mbstate_t *restrict st)
 {
-	size_t l, cnt=0, n2;
-	char *s, buf[256];
 	const wchar_t *ws = *wcs;
-	const wchar_t *tmp_ws;
-
-	if (!dst) s = buf, n = sizeof buf;
-	else s = dst;
-
-	while ( ws && n && ( (n2=wn)>=n || n2>32 ) ) {
-		if (n2>=n) n2=n;
-		tmp_ws = ws;
-		l = wcsrtombs(s, &ws, n2, 0);
-		if (!(l+1)) {
-			cnt = l;
-			n = 0;
+	size_t cnt = 0;
+	if (!dst) n=0;
+	while (ws && wn) {
+		char tmp[MB_LEN_MAX];
+		size_t l = wcrtomb(n<MB_LEN_MAX ? tmp : dst, *ws, 0);
+		if (l==-1) {
+			cnt = -1;
 			break;
 		}
-		if (s != buf) {
-			s += l;
+		if (dst) {
+			if (n<MB_LEN_MAX) {
+				if (l>n) break;
+				memcpy(dst, tmp, l);
+			}
+			dst += l;
 			n -= l;
 		}
-		wn = ws ? wn - (ws - tmp_ws) : 0;
-		cnt += l;
-	}
-	if (ws) while (n && wn) {
-		l = wcrtomb(s, *ws, 0);
-		if ((l+1)<=1) {
-			if (!l) ws = 0;
-			else cnt = l;
+		if (!*ws) {
+			ws = 0;
 			break;
 		}
-		ws++; wn--;
-		/* safe - this loop runs fewer than sizeof(buf) times */
-		s+=l; n-=l;
+		ws++;
+		wn--;
 		cnt += l;
 	}
 	if (dst) *wcs = ws;
diff --git a/src/network/accept4.c b/src/network/accept4.c
index 59ab1726..765a38ed 100644
--- a/src/network/accept4.c
+++ b/src/network/accept4.c
@@ -9,6 +9,10 @@ int accept4(int fd, struct sockaddr *restrict addr, socklen_t *restrict len, int
 	if (!flg) return accept(fd, addr, len);
 	int ret = socketcall_cp(accept4, fd, addr, len, flg, 0, 0);
 	if (ret>=0 || (errno != ENOSYS && errno != EINVAL)) return ret;
+	if (flg & ~(SOCK_CLOEXEC|SOCK_NONBLOCK)) {
+		errno = EINVAL;
+		return -1;
+	}
 	ret = accept(fd, addr, len);
 	if (ret<0) return ret;
 	if (flg & SOCK_CLOEXEC)
diff --git a/src/network/dn_skipname.c b/src/network/dn_skipname.c
index d54c2e5d..eba65bb8 100644
--- a/src/network/dn_skipname.c
+++ b/src/network/dn_skipname.c
@@ -2,11 +2,14 @@
 
 int dn_skipname(const unsigned char *s, const unsigned char *end)
 {
-	const unsigned char *p;
-	for (p=s; p<end; p++)
+	const unsigned char *p = s;
+	while (p < end)
 		if (!*p) return p-s+1;
 		else if (*p>=192)
 			if (p+1<end) return p-s+2;
 			else break;
+		else
+			if (end-p<*p+1) break;
+			else p += *p + 1;
 	return -1;
 }
diff --git a/src/network/dns_parse.c b/src/network/dns_parse.c
index e6ee19d9..09813112 100644
--- a/src/network/dns_parse.c
+++ b/src/network/dns_parse.c
@@ -1,7 +1,7 @@
 #include <string.h>
 #include "lookup.h"
 
-int __dns_parse(const unsigned char *r, int rlen, int (*callback)(void *, int, const void *, int, const void *), void *ctx)
+int __dns_parse(const unsigned char *r, int rlen, int (*callback)(void *, int, const void *, int, const void *, int), void *ctx)
 {
 	int qdcount, ancount;
 	const unsigned char *p;
@@ -12,21 +12,20 @@ int __dns_parse(const unsigned char *r, int rlen, int (*callback)(void *, int, c
 	p = r+12;
 	qdcount = r[4]*256 + r[5];
 	ancount = r[6]*256 + r[7];
-	if (qdcount+ancount > 64) return -1;
 	while (qdcount--) {
 		while (p-r < rlen && *p-1U < 127) p++;
-		if (*p>193 || (*p==193 && p[1]>254) || p>r+rlen-6)
+		if (p>r+rlen-6)
 			return -1;
 		p += 5 + !!*p;
 	}
 	while (ancount--) {
 		while (p-r < rlen && *p-1U < 127) p++;
-		if (*p>193 || (*p==193 && p[1]>254) || p>r+rlen-6)
+		if (p>r+rlen-12)
 			return -1;
 		p += 1 + !!*p;
 		len = p[8]*256 + p[9];
-		if (p+len > r+rlen) return -1;
-		if (callback(ctx, p[1], p+10, len, r) < 0) return -1;
+		if (len+10 > r+rlen-p) return -1;
+		if (callback(ctx, p[1], p+10, len, r, rlen) < 0) return -1;
 		p += 10 + len;
 	}
 	return 0;
diff --git a/src/network/gai_strerror.c b/src/network/gai_strerror.c
index 9596580e..56b71503 100644
--- a/src/network/gai_strerror.c
+++ b/src/network/gai_strerror.c
@@ -6,7 +6,7 @@ static const char msgs[] =
 	"Name does not resolve\0"
 	"Try again\0"
 	"Non-recoverable error\0"
-	"Unknown error\0"
+	"Name has no usable address\0"
 	"Unrecognized address family or invalid length\0"
 	"Unrecognized socket type\0"
 	"Unrecognized service\0"
diff --git a/src/network/getaddrinfo.c b/src/network/getaddrinfo.c
index 5ae8cbfb..64ad259a 100644
--- a/src/network/getaddrinfo.c
+++ b/src/network/getaddrinfo.c
@@ -16,6 +16,7 @@ int getaddrinfo(const char *restrict host, const char *restrict serv, const stru
 	char canon[256], *outcanon;
 	int nservs, naddrs, nais, canon_len, i, j, k;
 	int family = AF_UNSPEC, flags = 0, proto = 0, socktype = 0;
+	int no_family = 0;
 	struct aibuf *out;
 
 	if (!host && !serv) return EAI_NONAME;
@@ -66,9 +67,11 @@ int getaddrinfo(const char *restrict host, const char *restrict serv, const stru
 				pthread_setcancelstate(
 					PTHREAD_CANCEL_DISABLE, &cs);
 				int r = connect(s, ta[i], tl[i]);
+				int saved_errno = errno;
 				pthread_setcancelstate(cs, 0);
 				close(s);
 				if (!r) continue;
+				errno = saved_errno;
 			}
 			switch (errno) {
 			case EADDRNOTAVAIL:
@@ -80,7 +83,7 @@ int getaddrinfo(const char *restrict host, const char *restrict serv, const stru
 			default:
 				return EAI_SYSTEM;
 			}
-			if (family == tf[i]) return EAI_NONAME;
+			if (family == tf[i]) no_family = 1;
 			family = tf[1-i];
 		}
 	}
@@ -91,6 +94,8 @@ int getaddrinfo(const char *restrict host, const char *restrict serv, const stru
 	naddrs = __lookup_name(addrs, canon, host, family, flags);
 	if (naddrs < 0) return naddrs;
 
+	if (no_family) return EAI_NODATA;
+
 	nais = nservs * naddrs;
 	canon_len = strlen(canon);
 	out = calloc(1, nais * sizeof(*out) + canon_len + 1);
@@ -104,7 +109,7 @@ int getaddrinfo(const char *restrict host, const char *restrict serv, const stru
 	}
 
 	for (k=i=0; i<naddrs; i++) for (j=0; j<nservs; j++, k++) {
-		out[k].slot = i;
+		out[k].slot = k;
 		out[k].ai = (struct addrinfo){
 			.ai_family = addrs[i].family,
 			.ai_socktype = ports[j].socktype,
@@ -113,8 +118,8 @@ int getaddrinfo(const char *restrict host, const char *restrict serv, const stru
 				? sizeof(struct sockaddr_in)
 				: sizeof(struct sockaddr_in6),
 			.ai_addr = (void *)&out[k].sa,
-			.ai_canonname = outcanon,
-			.ai_next = &out[k+1].ai };
+			.ai_canonname = outcanon };
+		if (k) out[k-1].ai.ai_next = &out[k].ai;
 		switch (addrs[i].family) {
 		case AF_INET:
 			out[k].sa.sin.sin_family = AF_INET;
@@ -130,7 +135,6 @@ int getaddrinfo(const char *restrict host, const char *restrict serv, const stru
 		}
 	}
 	out[0].ref = nais;
-	out[nais-1].ai.ai_next = 0;
 	*res = &out->ai;
 	return 0;
 }
diff --git a/src/network/gethostbyaddr.c b/src/network/gethostbyaddr.c
index 598e2241..c3cacaac 100644
--- a/src/network/gethostbyaddr.c
+++ b/src/network/gethostbyaddr.c
@@ -20,5 +20,5 @@ struct hostent *gethostbyaddr(const void *a, socklen_t l, int af)
 		err = gethostbyaddr_r(a, l, af, h,
 			(void *)(h+1), size-sizeof *h, &res, &h_errno);
 	} while (err == ERANGE);
-	return err ? 0 : h;
+	return res;
 }
diff --git a/src/network/gethostbyaddr_r.c b/src/network/gethostbyaddr_r.c
index 0f1e61aa..ceaf3935 100644
--- a/src/network/gethostbyaddr_r.c
+++ b/src/network/gethostbyaddr_r.c
@@ -54,10 +54,11 @@ int gethostbyaddr_r(const void *a, socklen_t l, int af,
 	case EAI_OVERFLOW:
 		return ERANGE;
 	default:
-	case EAI_MEMORY:
-	case EAI_SYSTEM:
 	case EAI_FAIL:
 		*err = NO_RECOVERY;
+		return EBADMSG;
+	case EAI_SYSTEM:
+		*err = NO_RECOVERY;
 		return errno;
 	case 0:
 		break;
diff --git a/src/network/gethostbyname2.c b/src/network/gethostbyname2.c
index dc9d6621..bd0da7f8 100644
--- a/src/network/gethostbyname2.c
+++ b/src/network/gethostbyname2.c
@@ -21,5 +21,5 @@ struct hostent *gethostbyname2(const char *name, int af)
 		err = gethostbyname2_r(name, af, h,
 			(void *)(h+1), size-sizeof *h, &res, &h_errno);
 	} while (err == ERANGE);
-	return err ? 0 : h;
+	return res;
 }
diff --git a/src/network/gethostbyname2_r.c b/src/network/gethostbyname2_r.c
index fc894877..a5eb67fe 100644
--- a/src/network/gethostbyname2_r.c
+++ b/src/network/gethostbyname2_r.c
@@ -22,7 +22,10 @@ int gethostbyname2_r(const char *name, int af,
 	if (cnt<0) switch (cnt) {
 	case EAI_NONAME:
 		*err = HOST_NOT_FOUND;
-		return ENOENT;
+		return 0;
+	case EAI_NODATA:
+		*err = NO_DATA;
+		return 0;
 	case EAI_AGAIN:
 		*err = TRY_AGAIN;
 		return EAGAIN;
@@ -30,7 +33,6 @@ int gethostbyname2_r(const char *name, int af,
 	case EAI_FAIL:
 		*err = NO_RECOVERY;
 		return EBADMSG;
-	case EAI_MEMORY:
 	case EAI_SYSTEM:
 		*err = NO_RECOVERY;
 		return errno;
diff --git a/src/network/getifaddrs.c b/src/network/getifaddrs.c
index fed75bd8..74df4d6c 100644
--- a/src/network/getifaddrs.c
+++ b/src/network/getifaddrs.c
@@ -39,8 +39,8 @@ struct ifaddrs_storage {
 };
 
 struct ifaddrs_ctx {
-	struct ifaddrs_storage *first;
-	struct ifaddrs_storage *last;
+	struct ifaddrs *first;
+	struct ifaddrs *last;
 	struct ifaddrs_storage *hash[IFADDRS_HASH_SIZE];
 };
 
@@ -195,9 +195,9 @@ static int netlink_msg_to_ifaddr(void *pctx, struct nlmsghdr *h)
 	}
 
 	if (ifs->ifa.ifa_name) {
-		if (!ctx->first) ctx->first = ifs;
-		if (ctx->last) ctx->last->ifa.ifa_next = &ifs->ifa;
-		ctx->last = ifs;
+		if (!ctx->first) ctx->first = &ifs->ifa;
+		if (ctx->last) ctx->last->ifa_next = &ifs->ifa;
+		ctx->last = &ifs->ifa;
 	} else {
 		free(ifs);
 	}
@@ -210,7 +210,7 @@ int getifaddrs(struct ifaddrs **ifap)
 	int r;
 	memset(ctx, 0, sizeof *ctx);
 	r = __rtnetlink_enumerate(AF_UNSPEC, AF_UNSPEC, netlink_msg_to_ifaddr, ctx);
-	if (r == 0) *ifap = &ctx->first->ifa;
-	else freeifaddrs(&ctx->first->ifa);
+	if (r == 0) *ifap = ctx->first;
+	else freeifaddrs(ctx->first);
 	return r;
 }
diff --git a/src/network/getnameinfo.c b/src/network/getnameinfo.c
index f77e73ad..133c15b3 100644
--- a/src/network/getnameinfo.c
+++ b/src/network/getnameinfo.c
@@ -58,6 +58,7 @@ static void reverse_hosts(char *buf, const unsigned char *a, unsigned scopeid, i
 		if ((p=strchr(line, '#'))) *p++='\n', *p=0;
 
 		for (p=line; *p && !isspace(*p); p++);
+		if (!*p) continue;
 		*p++ = 0;
 		if (__lookup_ipliteral(&iplit, line, AF_UNSPEC)<=0)
 			continue;
@@ -108,10 +109,10 @@ static void reverse_services(char *buf, int port, int dgram)
 	__fclose_ca(f);
 }
 
-static int dns_parse_callback(void *c, int rr, const void *data, int len, const void *packet)
+static int dns_parse_callback(void *c, int rr, const void *data, int len, const void *packet, int plen)
 {
 	if (rr != RR_PTR) return 0;
-	if (__dn_expand(packet, (const unsigned char *)packet + 512,
+	if (__dn_expand(packet, (const unsigned char *)packet + plen,
 	    data, c, 256) <= 0)
 		*(char *)c = 0;
 	return 0;
@@ -158,10 +159,13 @@ int getnameinfo(const struct sockaddr *restrict sa, socklen_t sl,
 			unsigned char query[18+PTR_MAX], reply[512];
 			int qlen = __res_mkquery(0, ptr, 1, RR_PTR,
 				0, 0, 0, query, sizeof query);
+			query[3] = 0; /* don't need AD flag */
 			int rlen = __res_send(query, qlen, reply, sizeof reply);
 			buf[0] = 0;
-			if (rlen > 0)
+			if (rlen > 0) {
+				if (rlen > sizeof reply) rlen = sizeof reply;
 				__dns_parse(reply, rlen, dns_parse_callback, buf);
+			}
 		}
 		if (!*buf) {
 			if (flags & NI_NAMEREQD) return EAI_NONAME;
diff --git a/src/network/getservbyport_r.c b/src/network/getservbyport_r.c
index b7f21c6b..e4cc3079 100644
--- a/src/network/getservbyport_r.c
+++ b/src/network/getservbyport_r.c
@@ -26,7 +26,7 @@ int getservbyport_r(int port, const char *prots,
 	/* Align buffer */
 	i = (uintptr_t)buf & sizeof(char *)-1;
 	if (!i) i = sizeof(char *);
-	if (buflen < 3*sizeof(char *)-i)
+	if (buflen <= 3*sizeof(char *)-i)
 		return ERANGE;
 	buf += sizeof(char *)-i;
 	buflen -= sizeof(char *)-i;
@@ -46,6 +46,8 @@ int getservbyport_r(int port, const char *prots,
 	case EAI_MEMORY:
 	case EAI_SYSTEM:
 		return ENOMEM;
+	case EAI_OVERFLOW:
+		return ERANGE;
 	default:
 		return ENOENT;
 	case 0:
diff --git a/src/network/getsockopt.c b/src/network/getsockopt.c
index 28079d8c..d3640d9c 100644
--- a/src/network/getsockopt.c
+++ b/src/network/getsockopt.c
@@ -1,7 +1,41 @@
 #include <sys/socket.h>
+#include <sys/time.h>
+#include <errno.h>
 #include "syscall.h"
 
 int getsockopt(int fd, int level, int optname, void *restrict optval, socklen_t *restrict optlen)
 {
-	return socketcall(getsockopt, fd, level, optname, optval, optlen, 0);
+	long tv32[2];
+	struct timeval *tv;
+
+	int r = __socketcall(getsockopt, fd, level, optname, optval, optlen, 0);
+
+	if (r==-ENOPROTOOPT) switch (level) {
+	case SOL_SOCKET:
+		switch (optname) {
+		case SO_RCVTIMEO:
+		case SO_SNDTIMEO:
+			if (SO_RCVTIMEO == SO_RCVTIMEO_OLD) break;
+			if (*optlen < sizeof *tv) return __syscall_ret(-EINVAL);
+			if (optname==SO_RCVTIMEO) optname=SO_RCVTIMEO_OLD;
+			if (optname==SO_SNDTIMEO) optname=SO_SNDTIMEO_OLD;
+			r = __socketcall(getsockopt, fd, level, optname,
+				tv32, (socklen_t[]){sizeof tv32}, 0);
+			if (r<0) break;
+			tv = optval;
+			tv->tv_sec = tv32[0];
+			tv->tv_usec = tv32[1];
+			*optlen = sizeof *tv;
+			break;
+		case SO_TIMESTAMP:
+		case SO_TIMESTAMPNS:
+			if (SO_TIMESTAMP == SO_TIMESTAMP_OLD) break;
+			if (optname==SO_TIMESTAMP) optname=SO_TIMESTAMP_OLD;
+			if (optname==SO_TIMESTAMPNS) optname=SO_TIMESTAMPNS_OLD;
+			r = __socketcall(getsockopt, fd, level,
+				optname, optval, optlen, 0);
+			break;
+		}
+	}
+	return __syscall_ret(r);
 }
diff --git a/src/network/h_errno.c b/src/network/h_errno.c
index 4f700cea..638f7718 100644
--- a/src/network/h_errno.c
+++ b/src/network/h_errno.c
@@ -1,9 +1,11 @@
 #include <netdb.h>
+#include "pthread_impl.h"
 
 #undef h_errno
 int h_errno;
 
 int *__h_errno_location(void)
 {
-	return &h_errno;
+	if (!__pthread_self()->stack) return &h_errno;
+	return &__pthread_self()->h_errno_val;
 }
diff --git a/src/network/herror.c b/src/network/herror.c
index 65f25ff3..87f8cff4 100644
--- a/src/network/herror.c
+++ b/src/network/herror.c
@@ -4,5 +4,5 @@
 
 void herror(const char *msg)
 {
-	fprintf(stderr, "%s%s%s", msg?msg:"", msg?": ":"", hstrerror(h_errno));
+	fprintf(stderr, "%s%s%s\n", msg?msg:"", msg?": ":"", hstrerror(h_errno));
 }
diff --git a/src/network/inet_ntop.c b/src/network/inet_ntop.c
index 4bfef2c5..f442f47d 100644
--- a/src/network/inet_ntop.c
+++ b/src/network/inet_ntop.c
@@ -34,7 +34,12 @@ const char *inet_ntop(int af, const void *restrict a0, char *restrict s, socklen
 		for (i=best=0, max=2; buf[i]; i++) {
 			if (i && buf[i] != ':') continue;
 			j = strspn(buf+i, ":0");
-			if (j>max) best=i, max=j;
+			/* The leading sequence of zeros (best==0) is
+			 * disadvantaged compared to sequences elsewhere
+			 * as it doesn't have a leading colon. One extra
+			 * character is required for another sequence to
+			 * beat it fairly. */
+			if (j>max+(best==0)) best=i, max=j;
 		}
 		if (max>3) {
 			buf[best] = buf[best+1] = ':';
diff --git a/src/network/inet_pton.c b/src/network/inet_pton.c
index d36c3689..bcbdd9ef 100644
--- a/src/network/inet_pton.c
+++ b/src/network/inet_pton.c
@@ -54,6 +54,7 @@ int inet_pton(int af, const char *restrict s, void *restrict a0)
 			if (s[j]!='.' || (i<6 && brk<0)) return 0;
 			need_v4=1;
 			i++;
+			ip[i&7]=0;
 			break;
 		}
 		s += j+1;
diff --git a/src/network/lookup.h b/src/network/lookup.h
index ef662725..54b2f8b5 100644
--- a/src/network/lookup.h
+++ b/src/network/lookup.h
@@ -50,6 +50,6 @@ hidden int __lookup_ipliteral(struct address buf[static 1], const char *name, in
 hidden int __get_resolv_conf(struct resolvconf *, char *, size_t);
 hidden int __res_msend_rc(int, const unsigned char *const *, const int *, unsigned char *const *, int *, int, const struct resolvconf *);
 
-hidden int __dns_parse(const unsigned char *, int, int (*)(void *, int, const void *, int, const void *), void *);
+hidden int __dns_parse(const unsigned char *, int, int (*)(void *, int, const void *, int, const void *, int), void *);
 
 #endif
diff --git a/src/network/lookup_ipliteral.c b/src/network/lookup_ipliteral.c
index 2fddab73..1e766206 100644
--- a/src/network/lookup_ipliteral.c
+++ b/src/network/lookup_ipliteral.c
@@ -15,7 +15,7 @@ int __lookup_ipliteral(struct address buf[static 1], const char *name, int famil
 	struct in6_addr a6;
 	if (__inet_aton(name, &a4) > 0) {
 		if (family == AF_INET6) /* wrong family */
-			return EAI_NONAME;
+			return EAI_NODATA;
 		memcpy(&buf[0].addr, &a4, sizeof a4);
 		buf[0].family = AF_INET;
 		buf[0].scopeid = 0;
@@ -34,7 +34,7 @@ int __lookup_ipliteral(struct address buf[static 1], const char *name, int famil
 	if (inet_pton(AF_INET6, name, &a6) <= 0)
 		return 0;
 	if (family == AF_INET) /* wrong family */
-		return EAI_NONAME;
+		return EAI_NODATA;
 
 	memcpy(&buf[0].addr, &a6, sizeof a6);
 	buf[0].family = AF_INET6;
diff --git a/src/network/lookup_name.c b/src/network/lookup_name.c
index c93263a9..35218185 100644
--- a/src/network/lookup_name.c
+++ b/src/network/lookup_name.c
@@ -50,7 +50,7 @@ static int name_from_hosts(struct address buf[static MAXADDRS], char canon[stati
 {
 	char line[512];
 	size_t l = strlen(name);
-	int cnt = 0, badfam = 0;
+	int cnt = 0, badfam = 0, have_canon = 0;
 	unsigned char _buf[1032];
 	FILE _f, *f = __fopen_rb_ca("/etc/hosts", &_f, _buf, sizeof _buf);
 	if (!f) switch (errno) {
@@ -79,15 +79,20 @@ static int name_from_hosts(struct address buf[static MAXADDRS], char canon[stati
 		case 0:
 			continue;
 		default:
-			badfam = EAI_NONAME;
-			continue;
+			badfam = EAI_NODATA;
+			break;
 		}
 
+		if (have_canon) continue;
+
 		/* Extract first name as canonical name */
 		for (; *p && isspace(*p); p++);
 		for (z=p; *z && !isspace(*z); z++);
 		*z = 0;
-		if (is_valid_hostname(p)) memcpy(canon, p, z-p+1);
+		if (is_valid_hostname(p)) {
+			have_canon = 1;
+			memcpy(canon, p, z-p+1);
+		}
 	}
 	__fclose_ca(f);
 	return cnt ? cnt : badfam;
@@ -97,45 +102,50 @@ struct dpc_ctx {
 	struct address *addrs;
 	char *canon;
 	int cnt;
+	int rrtype;
 };
 
 #define RR_A 1
 #define RR_CNAME 5
 #define RR_AAAA 28
 
-static int dns_parse_callback(void *c, int rr, const void *data, int len, const void *packet)
+#define ABUF_SIZE 4800
+
+static int dns_parse_callback(void *c, int rr, const void *data, int len, const void *packet, int plen)
 {
 	char tmp[256];
+	int family;
 	struct dpc_ctx *ctx = c;
-	if (ctx->cnt >= MAXADDRS) return -1;
+	if (rr == RR_CNAME) {
+		if (__dn_expand(packet, (const unsigned char *)packet + plen,
+		    data, tmp, sizeof tmp) > 0 && is_valid_hostname(tmp))
+			strcpy(ctx->canon, tmp);
+		return 0;
+	}
+	if (ctx->cnt >= MAXADDRS) return 0;
+	if (rr != ctx->rrtype) return 0;
 	switch (rr) {
 	case RR_A:
 		if (len != 4) return -1;
-		ctx->addrs[ctx->cnt].family = AF_INET;
-		ctx->addrs[ctx->cnt].scopeid = 0;
-		memcpy(ctx->addrs[ctx->cnt++].addr, data, 4);
+		family = AF_INET;
 		break;
 	case RR_AAAA:
 		if (len != 16) return -1;
-		ctx->addrs[ctx->cnt].family = AF_INET6;
-		ctx->addrs[ctx->cnt].scopeid = 0;
-		memcpy(ctx->addrs[ctx->cnt++].addr, data, 16);
-		break;
-	case RR_CNAME:
-		if (__dn_expand(packet, (const unsigned char *)packet + 512,
-		    data, tmp, sizeof tmp) > 0 && is_valid_hostname(tmp))
-			strcpy(ctx->canon, tmp);
+		family = AF_INET6;
 		break;
 	}
+	ctx->addrs[ctx->cnt].family = family;
+	ctx->addrs[ctx->cnt].scopeid = 0;
+	memcpy(ctx->addrs[ctx->cnt++].addr, data, len);
 	return 0;
 }
 
 static int name_from_dns(struct address buf[static MAXADDRS], char canon[static 256], const char *name, int family, const struct resolvconf *conf)
 {
-	unsigned char qbuf[2][280], abuf[2][512];
+	unsigned char qbuf[2][280], abuf[2][ABUF_SIZE];
 	const unsigned char *qp[2] = { qbuf[0], qbuf[1] };
 	unsigned char *ap[2] = { abuf[0], abuf[1] };
-	int qlens[2], alens[2];
+	int qlens[2], alens[2], qtypes[2];
 	int i, nq = 0;
 	struct dpc_ctx ctx = { .addrs = buf, .canon = canon };
 	static const struct { int af; int rr; } afrr[2] = {
@@ -148,7 +158,12 @@ static int name_from_dns(struct address buf[static MAXADDRS], char canon[static
 			qlens[nq] = __res_mkquery(0, name, 1, afrr[i].rr,
 				0, 0, 0, qbuf[nq], sizeof *qbuf);
 			if (qlens[nq] == -1)
-				return EAI_NONAME;
+				return 0;
+			qtypes[nq] = afrr[i].rr;
+			qbuf[nq][3] = 0; /* don't need AD flag */
+			/* Ensure query IDs are distinct. */
+			if (nq && qbuf[nq][0] == qbuf[0][0])
+				qbuf[nq][0]++;
 			nq++;
 		}
 	}
@@ -156,14 +171,20 @@ static int name_from_dns(struct address buf[static MAXADDRS], char canon[static
 	if (__res_msend_rc(nq, qp, qlens, ap, alens, sizeof *abuf, conf) < 0)
 		return EAI_SYSTEM;
 
-	for (i=0; i<nq; i++)
+	for (i=0; i<nq; i++) {
+		if (alens[i] < 4 || (abuf[i][3] & 15) == 2) return EAI_AGAIN;
+		if ((abuf[i][3] & 15) == 3) return 0;
+		if ((abuf[i][3] & 15) != 0) return EAI_FAIL;
+	}
+
+	for (i=nq-1; i>=0; i--) {
+		ctx.rrtype = qtypes[i];
+		if (alens[i] > sizeof(abuf[i])) alens[i] = sizeof abuf[i];
 		__dns_parse(abuf[i], alens[i], dns_parse_callback, &ctx);
+	}
 
 	if (ctx.cnt) return ctx.cnt;
-	if (alens[0] < 4 || (abuf[0][3] & 15) == 2) return EAI_AGAIN;
-	if ((abuf[0][3] & 15) == 0) return EAI_NONAME;
-	if ((abuf[0][3] & 15) == 3) return 0;
-	return EAI_FAIL;
+	return EAI_NODATA;
 }
 
 static int name_from_dns_search(struct address buf[static MAXADDRS], char canon[static 256], const char *name, int family)
diff --git a/src/network/netlink.h b/src/network/netlink.h
index 38acb178..873fabe2 100644
--- a/src/network/netlink.h
+++ b/src/network/netlink.h
@@ -86,7 +86,7 @@ struct ifaddrmsg {
 #define RTA_DATALEN(rta)	((rta)->rta_len-sizeof(struct rtattr))
 #define RTA_DATAEND(rta)	((char*)(rta)+(rta)->rta_len)
 #define RTA_NEXT(rta)		(struct rtattr*)((char*)(rta)+NETLINK_ALIGN((rta)->rta_len))
-#define RTA_OK(nlh,end)		((char*)(end)-(char*)(rta) >= sizeof(struct rtattr))
+#define RTA_OK(rta,end)		((char*)(end)-(char*)(rta) >= sizeof(struct rtattr))
 
 #define NLMSG_RTA(nlh,len)	((void*)((char*)(nlh)+sizeof(struct nlmsghdr)+NETLINK_ALIGN(len)))
 #define NLMSG_RTAOK(rta,nlh)	RTA_OK(rta,NLMSG_DATAEND(nlh))
diff --git a/src/network/recvmmsg.c b/src/network/recvmmsg.c
index 58b1b2f6..2978e2f6 100644
--- a/src/network/recvmmsg.c
+++ b/src/network/recvmmsg.c
@@ -1,8 +1,15 @@
 #define _GNU_SOURCE
 #include <sys/socket.h>
 #include <limits.h>
+#include <errno.h>
+#include <time.h>
 #include "syscall.h"
 
+#define IS32BIT(x) !((x)+0x80000000ULL>>32)
+#define CLAMP(x) (int)(IS32BIT(x) ? (x) : 0x7fffffffU+((0ULL+(x))>>63))
+
+hidden void __convert_scm_timestamps(struct msghdr *, socklen_t);
+
 int recvmmsg(int fd, struct mmsghdr *msgvec, unsigned int vlen, unsigned int flags, struct timespec *timeout)
 {
 #if LONG_MAX > INT_MAX
@@ -11,5 +18,22 @@ int recvmmsg(int fd, struct mmsghdr *msgvec, unsigned int vlen, unsigned int fla
 	for (i = vlen; i; i--, mh++)
 		mh->msg_hdr.__pad1 = mh->msg_hdr.__pad2 = 0;
 #endif
+#ifdef SYS_recvmmsg_time64
+	time_t s = timeout ? timeout->tv_sec : 0;
+	long ns = timeout ? timeout->tv_nsec : 0;
+	int r = __syscall_cp(SYS_recvmmsg_time64, fd, msgvec, vlen, flags,
+			timeout ? ((long long[]){s, ns}) : 0);
+	if (SYS_recvmmsg == SYS_recvmmsg_time64 || r!=-ENOSYS)
+		return __syscall_ret(r);
+	if (vlen > IOV_MAX) vlen = IOV_MAX;
+	socklen_t csize[vlen];
+	for (int i=0; i<vlen; i++) csize[i] = msgvec[i].msg_hdr.msg_controllen;
+	r = __syscall_cp(SYS_recvmmsg, fd, msgvec, vlen, flags,
+		timeout ? ((long[]){CLAMP(s), ns}) : 0);
+	for (int i=0; i<r; i++)
+		__convert_scm_timestamps(&msgvec[i].msg_hdr, csize[i]);
+	return __syscall_ret(r);
+#else
 	return syscall_cp(SYS_recvmmsg, fd, msgvec, vlen, flags, timeout);
+#endif
 }
diff --git a/src/network/recvmsg.c b/src/network/recvmsg.c
index 4ca7da8b..03641625 100644
--- a/src/network/recvmsg.c
+++ b/src/network/recvmsg.c
@@ -1,10 +1,56 @@
 #include <sys/socket.h>
 #include <limits.h>
+#include <time.h>
+#include <sys/time.h>
+#include <string.h>
 #include "syscall.h"
 
+hidden void __convert_scm_timestamps(struct msghdr *, socklen_t);
+
+void __convert_scm_timestamps(struct msghdr *msg, socklen_t csize)
+{
+	if (SCM_TIMESTAMP == SCM_TIMESTAMP_OLD) return;
+	if (!msg->msg_control || !msg->msg_controllen) return;
+
+	struct cmsghdr *cmsg, *last=0;
+	long tmp;
+	long long tvts[2];
+	int type = 0;
+
+	for (cmsg=CMSG_FIRSTHDR(msg); cmsg; cmsg=CMSG_NXTHDR(msg, cmsg)) {
+		if (cmsg->cmsg_level==SOL_SOCKET) switch (cmsg->cmsg_type) {
+		case SCM_TIMESTAMP_OLD:
+			if (type) break;
+			type = SCM_TIMESTAMP;
+			goto common;
+		case SCM_TIMESTAMPNS_OLD:
+			type = SCM_TIMESTAMPNS;
+		common:
+			memcpy(&tmp, CMSG_DATA(cmsg), sizeof tmp);
+			tvts[0] = tmp;
+			memcpy(&tmp, CMSG_DATA(cmsg) + sizeof tmp, sizeof tmp);
+			tvts[1] = tmp;
+			break;
+		}
+		last = cmsg;
+	}
+	if (!last || !type) return;
+	if (CMSG_SPACE(sizeof tvts) > csize-msg->msg_controllen) {
+		msg->msg_flags |= MSG_CTRUNC;
+		return;
+	}
+	msg->msg_controllen += CMSG_SPACE(sizeof tvts);
+	cmsg = CMSG_NXTHDR(msg, last);
+	cmsg->cmsg_level = SOL_SOCKET;
+	cmsg->cmsg_type = type;
+	cmsg->cmsg_len = CMSG_LEN(sizeof tvts);
+	memcpy(CMSG_DATA(cmsg), &tvts, sizeof tvts);
+}
+
 ssize_t recvmsg(int fd, struct msghdr *msg, int flags)
 {
 	ssize_t r;
+	socklen_t orig_controllen = msg->msg_controllen;
 #if LONG_MAX > INT_MAX
 	struct msghdr h, *orig = msg;
 	if (msg) {
@@ -14,6 +60,7 @@ ssize_t recvmsg(int fd, struct msghdr *msg, int flags)
 	}
 #endif
 	r = socketcall_cp(recvmsg, fd, msg, flags, 0, 0, 0);
+	if (r >= 0) __convert_scm_timestamps(msg, orig_controllen);
 #if LONG_MAX > INT_MAX
 	if (orig) *orig = h;
 #endif
diff --git a/src/network/res_mkquery.c b/src/network/res_mkquery.c
index 6fa04a5c..614bf786 100644
--- a/src/network/res_mkquery.c
+++ b/src/network/res_mkquery.c
@@ -13,6 +13,7 @@ int __res_mkquery(int op, const char *dname, int class, int type,
 	int n;
 
 	if (l && dname[l-1]=='.') l--;
+	if (l && dname[l-1]=='.') return -1;
 	n = 17+l+!!l;
 	if (l>253 || buflen<n || op>15u || class>255u || type>255u)
 		return -1;
@@ -20,6 +21,7 @@ int __res_mkquery(int op, const char *dname, int class, int type,
 	/* Construct query template - ID will be filled later */
 	memset(q, 0, n);
 	q[2] = op*8 + 1;
+	q[3] = 32; /* AD */
 	q[5] = 1;
 	memcpy((char *)q+13, dname, l);
 	for (i=13; q[i]; i=j+1) {
diff --git a/src/network/res_msend.c b/src/network/res_msend.c
index 3e018009..fcb52513 100644
--- a/src/network/res_msend.c
+++ b/src/network/res_msend.c
@@ -1,5 +1,6 @@
 #include <sys/socket.h>
 #include <netinet/in.h>
+#include <netinet/tcp.h>
 #include <netdb.h>
 #include <arpa/inet.h>
 #include <stdint.h>
@@ -16,17 +17,65 @@
 
 static void cleanup(void *p)
 {
-	__syscall(SYS_close, (intptr_t)p);
+	struct pollfd *pfd = p;
+	for (int i=0; pfd[i].fd >= -1; i++)
+		if (pfd[i].fd >= 0) __syscall(SYS_close, pfd[i].fd);
 }
 
 static unsigned long mtime()
 {
 	struct timespec ts;
-	clock_gettime(CLOCK_REALTIME, &ts);
+	if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0 && errno == ENOSYS)
+		clock_gettime(CLOCK_REALTIME, &ts);
 	return (unsigned long)ts.tv_sec * 1000
 		+ ts.tv_nsec / 1000000;
 }
 
+static int start_tcp(struct pollfd *pfd, int family, const void *sa, socklen_t sl, const unsigned char *q, int ql)
+{
+	struct msghdr mh = {
+		.msg_name = (void *)sa,
+		.msg_namelen = sl,
+		.msg_iovlen = 2,
+		.msg_iov = (struct iovec [2]){
+			{ .iov_base = (uint8_t[]){ ql>>8, ql }, .iov_len = 2 },
+			{ .iov_base = (void *)q, .iov_len = ql } }
+	};
+	int r;
+	int fd = socket(family, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+	pfd->fd = fd;
+	pfd->events = POLLOUT;
+	if (!setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN_CONNECT,
+	    &(int){1}, sizeof(int))) {
+		r = sendmsg(fd, &mh, MSG_FASTOPEN|MSG_NOSIGNAL);
+		if (r == ql+2) pfd->events = POLLIN;
+		if (r >= 0) return r;
+		if (errno == EINPROGRESS) return 0;
+	}
+	r = connect(fd, sa, sl);
+	if (!r || errno == EINPROGRESS) return 0;
+	close(fd);
+	pfd->fd = -1;
+	return -1;
+}
+
+static void step_mh(struct msghdr *mh, size_t n)
+{
+	/* Adjust iovec in msghdr to skip first n bytes. */
+	while (mh->msg_iovlen && n >= mh->msg_iov->iov_len) {
+		n -= mh->msg_iov->iov_len;
+		mh->msg_iov++;
+		mh->msg_iovlen--;
+	}
+	if (!mh->msg_iovlen) return;
+	mh->msg_iov->iov_base = (char *)mh->msg_iov->iov_base + n;
+	mh->msg_iov->iov_len -= n;
+}
+
+/* Internal contract for __res_msend[_rc]: asize must be >=512, nqueries
+ * must be sufficiently small to be safe as VLA size. In practice it's
+ * either 1 or 2, anyway. */
+
 int __res_msend_rc(int nqueries, const unsigned char *const *queries,
 	const int *qlens, unsigned char *const *answers, int *alens, int asize,
 	const struct resolvconf *conf)
@@ -34,8 +83,8 @@ int __res_msend_rc(int nqueries, const unsigned char *const *queries,
 	int fd;
 	int timeout, attempts, retry_interval, servfail_retry;
 	union {
-		struct sockaddr_in sin;
 		struct sockaddr_in6 sin6;
+		struct sockaddr_in sin;
 	} sa = {0}, ns[MAXNS] = {{0}};
 	socklen_t sl = sizeof sa.sin;
 	int nns = 0;
@@ -44,7 +93,10 @@ int __res_msend_rc(int nqueries, const unsigned char *const *queries,
 	int next;
 	int i, j;
 	int cs;
-	struct pollfd pfd;
+	struct pollfd pfd[nqueries+2];
+	int qpos[nqueries], apos[nqueries];
+	unsigned char alen_buf[nqueries][2];
+	int r;
 	unsigned long t0, t1, t2;
 
 	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
@@ -68,29 +120,22 @@ int __res_msend_rc(int nqueries, const unsigned char *const *queries,
 	}
 
 	/* Get local address and open/bind a socket */
-	sa.sin.sin_family = family;
 	fd = socket(family, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
 
 	/* Handle case where system lacks IPv6 support */
 	if (fd < 0 && family == AF_INET6 && errno == EAFNOSUPPORT) {
+		for (i=0; i<nns && conf->ns[nns].family == AF_INET6; i++);
+		if (i==nns) {
+			pthread_setcancelstate(cs, 0);
+			return -1;
+		}
 		fd = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
 		family = AF_INET;
+		sl = sizeof sa.sin;
 	}
-	if (fd < 0 || bind(fd, (void *)&sa, sl) < 0) {
-		if (fd >= 0) close(fd);
-		pthread_setcancelstate(cs, 0);
-		return -1;
-	}
-
-	/* Past this point, there are no errors. Each individual query will
-	 * yield either no reply (indicated by zero length) or an answer
-	 * packet which is up to the caller to interpret. */
-
-	pthread_cleanup_push(cleanup, (void *)(intptr_t)fd);
-	pthread_setcancelstate(cs, 0);
 
 	/* Convert any IPv4 addresses in a mixed environment to v4-mapped */
-	if (family == AF_INET6) {
+	if (fd >= 0 && family == AF_INET6) {
 		setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &(int){0}, sizeof 0);
 		for (i=0; i<nns; i++) {
 			if (ns[i].sin.sin_family != AF_INET) continue;
@@ -104,16 +149,38 @@ int __res_msend_rc(int nqueries, const unsigned char *const *queries,
 		}
 	}
 
+	sa.sin.sin_family = family;
+	if (fd < 0 || bind(fd, (void *)&sa, sl) < 0) {
+		if (fd >= 0) close(fd);
+		pthread_setcancelstate(cs, 0);
+		return -1;
+	}
+
+	/* Past this point, there are no errors. Each individual query will
+	 * yield either no reply (indicated by zero length) or an answer
+	 * packet which is up to the caller to interpret. */
+
+	for (i=0; i<nqueries; i++) pfd[i].fd = -1;
+	pfd[nqueries].fd = fd;
+	pfd[nqueries].events = POLLIN;
+	pfd[nqueries+1].fd = -2;
+
+	pthread_cleanup_push(cleanup, pfd);
+	pthread_setcancelstate(cs, 0);
+
 	memset(alens, 0, sizeof *alens * nqueries);
 
-	pfd.fd = fd;
-	pfd.events = POLLIN;
 	retry_interval = timeout / attempts;
 	next = 0;
 	t0 = t2 = mtime();
 	t1 = t2 - retry_interval;
 
 	for (; t2-t0 < timeout; t2=mtime()) {
+		/* This is the loop exit condition: that all queries
+		 * have an accepted answer. */
+		for (i=0; i<nqueries && alens[i]>0; i++);
+		if (i==nqueries) break;
+
 		if (t2-t1 >= retry_interval) {
 			/* Query all configured namservers in parallel */
 			for (i=0; i<nqueries; i++)
@@ -127,10 +194,20 @@ int __res_msend_rc(int nqueries, const unsigned char *const *queries,
 		}
 
 		/* Wait for a response, or until time to retry */
-		if (poll(&pfd, 1, t1+retry_interval-t2) <= 0) continue;
+		if (poll(pfd, nqueries+1, t1+retry_interval-t2) <= 0) continue;
 
-		while ((rlen = recvfrom(fd, answers[next], asize, 0,
-		  (void *)&sa, (socklen_t[1]){sl})) >= 0) {
+		while (next < nqueries) {
+			struct msghdr mh = {
+				.msg_name = (void *)&sa,
+				.msg_namelen = sl,
+				.msg_iovlen = 1,
+				.msg_iov = (struct iovec []){
+					{ .iov_base = (void *)answers[next],
+					  .iov_len = asize }
+				}
+			};
+			rlen = recvmsg(fd, &mh, 0);
+			if (rlen < 0) break;
 
 			/* Ignore non-identifiable packets */
 			if (rlen < 4) continue;
@@ -170,12 +247,72 @@ int __res_msend_rc(int nqueries, const unsigned char *const *queries,
 			else
 				memcpy(answers[i], answers[next], rlen);
 
-			if (next == nqueries) goto out;
+			/* Ignore further UDP if all slots full or TCP-mode */
+			if (next == nqueries) pfd[nqueries].events = 0;
+
+			/* If answer is truncated (TC bit), fallback to TCP */
+			if ((answers[i][2] & 2) || (mh.msg_flags & MSG_TRUNC)) {
+				alens[i] = -1;
+				pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, 0);
+				r = start_tcp(pfd+i, family, ns+j, sl, queries[i], qlens[i]);
+				pthread_setcancelstate(cs, 0);
+				if (r >= 0) {
+					qpos[i] = r;
+					apos[i] = 0;
+				}
+				continue;
+			}
+		}
+
+		for (i=0; i<nqueries; i++) if (pfd[i].revents & POLLOUT) {
+			struct msghdr mh = {
+				.msg_iovlen = 2,
+				.msg_iov = (struct iovec [2]){
+					{ .iov_base = (uint8_t[]){ qlens[i]>>8, qlens[i] }, .iov_len = 2 },
+					{ .iov_base = (void *)queries[i], .iov_len = qlens[i] } }
+			};
+			step_mh(&mh, qpos[i]);
+			r = sendmsg(pfd[i].fd, &mh, MSG_NOSIGNAL);
+			if (r < 0) goto out;
+			qpos[i] += r;
+			if (qpos[i] == qlens[i]+2)
+				pfd[i].events = POLLIN;
+		}
+
+		for (i=0; i<nqueries; i++) if (pfd[i].revents & POLLIN) {
+			struct msghdr mh = {
+				.msg_iovlen = 2,
+				.msg_iov = (struct iovec [2]){
+					{ .iov_base = alen_buf[i], .iov_len = 2 },
+					{ .iov_base = answers[i], .iov_len = asize } }
+			};
+			step_mh(&mh, apos[i]);
+			r = recvmsg(pfd[i].fd, &mh, 0);
+			if (r <= 0) goto out;
+			apos[i] += r;
+			if (apos[i] < 2) continue;
+			int alen = alen_buf[i][0]*256 + alen_buf[i][1];
+			if (alen < 13) goto out;
+			if (apos[i] < alen+2 && apos[i] < asize+2)
+				continue;
+			int rcode = answers[i][3] & 15;
+			if (rcode != 0 && rcode != 3)
+				goto out;
+
+			/* Storing the length here commits the accepted answer.
+			 * Immediately close TCP socket so as not to consume
+			 * resources we no longer need. */
+			alens[i] = alen;
+			__syscall(SYS_close, pfd[i].fd);
+			pfd[i].fd = -1;
 		}
 	}
 out:
 	pthread_cleanup_pop(1);
 
+	/* Disregard any incomplete TCP results */
+	for (i=0; i<nqueries; i++) if (alens[i]<0) alens[i] = 0;
+
 	return 0;
 }
 
diff --git a/src/network/res_query.c b/src/network/res_query.c
index 2f4da2e2..506dc231 100644
--- a/src/network/res_query.c
+++ b/src/network/res_query.c
@@ -1,3 +1,4 @@
+#define _BSD_SOURCE
 #include <resolv.h>
 #include <netdb.h>
 
@@ -6,7 +7,20 @@ int res_query(const char *name, int class, int type, unsigned char *dest, int le
 	unsigned char q[280];
 	int ql = __res_mkquery(0, name, class, type, 0, 0, 0, q, sizeof q);
 	if (ql < 0) return ql;
-	return __res_send(q, ql, dest, len);
+	int r = __res_send(q, ql, dest, len);
+	if (r<12) {
+		h_errno = TRY_AGAIN;
+		return -1;
+	}
+	if ((dest[3] & 15) == 3) {
+		h_errno = HOST_NOT_FOUND;
+		return -1;
+	}
+	if ((dest[3] & 15) == 0 && !dest[6] && !dest[7]) {
+		h_errno = NO_DATA;
+		return -1;
+	}
+	return r;
 }
 
 weak_alias(res_query, res_search);
diff --git a/src/network/res_send.c b/src/network/res_send.c
index b9cea0bf..9593164d 100644
--- a/src/network/res_send.c
+++ b/src/network/res_send.c
@@ -1,9 +1,17 @@
 #include <resolv.h>
+#include <string.h>
 
 int __res_send(const unsigned char *msg, int msglen, unsigned char *answer, int anslen)
 {
-	int r = __res_msend(1, &msg, &msglen, &answer, &anslen, anslen);
-	return r<0 ? r : anslen;
+	int r;
+	if (anslen < 512) {
+		unsigned char buf[512];
+		r = __res_send(msg, msglen, buf, sizeof buf);
+		if (r >= 0) memcpy(answer, buf, r < anslen ? r : anslen);
+		return r;
+	}
+	r = __res_msend(1, &msg, &msglen, &answer, &anslen, anslen);
+	return r<0 || !anslen ? -1 : anslen;
 }
 
 weak_alias(__res_send, res_send);
diff --git a/src/network/sendmsg.c b/src/network/sendmsg.c
index 80cc5f41..acdfdf29 100644
--- a/src/network/sendmsg.c
+++ b/src/network/sendmsg.c
@@ -8,13 +8,16 @@ ssize_t sendmsg(int fd, const struct msghdr *msg, int flags)
 {
 #if LONG_MAX > INT_MAX
 	struct msghdr h;
-	struct cmsghdr chbuf[1024/sizeof(struct cmsghdr)+1], *c;
+	/* Kernels before 2.6.38 set SCM_MAX_FD to 255, allocate enough
+	 * space to support an SCM_RIGHTS ancillary message with 255 fds.
+	 * Kernels since 2.6.38 set SCM_MAX_FD to 253. */
+	struct cmsghdr chbuf[CMSG_SPACE(255*sizeof(int))/sizeof(struct cmsghdr)+1], *c;
 	if (msg) {
 		h = *msg;
 		h.__pad1 = h.__pad2 = 0;
 		msg = &h;
 		if (h.msg_controllen) {
-			if (h.msg_controllen > 1024) {
+			if (h.msg_controllen > sizeof chbuf) {
 				errno = ENOMEM;
 				return -1;
 			}
diff --git a/src/network/setsockopt.c b/src/network/setsockopt.c
index c960c9ca..612a1947 100644
--- a/src/network/setsockopt.c
+++ b/src/network/setsockopt.c
@@ -1,7 +1,46 @@
 #include <sys/socket.h>
+#include <sys/time.h>
+#include <errno.h>
 #include "syscall.h"
 
+#define IS32BIT(x) !((x)+0x80000000ULL>>32)
+#define CLAMP(x) (int)(IS32BIT(x) ? (x) : 0x7fffffffU+((0ULL+(x))>>63))
+
 int setsockopt(int fd, int level, int optname, const void *optval, socklen_t optlen)
 {
-	return socketcall(setsockopt, fd, level, optname, optval, optlen, 0);
+	const struct timeval *tv;
+	time_t s;
+	suseconds_t us;
+
+	int r = __socketcall(setsockopt, fd, level, optname, optval, optlen, 0);
+
+	if (r==-ENOPROTOOPT) switch (level) {
+	case SOL_SOCKET:
+		switch (optname) {
+		case SO_RCVTIMEO:
+		case SO_SNDTIMEO:
+			if (SO_RCVTIMEO == SO_RCVTIMEO_OLD) break;
+			if (optlen < sizeof *tv) return __syscall_ret(-EINVAL);
+			tv = optval;
+			s = tv->tv_sec;
+			us = tv->tv_usec;
+			if (!IS32BIT(s)) return __syscall_ret(-ENOTSUP);
+
+			if (optname==SO_RCVTIMEO) optname=SO_RCVTIMEO_OLD;
+			if (optname==SO_SNDTIMEO) optname=SO_SNDTIMEO_OLD;
+
+			r = __socketcall(setsockopt, fd, level, optname,
+				((long[]){s, CLAMP(us)}), 2*sizeof(long), 0);
+			break;
+		case SO_TIMESTAMP:
+		case SO_TIMESTAMPNS:
+			if (SO_TIMESTAMP == SO_TIMESTAMP_OLD) break;
+			if (optname==SO_TIMESTAMP) optname=SO_TIMESTAMP_OLD;
+			if (optname==SO_TIMESTAMPNS) optname=SO_TIMESTAMPNS_OLD;
+			r = __socketcall(setsockopt, fd, level,
+				optname, optval, optlen, 0);
+			break;
+		}
+	}
+	return __syscall_ret(r);
 }
diff --git a/src/network/socket.c b/src/network/socket.c
index a2e92d90..afa1a7f3 100644
--- a/src/network/socket.c
+++ b/src/network/socket.c
@@ -5,17 +5,17 @@
 
 int socket(int domain, int type, int protocol)
 {
-	int s = socketcall(socket, domain, type, protocol, 0, 0, 0);
-	if (s<0 && (errno==EINVAL || errno==EPROTONOSUPPORT)
+	int s = __socketcall(socket, domain, type, protocol, 0, 0, 0);
+	if ((s==-EINVAL || s==-EPROTONOSUPPORT)
 	    && (type&(SOCK_CLOEXEC|SOCK_NONBLOCK))) {
-		s = socketcall(socket, domain,
+		s = __socketcall(socket, domain,
 			type & ~(SOCK_CLOEXEC|SOCK_NONBLOCK),
 			protocol, 0, 0, 0);
-		if (s < 0) return s;
+		if (s < 0) return __syscall_ret(s);
 		if (type & SOCK_CLOEXEC)
 			__syscall(SYS_fcntl, s, F_SETFD, FD_CLOEXEC);
 		if (type & SOCK_NONBLOCK)
 			__syscall(SYS_fcntl, s, F_SETFL, O_NONBLOCK);
 	}
-	return s;
+	return __syscall_ret(s);
 }
diff --git a/src/passwd/getgrouplist.c b/src/passwd/getgrouplist.c
index 43e51824..301824ce 100644
--- a/src/passwd/getgrouplist.c
+++ b/src/passwd/getgrouplist.c
@@ -31,7 +31,8 @@ int getgrouplist(const char *user, gid_t gid, gid_t *groups, int *ngroups)
 	if (resp[INITGRFOUND]) {
 		nscdbuf = calloc(resp[INITGRNGRPS], sizeof(uint32_t));
 		if (!nscdbuf) goto cleanup;
-		if (!fread(nscdbuf, sizeof(*nscdbuf)*resp[INITGRNGRPS], 1, f)) {
+		size_t nbytes = sizeof(*nscdbuf)*resp[INITGRNGRPS];
+		if (nbytes && !fread(nscdbuf, nbytes, 1, f)) {
 			if (!ferror(f)) errno = EIO;
 			goto cleanup;
 		}
diff --git a/src/passwd/getspnam.c b/src/passwd/getspnam.c
index 041f8965..709b526d 100644
--- a/src/passwd/getspnam.c
+++ b/src/passwd/getspnam.c
@@ -8,10 +8,11 @@ struct spwd *getspnam(const char *name)
 	static char *line;
 	struct spwd *res;
 	int e;
+	int orig_errno = errno;
 
 	if (!line) line = malloc(LINE_LIM);
 	if (!line) return 0;
 	e = getspnam_r(name, &sp, line, LINE_LIM, &res);
-	if (e) errno = e;
+	errno = e ? e : orig_errno;
 	return res;
 }
diff --git a/src/passwd/getspnam_r.c b/src/passwd/getspnam_r.c
index 541206fa..541e8531 100644
--- a/src/passwd/getspnam_r.c
+++ b/src/passwd/getspnam_r.c
@@ -67,6 +67,7 @@ int getspnam_r(const char *name, struct spwd *sp, char *buf, size_t size, struct
 	size_t k, l = strlen(name);
 	int skip = 0;
 	int cs;
+	int orig_errno = errno;
 
 	*res = 0;
 
@@ -93,8 +94,14 @@ int getspnam_r(const char *name, struct spwd *sp, char *buf, size_t size, struct
 			return errno;
 		}
 	} else {
+		if (errno != ENOENT && errno != ENOTDIR)
+			return errno;
 		f = fopen("/etc/shadow", "rbe");
-		if (!f) return errno;
+		if (!f) {
+			if (errno != ENOENT && errno != ENOTDIR)
+				return errno;
+			return 0;
+		}
 	}
 
 	pthread_cleanup_push(cleanup, f);
@@ -113,6 +120,6 @@ int getspnam_r(const char *name, struct spwd *sp, char *buf, size_t size, struct
 		break;
 	}
 	pthread_cleanup_pop(1);
-	if (rv) errno = rv;
+	errno = rv ? rv : orig_errno;
 	return rv;
 }
diff --git a/src/passwd/nscd_query.c b/src/passwd/nscd_query.c
index d38e371b..dc3406b8 100644
--- a/src/passwd/nscd_query.c
+++ b/src/passwd/nscd_query.c
@@ -40,7 +40,15 @@ retry:
 	buf[0] = NSCDVERSION;
 
 	fd = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
-	if (fd < 0) return NULL;
+	if (fd < 0) {
+		if (errno == EAFNOSUPPORT) {
+			f = fopen("/dev/null", "re");
+			if (f)
+				errno = errno_save;
+			return f;
+		}
+		return 0;
+	}
 
 	if(!(f = fdopen(fd, "r"))) {
 		close(fd);
diff --git a/src/passwd/putgrent.c b/src/passwd/putgrent.c
index a0b320fc..2a8257dc 100644
--- a/src/passwd/putgrent.c
+++ b/src/passwd/putgrent.c
@@ -7,7 +7,7 @@ int putgrent(const struct group *gr, FILE *f)
 	int r;
 	size_t i;
 	flockfile(f);
-	if ((r = fprintf(f, "%s:%s:%d:", gr->gr_name, gr->gr_passwd, gr->gr_gid))<0) goto done;
+	if ((r = fprintf(f, "%s:%s:%u:", gr->gr_name, gr->gr_passwd, gr->gr_gid))<0) goto done;
 	if (gr->gr_mem) for (i=0; gr->gr_mem[i]; i++)
 		if ((r = fprintf(f, "%s%s", i?",":"", gr->gr_mem[i]))<0) goto done;
 	r = fputc('\n', f);
diff --git a/src/passwd/putpwent.c b/src/passwd/putpwent.c
index 3a02e573..312b7653 100644
--- a/src/passwd/putpwent.c
+++ b/src/passwd/putpwent.c
@@ -4,7 +4,7 @@
 
 int putpwent(const struct passwd *pw, FILE *f)
 {
-	return fprintf(f, "%s:%s:%d:%d:%s:%s:%s\n",
+	return fprintf(f, "%s:%s:%u:%u:%s:%s:%s\n",
 		pw->pw_name, pw->pw_passwd, pw->pw_uid, pw->pw_gid,
 		pw->pw_gecos, pw->pw_dir, pw->pw_shell)<0 ? -1 : 0;
 }
diff --git a/src/prng/random.c b/src/prng/random.c
index 633a17f6..d3780fa7 100644
--- a/src/prng/random.c
+++ b/src/prng/random.c
@@ -1,6 +1,7 @@
 #include <stdlib.h>
 #include <stdint.h>
 #include "lock.h"
+#include "fork_impl.h"
 
 /*
 this code uses the same lagged fibonacci generator as the
@@ -23,6 +24,7 @@ static int i = 3;
 static int j = 0;
 static uint32_t *x = init+1;
 static volatile int lock[1];
+volatile int *const __random_lockptr = lock;
 
 static uint32_t lcg31(uint32_t x) {
 	return (1103515245*x + 12345) & 0x7fffffff;
diff --git a/src/process/_Fork.c b/src/process/_Fork.c
new file mode 100644
index 00000000..9c07792d
--- /dev/null
+++ b/src/process/_Fork.c
@@ -0,0 +1,43 @@
+#include <unistd.h>
+#include <signal.h>
+#include "syscall.h"
+#include "libc.h"
+#include "lock.h"
+#include "pthread_impl.h"
+#include "aio_impl.h"
+#include "fork_impl.h"
+
+static void dummy(int x) { }
+weak_alias(dummy, __aio_atfork);
+
+void __post_Fork(int ret)
+{
+	if (!ret) {
+		pthread_t self = __pthread_self();
+		self->tid = __syscall(SYS_set_tid_address, &__thread_list_lock);
+		self->robust_list.off = 0;
+		self->robust_list.pending = 0;
+		self->next = self->prev = self;
+		__thread_list_lock = 0;
+		libc.threads_minus_1 = 0;
+		if (libc.need_locks) libc.need_locks = -1;
+	}
+	UNLOCK(__abort_lock);
+	if (!ret) __aio_atfork(1);
+}
+
+pid_t _Fork(void)
+{
+	pid_t ret;
+	sigset_t set;
+	__block_all_sigs(&set);
+	LOCK(__abort_lock);
+#ifdef SYS_fork
+	ret = __syscall(SYS_fork);
+#else
+	ret = __syscall(SYS_clone, SIGCHLD, 0);
+#endif
+	__post_Fork(ret);
+	__restore_sigs(&set);
+	return __syscall_ret(ret);
+}
diff --git a/src/process/aarch64/vfork.s b/src/process/aarch64/vfork.s
new file mode 100644
index 00000000..429bec8c
--- /dev/null
+++ b/src/process/aarch64/vfork.s
@@ -0,0 +1,9 @@
+.global vfork
+.type vfork,%function
+vfork:
+	mov x8, 220    // SYS_clone
+	mov x0, 0x4111 // SIGCHLD | CLONE_VM | CLONE_VFORK
+	mov x1, 0
+	svc 0
+	.hidden __syscall_ret
+	b __syscall_ret
diff --git a/src/process/execvp.c b/src/process/execvp.c
index 1fdf036f..ef3b9dd5 100644
--- a/src/process/execvp.c
+++ b/src/process/execvp.c
@@ -28,8 +28,7 @@ int __execvpe(const char *file, char *const argv[], char *const envp[])
 
 	for(p=path; ; p=z) {
 		char b[l+k+1];
-		z = strchr(p, ':');
-		if (!z) z = p+strlen(p);
+		z = __strchrnul(p, ':');
 		if (z-p >= l) {
 			if (!*z++) break;
 			continue;
diff --git a/src/process/fdop.h b/src/process/fdop.h
index 00b87514..7cf733b2 100644
--- a/src/process/fdop.h
+++ b/src/process/fdop.h
@@ -1,6 +1,8 @@
 #define FDOP_CLOSE 1
 #define FDOP_DUP2 2
 #define FDOP_OPEN 3
+#define FDOP_CHDIR 4
+#define FDOP_FCHDIR 5
 
 struct fdop {
 	struct fdop *next, *prev;
@@ -8,3 +10,8 @@ struct fdop {
 	mode_t mode;
 	char path[];
 };
+
+#define malloc __libc_malloc
+#define calloc __libc_calloc
+#define realloc undef
+#define free __libc_free
diff --git a/src/process/fork.c b/src/process/fork.c
index da074ae9..56f19313 100644
--- a/src/process/fork.c
+++ b/src/process/fork.c
@@ -1,35 +1,90 @@
 #include <unistd.h>
-#include <string.h>
-#include <signal.h>
-#include "syscall.h"
+#include <errno.h>
 #include "libc.h"
+#include "lock.h"
 #include "pthread_impl.h"
+#include "fork_impl.h"
 
-static void dummy(int x)
-{
-}
+static volatile int *const dummy_lockptr = 0;
+
+weak_alias(dummy_lockptr, __at_quick_exit_lockptr);
+weak_alias(dummy_lockptr, __atexit_lockptr);
+weak_alias(dummy_lockptr, __gettext_lockptr);
+weak_alias(dummy_lockptr, __locale_lockptr);
+weak_alias(dummy_lockptr, __random_lockptr);
+weak_alias(dummy_lockptr, __sem_open_lockptr);
+weak_alias(dummy_lockptr, __stdio_ofl_lockptr);
+weak_alias(dummy_lockptr, __syslog_lockptr);
+weak_alias(dummy_lockptr, __timezone_lockptr);
+weak_alias(dummy_lockptr, __bump_lockptr);
+
+weak_alias(dummy_lockptr, __vmlock_lockptr);
 
+static volatile int *const *const atfork_locks[] = {
+	&__at_quick_exit_lockptr,
+	&__atexit_lockptr,
+	&__gettext_lockptr,
+	&__locale_lockptr,
+	&__random_lockptr,
+	&__sem_open_lockptr,
+	&__stdio_ofl_lockptr,
+	&__syslog_lockptr,
+	&__timezone_lockptr,
+	&__bump_lockptr,
+};
+
+static void dummy(int x) { }
 weak_alias(dummy, __fork_handler);
+weak_alias(dummy, __malloc_atfork);
+weak_alias(dummy, __aio_atfork);
+weak_alias(dummy, __pthread_key_atfork);
+weak_alias(dummy, __ldso_atfork);
+
+static void dummy_0(void) { }
+weak_alias(dummy_0, __tl_lock);
+weak_alias(dummy_0, __tl_unlock);
 
 pid_t fork(void)
 {
-	pid_t ret;
 	sigset_t set;
 	__fork_handler(-1);
-	__block_all_sigs(&set);
-#ifdef SYS_fork
-	ret = __syscall(SYS_fork);
-#else
-	ret = __syscall(SYS_clone, SIGCHLD, 0);
-#endif
-	if (!ret) {
-		pthread_t self = __pthread_self();
-		self->tid = __syscall(SYS_gettid);
-		self->robust_list.off = 0;
-		self->robust_list.pending = 0;
-		libc.threads_minus_1 = 0;
+	__block_app_sigs(&set);
+	int need_locks = libc.need_locks > 0;
+	if (need_locks) {
+		__ldso_atfork(-1);
+		__pthread_key_atfork(-1);
+		__aio_atfork(-1);
+		__inhibit_ptc();
+		for (int i=0; i<sizeof atfork_locks/sizeof *atfork_locks; i++)
+			if (*atfork_locks[i]) LOCK(*atfork_locks[i]);
+		__malloc_atfork(-1);
+		__tl_lock();
+	}
+	pthread_t self=__pthread_self(), next=self->next;
+	pid_t ret = _Fork();
+	int errno_save = errno;
+	if (need_locks) {
+		if (!ret) {
+			for (pthread_t td=next; td!=self; td=td->next)
+				td->tid = -1;
+			if (__vmlock_lockptr) {
+				__vmlock_lockptr[0] = 0;
+				__vmlock_lockptr[1] = 0;
+			}
+		}
+		__tl_unlock();
+		__malloc_atfork(!ret);
+		for (int i=0; i<sizeof atfork_locks/sizeof *atfork_locks; i++)
+			if (*atfork_locks[i])
+				if (ret) UNLOCK(*atfork_locks[i]);
+				else **atfork_locks[i] = 0;
+		__release_ptc();
+		if (ret) __aio_atfork(0);
+		__pthread_key_atfork(!ret);
+		__ldso_atfork(!ret);
 	}
 	__restore_sigs(&set);
 	__fork_handler(!ret);
-	return __syscall_ret(ret);
+	if (ret<0) errno = errno_save;
+	return ret;
 }
diff --git a/src/process/posix_spawn.c b/src/process/posix_spawn.c
index 5aaf829d..8294598b 100644
--- a/src/process/posix_spawn.c
+++ b/src/process/posix_spawn.c
@@ -4,8 +4,10 @@
 #include <unistd.h>
 #include <signal.h>
 #include <fcntl.h>
+#include <errno.h>
 #include <sys/wait.h>
 #include "syscall.h"
+#include "lock.h"
 #include "pthread_impl.h"
 #include "fdop.h"
 
@@ -101,6 +103,10 @@ static int child(void *args_vp)
 				break;
 			case FDOP_DUP2:
 				fd = op->srcfd;
+				if (fd == p) {
+					ret = -EBADF;
+					goto fail;
+				}
 				if (fd != op->fd) {
 					if ((ret=__sys_dup2(fd, op->fd))<0)
 						goto fail;
@@ -121,6 +127,14 @@ static int child(void *args_vp)
 					__syscall(SYS_close, fd);
 				}
 				break;
+			case FDOP_CHDIR:
+				ret = __syscall(SYS_chdir, op->path);
+				if (ret<0) goto fail;
+				break;
+			case FDOP_FCHDIR:
+				ret = __syscall(SYS_fchdir, op->fd);
+				if (ret<0) goto fail;
+				break;
 			}
 		}
 	}
@@ -143,7 +157,11 @@ static int child(void *args_vp)
 fail:
 	/* Since sizeof errno < PIPE_BUF, the write is atomic. */
 	ret = -ret;
-	if (ret) while (__syscall(SYS_write, p, &ret, sizeof ret) < 0);
+	if (ret) {
+		int r;
+		do r = __syscall(SYS_write, p, &ret, sizeof ret);
+		while (r<0 && r!=-EPIPE);
+	}
 	_exit(127);
 }
 
@@ -158,9 +176,6 @@ int posix_spawn(pid_t *restrict res, const char *restrict path,
 	int ec=0, cs;
 	struct args args;
 
-	if (pipe2(args.p, O_CLOEXEC))
-		return errno;
-
 	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
 
 	args.path = path;
@@ -170,9 +185,20 @@ int posix_spawn(pid_t *restrict res, const char *restrict path,
 	args.envp = envp;
 	pthread_sigmask(SIG_BLOCK, SIGALL_SET, &args.oldmask);
 
+	/* The lock guards both against seeing a SIGABRT disposition change
+	 * by abort and against leaking the pipe fd to fork-without-exec. */
+	LOCK(__abort_lock);
+
+	if (pipe2(args.p, O_CLOEXEC)) {
+		UNLOCK(__abort_lock);
+		ec = errno;
+		goto fail;
+	}
+
 	pid = __clone(child, stack+sizeof stack,
 		CLONE_VM|CLONE_VFORK|SIGCHLD, &args);
 	close(args.p[1]);
+	UNLOCK(__abort_lock);
 
 	if (pid > 0) {
 		if (read(args.p[0], &ec, sizeof ec) != sizeof ec) ec = 0;
@@ -185,6 +211,7 @@ int posix_spawn(pid_t *restrict res, const char *restrict path,
 
 	if (!ec && res) *res = pid;
 
+fail:
 	pthread_sigmask(SIG_SETMASK, &args.oldmask, 0);
 	pthread_setcancelstate(cs, 0);
 
diff --git a/src/process/posix_spawn_file_actions_addchdir.c b/src/process/posix_spawn_file_actions_addchdir.c
new file mode 100644
index 00000000..7f2590ae
--- /dev/null
+++ b/src/process/posix_spawn_file_actions_addchdir.c
@@ -0,0 +1,18 @@
+#include <spawn.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include "fdop.h"
+
+int posix_spawn_file_actions_addchdir_np(posix_spawn_file_actions_t *restrict fa, const char *restrict path)
+{
+	struct fdop *op = malloc(sizeof *op + strlen(path) + 1);
+	if (!op) return ENOMEM;
+	op->cmd = FDOP_CHDIR;
+	op->fd = -1;
+	strcpy(op->path, path);
+	if ((op->next = fa->__actions)) op->next->prev = op;
+	op->prev = 0;
+	fa->__actions = op;
+	return 0;
+}
diff --git a/src/process/posix_spawn_file_actions_addclose.c b/src/process/posix_spawn_file_actions_addclose.c
index cdda5979..0c2ef8fa 100644
--- a/src/process/posix_spawn_file_actions_addclose.c
+++ b/src/process/posix_spawn_file_actions_addclose.c
@@ -5,6 +5,7 @@
 
 int posix_spawn_file_actions_addclose(posix_spawn_file_actions_t *fa, int fd)
 {
+	if (fd < 0) return EBADF;
 	struct fdop *op = malloc(sizeof *op);
 	if (!op) return ENOMEM;
 	op->cmd = FDOP_CLOSE;
diff --git a/src/process/posix_spawn_file_actions_adddup2.c b/src/process/posix_spawn_file_actions_adddup2.c
index 0367498f..addca4d4 100644
--- a/src/process/posix_spawn_file_actions_adddup2.c
+++ b/src/process/posix_spawn_file_actions_adddup2.c
@@ -5,6 +5,7 @@
 
 int posix_spawn_file_actions_adddup2(posix_spawn_file_actions_t *fa, int srcfd, int fd)
 {
+	if (srcfd < 0 || fd < 0) return EBADF;
 	struct fdop *op = malloc(sizeof *op);
 	if (!op) return ENOMEM;
 	op->cmd = FDOP_DUP2;
diff --git a/src/process/posix_spawn_file_actions_addfchdir.c b/src/process/posix_spawn_file_actions_addfchdir.c
new file mode 100644
index 00000000..e89ede8c
--- /dev/null
+++ b/src/process/posix_spawn_file_actions_addfchdir.c
@@ -0,0 +1,18 @@
+#include <spawn.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include "fdop.h"
+
+int posix_spawn_file_actions_addfchdir_np(posix_spawn_file_actions_t *fa, int fd)
+{
+	if (fd < 0) return EBADF;
+	struct fdop *op = malloc(sizeof *op);
+	if (!op) return ENOMEM;
+	op->cmd = FDOP_FCHDIR;
+	op->fd = fd;
+	if ((op->next = fa->__actions)) op->next->prev = op;
+	op->prev = 0;
+	fa->__actions = op;
+	return 0;
+}
diff --git a/src/process/posix_spawn_file_actions_addopen.c b/src/process/posix_spawn_file_actions_addopen.c
index 368922c7..82bbcec9 100644
--- a/src/process/posix_spawn_file_actions_addopen.c
+++ b/src/process/posix_spawn_file_actions_addopen.c
@@ -6,6 +6,7 @@
 
 int posix_spawn_file_actions_addopen(posix_spawn_file_actions_t *restrict fa, int fd, const char *restrict path, int flags, mode_t mode)
 {
+	if (fd < 0) return EBADF;
 	struct fdop *op = malloc(sizeof *op + strlen(path) + 1);
 	if (!op) return ENOMEM;
 	op->cmd = FDOP_OPEN;
diff --git a/src/process/riscv64/vfork.s b/src/process/riscv64/vfork.s
new file mode 100644
index 00000000..c93dca23
--- /dev/null
+++ b/src/process/riscv64/vfork.s
@@ -0,0 +1,12 @@
+.global vfork
+.type vfork,@function
+vfork:
+	/* riscv does not have SYS_vfork, so we must use clone instead */
+	/* note: riscv's clone = clone(flags, sp, ptidptr, tls, ctidptr) */
+	li a7, 220
+	li a0, 0x100 | 0x4000 | 17 /* flags = CLONE_VM | CLONE_VFORK | SIGCHLD */
+	mv a1, sp
+	/* the other arguments are ignoreable */
+	ecall
+	.hidden __syscall_ret
+	j __syscall_ret
diff --git a/src/process/waitpid.c b/src/process/waitpid.c
index 1b65bf05..80231862 100644
--- a/src/process/waitpid.c
+++ b/src/process/waitpid.c
@@ -3,5 +3,5 @@
 
 pid_t waitpid(pid_t pid, int *status, int options)
 {
-	return syscall_cp(SYS_wait4, pid, status, options, 0);
+	return sys_wait4_cp(pid, status, options, 0);
 }
diff --git a/src/regex/glob.c b/src/regex/glob.c
index aa1c6a44..87bae084 100644
--- a/src/regex/glob.c
+++ b/src/regex/glob.c
@@ -8,6 +8,8 @@
 #include <stdlib.h>
 #include <errno.h>
 #include <stddef.h>
+#include <unistd.h>
+#include <pwd.h>
 
 struct match
 {
@@ -90,16 +92,23 @@ static int do_glob(char *buf, size_t pos, int type, char *pat, int flags, int (*
 	if (!*pat) {
 		/* If we consumed any components above, or if GLOB_MARK is
 		 * requested and we don't yet know if the match is a dir,
-		 * we must call stat to confirm the file exists and/or
-		 * determine its type. */
+		 * we must confirm the file exists and/or determine its type.
+		 *
+		 * If marking dirs, symlink type is inconclusive; we need the
+		 * type for the symlink target, and therefore must try stat
+		 * first unless type is known not to be a symlink. Otherwise,
+		 * or if that fails, use lstat for determining existence to
+		 * avoid false negatives in the case of broken symlinks. */
 		struct stat st;
-		if ((flags & GLOB_MARK) && type==DT_LNK) type = 0;
-		if (!type && stat(buf, &st)) {
+		if ((flags & GLOB_MARK) && (!type||type==DT_LNK) && !stat(buf, &st)) {
+			if (S_ISDIR(st.st_mode)) type = DT_DIR;
+			else type = DT_REG;
+		}
+		if (!type && lstat(buf, &st)) {
 			if (errno!=ENOENT && (errfunc(buf, errno) || (flags & GLOB_ERR)))
 				return GLOB_ABORTED;
 			return 0;
 		}
-		if (!type && S_ISDIR(st.st_mode)) type = DT_DIR;
 		if (append(tail, buf, pos, (flags & GLOB_MARK) && type==DT_DIR))
 			return GLOB_NOSPACE;
 		return 0;
@@ -182,6 +191,39 @@ static int sort(const void *a, const void *b)
 	return strcmp(*(const char **)a, *(const char **)b);
 }
 
+static int expand_tilde(char **pat, char *buf, size_t *pos)
+{
+	char *p = *pat + 1;
+	size_t i = 0;
+
+	char delim, *name_end = __strchrnul(p, '/');
+	if ((delim = *name_end)) *name_end++ = 0;
+	*pat = name_end;
+
+	char *home = *p ? NULL : getenv("HOME");
+	if (!home) {
+		struct passwd pw, *res;
+		switch (*p ? getpwnam_r(p, &pw, buf, PATH_MAX, &res)
+			   : getpwuid_r(getuid(), &pw, buf, PATH_MAX, &res)) {
+		case ENOMEM:
+			return GLOB_NOSPACE;
+		case 0:
+			if (!res)
+		default:
+				return GLOB_NOMATCH;
+		}
+		home = pw.pw_dir;
+	}
+	while (i < PATH_MAX - 2 && *home)
+		buf[i++] = *home++;
+	if (*home)
+		return GLOB_NOMATCH;
+	if ((buf[i] = delim))
+		buf[++i] = 0;
+	*pos = i;
+	return 0;
+}
+
 int glob(const char *restrict pat, int flags, int (*errfunc)(const char *path, int err), glob_t *restrict g)
 {
 	struct match head = { .next = NULL }, *tail = &head;
@@ -202,7 +244,12 @@ int glob(const char *restrict pat, int flags, int (*errfunc)(const char *path, i
 		char *p = strdup(pat);
 		if (!p) return GLOB_NOSPACE;
 		buf[0] = 0;
-		error = do_glob(buf, 0, 0, p, flags, errfunc, &tail);
+		size_t pos = 0;
+		char *s = p;
+		if ((flags & (GLOB_TILDE | GLOB_TILDE_CHECK)) && *p == '~')
+			error = expand_tilde(&s, buf, &pos);
+		if (!error)
+			error = do_glob(buf, pos, 0, s, flags, errfunc, &tail);
 		free(p);
 	}
 
@@ -218,7 +265,7 @@ int glob(const char *restrict pat, int flags, int (*errfunc)(const char *path, i
 			if (append(&tail, pat, strlen(pat), 0))
 				return GLOB_NOSPACE;
 			cnt++;
-		} else
+		} else if (!error)
 			return GLOB_NOMATCH;
 	}
 
@@ -259,6 +306,3 @@ void globfree(glob_t *g)
 	g->gl_pathc = 0;
 	g->gl_pathv = NULL;
 }
-
-weak_alias(glob, glob64);
-weak_alias(globfree, globfree64);
diff --git a/src/sched/sched_rr_get_interval.c b/src/sched/sched_rr_get_interval.c
index 4b01028f..33a3d1ae 100644
--- a/src/sched/sched_rr_get_interval.c
+++ b/src/sched/sched_rr_get_interval.c
@@ -3,5 +3,19 @@
 
 int sched_rr_get_interval(pid_t pid, struct timespec *ts)
 {
+#ifdef SYS_sched_rr_get_interval_time64
+	/* On a 32-bit arch, use the old syscall if it exists. */
+	if (SYS_sched_rr_get_interval != SYS_sched_rr_get_interval_time64) {
+		long ts32[2];
+		int r = __syscall(SYS_sched_rr_get_interval, pid, ts32);
+		if (!r) {
+			ts->tv_sec = ts32[0];
+			ts->tv_nsec = ts32[1];
+		}
+		return __syscall_ret(r);
+	}
+#endif
+	/* If reaching this point, it's a 64-bit arch or time64-only
+	 * 32-bit arch and we can get result directly into timespec. */
 	return syscall(SYS_sched_rr_get_interval, pid, ts);
 }
diff --git a/src/search/hsearch.c b/src/search/hsearch.c
index b3ac8796..2634a67f 100644
--- a/src/search/hsearch.c
+++ b/src/search/hsearch.c
@@ -41,9 +41,9 @@ static int resize(size_t nel, struct hsearch_data *htab)
 {
 	size_t newsize;
 	size_t i, j;
+	size_t oldsize = htab->__tab->mask + 1;
 	ENTRY *e, *newe;
 	ENTRY *oldtab = htab->__tab->entries;
-	ENTRY *oldend = htab->__tab->entries + htab->__tab->mask + 1;
 
 	if (nel > MAXSIZE)
 		nel = MAXSIZE;
@@ -56,7 +56,7 @@ static int resize(size_t nel, struct hsearch_data *htab)
 	htab->__tab->mask = newsize - 1;
 	if (!oldtab)
 		return 1;
-	for (e = oldtab; e < oldend; e++)
+	for (e = oldtab; e < oldtab + oldsize; e++)
 		if (e->key) {
 			for (i=keyhash(e->key),j=1; ; i+=j++) {
 				newe = htab->__tab->entries + (i & htab->__tab->mask);
diff --git a/src/select/poll.c b/src/select/poll.c
index c84c8a99..7883dfab 100644
--- a/src/select/poll.c
+++ b/src/select/poll.c
@@ -8,8 +8,13 @@ int poll(struct pollfd *fds, nfds_t n, int timeout)
 #ifdef SYS_poll
 	return syscall_cp(SYS_poll, fds, n, timeout);
 #else
+#if SYS_ppoll_time64 == SYS_ppoll
+	typedef long long ppoll_ts_t[2];
+#else
+	typedef long ppoll_ts_t[2];
+#endif
 	return syscall_cp(SYS_ppoll, fds, n, timeout>=0 ?
-		&((struct timespec){ .tv_sec = timeout/1000,
-		.tv_nsec = timeout%1000*1000000 }) : 0, 0, _NSIG/8);
+		((ppoll_ts_t){ timeout/1000, timeout%1000*1000000 }) : 0,
+		0, _NSIG/8);
 #endif
 }
diff --git a/src/select/ppoll.c b/src/select/ppoll.c
new file mode 100644
index 00000000..9a0bf929
--- /dev/null
+++ b/src/select/ppoll.c
@@ -0,0 +1,26 @@
+#define _BSD_SOURCE
+#include <poll.h>
+#include <signal.h>
+#include <errno.h>
+#include "syscall.h"
+
+#define IS32BIT(x) !((x)+0x80000000ULL>>32)
+#define CLAMP(x) (int)(IS32BIT(x) ? (x) : 0x7fffffffU+((0ULL+(x))>>63))
+
+int ppoll(struct pollfd *fds, nfds_t n, const struct timespec *to, const sigset_t *mask)
+{
+	time_t s = to ? to->tv_sec : 0;
+	long ns = to ? to->tv_nsec : 0;
+#ifdef SYS_ppoll_time64
+	int r = -ENOSYS;
+	if (SYS_ppoll == SYS_ppoll_time64 || !IS32BIT(s))
+		r = __syscall_cp(SYS_ppoll_time64, fds, n,
+			to ? ((long long[]){s, ns}) : 0,
+			mask, _NSIG/8);
+	if (SYS_ppoll == SYS_ppoll_time64 || r != -ENOSYS)
+		return __syscall_ret(r);
+	s = CLAMP(s);
+#endif
+	return syscall_cp(SYS_ppoll, fds, n,
+		to ? ((long[]){s, ns}) : 0, mask, _NSIG/8);
+}
diff --git a/src/select/pselect.c b/src/select/pselect.c
index 762af37f..54cfb291 100644
--- a/src/select/pselect.c
+++ b/src/select/pselect.c
@@ -1,12 +1,26 @@
 #include <sys/select.h>
 #include <signal.h>
 #include <stdint.h>
+#include <errno.h>
 #include "syscall.h"
 
+#define IS32BIT(x) !((x)+0x80000000ULL>>32)
+#define CLAMP(x) (int)(IS32BIT(x) ? (x) : 0x7fffffffU+((0ULL+(x))>>63))
+
 int pselect(int n, fd_set *restrict rfds, fd_set *restrict wfds, fd_set *restrict efds, const struct timespec *restrict ts, const sigset_t *restrict mask)
 {
 	syscall_arg_t data[2] = { (uintptr_t)mask, _NSIG/8 };
-	struct timespec ts_tmp;
-	if (ts) ts_tmp = *ts;
-	return syscall_cp(SYS_pselect6, n, rfds, wfds, efds, ts ? &ts_tmp : 0, data);
+	time_t s = ts ? ts->tv_sec : 0;
+	long ns = ts ? ts->tv_nsec : 0;
+#ifdef SYS_pselect6_time64
+	int r = -ENOSYS;
+	if (SYS_pselect6 == SYS_pselect6_time64 || !IS32BIT(s))
+		r = __syscall_cp(SYS_pselect6_time64, n, rfds, wfds, efds,
+			ts ? ((long long[]){s, ns}) : 0, data);
+	if (SYS_pselect6 == SYS_pselect6_time64 || r!=-ENOSYS)
+		return __syscall_ret(r);
+	s = CLAMP(s);
+#endif
+	return syscall_cp(SYS_pselect6, n, rfds, wfds, efds,
+		ts ? ((long[]){s, ns}) : 0, data);
 }
diff --git a/src/select/select.c b/src/select/select.c
index 02fd75c3..f1d72863 100644
--- a/src/select/select.c
+++ b/src/select/select.c
@@ -4,22 +4,42 @@
 #include <errno.h>
 #include "syscall.h"
 
+#define IS32BIT(x) !((x)+0x80000000ULL>>32)
+#define CLAMP(x) (int)(IS32BIT(x) ? (x) : 0x7fffffffU+((0ULL+(x))>>63))
+
 int select(int n, fd_set *restrict rfds, fd_set *restrict wfds, fd_set *restrict efds, struct timeval *restrict tv)
 {
+	time_t s = tv ? tv->tv_sec : 0;
+	suseconds_t us = tv ? tv->tv_usec : 0;
+	long ns;
+	const time_t max_time = (1ULL<<8*sizeof(time_t)-1)-1;
+
+	if (s<0 || us<0) return __syscall_ret(-EINVAL);
+	if (us/1000000 > max_time - s) {
+		s = max_time;
+		us = 999999;
+		ns = 999999999;
+	} else {
+		s += us/1000000;
+		us %= 1000000;
+		ns = us*1000;
+	}
+
+#ifdef SYS_pselect6_time64
+	int r = -ENOSYS;
+	if (SYS_pselect6 == SYS_pselect6_time64 || !IS32BIT(s))
+		r = __syscall_cp(SYS_pselect6_time64, n, rfds, wfds, efds,
+			tv ? ((long long[]){s, ns}) : 0,
+			((syscall_arg_t[]){ 0, _NSIG/8 }));
+	if (SYS_pselect6 == SYS_pselect6_time64 || r!=-ENOSYS)
+		return __syscall_ret(r);
+	s = CLAMP(s);
+#endif
 #ifdef SYS_select
-	return syscall_cp(SYS_select, n, rfds, wfds, efds, tv);
+	return syscall_cp(SYS_select, n, rfds, wfds, efds,
+		tv ? ((long[]){s, us}) : 0);
 #else
-	syscall_arg_t data[2] = { 0, _NSIG/8 };
-	struct timespec ts;
-	if (tv) {
-		if (tv->tv_sec < 0 || tv->tv_usec < 0)
-			return __syscall_ret(-EINVAL);
-		time_t extra_secs = tv->tv_usec / 1000000;
-		ts.tv_nsec = tv->tv_usec % 1000000 * 1000;
-		const time_t max_time = (1ULL<<8*sizeof(time_t)-1)-1;
-		ts.tv_sec = extra_secs > max_time - tv->tv_sec ?
-			max_time : tv->tv_sec + extra_secs;
-	}
-	return syscall_cp(SYS_pselect6, n, rfds, wfds, efds, tv ? &ts : 0, data);
+	return syscall_cp(SYS_pselect6, n, rfds, wfds, efds,
+		tv ? ((long[]){s, ns}) : 0, ((syscall_arg_t[]){ 0, _NSIG/8 }));
 #endif
 }
diff --git a/src/setjmp/aarch64/longjmp.s b/src/setjmp/aarch64/longjmp.s
index 7c4655fa..0af9c50e 100644
--- a/src/setjmp/aarch64/longjmp.s
+++ b/src/setjmp/aarch64/longjmp.s
@@ -18,7 +18,6 @@ longjmp:
 	ldp d12, d13, [x0,#144]
 	ldp d14, d15, [x0,#160]
 
-	mov x0, x1
-	cbnz x1, 1f
-	mov x0, #1
-1:	br x30
+	cmp w1, 0
+	csinc w0, w1, wzr, ne
+	br x30
diff --git a/src/setjmp/arm/longjmp.s b/src/setjmp/arm/longjmp.S
index 76cc2920..8df0b819 100644
--- a/src/setjmp/arm/longjmp.s
+++ b/src/setjmp/arm/longjmp.S
@@ -16,11 +16,14 @@ longjmp:
 	ldr r2,1f
 	ldr r1,[r1,r2]
 
+#if __ARM_ARCH < 8
 	tst r1,#0x260
 	beq 3f
+	// HWCAP_ARM_FPA
 	tst r1,#0x20
 	beq 2f
 	ldc p2, cr4, [ip], #48
+#endif
 2:	tst r1,#0x40
 	beq 2f
 	.fpu vfp
@@ -28,6 +31,8 @@ longjmp:
 	.fpu softvfp
 	.eabi_attribute 10, 0
 	.eabi_attribute 27, 0
+#if __ARM_ARCH < 8
+	// HWCAP_ARM_IWMMXT
 2:	tst r1,#0x200
 	beq 3f
 	ldcl p1, cr10, [ip], #8
@@ -36,6 +41,8 @@ longjmp:
 	ldcl p1, cr13, [ip], #8
 	ldcl p1, cr14, [ip], #8
 	ldcl p1, cr15, [ip], #8
+#endif
+2:
 3:	bx lr
 
 .hidden __hwcap
diff --git a/src/setjmp/arm/setjmp.s b/src/setjmp/arm/setjmp.S
index 011315b7..45731d22 100644
--- a/src/setjmp/arm/setjmp.s
+++ b/src/setjmp/arm/setjmp.S
@@ -18,11 +18,14 @@ setjmp:
 	ldr r2,1f
 	ldr r1,[r1,r2]
 
+#if __ARM_ARCH < 8
 	tst r1,#0x260
 	beq 3f
+	// HWCAP_ARM_FPA
 	tst r1,#0x20
 	beq 2f
 	stc p2, cr4, [ip], #48
+#endif
 2:	tst r1,#0x40
 	beq 2f
 	.fpu vfp
@@ -30,6 +33,8 @@ setjmp:
 	.fpu softvfp
 	.eabi_attribute 10, 0
 	.eabi_attribute 27, 0
+#if __ARM_ARCH < 8
+	// HWCAP_ARM_IWMMXT
 2:	tst r1,#0x200
 	beq 3f
 	stcl p1, cr10, [ip], #8
@@ -38,6 +43,8 @@ setjmp:
 	stcl p1, cr13, [ip], #8
 	stcl p1, cr14, [ip], #8
 	stcl p1, cr15, [ip], #8
+#endif
+2:
 3:	bx lr
 
 .hidden __hwcap
diff --git a/src/setjmp/i386/longjmp.s b/src/setjmp/i386/longjmp.s
index 772d28dd..8188f06b 100644
--- a/src/setjmp/i386/longjmp.s
+++ b/src/setjmp/i386/longjmp.s
@@ -6,15 +6,11 @@ _longjmp:
 longjmp:
 	mov  4(%esp),%edx
 	mov  8(%esp),%eax
-	test    %eax,%eax
-	jnz 1f
-	inc     %eax
-1:
+	cmp       $1,%eax
+	adc       $0, %al
 	mov   (%edx),%ebx
 	mov  4(%edx),%esi
 	mov  8(%edx),%edi
 	mov 12(%edx),%ebp
-	mov 16(%edx),%ecx
-	mov     %ecx,%esp
-	mov 20(%edx),%ecx
-	jmp *%ecx
+	mov 16(%edx),%esp
+	jmp *20(%edx)
diff --git a/src/setjmp/loongarch64/longjmp.S b/src/setjmp/loongarch64/longjmp.S
new file mode 100644
index 00000000..896d2e26
--- /dev/null
+++ b/src/setjmp/loongarch64/longjmp.S
@@ -0,0 +1,32 @@
+.global _longjmp
+.global longjmp
+.type   _longjmp,@function
+.type   longjmp,@function
+_longjmp:
+longjmp:
+	ld.d    $ra, $a0, 0
+	ld.d    $sp, $a0, 8
+	ld.d    $r21,$a0, 16
+	ld.d    $fp, $a0, 24
+	ld.d    $s0, $a0, 32
+	ld.d    $s1, $a0, 40
+	ld.d    $s2, $a0, 48
+	ld.d    $s3, $a0, 56
+	ld.d    $s4, $a0, 64
+	ld.d    $s5, $a0, 72
+	ld.d    $s6, $a0, 80
+	ld.d    $s7, $a0, 88
+	ld.d    $s8, $a0, 96
+#ifndef __loongarch_soft_float
+	fld.d   $fs0, $a0, 104
+	fld.d   $fs1, $a0, 112
+	fld.d   $fs2, $a0, 120
+	fld.d   $fs3, $a0, 128
+	fld.d   $fs4, $a0, 136
+	fld.d   $fs5, $a0, 144
+	fld.d   $fs6, $a0, 152
+	fld.d   $fs7, $a0, 160
+#endif
+	sltui   $a0, $a1, 1
+	add.d   $a0, $a0, $a1
+	jr      $ra
diff --git a/src/setjmp/loongarch64/setjmp.S b/src/setjmp/loongarch64/setjmp.S
new file mode 100644
index 00000000..d158a3d2
--- /dev/null
+++ b/src/setjmp/loongarch64/setjmp.S
@@ -0,0 +1,34 @@
+.global __setjmp
+.global _setjmp
+.global setjmp
+.type __setjmp,@function
+.type _setjmp,@function
+.type setjmp,@function
+__setjmp:
+_setjmp:
+setjmp:
+	st.d    $ra, $a0, 0
+	st.d    $sp, $a0, 8
+	st.d    $r21,$a0, 16
+	st.d    $fp, $a0, 24
+	st.d    $s0, $a0, 32
+	st.d    $s1, $a0, 40
+	st.d    $s2, $a0, 48
+	st.d    $s3, $a0, 56
+	st.d    $s4, $a0, 64
+	st.d    $s5, $a0, 72
+	st.d    $s6, $a0, 80
+	st.d    $s7, $a0, 88
+	st.d    $s8, $a0, 96
+#ifndef __loongarch_soft_float
+	fst.d   $fs0, $a0, 104
+	fst.d   $fs1, $a0, 112
+	fst.d   $fs2, $a0, 120
+	fst.d   $fs3, $a0, 128
+	fst.d   $fs4, $a0, 136
+	fst.d   $fs5, $a0, 144
+	fst.d   $fs6, $a0, 152
+	fst.d   $fs7, $a0, 160
+#endif
+	move    $a0, $zero
+	jr      $ra
diff --git a/src/setjmp/mips/longjmp.S b/src/setjmp/mips/longjmp.S
index fdb6c95d..ecf40855 100644
--- a/src/setjmp/mips/longjmp.S
+++ b/src/setjmp/mips/longjmp.S
@@ -12,18 +12,12 @@ longjmp:
 	addu    $2, $2, 1
 1:
 #ifndef __mips_soft_float
-	lwc1    $20, 56($4)
-	lwc1    $21, 60($4)
-	lwc1    $22, 64($4)
-	lwc1    $23, 68($4)
-	lwc1    $24, 72($4)
-	lwc1    $25, 76($4)
-	lwc1    $26, 80($4)
-	lwc1    $27, 84($4)
-	lwc1    $28, 88($4)
-	lwc1    $29, 92($4)
-	lwc1    $30, 96($4)
-	lwc1    $31, 100($4)
+	l.d     $f20, 56($4)
+	l.d     $f22, 64($4)
+	l.d     $f24, 72($4)
+	l.d     $f26, 80($4)
+	l.d     $f28, 88($4)
+	l.d     $f30, 96($4)
 #endif
 	lw      $ra,  0($4)
 	lw      $sp,  4($4)
diff --git a/src/setjmp/mips/setjmp.S b/src/setjmp/mips/setjmp.S
index 501d5264..7ae8832d 100644
--- a/src/setjmp/mips/setjmp.S
+++ b/src/setjmp/mips/setjmp.S
@@ -22,18 +22,12 @@ setjmp:
 	sw      $30, 40($4)
 	sw      $28, 44($4)
 #ifndef __mips_soft_float
-	swc1    $20, 56($4)
-	swc1    $21, 60($4)
-	swc1    $22, 64($4)
-	swc1    $23, 68($4)
-	swc1    $24, 72($4)
-	swc1    $25, 76($4)
-	swc1    $26, 80($4)
-	swc1    $27, 84($4)
-	swc1    $28, 88($4)
-	swc1    $29, 92($4)
-	swc1    $30, 96($4)
-	swc1    $31, 100($4)
+	s.d     $f20, 56($4)
+	s.d     $f22, 64($4)
+	s.d     $f24, 72($4)
+	s.d     $f26, 80($4)
+	s.d     $f28, 88($4)
+	s.d     $f30, 96($4)
 #endif
 	jr      $ra
 	li      $2, 0
diff --git a/src/setjmp/powerpc/longjmp.S b/src/setjmp/powerpc/longjmp.S
index e598bd05..465e4cd7 100644
--- a/src/setjmp/powerpc/longjmp.S
+++ b/src/setjmp/powerpc/longjmp.S
@@ -37,7 +37,37 @@ longjmp:
 	lwz 29, 72(3)
 	lwz 30, 76(3)
 	lwz 31, 80(3)
-#ifndef _SOFT_FLOAT
+#if defined(_SOFT_FLOAT) || defined(__NO_FPRS__)
+	mflr 0
+	bl 1f
+	.hidden __hwcap
+	.long __hwcap-.
+1:	mflr 6
+	lwz 5, 0(6)
+	lwzx 6, 6, 5
+	andis. 6, 6, 0x80
+	beq 1f
+	.long 0x11c35b01 /* evldd 14,88(3) */
+	.long 0x11e36301 /* ... */
+	.long 0x12036b01
+	.long 0x12237301
+	.long 0x12437b01
+	.long 0x12638301
+	.long 0x12838b01
+	.long 0x12a39301
+	.long 0x12c39b01
+	.long 0x12e3a301
+	.long 0x1303ab01
+	.long 0x1323b301
+	.long 0x1343bb01
+	.long 0x1363c301
+	.long 0x1383cb01
+	.long 0x13a3d301
+	.long 0x13c3db01
+	.long 0x13e3e301 /* evldd 31,224(3) */
+	.long 0x11a3eb01 /* evldd 13,232(3) */
+1:	mtlr 0
+#else
 	lfd 14,88(3)
 	lfd 15,96(3)
 	lfd 16,104(3)
diff --git a/src/setjmp/powerpc/setjmp.S b/src/setjmp/powerpc/setjmp.S
index cd91a207..f1fcce33 100644
--- a/src/setjmp/powerpc/setjmp.S
+++ b/src/setjmp/powerpc/setjmp.S
@@ -37,7 +37,37 @@ setjmp:
 	stw 29, 72(3)
 	stw 30, 76(3)
 	stw 31, 80(3)
-#ifndef _SOFT_FLOAT
+#if defined(_SOFT_FLOAT) || defined(__NO_FPRS__)
+	mflr 0
+	bl 1f
+	.hidden __hwcap
+	.long __hwcap-.
+1:	mflr 4
+	lwz 5, 0(4)
+	lwzx 4, 4, 5
+	andis. 4, 4, 0x80
+	beq 1f
+	.long 0x11c35b21 /* evstdd 14,88(3) */
+	.long 0x11e36321 /* ... */
+	.long 0x12036b21
+	.long 0x12237321
+	.long 0x12437b21
+	.long 0x12638321
+	.long 0x12838b21
+	.long 0x12a39321
+	.long 0x12c39b21
+	.long 0x12e3a321
+	.long 0x1303ab21
+	.long 0x1323b321
+	.long 0x1343bb21
+	.long 0x1363c321
+	.long 0x1383cb21
+	.long 0x13a3d321
+	.long 0x13c3db21
+	.long 0x13e3e321 /* evstdd 31,224(3) */
+	.long 0x11a3eb21 /* evstdd 13,232(3) */
+1:	mtlr 0
+#else
 	stfd 14,88(3)
 	stfd 15,96(3)
 	stfd 16,104(3)
diff --git a/src/setjmp/riscv32/longjmp.S b/src/setjmp/riscv32/longjmp.S
new file mode 100644
index 00000000..f9cb3318
--- /dev/null
+++ b/src/setjmp/riscv32/longjmp.S
@@ -0,0 +1,42 @@
+.global __longjmp
+.global _longjmp
+.global longjmp
+.type __longjmp, %function
+.type _longjmp,  %function
+.type longjmp,   %function
+__longjmp:
+_longjmp:
+longjmp:
+	lw s0,    0(a0)
+	lw s1,    4(a0)
+	lw s2,    8(a0)
+	lw s3,    12(a0)
+	lw s4,    16(a0)
+	lw s5,    20(a0)
+	lw s6,    24(a0)
+	lw s7,    28(a0)
+	lw s8,    32(a0)
+	lw s9,    36(a0)
+	lw s10,   40(a0)
+	lw s11,   44(a0)
+	lw sp,    48(a0)
+	lw ra,    52(a0)
+
+#ifndef __riscv_float_abi_soft
+	fld fs0,  56(a0)
+	fld fs1,  64(a0)
+	fld fs2,  72(a0)
+	fld fs3,  80(a0)
+	fld fs4,  88(a0)
+	fld fs5,  96(a0)
+	fld fs6,  104(a0)
+	fld fs7,  112(a0)
+	fld fs8,  120(a0)
+	fld fs9,  128(a0)
+	fld fs10, 136(a0)
+	fld fs11, 144(a0)
+#endif
+
+	seqz a0, a1
+	add a0, a0, a1
+	ret
diff --git a/src/setjmp/riscv32/setjmp.S b/src/setjmp/riscv32/setjmp.S
new file mode 100644
index 00000000..8a75cf55
--- /dev/null
+++ b/src/setjmp/riscv32/setjmp.S
@@ -0,0 +1,41 @@
+.global __setjmp
+.global _setjmp
+.global setjmp
+.type __setjmp, %function
+.type _setjmp,  %function
+.type setjmp,   %function
+__setjmp:
+_setjmp:
+setjmp:
+	sw s0,    0(a0)
+	sw s1,    4(a0)
+	sw s2,    8(a0)
+	sw s3,    12(a0)
+	sw s4,    16(a0)
+	sw s5,    20(a0)
+	sw s6,    24(a0)
+	sw s7,    28(a0)
+	sw s8,    32(a0)
+	sw s9,    36(a0)
+	sw s10,   40(a0)
+	sw s11,   44(a0)
+	sw sp,    48(a0)
+	sw ra,    52(a0)
+
+#ifndef __riscv_float_abi_soft
+	fsd fs0,  56(a0)
+	fsd fs1,  64(a0)
+	fsd fs2,  72(a0)
+	fsd fs3,  80(a0)
+	fsd fs4,  88(a0)
+	fsd fs5,  96(a0)
+	fsd fs6,  104(a0)
+	fsd fs7,  112(a0)
+	fsd fs8,  120(a0)
+	fsd fs9,  128(a0)
+	fsd fs10, 136(a0)
+	fsd fs11, 144(a0)
+#endif
+
+	li a0, 0
+	ret
diff --git a/src/setjmp/riscv64/longjmp.S b/src/setjmp/riscv64/longjmp.S
new file mode 100644
index 00000000..41e2d210
--- /dev/null
+++ b/src/setjmp/riscv64/longjmp.S
@@ -0,0 +1,42 @@
+.global __longjmp
+.global _longjmp
+.global longjmp
+.type __longjmp, %function
+.type _longjmp,  %function
+.type longjmp,   %function
+__longjmp:
+_longjmp:
+longjmp:
+	ld s0,    0(a0)
+	ld s1,    8(a0)
+	ld s2,    16(a0)
+	ld s3,    24(a0)
+	ld s4,    32(a0)
+	ld s5,    40(a0)
+	ld s6,    48(a0)
+	ld s7,    56(a0)
+	ld s8,    64(a0)
+	ld s9,    72(a0)
+	ld s10,   80(a0)
+	ld s11,   88(a0)
+	ld sp,    96(a0)
+	ld ra,    104(a0)
+
+#ifndef __riscv_float_abi_soft
+	fld fs0,  112(a0)
+	fld fs1,  120(a0)
+	fld fs2,  128(a0)
+	fld fs3,  136(a0)
+	fld fs4,  144(a0)
+	fld fs5,  152(a0)
+	fld fs6,  160(a0)
+	fld fs7,  168(a0)
+	fld fs8,  176(a0)
+	fld fs9,  184(a0)
+	fld fs10, 192(a0)
+	fld fs11, 200(a0)
+#endif
+
+	seqz a0, a1
+	add a0, a0, a1
+	ret
diff --git a/src/setjmp/riscv64/setjmp.S b/src/setjmp/riscv64/setjmp.S
new file mode 100644
index 00000000..51249672
--- /dev/null
+++ b/src/setjmp/riscv64/setjmp.S
@@ -0,0 +1,41 @@
+.global __setjmp
+.global _setjmp
+.global setjmp
+.type __setjmp, %function
+.type _setjmp,  %function
+.type setjmp,   %function
+__setjmp:
+_setjmp:
+setjmp:
+	sd s0,    0(a0)
+	sd s1,    8(a0)
+	sd s2,    16(a0)
+	sd s3,    24(a0)
+	sd s4,    32(a0)
+	sd s5,    40(a0)
+	sd s6,    48(a0)
+	sd s7,    56(a0)
+	sd s8,    64(a0)
+	sd s9,    72(a0)
+	sd s10,   80(a0)
+	sd s11,   88(a0)
+	sd sp,    96(a0)
+	sd ra,    104(a0)
+
+#ifndef __riscv_float_abi_soft
+	fsd fs0,  112(a0)
+	fsd fs1,  120(a0)
+	fsd fs2,  128(a0)
+	fsd fs3,  136(a0)
+	fsd fs4,  144(a0)
+	fsd fs5,  152(a0)
+	fsd fs6,  160(a0)
+	fsd fs7,  168(a0)
+	fsd fs8,  176(a0)
+	fsd fs9,  184(a0)
+	fsd fs10, 192(a0)
+	fsd fs11, 200(a0)
+#endif
+
+	li a0, 0
+	ret
diff --git a/src/setjmp/x32/longjmp.s b/src/setjmp/x32/longjmp.s
index e175a4b9..1b2661c3 100644
--- a/src/setjmp/x32/longjmp.s
+++ b/src/setjmp/x32/longjmp.s
@@ -5,18 +5,14 @@
 .type longjmp,@function
 _longjmp:
 longjmp:
-	mov %rsi,%rax           /* val will be longjmp return */
-	test %rax,%rax
-	jnz 1f
-	inc %rax                /* if val==0, val=1 per longjmp semantics */
-1:
+	xor %eax,%eax
+	cmp $1,%esi             /* CF = val ? 0 : 1 */
+	adc %esi,%eax           /* eax = val + !val */
 	mov (%rdi),%rbx         /* rdi is the jmp_buf, restore regs from it */
 	mov 8(%rdi),%rbp
 	mov 16(%rdi),%r12
 	mov 24(%rdi),%r13
 	mov 32(%rdi),%r14
 	mov 40(%rdi),%r15
-	mov 48(%rdi),%rdx       /* this ends up being the stack pointer */
-	mov %rdx,%rsp
-	mov 56(%rdi),%rdx       /* this is the instruction pointer */
-	jmp *%rdx               /* goto saved address without altering rsp */
+	mov 48(%rdi),%rsp
+	jmp *56(%rdi)           /* goto saved address without altering rsp */
diff --git a/src/setjmp/x32/setjmp.s b/src/setjmp/x32/setjmp.s
index 98f58b8d..d95e4853 100644
--- a/src/setjmp/x32/setjmp.s
+++ b/src/setjmp/x32/setjmp.s
@@ -18,5 +18,5 @@ setjmp:
 	mov %rdx,48(%rdi)
 	mov (%rsp),%rdx         /* save return addr ptr for new rip */
 	mov %rdx,56(%rdi)
-	xor %rax,%rax           /* always return 0 */
+	xor %eax,%eax           /* always return 0 */
 	ret
diff --git a/src/setjmp/x86_64/longjmp.s b/src/setjmp/x86_64/longjmp.s
index e175a4b9..1b2661c3 100644
--- a/src/setjmp/x86_64/longjmp.s
+++ b/src/setjmp/x86_64/longjmp.s
@@ -5,18 +5,14 @@
 .type longjmp,@function
 _longjmp:
 longjmp:
-	mov %rsi,%rax           /* val will be longjmp return */
-	test %rax,%rax
-	jnz 1f
-	inc %rax                /* if val==0, val=1 per longjmp semantics */
-1:
+	xor %eax,%eax
+	cmp $1,%esi             /* CF = val ? 0 : 1 */
+	adc %esi,%eax           /* eax = val + !val */
 	mov (%rdi),%rbx         /* rdi is the jmp_buf, restore regs from it */
 	mov 8(%rdi),%rbp
 	mov 16(%rdi),%r12
 	mov 24(%rdi),%r13
 	mov 32(%rdi),%r14
 	mov 40(%rdi),%r15
-	mov 48(%rdi),%rdx       /* this ends up being the stack pointer */
-	mov %rdx,%rsp
-	mov 56(%rdi),%rdx       /* this is the instruction pointer */
-	jmp *%rdx               /* goto saved address without altering rsp */
+	mov 48(%rdi),%rsp
+	jmp *56(%rdi)           /* goto saved address without altering rsp */
diff --git a/src/setjmp/x86_64/setjmp.s b/src/setjmp/x86_64/setjmp.s
index 98f58b8d..d95e4853 100644
--- a/src/setjmp/x86_64/setjmp.s
+++ b/src/setjmp/x86_64/setjmp.s
@@ -18,5 +18,5 @@ setjmp:
 	mov %rdx,48(%rdi)
 	mov (%rsp),%rdx         /* save return addr ptr for new rip */
 	mov %rdx,56(%rdi)
-	xor %rax,%rax           /* always return 0 */
+	xor %eax,%eax           /* always return 0 */
 	ret
diff --git a/src/signal/arm/sigsetjmp.s b/src/signal/arm/sigsetjmp.s
index 318addba..69ebbf49 100644
--- a/src/signal/arm/sigsetjmp.s
+++ b/src/signal/arm/sigsetjmp.s
@@ -6,9 +6,10 @@
 sigsetjmp:
 __sigsetjmp:
 	tst r1,r1
-	beq setjmp
+	bne 1f
+	b setjmp
 
-	str lr,[r0,#256]
+1:	str lr,[r0,#256]
 	str r4,[r0,#260+8]
 	mov r4,r0
 
diff --git a/src/signal/block.c b/src/signal/block.c
index d7f61001..cc8698f0 100644
--- a/src/signal/block.c
+++ b/src/signal/block.c
@@ -3,9 +3,9 @@
 #include <signal.h>
 
 static const unsigned long all_mask[] = {
-#if ULONG_MAX == 0xffffffff && _NSIG == 129
+#if ULONG_MAX == 0xffffffff && _NSIG > 65
 	-1UL, -1UL, -1UL, -1UL
-#elif ULONG_MAX == 0xffffffff
+#elif ULONG_MAX == 0xffffffff || _NSIG > 65
 	-1UL, -1UL
 #else
 	-1UL
diff --git a/src/signal/getitimer.c b/src/signal/getitimer.c
index 8a8046a7..36d1eb9d 100644
--- a/src/signal/getitimer.c
+++ b/src/signal/getitimer.c
@@ -3,5 +3,16 @@
 
 int getitimer(int which, struct itimerval *old)
 {
+	if (sizeof(time_t) > sizeof(long)) {
+		long old32[4];
+		int r = __syscall(SYS_getitimer, which, old32);
+		if (!r) {
+			old->it_interval.tv_sec = old32[0];
+			old->it_interval.tv_usec = old32[1];
+			old->it_value.tv_sec = old32[2];
+			old->it_value.tv_usec = old32[3];
+		}
+		return __syscall_ret(r);
+	}
 	return syscall(SYS_getitimer, which, old);
 }
diff --git a/src/signal/mips/restore.s b/src/signal/loongarch64/restore.s
index b6dadce0..d90a8ebb 100644
--- a/src/signal/mips/restore.s
+++ b/src/signal/loongarch64/restore.s
@@ -1,15 +1,10 @@
-.set noreorder
-
 .global __restore_rt
-.hidden __restore_rt
-.type   __restore_rt,@function
-__restore_rt:
-	li $2, 4193
-	syscall
-
 .global __restore
+.hidden __restore_rt
 .hidden __restore
+.type   __restore_rt,@function
 .type   __restore,@function
+__restore_rt:
 __restore:
-	li $2, 4119
-	syscall
+	li.w    $a7, 139
+	syscall 0
diff --git a/src/signal/loongarch64/sigsetjmp.s b/src/signal/loongarch64/sigsetjmp.s
new file mode 100644
index 00000000..9c0e3ae2
--- /dev/null
+++ b/src/signal/loongarch64/sigsetjmp.s
@@ -0,0 +1,25 @@
+.global sigsetjmp
+.global __sigsetjmp
+.type sigsetjmp,@function
+.type __sigsetjmp,@function
+sigsetjmp:
+__sigsetjmp:
+	beq     $a1, $zero, 1f
+	st.d    $ra, $a0, 184
+	st.d    $s0, $a0, 200  #184+8+8
+	move    $s0, $a0
+
+	la.global  $t0, setjmp
+	jirl       $ra, $t0, 0
+
+	move    $a1, $a0        # Return from 'setjmp' or 'longjmp'
+	move    $a0, $s0
+	ld.d    $ra, $a0, 184
+	ld.d    $s0, $a0, 200 #184+8+8
+
+.hidden __sigsetjmp_tail
+	la.global  $t0, __sigsetjmp_tail
+	jr         $t0
+1:
+	la.global  $t0, setjmp
+	jr         $t0
diff --git a/src/signal/mips64/restore.s b/src/signal/mips64/restore.s
deleted file mode 100644
index 401f8e73..00000000
--- a/src/signal/mips64/restore.s
+++ /dev/null
@@ -1,11 +0,0 @@
-.set	noreorder
-.global	__restore_rt
-.global	__restore
-.hidden __restore_rt
-.hidden __restore
-.type	__restore_rt,@function
-.type	__restore,@function
-__restore_rt:
-__restore:
-	li	$2,5211
-	syscall
diff --git a/src/signal/mipsn32/restore.s b/src/signal/mipsn32/restore.s
deleted file mode 100644
index 4cd4e1b4..00000000
--- a/src/signal/mipsn32/restore.s
+++ /dev/null
@@ -1,11 +0,0 @@
-.set	noreorder
-.global	__restore_rt
-.global	__restore
-.hidden __restore_rt
-.hidden __restore
-.type	__restore_rt,@function
-.type	__restore,@function
-__restore_rt:
-__restore:
-	li	$2,6211
-	syscall
diff --git a/src/signal/riscv32/restore.s b/src/signal/riscv32/restore.s
new file mode 100644
index 00000000..5a0af695
--- /dev/null
+++ b/src/signal/riscv32/restore.s
@@ -0,0 +1,10 @@
+.global __restore
+.hidden __restore
+.type __restore, %function
+__restore:
+.global __restore_rt
+.hidden __restore_rt
+.type __restore_rt, %function
+__restore_rt:
+	li a7, 139 # SYS_rt_sigreturn
+	ecall
diff --git a/src/signal/riscv32/sigsetjmp.s b/src/signal/riscv32/sigsetjmp.s
new file mode 100644
index 00000000..c1caeab1
--- /dev/null
+++ b/src/signal/riscv32/sigsetjmp.s
@@ -0,0 +1,23 @@
+.global sigsetjmp
+.global __sigsetjmp
+.type sigsetjmp, %function
+.type __sigsetjmp, %function
+sigsetjmp:
+__sigsetjmp:
+	bnez a1, 1f
+	tail setjmp
+1:
+
+	sw ra, 152(a0)
+	sw s0, 164(a0)
+	mv s0, a0
+
+	call setjmp
+
+	mv a1, a0
+	mv a0, s0
+	lw s0, 164(a0)
+	lw ra, 152(a0)
+
+.hidden __sigsetjmp_tail
+	tail __sigsetjmp_tail
diff --git a/src/signal/riscv64/restore.s b/src/signal/riscv64/restore.s
new file mode 100644
index 00000000..5a0af695
--- /dev/null
+++ b/src/signal/riscv64/restore.s
@@ -0,0 +1,10 @@
+.global __restore
+.hidden __restore
+.type __restore, %function
+__restore:
+.global __restore_rt
+.hidden __restore_rt
+.type __restore_rt, %function
+__restore_rt:
+	li a7, 139 # SYS_rt_sigreturn
+	ecall
diff --git a/src/signal/riscv64/sigsetjmp.s b/src/signal/riscv64/sigsetjmp.s
new file mode 100644
index 00000000..f9bc162a
--- /dev/null
+++ b/src/signal/riscv64/sigsetjmp.s
@@ -0,0 +1,23 @@
+.global sigsetjmp
+.global __sigsetjmp
+.type sigsetjmp, %function
+.type __sigsetjmp, %function
+sigsetjmp:
+__sigsetjmp:
+	bnez a1, 1f
+	tail setjmp
+1:
+
+	sd ra, 208(a0)
+	sd s0, 224(a0)
+	mv s0, a0
+
+	call setjmp
+
+	mv a1, a0
+	mv a0, s0
+	ld s0, 224(a0)
+	ld ra, 208(a0)
+
+.hidden __sigsetjmp_tail
+	tail __sigsetjmp_tail
diff --git a/src/signal/setitimer.c b/src/signal/setitimer.c
index 21b1f45d..0dfbeb4d 100644
--- a/src/signal/setitimer.c
+++ b/src/signal/setitimer.c
@@ -1,7 +1,26 @@
 #include <sys/time.h>
+#include <errno.h>
 #include "syscall.h"
 
+#define IS32BIT(x) !((x)+0x80000000ULL>>32)
+
 int setitimer(int which, const struct itimerval *restrict new, struct itimerval *restrict old)
 {
+	if (sizeof(time_t) > sizeof(long)) {
+		time_t is = new->it_interval.tv_sec, vs = new->it_value.tv_sec;
+		long ius = new->it_interval.tv_usec, vus = new->it_value.tv_usec;
+		if (!IS32BIT(is) || !IS32BIT(vs))
+			return __syscall_ret(-ENOTSUP);
+		long old32[4];
+		int r = __syscall(SYS_setitimer, which,
+			((long[]){is, ius, vs, vus}), old32);
+		if (!r && old) {
+			old->it_interval.tv_sec = old32[0];
+			old->it_interval.tv_usec = old32[1];
+			old->it_value.tv_sec = old32[2];
+			old->it_value.tv_usec = old32[3];
+		}
+		return __syscall_ret(r);
+	}
 	return syscall(SYS_setitimer, which, new, old);
 }
diff --git a/src/signal/sh/sigsetjmp.s b/src/signal/sh/sigsetjmp.s
index 1e2270be..f0f604e2 100644
--- a/src/signal/sh/sigsetjmp.s
+++ b/src/signal/sh/sigsetjmp.s
@@ -27,7 +27,7 @@ __sigsetjmp:
 
 	mov.l 3f, r0
 4:	braf r0
-	 mov.l @(4+8,r4), r8
+	 mov.l @(4+8,r6), r8
 
 9:	mov.l 5f, r0
 6:	braf r0
diff --git a/src/signal/sigaction.c b/src/signal/sigaction.c
index af47195e..e45308fa 100644
--- a/src/signal/sigaction.c
+++ b/src/signal/sigaction.c
@@ -7,12 +7,6 @@
 #include "lock.h"
 #include "ksigaction.h"
 
-volatile int dummy_lock[1] = { 0 };
-
-extern hidden volatile int __abort_lock[1];
-
-weak_alias(dummy_lock, __abort_lock);
-
 static int unmask_done;
 static unsigned long handler_set[_NSIG/(8*sizeof(long))];
 
@@ -21,10 +15,11 @@ void __get_handler_set(sigset_t *set)
 	memcpy(set, handler_set, sizeof handler_set);
 }
 
+volatile int __eintr_valid_flag;
+
 int __libc_sigaction(int sig, const struct sigaction *restrict sa, struct sigaction *restrict old)
 {
 	struct k_sigaction ksa, ksa_old;
-	unsigned long set[_NSIG/(8*sizeof(long))];
 	if (sa) {
 		if ((uintptr_t)sa->sa_handler > 1UL) {
 			a_or_l(handler_set+(sig-1)/(8*sizeof(long)),
@@ -43,25 +38,20 @@ int __libc_sigaction(int sig, const struct sigaction *restrict sa, struct sigact
 					SIGPT_SET, 0, _NSIG/8);
 				unmask_done = 1;
 			}
-		}
-		/* Changing the disposition of SIGABRT to anything but
-		 * SIG_DFL requires a lock, so that it cannot be changed
-		 * while abort is terminating the process after simply
-		 * calling raise(SIGABRT) failed to do so. */
-		if (sa->sa_handler != SIG_DFL && sig == SIGABRT) {
-			__block_all_sigs(&set);
-			LOCK(__abort_lock);
+
+			if (!(sa->sa_flags & SA_RESTART)) {
+				a_store(&__eintr_valid_flag, 1);
+			}
 		}
 		ksa.handler = sa->sa_handler;
-		ksa.flags = sa->sa_flags | SA_RESTORER;
+		ksa.flags = sa->sa_flags;
+#ifdef SA_RESTORER
+		ksa.flags |= SA_RESTORER;
 		ksa.restorer = (sa->sa_flags & SA_SIGINFO) ? __restore_rt : __restore;
+#endif
 		memcpy(&ksa.mask, &sa->sa_mask, _NSIG/8);
 	}
 	int r = __syscall(SYS_rt_sigaction, sig, sa?&ksa:0, old?&ksa_old:0, _NSIG/8);
-	if (sig == SIGABRT && sa && sa->sa_handler != SIG_DFL) {
-		UNLOCK(__abort_lock);
-		__restore_sigs(&set);
-	}
 	if (old && !r) {
 		old->sa_handler = ksa_old.handler;
 		old->sa_flags = ksa_old.flags;
@@ -72,11 +62,26 @@ int __libc_sigaction(int sig, const struct sigaction *restrict sa, struct sigact
 
 int __sigaction(int sig, const struct sigaction *restrict sa, struct sigaction *restrict old)
 {
+	unsigned long set[_NSIG/(8*sizeof(long))];
+
 	if (sig-32U < 3 || sig-1U >= _NSIG-1) {
 		errno = EINVAL;
 		return -1;
 	}
-	return __libc_sigaction(sig, sa, old);
+
+	/* Doing anything with the disposition of SIGABRT requires a lock,
+	 * so that it cannot be changed while abort is terminating the
+	 * process and so any change made by abort can't be observed. */
+	if (sig == SIGABRT) {
+		__block_all_sigs(&set);
+		LOCK(__abort_lock);
+	}
+	int r = __libc_sigaction(sig, sa, old);
+	if (sig == SIGABRT) {
+		UNLOCK(__abort_lock);
+		__restore_sigs(&set);
+	}
+	return r;
 }
 
 weak_alias(__sigaction, sigaction);
diff --git a/src/signal/sigaltstack.c b/src/signal/sigaltstack.c
index 62cb81ad..616625c5 100644
--- a/src/signal/sigaltstack.c
+++ b/src/signal/sigaltstack.c
@@ -1,15 +1,17 @@
 #include <signal.h>
 #include <errno.h>
+#include <unistd.h>
 #include "syscall.h"
 
 int sigaltstack(const stack_t *restrict ss, stack_t *restrict old)
 {
 	if (ss) {
-		if (ss->ss_size < MINSIGSTKSZ) {
+		size_t min = sysconf(_SC_MINSIGSTKSZ);
+		if (!(ss->ss_flags & SS_DISABLE) && ss->ss_size < min) {
 			errno = ENOMEM;
 			return -1;
 		}
-		if (ss->ss_flags & ~SS_DISABLE) {
+		if (ss->ss_flags & SS_ONSTACK) {
 			errno = EINVAL;
 			return -1;
 		}
diff --git a/src/signal/siglongjmp.c b/src/signal/siglongjmp.c
index bc317acc..53789b23 100644
--- a/src/signal/siglongjmp.c
+++ b/src/signal/siglongjmp.c
@@ -5,5 +5,10 @@
 
 _Noreturn void siglongjmp(sigjmp_buf buf, int ret)
 {
+	/* If sigsetjmp was called with nonzero savemask flag, the address
+	 * longjmp will return to is inside of sigsetjmp. The signal mask
+	 * will then be restored in the returned-to context instead of here,
+	 * which matters if the context we are returning from may not have
+	 * sufficient stack space for signal delivery. */
 	longjmp(buf, ret);
 }
diff --git a/src/signal/sigpause.c b/src/signal/sigpause.c
index 363d2fec..1587c391 100644
--- a/src/signal/sigpause.c
+++ b/src/signal/sigpause.c
@@ -4,6 +4,6 @@ int sigpause(int sig)
 {
 	sigset_t mask;
 	sigprocmask(0, 0, &mask);
-	sigdelset(&mask, sig);
+	if (sigdelset(&mask, sig)) return -1;
 	return sigsuspend(&mask);
 }
diff --git a/src/signal/sigset.c b/src/signal/sigset.c
index 0d7b4564..f3e8c407 100644
--- a/src/signal/sigset.c
+++ b/src/signal/sigset.c
@@ -3,7 +3,7 @@
 void (*sigset(int sig, void (*handler)(int)))(int)
 {
 	struct sigaction sa, sa_old;
-	sigset_t mask;
+	sigset_t mask, mask_old;
 
 	sigemptyset(&mask);
 	if (sigaddset(&mask, sig) < 0)
@@ -12,7 +12,7 @@ void (*sigset(int sig, void (*handler)(int)))(int)
 	if (handler == SIG_HOLD) {
 		if (sigaction(sig, 0, &sa_old) < 0)
 			return SIG_ERR;
-		if (sigprocmask(SIG_BLOCK, &mask, &mask) < 0)
+		if (sigprocmask(SIG_BLOCK, &mask, &mask_old) < 0)
 			return SIG_ERR;
 	} else {
 		sa.sa_handler = handler;
@@ -20,8 +20,8 @@ void (*sigset(int sig, void (*handler)(int)))(int)
 		sigemptyset(&sa.sa_mask);
 		if (sigaction(sig, &sa, &sa_old) < 0)
 			return SIG_ERR;
-		if (sigprocmask(SIG_UNBLOCK, &mask, &mask) < 0)
+		if (sigprocmask(SIG_UNBLOCK, &mask, &mask_old) < 0)
 			return SIG_ERR;
 	}
-	return sigismember(&mask, sig) ? SIG_HOLD : sa_old.sa_handler;
+	return sigismember(&mask_old, sig) ? SIG_HOLD : sa_old.sa_handler;
 }
diff --git a/src/signal/sigtimedwait.c b/src/signal/sigtimedwait.c
index 7bcfe720..1287174e 100644
--- a/src/signal/sigtimedwait.c
+++ b/src/signal/sigtimedwait.c
@@ -2,11 +2,31 @@
 #include <errno.h>
 #include "syscall.h"
 
+#define IS32BIT(x) !((x)+0x80000000ULL>>32)
+#define CLAMP(x) (int)(IS32BIT(x) ? (x) : 0x7fffffffU+((0ULL+(x))>>63))
+
+static int do_sigtimedwait(const sigset_t *restrict mask, siginfo_t *restrict si, const struct timespec *restrict ts)
+{
+#ifdef SYS_rt_sigtimedwait_time64
+	time_t s = ts ? ts->tv_sec : 0;
+	long ns = ts ? ts->tv_nsec : 0;
+	int r = -ENOSYS;
+	if (SYS_rt_sigtimedwait == SYS_rt_sigtimedwait_time64 || !IS32BIT(s))
+		r = __syscall_cp(SYS_rt_sigtimedwait_time64, mask, si,
+			ts ? ((long long[]){s, ns}) : 0, _NSIG/8);
+	if (SYS_rt_sigtimedwait == SYS_rt_sigtimedwait_time64 || r!=-ENOSYS)
+		return r;
+	return __syscall_cp(SYS_rt_sigtimedwait, mask, si,
+		ts ? ((long[]){CLAMP(s), ns}) : 0, _NSIG/8);;
+#else
+	return __syscall_cp(SYS_rt_sigtimedwait, mask, si, ts, _NSIG/8);
+#endif
+}
+
 int sigtimedwait(const sigset_t *restrict mask, siginfo_t *restrict si, const struct timespec *restrict timeout)
 {
 	int ret;
-	do ret = syscall_cp(SYS_rt_sigtimedwait, mask,
-		si, timeout, _NSIG/8);
-	while (ret<0 && errno==EINTR);
-	return ret;
+	do ret = do_sigtimedwait(mask, si, timeout);
+	while (ret==-EINTR);
+	return __syscall_ret(ret);
 }
diff --git a/src/signal/x32/getitimer.c b/src/signal/x32/getitimer.c
new file mode 100644
index 00000000..8a8046a7
--- /dev/null
+++ b/src/signal/x32/getitimer.c
@@ -0,0 +1,7 @@
+#include <sys/time.h>
+#include "syscall.h"
+
+int getitimer(int which, struct itimerval *old)
+{
+	return syscall(SYS_getitimer, which, old);
+}
diff --git a/src/signal/x32/setitimer.c b/src/signal/x32/setitimer.c
new file mode 100644
index 00000000..21b1f45d
--- /dev/null
+++ b/src/signal/x32/setitimer.c
@@ -0,0 +1,7 @@
+#include <sys/time.h>
+#include "syscall.h"
+
+int setitimer(int which, const struct itimerval *restrict new, struct itimerval *restrict old)
+{
+	return syscall(SYS_setitimer, which, new, old);
+}
diff --git a/src/stat/__xstat.c b/src/stat/__xstat.c
index f6303430..b4560df7 100644
--- a/src/stat/__xstat.c
+++ b/src/stat/__xstat.c
@@ -1,5 +1,7 @@
 #include <sys/stat.h>
 
+#if !_REDIR_TIME64
+
 int __fxstat(int ver, int fd, struct stat *buf)
 {
 	return fstat(fd, buf);
@@ -20,10 +22,7 @@ int __xstat(int ver, const char *path, struct stat *buf)
 	return stat(path, buf);
 }
 
-weak_alias(__fxstat, __fxstat64);
-weak_alias(__fxstatat, __fxstatat64);
-weak_alias(__lxstat, __lxstat64);
-weak_alias(__xstat, __xstat64);
+#endif
 
 int __xmknod(int ver, const char *path, mode_t mode, dev_t *dev)
 {
diff --git a/src/stat/fchmodat.c b/src/stat/fchmodat.c
index be61bdf3..92c9d1b0 100644
--- a/src/stat/fchmodat.c
+++ b/src/stat/fchmodat.c
@@ -5,17 +5,20 @@
 
 int fchmodat(int fd, const char *path, mode_t mode, int flag)
 {
-	if (!flag) return syscall(SYS_fchmodat, fd, path, mode, flag);
+	if (!flag) return syscall(SYS_fchmodat, fd, path, mode);
+
+	int ret = __syscall(SYS_fchmodat2, fd, path, mode, flag);
+	if (ret != -ENOSYS) return __syscall_ret(ret);
 
 	if (flag != AT_SYMLINK_NOFOLLOW)
 		return __syscall_ret(-EINVAL);
 
 	struct stat st;
-	int ret, fd2;
+	int fd2;
 	char proc[15+3*sizeof(int)];
 
-	if ((ret = __syscall(SYS_fstatat, fd, path, &st, flag)))
-		return __syscall_ret(ret);
+	if (fstatat(fd, path, &st, flag))
+		return -1;
 	if (S_ISLNK(st.st_mode))
 		return __syscall_ret(-EOPNOTSUPP);
 
@@ -26,12 +29,12 @@ int fchmodat(int fd, const char *path, mode_t mode, int flag)
 	}
 
 	__procfdname(proc, fd2);
-	ret = __syscall(SYS_fstatat, AT_FDCWD, proc, &st, 0);
+	ret = stat(proc, &st);
 	if (!ret) {
-		if (S_ISLNK(st.st_mode)) ret = -EOPNOTSUPP;
-		else ret = __syscall(SYS_fchmodat, AT_FDCWD, proc, mode);
+		if (S_ISLNK(st.st_mode)) ret = __syscall_ret(-EOPNOTSUPP);
+		else ret = syscall(SYS_fchmodat, AT_FDCWD, proc, mode);
 	}
 
 	__syscall(SYS_close, fd2);
-	return __syscall_ret(ret);
+	return ret;
 }
diff --git a/src/stat/fstat.c b/src/stat/fstat.c
index 4f13f4f0..fd28b8ac 100644
--- a/src/stat/fstat.c
+++ b/src/stat/fstat.c
@@ -1,21 +1,13 @@
+#define _BSD_SOURCE
 #include <sys/stat.h>
 #include <errno.h>
 #include <fcntl.h>
 #include "syscall.h"
 
-int fstat(int fd, struct stat *st)
+int __fstat(int fd, struct stat *st)
 {
-	int ret = __syscall(SYS_fstat, fd, st);
-	if (ret != -EBADF || __syscall(SYS_fcntl, fd, F_GETFD) < 0)
-		return __syscall_ret(ret);
-
-	char buf[15+3*sizeof(int)];
-	__procfdname(buf, fd);
-#ifdef SYS_stat
-	return syscall(SYS_stat, buf, st);
-#else
-	return syscall(SYS_fstatat, AT_FDCWD, buf, st, 0);
-#endif
+	if (fd<0) return __syscall_ret(-EBADF);
+	return __fstatat(fd, "", st, AT_EMPTY_PATH);
 }
 
-weak_alias(fstat, fstat64);
+weak_alias(__fstat, fstat);
diff --git a/src/stat/fstatat.c b/src/stat/fstatat.c
index 582db442..9eed063b 100644
--- a/src/stat/fstatat.c
+++ b/src/stat/fstatat.c
@@ -1,9 +1,154 @@
+#define _BSD_SOURCE
 #include <sys/stat.h>
+#include <string.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdint.h>
+#include <sys/sysmacros.h>
 #include "syscall.h"
 
-int fstatat(int fd, const char *restrict path, struct stat *restrict buf, int flag)
+struct statx {
+	uint32_t stx_mask;
+	uint32_t stx_blksize;
+	uint64_t stx_attributes;
+	uint32_t stx_nlink;
+	uint32_t stx_uid;
+	uint32_t stx_gid;
+	uint16_t stx_mode;
+	uint16_t pad1;
+	uint64_t stx_ino;
+	uint64_t stx_size;
+	uint64_t stx_blocks;
+	uint64_t stx_attributes_mask;
+	struct {
+		int64_t tv_sec;
+		uint32_t tv_nsec;
+		int32_t pad;
+	} stx_atime, stx_btime, stx_ctime, stx_mtime;
+	uint32_t stx_rdev_major;
+	uint32_t stx_rdev_minor;
+	uint32_t stx_dev_major;
+	uint32_t stx_dev_minor;
+	uint64_t spare[14];
+};
+
+static int fstatat_statx(int fd, const char *restrict path, struct stat *restrict st, int flag)
+{
+	struct statx stx;
+
+	flag |= AT_NO_AUTOMOUNT;
+	int ret = __syscall(SYS_statx, fd, path, flag, 0x7ff, &stx);
+	if (ret) return ret;
+
+	*st = (struct stat){
+		.st_dev = makedev(stx.stx_dev_major, stx.stx_dev_minor),
+		.st_ino = stx.stx_ino,
+		.st_mode = stx.stx_mode,
+		.st_nlink = stx.stx_nlink,
+		.st_uid = stx.stx_uid,
+		.st_gid = stx.stx_gid,
+		.st_rdev = makedev(stx.stx_rdev_major, stx.stx_rdev_minor),
+		.st_size = stx.stx_size,
+		.st_blksize = stx.stx_blksize,
+		.st_blocks = stx.stx_blocks,
+		.st_atim.tv_sec = stx.stx_atime.tv_sec,
+		.st_atim.tv_nsec = stx.stx_atime.tv_nsec,
+		.st_mtim.tv_sec = stx.stx_mtime.tv_sec,
+		.st_mtim.tv_nsec = stx.stx_mtime.tv_nsec,
+		.st_ctim.tv_sec = stx.stx_ctime.tv_sec,
+		.st_ctim.tv_nsec = stx.stx_ctime.tv_nsec,
+#if _REDIR_TIME64
+		.__st_atim32.tv_sec = stx.stx_atime.tv_sec,
+		.__st_atim32.tv_nsec = stx.stx_atime.tv_nsec,
+		.__st_mtim32.tv_sec = stx.stx_mtime.tv_sec,
+		.__st_mtim32.tv_nsec = stx.stx_mtime.tv_nsec,
+		.__st_ctim32.tv_sec = stx.stx_ctime.tv_sec,
+		.__st_ctim32.tv_nsec = stx.stx_ctime.tv_nsec,
+#endif
+	};
+	return 0;
+}
+
+#ifdef SYS_fstatat
+
+#include "kstat.h"
+
+static int fstatat_kstat(int fd, const char *restrict path, struct stat *restrict st, int flag)
+{
+	int ret;
+	struct kstat kst;
+
+	if (flag==AT_EMPTY_PATH && fd>=0 && !*path) {
+		ret = __syscall(SYS_fstat, fd, &kst);
+		if (ret==-EBADF && __syscall(SYS_fcntl, fd, F_GETFD)>=0) {
+			ret = __syscall(SYS_fstatat, fd, path, &kst, flag);
+			if (ret==-EINVAL) {
+				char buf[15+3*sizeof(int)];
+				__procfdname(buf, fd);
+#ifdef SYS_stat
+				ret = __syscall(SYS_stat, buf, &kst);
+#else
+				ret = __syscall(SYS_fstatat, AT_FDCWD, buf, &kst, 0);
+#endif
+			}
+		}
+	}
+#ifdef SYS_lstat
+	else if ((fd == AT_FDCWD || *path=='/') && flag==AT_SYMLINK_NOFOLLOW)
+		ret = __syscall(SYS_lstat, path, &kst);
+#endif
+#ifdef SYS_stat
+	else if ((fd == AT_FDCWD || *path=='/') && !flag)
+		ret = __syscall(SYS_stat, path, &kst);
+#endif
+	else ret = __syscall(SYS_fstatat, fd, path, &kst, flag);
+
+	if (ret) return ret;
+
+	*st = (struct stat){
+		.st_dev = kst.st_dev,
+		.st_ino = kst.st_ino,
+		.st_mode = kst.st_mode,
+		.st_nlink = kst.st_nlink,
+		.st_uid = kst.st_uid,
+		.st_gid = kst.st_gid,
+		.st_rdev = kst.st_rdev,
+		.st_size = kst.st_size,
+		.st_blksize = kst.st_blksize,
+		.st_blocks = kst.st_blocks,
+		.st_atim.tv_sec = kst.st_atime_sec,
+		.st_atim.tv_nsec = kst.st_atime_nsec,
+		.st_mtim.tv_sec = kst.st_mtime_sec,
+		.st_mtim.tv_nsec = kst.st_mtime_nsec,
+		.st_ctim.tv_sec = kst.st_ctime_sec,
+		.st_ctim.tv_nsec = kst.st_ctime_nsec,
+#if _REDIR_TIME64
+		.__st_atim32.tv_sec = kst.st_atime_sec,
+		.__st_atim32.tv_nsec = kst.st_atime_nsec,
+		.__st_mtim32.tv_sec = kst.st_mtime_sec,
+		.__st_mtim32.tv_nsec = kst.st_mtime_nsec,
+		.__st_ctim32.tv_sec = kst.st_ctime_sec,
+		.__st_ctim32.tv_nsec = kst.st_ctime_nsec,
+#endif
+	};
+
+	return 0;
+}
+#endif
+
+int __fstatat(int fd, const char *restrict path, struct stat *restrict st, int flag)
 {
-	return syscall(SYS_fstatat, fd, path, buf, flag);
+	int ret;
+#ifdef SYS_fstatat
+	if (sizeof((struct kstat){0}.st_atime_sec) < sizeof(time_t)) {
+		ret = fstatat_statx(fd, path, st, flag);
+		if (ret!=-ENOSYS) return __syscall_ret(ret);
+	}
+	ret = fstatat_kstat(fd, path, st, flag);
+#else
+	ret = fstatat_statx(fd, path, st, flag);
+#endif
+	return __syscall_ret(ret);
 }
 
-weak_alias(fstatat, fstatat64);
+weak_alias(__fstatat, fstatat);
diff --git a/src/stat/lstat.c b/src/stat/lstat.c
index 5b89f290..6822fcae 100644
--- a/src/stat/lstat.c
+++ b/src/stat/lstat.c
@@ -1,14 +1,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
-#include "syscall.h"
 
 int lstat(const char *restrict path, struct stat *restrict buf)
 {
-#ifdef SYS_lstat
-	return syscall(SYS_lstat, path, buf);
-#else
-	return syscall(SYS_fstatat, AT_FDCWD, path, buf, AT_SYMLINK_NOFOLLOW);
-#endif
+	return fstatat(AT_FDCWD, path, buf, AT_SYMLINK_NOFOLLOW);
 }
-
-weak_alias(lstat, lstat64);
diff --git a/src/stat/stat.c b/src/stat/stat.c
index 0bec9d6f..23570e7a 100644
--- a/src/stat/stat.c
+++ b/src/stat/stat.c
@@ -1,14 +1,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
-#include "syscall.h"
 
 int stat(const char *restrict path, struct stat *restrict buf)
 {
-#ifdef SYS_stat
-	return syscall(SYS_stat, path, buf);
-#else
-	return syscall(SYS_fstatat, AT_FDCWD, path, buf, 0);
-#endif
+	return fstatat(AT_FDCWD, path, buf, 0);
 }
-
-weak_alias(stat, stat64);
diff --git a/src/stat/statvfs.c b/src/stat/statvfs.c
index f65d1b54..bc12da8b 100644
--- a/src/stat/statvfs.c
+++ b/src/stat/statvfs.c
@@ -39,6 +39,7 @@ static void fixup(struct statvfs *out, const struct statfs *in)
 	out->f_fsid = in->f_fsid.__val[0];
 	out->f_flag = in->f_flags;
 	out->f_namemax = in->f_namelen;
+	out->f_type = in->f_type;
 }
 
 int statvfs(const char *restrict path, struct statvfs *restrict buf)
@@ -56,8 +57,3 @@ int fstatvfs(int fd, struct statvfs *buf)
 	fixup(buf, &kbuf);
 	return 0;
 }
-
-weak_alias(statvfs, statvfs64);
-weak_alias(statfs, statfs64);
-weak_alias(fstatvfs, fstatvfs64);
-weak_alias(fstatfs, fstatfs64);
diff --git a/src/stat/utimensat.c b/src/stat/utimensat.c
index 159c8be3..730723a9 100644
--- a/src/stat/utimensat.c
+++ b/src/stat/utimensat.c
@@ -4,28 +4,51 @@
 #include <errno.h>
 #include "syscall.h"
 
+#define IS32BIT(x) !((x)+0x80000000ULL>>32)
+#define NS_SPECIAL(ns) ((ns)==UTIME_NOW || (ns)==UTIME_OMIT)
+
 int utimensat(int fd, const char *path, const struct timespec times[2], int flags)
 {
-	int r = __syscall(SYS_utimensat, fd, path, times, flags);
+	int r;
+	if (times && times[0].tv_nsec==UTIME_NOW && times[1].tv_nsec==UTIME_NOW)
+		times = 0;
+#ifdef SYS_utimensat_time64
+	r = -ENOSYS;
+	time_t s0=0, s1=0;
+	long ns0=0, ns1=0;
+	if (times) {
+		ns0 = times[0].tv_nsec;
+		ns1 = times[1].tv_nsec;
+		if (!NS_SPECIAL(ns0)) s0 = times[0].tv_sec;
+		if (!NS_SPECIAL(ns1)) s1 = times[1].tv_sec;
+	}
+	if (SYS_utimensat == SYS_utimensat_time64 || !IS32BIT(s0) || !IS32BIT(s1))
+		r = __syscall(SYS_utimensat_time64, fd, path, times ?
+			((long long[]){s0, ns0, s1, ns1}) : 0, flags);
+	if (SYS_utimensat == SYS_utimensat_time64 || r!=-ENOSYS)
+		return __syscall_ret(r);
+	if (!IS32BIT(s0) || !IS32BIT(s1))
+		return __syscall_ret(-ENOTSUP);
+	r = __syscall(SYS_utimensat, fd, path,
+		times ? ((long[]){s0, ns0, s1, ns1}) : 0, flags);
+#else
+	r = __syscall(SYS_utimensat, fd, path, times, flags);
+#endif
+
 #ifdef SYS_futimesat
 	if (r != -ENOSYS || flags) return __syscall_ret(r);
-	struct timeval *tv = 0, tmp[2];
+	long *tv=0, tmp[4];
 	if (times) {
 		int i;
 		tv = tmp;
 		for (i=0; i<2; i++) {
 			if (times[i].tv_nsec >= 1000000000ULL) {
-				if (times[i].tv_nsec == UTIME_NOW &&
-				    times[1-i].tv_nsec == UTIME_NOW) {
-					tv = 0;
-					break;
-				}
-				if (times[i].tv_nsec == UTIME_OMIT)
+				if (NS_SPECIAL(times[i].tv_nsec))
 					return __syscall_ret(-ENOSYS);
 				return __syscall_ret(-EINVAL);
 			}
-			tmp[i].tv_sec = times[i].tv_sec;
-			tmp[i].tv_usec = times[i].tv_nsec / 1000;
+			tmp[2*i+0] = times[i].tv_sec;
+			tmp[2*i+1] = times[i].tv_nsec / 1000;
 		}
 	}
 
diff --git a/src/stdio/__stdio_close.c b/src/stdio/__stdio_close.c
index 79452bdb..30291328 100644
--- a/src/stdio/__stdio_close.c
+++ b/src/stdio/__stdio_close.c
@@ -1,4 +1,5 @@
 #include "stdio_impl.h"
+#include "aio_impl.h"
 
 static int dummy(int fd)
 {
diff --git a/src/stdio/__stdio_seek.c b/src/stdio/__stdio_seek.c
index 13e06a66..326ab9bc 100644
--- a/src/stdio/__stdio_seek.c
+++ b/src/stdio/__stdio_seek.c
@@ -1,13 +1,7 @@
 #include "stdio_impl.h"
+#include <unistd.h>
 
 off_t __stdio_seek(FILE *f, off_t off, int whence)
 {
-	off_t ret;
-#ifdef SYS__llseek
-	if (syscall(SYS__llseek, f->fd, off>>32, off, &ret, whence)<0)
-		ret = -1;
-#else
-	ret = syscall(SYS_lseek, f->fd, off, whence);
-#endif
-	return ret;
+	return __lseek(f->fd, off, whence);
 }
diff --git a/src/stdio/__stdio_write.c b/src/stdio/__stdio_write.c
index d2d89475..5356553d 100644
--- a/src/stdio/__stdio_write.c
+++ b/src/stdio/__stdio_write.c
@@ -11,6 +11,11 @@ size_t __stdio_write(FILE *f, const unsigned char *buf, size_t len)
 	size_t rem = iov[0].iov_len + iov[1].iov_len;
 	int iovcnt = 2;
 	ssize_t cnt;
+
+	if (!iov->iov_len) {
+		iov++;
+		iovcnt--;
+	}
 	for (;;) {
 		cnt = syscall(SYS_writev, f->fd, iov, iovcnt);
 		if (cnt == rem) {
diff --git a/src/stdio/__string_read.c b/src/stdio/__string_read.c
deleted file mode 100644
index 7b50a7e1..00000000
--- a/src/stdio/__string_read.c
+++ /dev/null
@@ -1,16 +0,0 @@
-#include "stdio_impl.h"
-#include <string.h>
-
-size_t __string_read(FILE *f, unsigned char *buf, size_t len)
-{
-	char *src = f->cookie;
-	size_t k = len+256;
-	char *end = memchr(src, 0, k);
-	if (end) k = end-src;
-	if (k < len) len = k;
-	memcpy(buf, src, len);
-	f->rpos = (void *)(src+len);
-	f->rend = (void *)(src+k);
-	f->cookie = src+k;
-	return len;
-}
diff --git a/src/stdio/fgetpos.c b/src/stdio/fgetpos.c
index 50813d2c..392f7323 100644
--- a/src/stdio/fgetpos.c
+++ b/src/stdio/fgetpos.c
@@ -7,5 +7,3 @@ int fgetpos(FILE *restrict f, fpos_t *restrict pos)
 	*(long long *)pos = off;
 	return 0;
 }
-
-weak_alias(fgetpos, fgetpos64);
diff --git a/src/stdio/fgets.c b/src/stdio/fgets.c
index 6171f398..4a100b39 100644
--- a/src/stdio/fgets.c
+++ b/src/stdio/fgets.c
@@ -12,13 +12,14 @@ char *fgets(char *restrict s, int n, FILE *restrict f)
 
 	FLOCK(f);
 
-	if (n--<=1) {
+	if (n<=1) {
 		f->mode |= f->mode-1;
 		FUNLOCK(f);
-		if (n) return 0;
+		if (n<1) return 0;
 		*s = 0;
 		return s;
 	}
+	n--;
 
 	while (n) {
 		if (f->rpos != f->rend) {
diff --git a/src/stdio/fgetwc.c b/src/stdio/fgetwc.c
index 0801e28f..aa10b818 100644
--- a/src/stdio/fgetwc.c
+++ b/src/stdio/fgetwc.c
@@ -25,12 +25,18 @@ static wint_t __fgetwc_unlocked_internal(FILE *f)
 	do {
 		b = c = getc_unlocked(f);
 		if (c < 0) {
-			if (!first) errno = EILSEQ;
+			if (!first) {
+				f->flags |= F_ERR;
+				errno = EILSEQ;
+			}
 			return WEOF;
 		}
 		l = mbrtowc(&wc, (void *)&b, 1, &st);
 		if (l == -1) {
-			if (!first) ungetc(b, f);
+			if (!first) {
+				f->flags |= F_ERR;
+				ungetc(b, f);
+			}
 			return WEOF;
 		}
 		first = 0;
diff --git a/src/stdio/fgetws.c b/src/stdio/fgetws.c
index b08b3049..195cb435 100644
--- a/src/stdio/fgetws.c
+++ b/src/stdio/fgetws.c
@@ -1,6 +1,5 @@
 #include "stdio_impl.h"
 #include <wchar.h>
-#include <errno.h>
 
 wint_t __fgetwc_unlocked(FILE *);
 
@@ -12,10 +11,6 @@ wchar_t *fgetws(wchar_t *restrict s, int n, FILE *restrict f)
 
 	FLOCK(f);
 
-	/* Setup a dummy errno so we can detect EILSEQ. This is
-	 * the only way to catch encoding errors in the form of a
-	 * partial character just before EOF. */
-	errno = EAGAIN;
 	for (; n; n--) {
 		wint_t c = __fgetwc_unlocked(f);
 		if (c == WEOF) break;
@@ -23,7 +18,7 @@ wchar_t *fgetws(wchar_t *restrict s, int n, FILE *restrict f)
 		if (c == '\n') break;
 	}
 	*p = 0;
-	if (ferror(f) || errno==EILSEQ) p = s;
+	if (ferror(f)) p = s;
 
 	FUNLOCK(f);
 
diff --git a/src/stdio/fmemopen.c b/src/stdio/fmemopen.c
index 82413b2d..343e3e3f 100644
--- a/src/stdio/fmemopen.c
+++ b/src/stdio/fmemopen.c
@@ -2,6 +2,7 @@
 #include <errno.h>
 #include <string.h>
 #include <stdlib.h>
+#include <stddef.h>
 #include <inttypes.h>
 #include "libc.h"
 
@@ -83,7 +84,7 @@ FILE *fmemopen(void *restrict buf, size_t size, const char *restrict mode)
 	struct mem_FILE *f;
 	int plus = !!strchr(mode, '+');
 	
-	if (!size || !strchr("rwa", *mode)) {
+	if (!strchr("rwa", *mode)) {
 		errno = EINVAL;
 		return 0;
 	}
@@ -95,18 +96,17 @@ FILE *fmemopen(void *restrict buf, size_t size, const char *restrict mode)
 
 	f = malloc(sizeof *f + (buf?0:size));
 	if (!f) return 0;
-	memset(&f->f, 0, sizeof f->f);
+	memset(f, 0, offsetof(struct mem_FILE, buf));
 	f->f.cookie = &f->c;
 	f->f.fd = -1;
 	f->f.lbf = EOF;
 	f->f.buf = f->buf + UNGET;
 	f->f.buf_size = sizeof f->buf - UNGET;
 	if (!buf) {
-		buf = f->buf2;;
+		buf = f->buf2;
 		memset(buf, 0, size);
 	}
 
-	memset(&f->c, 0, sizeof f->c);
 	f->c.buf = buf;
 	f->c.size = size;
 	f->c.mode = *mode;
diff --git a/src/stdio/fopen.c b/src/stdio/fopen.c
index e1b91e12..80bc341e 100644
--- a/src/stdio/fopen.c
+++ b/src/stdio/fopen.c
@@ -29,5 +29,3 @@ FILE *fopen(const char *restrict filename, const char *restrict mode)
 	__syscall(SYS_close, fd);
 	return 0;
 }
-
-weak_alias(fopen, fopen64);
diff --git a/src/stdio/freopen.c b/src/stdio/freopen.c
index 615d4b47..1641a4c5 100644
--- a/src/stdio/freopen.c
+++ b/src/stdio/freopen.c
@@ -40,6 +40,8 @@ FILE *freopen(const char *restrict filename, const char *restrict mode, FILE *re
 		fclose(f2);
 	}
 
+	f->mode = 0;
+	f->locale = 0;
 	FUNLOCK(f);
 	return f;
 
@@ -49,5 +51,3 @@ fail:
 	fclose(f);
 	return NULL;
 }
-
-weak_alias(freopen, freopen64);
diff --git a/src/stdio/fseek.c b/src/stdio/fseek.c
index 439308f7..c7425802 100644
--- a/src/stdio/fseek.c
+++ b/src/stdio/fseek.c
@@ -1,7 +1,14 @@
 #include "stdio_impl.h"
+#include <errno.h>
 
 int __fseeko_unlocked(FILE *f, off_t off, int whence)
 {
+	/* Fail immediately for invalid whence argument. */
+	if (whence != SEEK_CUR && whence != SEEK_SET && whence != SEEK_END) {
+		errno = EINVAL;
+		return -1;
+	}
+
 	/* Adjust relative offset for unread data in buffer, if any. */
 	if (whence == SEEK_CUR && f->rend) off -= f->rend - f->rpos;
 
@@ -39,5 +46,3 @@ int fseek(FILE *f, long off, int whence)
 }
 
 weak_alias(__fseeko, fseeko);
-
-weak_alias(fseeko, fseeko64);
diff --git a/src/stdio/fsetpos.c b/src/stdio/fsetpos.c
index 77ab8d82..779cb3cc 100644
--- a/src/stdio/fsetpos.c
+++ b/src/stdio/fsetpos.c
@@ -4,5 +4,3 @@ int fsetpos(FILE *f, const fpos_t *pos)
 {
 	return __fseeko(f, *(const long long *)pos, SEEK_SET);
 }
-
-weak_alias(fsetpos, fsetpos64);
diff --git a/src/stdio/ftell.c b/src/stdio/ftell.c
index 1a2afbbc..1e1a08d8 100644
--- a/src/stdio/ftell.c
+++ b/src/stdio/ftell.c
@@ -37,5 +37,3 @@ long ftell(FILE *f)
 }
 
 weak_alias(__ftello, ftello);
-
-weak_alias(ftello, ftello64);
diff --git a/src/stdio/getdelim.c b/src/stdio/getdelim.c
index d2f5b15a..df114441 100644
--- a/src/stdio/getdelim.c
+++ b/src/stdio/getdelim.c
@@ -55,9 +55,11 @@ ssize_t getdelim(char **restrict s, size_t *restrict n, int delim, FILE *restric
 			*s = tmp;
 			*n = m;
 		}
-		memcpy(*s+i, f->rpos, k);
-		f->rpos += k;
-		i += k;
+		if (k) {
+			memcpy(*s+i, f->rpos, k);
+			f->rpos += k;
+			i += k;
+		}
 		if (z) break;
 		if ((c = getc_unlocked(f)) == EOF) {
 			if (!i || !feof(f)) {
diff --git a/src/stdio/gets.c b/src/stdio/gets.c
index 6c4645e5..17963b93 100644
--- a/src/stdio/gets.c
+++ b/src/stdio/gets.c
@@ -4,7 +4,12 @@
 
 char *gets(char *s)
 {
-	char *ret = fgets(s, INT_MAX, stdin);
-	if (ret && s[strlen(s)-1] == '\n') s[strlen(s)-1] = 0;
-	return ret;
+	size_t i=0;
+	int c;
+	FLOCK(stdin);
+	while ((c=getc_unlocked(stdin)) != EOF && c != '\n') s[i++] = c;
+	s[i] = 0;
+	if (c != '\n' && (!feof(stdin) || !i)) s = 0;
+	FUNLOCK(stdin);
+	return s;
 }
diff --git a/src/stdio/ofl.c b/src/stdio/ofl.c
index f2d3215a..aad3d171 100644
--- a/src/stdio/ofl.c
+++ b/src/stdio/ofl.c
@@ -1,8 +1,10 @@
 #include "stdio_impl.h"
 #include "lock.h"
+#include "fork_impl.h"
 
 static FILE *ofl_head;
 static volatile int ofl_lock[1];
+volatile int *const __stdio_ofl_lockptr = ofl_lock;
 
 FILE **__ofl_lock()
 {
diff --git a/src/stdio/open_wmemstream.c b/src/stdio/open_wmemstream.c
index ed1b561d..b8ae4a79 100644
--- a/src/stdio/open_wmemstream.c
+++ b/src/stdio/open_wmemstream.c
@@ -40,8 +40,12 @@ fail:
 static size_t wms_write(FILE *f, const unsigned char *buf, size_t len)
 {
 	struct cookie *c = f->cookie;
-	size_t len2;
+	size_t len2 = f->wpos - f->wbase;
 	wchar_t *newbuf;
+	if (len2) {
+		f->wpos = f->wbase;
+		if (wms_write(f, f->wbase, len2) < len2) return 0;
+	}
 	if (len + c->pos >= c->space) {
 		len2 = 2*c->space+1 | c->pos+len+1;
 		if (len2 > SSIZE_MAX/4) return 0;
diff --git a/src/stdio/pclose.c b/src/stdio/pclose.c
index 080a4262..c64da405 100644
--- a/src/stdio/pclose.c
+++ b/src/stdio/pclose.c
@@ -7,7 +7,7 @@ int pclose(FILE *f)
 	int status, r;
 	pid_t pid = f->pipe_pid;
 	fclose(f);
-	while ((r=__syscall(SYS_wait4, pid, &status, 0, 0)) == -EINTR);
+	while ((r=__sys_wait4(pid, &status, 0, 0)) == -EINTR);
 	if (r<0) return __syscall_ret(r);
 	return status;
 }
diff --git a/src/stdio/popen.c b/src/stdio/popen.c
index 92cb57ee..3ec83394 100644
--- a/src/stdio/popen.c
+++ b/src/stdio/popen.c
@@ -31,25 +31,12 @@ FILE *popen(const char *cmd, const char *mode)
 		__syscall(SYS_close, p[1]);
 		return NULL;
 	}
-	FLOCK(f);
-
-	/* If the child's end of the pipe happens to already be on the final
-	 * fd number to which it will be assigned (either 0 or 1), it must
-	 * be moved to a different fd. Otherwise, there is no safe way to
-	 * remove the close-on-exec flag in the child without also creating
-	 * a file descriptor leak race condition in the parent. */
-	if (p[1-op] == 1-op) {
-		int tmp = fcntl(1-op, F_DUPFD_CLOEXEC, 0);
-		if (tmp < 0) {
-			e = errno;
-			goto fail;
-		}
-		__syscall(SYS_close, p[1-op]);
-		p[1-op] = tmp;
-	}
 
 	e = ENOMEM;
 	if (!posix_spawn_file_actions_init(&fa)) {
+		for (FILE *l = *__ofl_lock(); l; l=l->next)
+			if (l->pipe_pid && posix_spawn_file_actions_addclose(&fa, l->fd))
+				goto fail;
 		if (!posix_spawn_file_actions_adddup2(&fa, p[1-op], 1-op)) {
 			if (!(e = posix_spawn(&pid, "/bin/sh", &fa, 0,
 			    (char *[]){ "sh", "-c", (char *)cmd, 0 }, __environ))) {
@@ -58,13 +45,14 @@ FILE *popen(const char *cmd, const char *mode)
 				if (!strchr(mode, 'e'))
 					fcntl(p[op], F_SETFD, 0);
 				__syscall(SYS_close, p[1-op]);
-				FUNLOCK(f);
+				__ofl_unlock();
 				return f;
 			}
 		}
+fail:
+		__ofl_unlock();
 		posix_spawn_file_actions_destroy(&fa);
 	}
-fail:
 	fclose(f);
 	__syscall(SYS_close, p[1-op]);
 
diff --git a/src/stdio/rename.c b/src/stdio/rename.c
index 04c90c01..f540adb6 100644
--- a/src/stdio/rename.c
+++ b/src/stdio/rename.c
@@ -4,9 +4,11 @@
 
 int rename(const char *old, const char *new)
 {
-#ifdef SYS_rename
+#if defined(SYS_rename)
 	return syscall(SYS_rename, old, new);
-#else
+#elif defined(SYS_renameat)
 	return syscall(SYS_renameat, AT_FDCWD, old, AT_FDCWD, new);
+#else
+	return syscall(SYS_renameat2, AT_FDCWD, old, AT_FDCWD, new, 0);
 #endif
 }
diff --git a/src/stdio/setvbuf.c b/src/stdio/setvbuf.c
index 06ea296c..523dddc8 100644
--- a/src/stdio/setvbuf.c
+++ b/src/stdio/setvbuf.c
@@ -12,13 +12,15 @@ int setvbuf(FILE *restrict f, char *restrict buf, int type, size_t size)
 
 	if (type == _IONBF) {
 		f->buf_size = 0;
-	} else {
+	} else if (type == _IOLBF || type == _IOFBF) {
 		if (buf && size >= UNGET) {
 			f->buf = (void *)(buf + UNGET);
 			f->buf_size = size - UNGET;
 		}
 		if (type == _IOLBF && f->buf_size)
 			f->lbf = '\n';
+	} else {
+		return -1;
 	}
 
 	f->flags |= F_SVB;
diff --git a/src/stdio/tempnam.c b/src/stdio/tempnam.c
index 84f91978..0c65b1f0 100644
--- a/src/stdio/tempnam.c
+++ b/src/stdio/tempnam.c
@@ -36,11 +36,10 @@ char *tempnam(const char *dir, const char *pfx)
 
 	for (try=0; try<MAXTRIES; try++) {
 		__randname(s+l-6);
-#ifdef SYS_lstat
-		r = __syscall(SYS_lstat, s, &(struct stat){0});
+#ifdef SYS_readlink
+		r = __syscall(SYS_readlink, s, (char[1]){0}, 1);
 #else
-		r = __syscall(SYS_fstatat, AT_FDCWD, s,
-			&(struct stat){0}, AT_SYMLINK_NOFOLLOW);
+		r = __syscall(SYS_readlinkat, AT_FDCWD, s, (char[1]){0}, 1);
 #endif
 		if (r == -ENOENT) return strdup(s);
 	}
diff --git a/src/stdio/tmpfile.c b/src/stdio/tmpfile.c
index ae493987..2fa8803f 100644
--- a/src/stdio/tmpfile.c
+++ b/src/stdio/tmpfile.c
@@ -27,5 +27,3 @@ FILE *tmpfile(void)
 	}
 	return 0;
 }
-
-weak_alias(tmpfile, tmpfile64);
diff --git a/src/stdio/tmpnam.c b/src/stdio/tmpnam.c
index 6c7c253a..71dc8bb1 100644
--- a/src/stdio/tmpnam.c
+++ b/src/stdio/tmpnam.c
@@ -16,11 +16,10 @@ char *tmpnam(char *buf)
 	int r;
 	for (try=0; try<MAXTRIES; try++) {
 		__randname(s+12);
-#ifdef SYS_lstat
-		r = __syscall(SYS_lstat, s, &(struct stat){0});
+#ifdef SYS_readlink
+		r = __syscall(SYS_readlink, s, (char[1]){0}, 1);
 #else
-		r = __syscall(SYS_fstatat, AT_FDCWD, s,
-			&(struct stat){0}, AT_SYMLINK_NOFOLLOW);
+		r = __syscall(SYS_readlinkat, AT_FDCWD, s, (char[1]){0}, 1);
 #endif
 		if (r == -ENOENT) return strcpy(buf ? buf : internal, s);
 	}
diff --git a/src/stdio/ungetc.c b/src/stdio/ungetc.c
index 180673a4..bc629d4c 100644
--- a/src/stdio/ungetc.c
+++ b/src/stdio/ungetc.c
@@ -16,5 +16,5 @@ int ungetc(int c, FILE *f)
 	f->flags &= ~F_EOF;
 
 	FUNLOCK(f);
-	return c;
+	return (unsigned char)c;
 }
diff --git a/src/stdio/vdprintf.c b/src/stdio/vdprintf.c
index c35d9b4f..3b9c093b 100644
--- a/src/stdio/vdprintf.c
+++ b/src/stdio/vdprintf.c
@@ -1,14 +1,9 @@
 #include "stdio_impl.h"
 
-static size_t wrap_write(FILE *f, const unsigned char *buf, size_t len)
-{
-	return __stdio_write(f, buf, len);
-}
-
 int vdprintf(int fd, const char *restrict fmt, va_list ap)
 {
 	FILE f = {
-		.fd = fd, .lbf = EOF, .write = wrap_write,
+		.fd = fd, .lbf = EOF, .write = __stdio_write,
 		.buf = (void *)fmt, .buf_size = 0,
 		.lock = -1
 	};
diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
index 9b961e7f..76733997 100644
--- a/src/stdio/vfprintf.c
+++ b/src/stdio/vfprintf.c
@@ -52,7 +52,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
 		S('E') = DBL, S('F') = DBL, S('G') = DBL, S('A') = DBL,
-		S('c') = CHAR, S('C') = INT,
+		S('c') = INT, S('C') = UINT,
 		S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR,
 		S('m') = NOARG,
 		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
@@ -62,7 +62,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('o') = ULONG, S('u') = ULONG, S('x') = ULONG, S('X') = ULONG,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
 		S('E') = DBL, S('F') = DBL, S('G') = DBL, S('A') = DBL,
-		S('c') = INT, S('s') = PTR, S('n') = PTR,
+		S('c') = UINT, S('s') = PTR, S('n') = PTR,
 		S('l') = LLPRE,
 	}, { /* 2: ll-prefixed */
 		S('d') = LLONG, S('i') = LLONG,
@@ -132,7 +132,7 @@ static void pop_arg(union arg *arg, int type, va_list *ap)
 
 static void out(FILE *f, const char *s, size_t l)
 {
-	if (!(f->flags & F_ERR)) __fwritex((void *)s, l, f);
+	if (!ferror(f)) __fwritex((void *)s, l, f);
 }
 
 static void pad(FILE *f, char c, int w, int l, int fl)
@@ -166,7 +166,8 @@ static char *fmt_u(uintmax_t x, char *s)
 {
 	unsigned long y;
 	for (   ; x>ULONG_MAX; x/=10) *--s = '0' + x%10;
-	for (y=x;           y; y/=10) *--s = '0' + y%10;
+	for (y=x;       y>=10; y/=10) *--s = '0' + y%10;
+	if (y) *--s = '0' + y;
 	return s;
 }
 
@@ -177,10 +178,14 @@ static char *fmt_u(uintmax_t x, char *s)
 typedef char compiler_defines_long_double_incorrectly[9-(int)sizeof(long double)];
 #endif
 
-static int fmt_fp(FILE *f, long double y, int w, int p, int fl, int t)
+static int fmt_fp(FILE *f, long double y, int w, int p, int fl, int t, int ps)
 {
-	uint32_t big[(LDBL_MANT_DIG+28)/29 + 1          // mantissa expansion
-		+ (LDBL_MAX_EXP+LDBL_MANT_DIG+28+8)/9]; // exponent expansion
+	int bufsize = (ps==BIGLPRE)
+		? (LDBL_MANT_DIG+28)/29 + 1 +          // mantissa expansion
+		  (LDBL_MAX_EXP+LDBL_MANT_DIG+28+8)/9  // exponent expansion
+		: (DBL_MANT_DIG+28)/29 + 1 +
+		  (DBL_MAX_EXP+DBL_MANT_DIG+28+8)/9;
+	uint32_t big[bufsize];
 	uint32_t *a, *d, *r, *z;
 	int e2=0, e, i, j, l;
 	char buf[9+LDBL_MANT_DIG/4], *s;
@@ -211,18 +216,11 @@ static int fmt_fp(FILE *f, long double y, int w, int p, int fl, int t)
 	if (y) e2--;
 
 	if ((t|32)=='a') {
-		long double round = 8.0;
-		int re;
-
 		if (t&32) prefix += 9;
 		pl += 2;
 
-		if (p<0 || p>=LDBL_MANT_DIG/4-1) re=0;
-		else re=LDBL_MANT_DIG/4-1-p;
-
-		if (re) {
-			round *= 1<<(LDBL_MANT_DIG%4);
-			while (re--) round*=16;
+		if (p>=0 && p<(LDBL_MANT_DIG-1+3)/4) {
+			double round = scalbn(1, LDBL_MANT_DIG-1-(p*4));
 			if (*prefix=='-') {
 				y=-y;
 				y-=round;
@@ -437,7 +435,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 	unsigned st, ps;
 	int cnt=0, l=0;
 	size_t i;
-	char buf[sizeof(uintmax_t)*3+3+LDBL_MANT_DIG/4];
+	char buf[sizeof(uintmax_t)*3];
 	const char *prefix;
 	int t, pl;
 	wchar_t wc[2], *ws;
@@ -478,8 +476,8 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 		if (*s=='*') {
 			if (isdigit(s[1]) && s[2]=='$') {
 				l10n=1;
-				nl_type[s[1]-'0'] = INT;
-				w = nl_arg[s[1]-'0'].i;
+				if (!f) nl_type[s[1]-'0'] = INT, w = 0;
+				else w = nl_arg[s[1]-'0'].i;
 				s+=3;
 			} else if (!l10n) {
 				w = f ? va_arg(*ap, int) : 0;
@@ -491,8 +489,8 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 		/* Read precision */
 		if (*s=='.' && s[1]=='*') {
 			if (isdigit(s[2]) && s[3]=='$') {
-				nl_type[s[2]-'0'] = INT;
-				p = nl_arg[s[2]-'0'].i;
+				if (!f) nl_type[s[2]-'0'] = INT, p = 0;
+				else p = nl_arg[s[2]-'0'].i;
 				s+=4;
 			} else if (!l10n) {
 				p = f ? va_arg(*ap, int) : 0;
@@ -521,13 +519,18 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 		if (st==NOARG) {
 			if (argpos>=0) goto inval;
 		} else {
-			if (argpos>=0) nl_type[argpos]=st, arg=nl_arg[argpos];
-			else if (f) pop_arg(&arg, st, ap);
+			if (argpos>=0) {
+				if (!f) nl_type[argpos]=st;
+				else arg=nl_arg[argpos];
+			} else if (f) pop_arg(&arg, st, ap);
 			else return 0;
 		}
 
 		if (!f) continue;
 
+		/* Do not process any new directives once in error state. */
+		if (ferror(f)) return -1;
+
 		z = buf + sizeof(buf);
 		prefix = "-+   0X0x";
 		pl = 0;
@@ -558,11 +561,11 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 		case 'x': case 'X':
 			a = fmt_x(arg.i, z, t&32);
 			if (arg.i && (fl & ALT_FORM)) prefix+=(t>>4), pl=2;
-			if (0) {
+			goto ifmt_tail;
 		case 'o':
 			a = fmt_o(arg.i, z);
 			if ((fl&ALT_FORM) && p<z-a+1) p=z-a+1;
-			} if (0) {
+			goto ifmt_tail;
 		case 'd': case 'i':
 			pl=1;
 			if (arg.i>INTMAX_MAX) {
@@ -574,7 +577,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			} else pl=0;
 		case 'u':
 			a = fmt_u(arg.i, z);
-			}
+		ifmt_tail:
 			if (xp && p<0) goto overflow;
 			if (xp) fl &= ~ZERO_PAD;
 			if (!arg.i && !p) {
@@ -583,6 +586,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			}
 			p = MAX(p, z-a + !arg.i);
 			break;
+		narrow_c:
 		case 'c':
 			*(a=z-(p=1))=arg.i;
 			fl &= ~ZERO_PAD;
@@ -597,6 +601,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			fl &= ~ZERO_PAD;
 			break;
 		case 'C':
+			if (!arg.i) goto narrow_c;
 			wc[0] = arg.i;
 			wc[1] = 0;
 			arg.p = wc;
@@ -617,7 +622,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 		case 'e': case 'f': case 'g': case 'a':
 		case 'E': case 'F': case 'G': case 'A':
 			if (xp && p<0) goto overflow;
-			l = fmt_fp(f, arg.f, w, p, fl, t);
+			l = fmt_fp(f, arg.f, w, p, fl, t, ps);
 			if (l<0) goto overflow;
 			continue;
 		}
@@ -672,7 +677,7 @@ int vfprintf(FILE *restrict f, const char *restrict fmt, va_list ap)
 
 	FLOCK(f);
 	olderr = f->flags & F_ERR;
-	if (f->mode < 1) f->flags &= ~F_ERR;
+	f->flags &= ~F_ERR;
 	if (!f->buf_size) {
 		saved_buf = f->buf;
 		f->buf = internal_buf;
@@ -688,7 +693,7 @@ int vfprintf(FILE *restrict f, const char *restrict fmt, va_list ap)
 		f->buf_size = 0;
 		f->wpos = f->wbase = f->wend = 0;
 	}
-	if (f->flags & F_ERR) ret = -1;
+	if (ferror(f)) ret = -1;
 	f->flags |= olderr;
 	FUNLOCK(f);
 	va_end(ap2);
diff --git a/src/stdio/vfscanf.c b/src/stdio/vfscanf.c
index 9e030fc4..b78a374d 100644
--- a/src/stdio/vfscanf.c
+++ b/src/stdio/vfscanf.c
@@ -57,7 +57,7 @@ int vfscanf(FILE *restrict f, const char *restrict fmt, va_list ap)
 {
 	int width;
 	int size;
-	int alloc;
+	int alloc = 0;
 	int base;
 	const unsigned char *p;
 	int c, t;
@@ -76,6 +76,9 @@ int vfscanf(FILE *restrict f, const char *restrict fmt, va_list ap)
 
 	FLOCK(f);
 
+	if (!f->rpos) __toread(f);
+	if (!f->rpos) goto input_fail;
+
 	for (p=(const unsigned char *)fmt; *p; p++) {
 
 		alloc = 0;
diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c
index 0adf0b7a..59d5471b 100644
--- a/src/stdio/vfwprintf.c
+++ b/src/stdio/vfwprintf.c
@@ -45,7 +45,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
 		S('E') = DBL, S('F') = DBL, S('G') = DBL, S('A') = DBL,
-		S('c') = CHAR, S('C') = INT,
+		S('c') = INT, S('C') = UINT,
 		S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR,
 		S('m') = NOARG,
 		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
@@ -53,7 +53,9 @@ static const unsigned char states[]['z'-'A'+1] = {
 	}, { /* 1: l-prefixed */
 		S('d') = LONG, S('i') = LONG,
 		S('o') = ULONG, S('u') = ULONG, S('x') = ULONG, S('X') = ULONG,
-		S('c') = INT, S('s') = PTR, S('n') = PTR,
+		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
+		S('E') = DBL, S('F') = DBL, S('G') = DBL, S('A') = DBL,
+		S('c') = UINT, S('s') = PTR, S('n') = PTR,
 		S('l') = LLPRE,
 	}, { /* 2: ll-prefixed */
 		S('d') = LLONG, S('i') = LLONG,
@@ -123,7 +125,13 @@ static void pop_arg(union arg *arg, int type, va_list *ap)
 
 static void out(FILE *f, const wchar_t *s, size_t l)
 {
-	while (l-- && !(f->flags & F_ERR)) fputwc(*s++, f);
+	while (l-- && !ferror(f)) fputwc(*s++, f);
+}
+
+static void pad(FILE *f, int n, int fl)
+{
+	if ((fl & LEFT_ADJ) || !n || ferror(f)) return;
+	fprintf(f, "%*s", n, "");
 }
 
 static int getint(wchar_t **s) {
@@ -240,6 +248,10 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 		}
 
 		if (!f) continue;
+
+		/* Do not process any new directives once in error state. */
+		if (ferror(f)) return -1;
+
 		t = s[-1];
 		if (ps && (t&15)==3) t&=~32;
 
@@ -256,25 +268,22 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 			}
 			continue;
 		case 'c':
+		case 'C':
 			if (w<1) w=1;
-			if (w>1 && !(fl&LEFT_ADJ)) fprintf(f, "%*s", w-1, "");
-			fputwc(btowc(arg.i), f);
-			if (w>1 && (fl&LEFT_ADJ)) fprintf(f, "%*s", w-1, "");
+			pad(f, w-1, fl);
+			out(f, &(wchar_t){t=='C' ? arg.i : btowc(arg.i)}, 1);
+			pad(f, w-1, fl^LEFT_ADJ);
 			l = w;
 			continue;
-		case 'C':
-			fputwc(arg.i, f);
-			l = 1;
-			continue;
 		case 'S':
 			a = arg.p;
 			z = a + wcsnlen(a, p<0 ? INT_MAX : p);
 			if (p<0 && *z) goto overflow;
 			p = z-a;
 			if (w<p) w=p;
-			if (!(fl&LEFT_ADJ)) fprintf(f, "%*s", w-p, "");
+			pad(f, w-p, fl);
 			out(f, a, p);
-			if ((fl&LEFT_ADJ)) fprintf(f, "%*s", w-p, "");
+			pad(f, w-p, fl^LEFT_ADJ);
 			l=w;
 			continue;
 		case 'm':
@@ -287,14 +296,14 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 			if (p<0 && *bs) goto overflow;
 			p=l;
 			if (w<p) w=p;
-			if (!(fl&LEFT_ADJ)) fprintf(f, "%*s", w-p, "");
+			pad(f, w-p, fl);
 			bs = arg.p;
 			while (l--) {
 				i=mbtowc(&wc, bs, MB_LEN_MAX);
 				bs+=i;
-				fputwc(wc, f);
+				out(f, &wc, 1);
 			}
-			if ((fl&LEFT_ADJ)) fprintf(f, "%*s", w-p, "");
+			pad(f, w-p, fl^LEFT_ADJ);
 			l=w;
 			continue;
 		}
@@ -338,8 +347,8 @@ overflow:
 int vfwprintf(FILE *restrict f, const wchar_t *restrict fmt, va_list ap)
 {
 	va_list ap2;
-	int nl_type[NL_ARGMAX] = {0};
-	union arg nl_arg[NL_ARGMAX];
+	int nl_type[NL_ARGMAX+1] = {0};
+	union arg nl_arg[NL_ARGMAX+1];
 	int olderr;
 	int ret;
 
@@ -355,7 +364,7 @@ int vfwprintf(FILE *restrict f, const wchar_t *restrict fmt, va_list ap)
 	olderr = f->flags & F_ERR;
 	f->flags &= ~F_ERR;
 	ret = wprintf_core(f, fmt, &ap2, nl_arg, nl_type);
-	if (f->flags & F_ERR) ret = -1;
+	if (ferror(f)) ret = -1;
 	f->flags |= olderr;
 	FUNLOCK(f);
 	va_end(ap2);
diff --git a/src/stdio/vsnprintf.c b/src/stdio/vsnprintf.c
index b3510a63..409b9c85 100644
--- a/src/stdio/vsnprintf.c
+++ b/src/stdio/vsnprintf.c
@@ -45,11 +45,6 @@ int vsnprintf(char *restrict s, size_t n, const char *restrict fmt, va_list ap)
 		.cookie = &c,
 	};
 
-	if (n > INT_MAX) {
-		errno = EOVERFLOW;
-		return -1;
-	}
-
 	*c.s = 0;
 	return vfprintf(&f, fmt, ap);
 }
diff --git a/src/stdio/vsscanf.c b/src/stdio/vsscanf.c
index 98500225..4d6d259b 100644
--- a/src/stdio/vsscanf.c
+++ b/src/stdio/vsscanf.c
@@ -1,15 +1,25 @@
 #include "stdio_impl.h"
+#include <string.h>
 
-static size_t do_read(FILE *f, unsigned char *buf, size_t len)
+static size_t string_read(FILE *f, unsigned char *buf, size_t len)
 {
-	return __string_read(f, buf, len);
+	char *src = f->cookie;
+	size_t k = len+256;
+	char *end = memchr(src, 0, k);
+	if (end) k = end-src;
+	if (k < len) len = k;
+	memcpy(buf, src, len);
+	f->rpos = (void *)(src+len);
+	f->rend = (void *)(src+k);
+	f->cookie = src+k;
+	return len;
 }
 
 int vsscanf(const char *restrict s, const char *restrict fmt, va_list ap)
 {
 	FILE f = {
 		.buf = (void *)s, .cookie = (void *)s,
-		.read = do_read, .lock = -1
+		.read = string_read, .lock = -1
 	};
 	return vfscanf(&f, fmt, ap);
 }
diff --git a/src/stdio/vswprintf.c b/src/stdio/vswprintf.c
index 7f98c5c9..5e9a4dad 100644
--- a/src/stdio/vswprintf.c
+++ b/src/stdio/vswprintf.c
@@ -18,6 +18,7 @@ static size_t sw_write(FILE *f, const unsigned char *s, size_t l)
 	if (s!=f->wbase && sw_write(f, f->wbase, f->wpos-f->wbase)==-1)
 		return -1;
 	while (c->l && l && (i=mbtowc(c->ws, (void *)s, l))>=0) {
+		if (!i) i=1;
 		s+=i;
 		l-=i;
 		c->l--;
@@ -50,9 +51,6 @@ int vswprintf(wchar_t *restrict s, size_t n, const wchar_t *restrict fmt, va_lis
 
 	if (!n) {
 		return -1;
-	} else if (n > INT_MAX) {
-		errno = EOVERFLOW;
-		return -1;
 	}
 	r = vfwprintf(&f, fmt, ap);
 	sw_write(&f, 0, 0);
diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c
index da58fd31..ab79dc6f 100644
--- a/src/stdlib/qsort.c
+++ b/src/stdlib/qsort.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011 by Valentin Ochs
+/* Copyright (C) 2011 by Lynn Ochs
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
@@ -24,6 +24,7 @@
 /* Smoothsort, an adaptive variant of Heapsort.  Memory usage: O(1).
    Run time: Worst case O(n log n), close to O(n) in the mostly-sorted case. */
 
+#define _BSD_SOURCE
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
@@ -31,7 +32,7 @@
 #include "atomic.h"
 #define ntz(x) a_ctz_l((x))
 
-typedef int (*cmpfun)(const void *, const void *);
+typedef int (*cmpfun)(const void *, const void *, void *);
 
 static inline int pntz(size_t p[2]) {
 	int r = ntz(p[0] - 1);
@@ -88,7 +89,7 @@ static inline void shr(size_t p[2], int n)
 	p[1] >>= n;
 }
 
-static void sift(unsigned char *head, size_t width, cmpfun cmp, int pshift, size_t lp[])
+static void sift(unsigned char *head, size_t width, cmpfun cmp, void *arg, int pshift, size_t lp[])
 {
 	unsigned char *rt, *lf;
 	unsigned char *ar[14 * sizeof(size_t) + 1];
@@ -99,10 +100,10 @@ static void sift(unsigned char *head, size_t width, cmpfun cmp, int pshift, size
 		rt = head - width;
 		lf = head - width - lp[pshift - 2];
 
-		if((*cmp)(ar[0], lf) >= 0 && (*cmp)(ar[0], rt) >= 0) {
+		if(cmp(ar[0], lf, arg) >= 0 && cmp(ar[0], rt, arg) >= 0) {
 			break;
 		}
-		if((*cmp)(lf, rt) >= 0) {
+		if(cmp(lf, rt, arg) >= 0) {
 			ar[i++] = lf;
 			head = lf;
 			pshift -= 1;
@@ -115,7 +116,7 @@ static void sift(unsigned char *head, size_t width, cmpfun cmp, int pshift, size
 	cycle(width, ar, i);
 }
 
-static void trinkle(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2], int pshift, int trusty, size_t lp[])
+static void trinkle(unsigned char *head, size_t width, cmpfun cmp, void *arg, size_t pp[2], int pshift, int trusty, size_t lp[])
 {
 	unsigned char *stepson,
 	              *rt, *lf;
@@ -130,13 +131,13 @@ static void trinkle(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2],
 	ar[0] = head;
 	while(p[0] != 1 || p[1] != 0) {
 		stepson = head - lp[pshift];
-		if((*cmp)(stepson, ar[0]) <= 0) {
+		if(cmp(stepson, ar[0], arg) <= 0) {
 			break;
 		}
 		if(!trusty && pshift > 1) {
 			rt = head - width;
 			lf = head - width - lp[pshift - 2];
-			if((*cmp)(rt, stepson) >= 0 || (*cmp)(lf, stepson) >= 0) {
+			if(cmp(rt, stepson, arg) >= 0 || cmp(lf, stepson, arg) >= 0) {
 				break;
 			}
 		}
@@ -150,11 +151,11 @@ static void trinkle(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2],
 	}
 	if(!trusty) {
 		cycle(width, ar, i);
-		sift(head, width, cmp, pshift, lp);
+		sift(head, width, cmp, arg, pshift, lp);
 	}
 }
 
-void qsort(void *base, size_t nel, size_t width, cmpfun cmp)
+void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
 {
 	size_t lp[12*sizeof(size_t)];
 	size_t i, size = width * nel;
@@ -173,16 +174,16 @@ void qsort(void *base, size_t nel, size_t width, cmpfun cmp)
 
 	while(head < high) {
 		if((p[0] & 3) == 3) {
-			sift(head, width, cmp, pshift, lp);
+			sift(head, width, cmp, arg, pshift, lp);
 			shr(p, 2);
 			pshift += 2;
 		} else {
 			if(lp[pshift - 1] >= high - head) {
-				trinkle(head, width, cmp, p, pshift, 0, lp);
+				trinkle(head, width, cmp, arg, p, pshift, 0, lp);
 			} else {
-				sift(head, width, cmp, pshift, lp);
+				sift(head, width, cmp, arg, pshift, lp);
 			}
-			
+
 			if(pshift == 1) {
 				shl(p, 1);
 				pshift = 0;
@@ -191,12 +192,12 @@ void qsort(void *base, size_t nel, size_t width, cmpfun cmp)
 				pshift = 1;
 			}
 		}
-		
+
 		p[0] |= 1;
 		head += width;
 	}
 
-	trinkle(head, width, cmp, p, pshift, 0, lp);
+	trinkle(head, width, cmp, arg, p, pshift, 0, lp);
 
 	while(pshift != 1 || p[0] != 1 || p[1] != 0) {
 		if(pshift <= 1) {
@@ -208,11 +209,13 @@ void qsort(void *base, size_t nel, size_t width, cmpfun cmp)
 			pshift -= 2;
 			p[0] ^= 7;
 			shr(p, 1);
-			trinkle(head - lp[pshift] - width, width, cmp, p, pshift + 1, 1, lp);
+			trinkle(head - lp[pshift] - width, width, cmp, arg, p, pshift + 1, 1, lp);
 			shl(p, 1);
 			p[0] |= 1;
-			trinkle(head - width, width, cmp, p, pshift, 1, lp);
+			trinkle(head - width, width, cmp, arg, p, pshift, 1, lp);
 		}
 		head -= width;
 	}
 }
+
+weak_alias(__qsort_r, qsort_r);
diff --git a/src/stdlib/qsort_nr.c b/src/stdlib/qsort_nr.c
new file mode 100644
index 00000000..8ffe71d0
--- /dev/null
+++ b/src/stdlib/qsort_nr.c
@@ -0,0 +1,14 @@
+#define _BSD_SOURCE
+#include <stdlib.h>
+
+typedef int (*cmpfun)(const void *, const void *);
+
+static int wrapper_cmp(const void *v1, const void *v2, void *cmp)
+{
+	return ((cmpfun)cmp)(v1, v2);
+}
+
+void qsort(void *base, size_t nel, size_t width, cmpfun cmp)
+{
+	__qsort_r(base, nel, width, wrapper_cmp, (void *)cmp);
+}
diff --git a/src/stdlib/strtod.c b/src/stdlib/strtod.c
index a5d0118a..39b9daad 100644
--- a/src/stdlib/strtod.c
+++ b/src/stdlib/strtod.c
@@ -28,10 +28,3 @@ long double strtold(const char *restrict s, char **restrict p)
 {
 	return strtox(s, p, 2);
 }
-
-weak_alias(strtof, strtof_l);
-weak_alias(strtod, strtod_l);
-weak_alias(strtold, strtold_l);
-weak_alias(strtof, __strtof_l);
-weak_alias(strtod, __strtod_l);
-weak_alias(strtold, __strtold_l);
diff --git a/src/stdlib/wcstod.c b/src/stdlib/wcstod.c
index 26fe9af8..0deb7010 100644
--- a/src/stdlib/wcstod.c
+++ b/src/stdlib/wcstod.c
@@ -33,8 +33,7 @@ static long double wcstox(const wchar_t *s, wchar_t **p, int prec)
 	unsigned char buf[64];
 	FILE f = {0};
 	f.flags = 0;
-	f.rpos = f.rend = 0;
-	f.buf = buf + 4;
+	f.rpos = f.rend = f.buf = buf + 4;
 	f.buf_size = sizeof buf - 4;
 	f.lock = -1;
 	f.read = do_read;
diff --git a/src/stdlib/wcstol.c b/src/stdlib/wcstol.c
index 4443f577..1eeb495f 100644
--- a/src/stdlib/wcstol.c
+++ b/src/stdlib/wcstol.c
@@ -35,8 +35,7 @@ static unsigned long long wcstox(const wchar_t *s, wchar_t **p, int base, unsign
 	unsigned char buf[64];
 	FILE f = {0};
 	f.flags = 0;
-	f.rpos = f.rend = 0;
-	f.buf = buf + 4;
+	f.rpos = f.rend = f.buf = buf + 4;
 	f.buf_size = sizeof buf - 4;
 	f.lock = -1;
 	f.read = do_read;
diff --git a/src/string/aarch64/memcpy.S b/src/string/aarch64/memcpy.S
new file mode 100644
index 00000000..48bb8a8d
--- /dev/null
+++ b/src/string/aarch64/memcpy.S
@@ -0,0 +1,186 @@
+/*
+ * memcpy - copy memory area
+ *
+ * Copyright (c) 2012-2020, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses.
+ *
+ */
+
+#define dstin   x0
+#define src     x1
+#define count   x2
+#define dst     x3
+#define srcend  x4
+#define dstend  x5
+#define A_l     x6
+#define A_lw    w6
+#define A_h     x7
+#define B_l     x8
+#define B_lw    w8
+#define B_h     x9
+#define C_l     x10
+#define C_lw    w10
+#define C_h     x11
+#define D_l     x12
+#define D_h     x13
+#define E_l     x14
+#define E_h     x15
+#define F_l     x16
+#define F_h     x17
+#define G_l     count
+#define G_h     dst
+#define H_l     src
+#define H_h     srcend
+#define tmp1    x14
+
+/* This implementation of memcpy uses unaligned accesses and branchless
+   sequences to keep the code small, simple and improve performance.
+
+   Copies are split into 3 main cases: small copies of up to 32 bytes, medium
+   copies of up to 128 bytes, and large copies.  The overhead of the overlap
+   check is negligible since it is only required for large copies.
+
+   Large copies use a software pipelined loop processing 64 bytes per iteration.
+   The destination pointer is 16-byte aligned to minimize unaligned accesses.
+   The loop tail is handled by always copying 64 bytes from the end.
+*/
+
+.global memcpy
+.type memcpy,%function
+memcpy:
+	add     srcend, src, count
+	add     dstend, dstin, count
+	cmp     count, 128
+	b.hi    .Lcopy_long
+	cmp     count, 32
+	b.hi    .Lcopy32_128
+
+	/* Small copies: 0..32 bytes.  */
+	cmp     count, 16
+	b.lo    .Lcopy16
+	ldp     A_l, A_h, [src]
+	ldp     D_l, D_h, [srcend, -16]
+	stp     A_l, A_h, [dstin]
+	stp     D_l, D_h, [dstend, -16]
+	ret
+
+	/* Copy 8-15 bytes.  */
+.Lcopy16:
+	tbz     count, 3, .Lcopy8
+	ldr     A_l, [src]
+	ldr     A_h, [srcend, -8]
+	str     A_l, [dstin]
+	str     A_h, [dstend, -8]
+	ret
+
+	.p2align 3
+	/* Copy 4-7 bytes.  */
+.Lcopy8:
+	tbz     count, 2, .Lcopy4
+	ldr     A_lw, [src]
+	ldr     B_lw, [srcend, -4]
+	str     A_lw, [dstin]
+	str     B_lw, [dstend, -4]
+	ret
+
+	/* Copy 0..3 bytes using a branchless sequence.  */
+.Lcopy4:
+	cbz     count, .Lcopy0
+	lsr     tmp1, count, 1
+	ldrb    A_lw, [src]
+	ldrb    C_lw, [srcend, -1]
+	ldrb    B_lw, [src, tmp1]
+	strb    A_lw, [dstin]
+	strb    B_lw, [dstin, tmp1]
+	strb    C_lw, [dstend, -1]
+.Lcopy0:
+	ret
+
+	.p2align 4
+	/* Medium copies: 33..128 bytes.  */
+.Lcopy32_128:
+	ldp     A_l, A_h, [src]
+	ldp     B_l, B_h, [src, 16]
+	ldp     C_l, C_h, [srcend, -32]
+	ldp     D_l, D_h, [srcend, -16]
+	cmp     count, 64
+	b.hi    .Lcopy128
+	stp     A_l, A_h, [dstin]
+	stp     B_l, B_h, [dstin, 16]
+	stp     C_l, C_h, [dstend, -32]
+	stp     D_l, D_h, [dstend, -16]
+	ret
+
+	.p2align 4
+	/* Copy 65..128 bytes.  */
+.Lcopy128:
+	ldp     E_l, E_h, [src, 32]
+	ldp     F_l, F_h, [src, 48]
+	cmp     count, 96
+	b.ls    .Lcopy96
+	ldp     G_l, G_h, [srcend, -64]
+	ldp     H_l, H_h, [srcend, -48]
+	stp     G_l, G_h, [dstend, -64]
+	stp     H_l, H_h, [dstend, -48]
+.Lcopy96:
+	stp     A_l, A_h, [dstin]
+	stp     B_l, B_h, [dstin, 16]
+	stp     E_l, E_h, [dstin, 32]
+	stp     F_l, F_h, [dstin, 48]
+	stp     C_l, C_h, [dstend, -32]
+	stp     D_l, D_h, [dstend, -16]
+	ret
+
+	.p2align 4
+	/* Copy more than 128 bytes.  */
+.Lcopy_long:
+
+	/* Copy 16 bytes and then align dst to 16-byte alignment.  */
+
+	ldp     D_l, D_h, [src]
+	and     tmp1, dstin, 15
+	bic     dst, dstin, 15
+	sub     src, src, tmp1
+	add     count, count, tmp1      /* Count is now 16 too large.  */
+	ldp     A_l, A_h, [src, 16]
+	stp     D_l, D_h, [dstin]
+	ldp     B_l, B_h, [src, 32]
+	ldp     C_l, C_h, [src, 48]
+	ldp     D_l, D_h, [src, 64]!
+	subs    count, count, 128 + 16  /* Test and readjust count.  */
+	b.ls    .Lcopy64_from_end
+
+.Lloop64:
+	stp     A_l, A_h, [dst, 16]
+	ldp     A_l, A_h, [src, 16]
+	stp     B_l, B_h, [dst, 32]
+	ldp     B_l, B_h, [src, 32]
+	stp     C_l, C_h, [dst, 48]
+	ldp     C_l, C_h, [src, 48]
+	stp     D_l, D_h, [dst, 64]!
+	ldp     D_l, D_h, [src, 64]!
+	subs    count, count, 64
+	b.hi    .Lloop64
+
+	/* Write the last iteration and copy 64 bytes from the end.  */
+.Lcopy64_from_end:
+	ldp     E_l, E_h, [srcend, -64]
+	stp     A_l, A_h, [dst, 16]
+	ldp     A_l, A_h, [srcend, -48]
+	stp     B_l, B_h, [dst, 32]
+	ldp     B_l, B_h, [srcend, -32]
+	stp     C_l, C_h, [dst, 48]
+	ldp     C_l, C_h, [srcend, -16]
+	stp     D_l, D_h, [dst, 64]
+	stp     E_l, E_h, [dstend, -64]
+	stp     A_l, A_h, [dstend, -48]
+	stp     B_l, B_h, [dstend, -32]
+	stp     C_l, C_h, [dstend, -16]
+	ret
+
+.size memcpy,.-memcpy
diff --git a/src/string/aarch64/memset.S b/src/string/aarch64/memset.S
new file mode 100644
index 00000000..f0d29b7f
--- /dev/null
+++ b/src/string/aarch64/memset.S
@@ -0,0 +1,115 @@
+/*
+ * memset - fill memory with a constant byte
+ *
+ * Copyright (c) 2012-2020, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
+ *
+ */
+
+#define dstin   x0
+#define val     x1
+#define valw    w1
+#define count   x2
+#define dst     x3
+#define dstend  x4
+#define zva_val x5
+
+.global memset
+.type memset,%function
+memset:
+
+	dup     v0.16B, valw
+	add     dstend, dstin, count
+
+	cmp     count, 96
+	b.hi    .Lset_long
+	cmp     count, 16
+	b.hs    .Lset_medium
+	mov     val, v0.D[0]
+
+	/* Set 0..15 bytes.  */
+	tbz     count, 3, 1f
+	str     val, [dstin]
+	str     val, [dstend, -8]
+	ret
+	nop
+1:      tbz     count, 2, 2f
+	str     valw, [dstin]
+	str     valw, [dstend, -4]
+	ret
+2:      cbz     count, 3f
+	strb    valw, [dstin]
+	tbz     count, 1, 3f
+	strh    valw, [dstend, -2]
+3:      ret
+
+	/* Set 17..96 bytes.  */
+.Lset_medium:
+	str     q0, [dstin]
+	tbnz    count, 6, .Lset96
+	str     q0, [dstend, -16]
+	tbz     count, 5, 1f
+	str     q0, [dstin, 16]
+	str     q0, [dstend, -32]
+1:      ret
+
+	.p2align 4
+	/* Set 64..96 bytes.  Write 64 bytes from the start and
+	   32 bytes from the end.  */
+.Lset96:
+	str     q0, [dstin, 16]
+	stp     q0, q0, [dstin, 32]
+	stp     q0, q0, [dstend, -32]
+	ret
+
+	.p2align 4
+.Lset_long:
+	and     valw, valw, 255
+	bic     dst, dstin, 15
+	str     q0, [dstin]
+	cmp     count, 160
+	ccmp    valw, 0, 0, hs
+	b.ne    .Lno_zva
+
+#ifndef SKIP_ZVA_CHECK
+	mrs     zva_val, dczid_el0
+	and     zva_val, zva_val, 31
+	cmp     zva_val, 4              /* ZVA size is 64 bytes.  */
+	b.ne    .Lno_zva
+#endif
+	str     q0, [dst, 16]
+	stp     q0, q0, [dst, 32]
+	bic     dst, dst, 63
+	sub     count, dstend, dst      /* Count is now 64 too large.  */
+	sub     count, count, 128       /* Adjust count and bias for loop.  */
+
+	.p2align 4
+.Lzva_loop:
+	add     dst, dst, 64
+	dc      zva, dst
+	subs    count, count, 64
+	b.hi    .Lzva_loop
+	stp     q0, q0, [dstend, -64]
+	stp     q0, q0, [dstend, -32]
+	ret
+
+.Lno_zva:
+	sub     count, dstend, dst      /* Count is 16 too large.  */
+	sub     dst, dst, 16            /* Dst is biased by -32.  */
+	sub     count, count, 64 + 16   /* Adjust count and bias for loop.  */
+.Lno_zva_loop:
+	stp     q0, q0, [dst, 32]
+	stp     q0, q0, [dst, 64]!
+	subs    count, count, 64
+	b.hi    .Lno_zva_loop
+	stp     q0, q0, [dstend, -64]
+	stp     q0, q0, [dstend, -32]
+	ret
+
+.size memset,.-memset
+
diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy.S
index 9cfbcb2a..869e3448 100644
--- a/src/string/arm/memcpy_le.S
+++ b/src/string/arm/memcpy.S
@@ -1,5 +1,3 @@
-#if !__ARMEB__ && !__thumb__
-
 /*
  * Copyright (C) 2008 The Android Open Source Project
  * All rights reserved.
@@ -40,8 +38,9 @@
  * This file has been modified from the original for use in musl libc.
  * The main changes are: addition of .type memcpy,%function to make the
  * code safely callable from thumb mode, adjusting the return
- * instructions to be compatible with pre-thumb ARM cpus, and removal
- * of prefetch code that is not compatible with older cpus.
+ * instructions to be compatible with pre-thumb ARM cpus, removal of
+ * prefetch code that is not compatible with older cpus and support for
+ * building as thumb 2 and big-endian.
  */
 
 .syntax unified
@@ -226,23 +225,45 @@ non_congruent:
 	 * becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
 	 */
 	movs    r5, r5, lsl #31
+
+#if __ARMEB__
+	movmi   r3, r3, ror #24
+	strbmi	r3, [r0], #1
+	movcs   r3, r3, ror #24
+	strbcs	r3, [r0], #1
+	movcs   r3, r3, ror #24
+	strbcs	r3, [r0], #1
+#else
 	strbmi r3, [r0], #1
 	movmi   r3, r3, lsr #8
 	strbcs r3, [r0], #1
 	movcs   r3, r3, lsr #8
 	strbcs r3, [r0], #1
 	movcs   r3, r3, lsr #8
+#endif
 
 	cmp     r2, #4
 	blo     partial_word_tail
 
+#if __ARMEB__
+	mov	r3, r3, lsr r12
+	mov	r3, r3, lsl r12
+#endif
+
 	/* Align destination to 32 bytes (cache line boundary) */
 1:      tst     r0, #0x1c
 	beq     2f
 	ldr     r5, [r1], #4
 	sub     r2, r2, #4
-	orr     r4, r3, r5,             lsl lr
+#if __ARMEB__
+	mov     r4, r5,                 lsr lr
+	orr     r4, r4, r3
+	mov     r3, r5,                 lsl r12
+#else
+	mov     r4, r5,                 lsl lr
+	orr     r4, r4, r3
 	mov     r3, r5,                 lsr r12
+#endif
 	str     r4, [r0], #4
 	cmp     r2, #4
 	bhs     1b
@@ -268,6 +289,25 @@ loop16:
 	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
 	subs    r2, r2, #32
 	ldrhs   r12, [r1], #4
+#if __ARMEB__
+	orr     r3, r3, r4, lsr #16
+	mov     r4, r4, lsl #16
+	orr     r4, r4, r5, lsr #16
+	mov     r5, r5, lsl #16
+	orr     r5, r5, r6, lsr #16
+	mov     r6, r6, lsl #16
+	orr     r6, r6, r7, lsr #16
+	mov     r7, r7, lsl #16
+	orr     r7, r7, r8, lsr #16
+	mov     r8, r8, lsl #16
+	orr     r8, r8, r9, lsr #16
+	mov     r9, r9, lsl #16
+	orr     r9, r9, r10, lsr #16
+	mov     r10, r10,               lsl #16
+	orr     r10, r10, r11, lsr #16
+	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+	mov     r3, r11, lsl #16
+#else
 	orr     r3, r3, r4, lsl #16
 	mov     r4, r4, lsr #16
 	orr     r4, r4, r5, lsl #16
@@ -285,6 +325,7 @@ loop16:
 	orr     r10, r10, r11, lsl #16
 	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
 	mov     r3, r11, lsr #16
+#endif
 	bhs     1b
 	b       less_than_thirtytwo
 
@@ -294,6 +335,25 @@ loop8:
 	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
 	subs    r2, r2, #32
 	ldrhs   r12, [r1], #4
+#if __ARMEB__
+	orr     r3, r3, r4, lsr #24
+	mov     r4, r4, lsl #8
+	orr     r4, r4, r5, lsr #24
+	mov     r5, r5, lsl #8
+	orr     r5, r5, r6, lsr #24
+	mov     r6, r6,  lsl #8
+	orr     r6, r6, r7, lsr #24
+	mov     r7, r7,  lsl #8
+	orr     r7, r7, r8,             lsr #24
+	mov     r8, r8,  lsl #8
+	orr     r8, r8, r9,             lsr #24
+	mov     r9, r9,  lsl #8
+	orr     r9, r9, r10,    lsr #24
+	mov     r10, r10, lsl #8
+	orr     r10, r10, r11,  lsr #24
+	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+	mov     r3, r11, lsl #8
+#else
 	orr     r3, r3, r4, lsl #24
 	mov     r4, r4, lsr #8
 	orr     r4, r4, r5, lsl #24
@@ -311,6 +371,7 @@ loop8:
 	orr     r10, r10, r11,  lsl #24
 	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
 	mov     r3, r11, lsr #8
+#endif
 	bhs     1b
 	b       less_than_thirtytwo
 
@@ -320,6 +381,25 @@ loop24:
 	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
 	subs    r2, r2, #32
 	ldrhs   r12, [r1], #4
+#if __ARMEB__
+	orr     r3, r3, r4, lsr #8
+	mov     r4, r4, lsl #24
+	orr     r4, r4, r5, lsr #8
+	mov     r5, r5, lsl #24
+	orr     r5, r5, r6, lsr #8
+	mov     r6, r6, lsl #24
+	orr     r6, r6, r7, lsr #8
+	mov     r7, r7, lsl #24
+	orr     r7, r7, r8, lsr #8
+	mov     r8, r8, lsl #24
+	orr     r8, r8, r9, lsr #8
+	mov     r9, r9, lsl #24
+	orr     r9, r9, r10, lsr #8
+	mov     r10, r10, lsl #24
+	orr     r10, r10, r11, lsr #8
+	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+	mov     r3, r11, lsl #24
+#else
 	orr     r3, r3, r4, lsl #8
 	mov     r4, r4, lsr #24
 	orr     r4, r4, r5, lsl #8
@@ -337,6 +417,7 @@ loop24:
 	orr     r10, r10, r11, lsl #8
 	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
 	mov     r3, r11, lsr #24
+#endif
 	bhs     1b
 
 less_than_thirtytwo:
@@ -348,8 +429,15 @@ less_than_thirtytwo:
 
 1:      ldr     r5, [r1], #4
 	sub     r2, r2, #4
-	orr     r4, r3, r5,             lsl lr
+#if __ARMEB__
+	mov     r4, r5,                 lsr lr
+	orr     r4, r4, r3
+	mov     r3,     r5,                     lsl r12
+#else
+	mov     r4, r5,                 lsl lr
+	orr     r4, r4, r3
 	mov     r3,     r5,                     lsr r12
+#endif
 	str     r4, [r0], #4
 	cmp     r2, #4
 	bhs     1b
@@ -357,11 +445,20 @@ less_than_thirtytwo:
 partial_word_tail:
 	/* we have a partial word in the input buffer */
 	movs    r5, lr, lsl #(31-3)
+#if __ARMEB__
+	movmi   r3, r3, ror #24
+	strbmi r3, [r0], #1
+	movcs   r3, r3, ror #24
+	strbcs r3, [r0], #1
+	movcs   r3, r3, ror #24
+	strbcs r3, [r0], #1
+#else
 	strbmi r3, [r0], #1
 	movmi   r3, r3, lsr #8
 	strbcs r3, [r0], #1
 	movcs   r3, r3, lsr #8
 	strbcs r3, [r0], #1
+#endif
 
 	/* Refill spilled registers from the stack. Don't update sp. */
 	ldmfd   sp, {r5-r11}
@@ -380,4 +477,3 @@ copy_last_3_and_return:
 	ldmfd   sp!, {r0, r4, lr}
 	bx      lr
 
-#endif
diff --git a/src/string/arm/memcpy.c b/src/string/arm/memcpy.c
deleted file mode 100644
index f703c9bd..00000000
--- a/src/string/arm/memcpy.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#if __ARMEB__ || __thumb__
-#include "../memcpy.c"
-#endif
diff --git a/src/string/memccpy.c b/src/string/memccpy.c
index 00c18e2b..3b0a3700 100644
--- a/src/string/memccpy.c
+++ b/src/string/memccpy.c
@@ -29,6 +29,6 @@ void *memccpy(void *restrict dest, const void *restrict src, int c, size_t n)
 #endif
 	for (; n && (*d=*s)!=c; n--, s++, d++);
 tail:
-	if (n && *s==c) return d+1;
+	if (n) return d+1;
 	return 0;
 }
diff --git a/src/string/memmem.c b/src/string/memmem.c
index 58a21fcd..11eff86e 100644
--- a/src/string/memmem.c
+++ b/src/string/memmem.c
@@ -12,8 +12,8 @@ static char *twobyte_memmem(const unsigned char *h, size_t k, const unsigned cha
 
 static char *threebyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
 {
-	uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8;
-	uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8;
+	uint32_t nw = (uint32_t)n[0]<<24 | n[1]<<16 | n[2]<<8;
+	uint32_t hw = (uint32_t)h[0]<<24 | h[1]<<16 | h[2]<<8;
 	for (h+=3, k-=3; k; k--, hw = (hw|*h++)<<8)
 		if (hw == nw) return (char *)h-3;
 	return hw == nw ? (char *)h-3 : 0;
@@ -21,8 +21,8 @@ static char *threebyte_memmem(const unsigned char *h, size_t k, const unsigned c
 
 static char *fourbyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
 {
-	uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3];
-	uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3];
+	uint32_t nw = (uint32_t)n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3];
+	uint32_t hw = (uint32_t)h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3];
 	for (h+=4, k-=4; k; k--, hw = hw<<8 | *h++)
 		if (hw == nw) return (char *)h-4;
 	return hw == nw ? (char *)h-4 : 0;
diff --git a/src/string/strcasestr.c b/src/string/strcasestr.c
index af109f36..dc598bc3 100644
--- a/src/string/strcasestr.c
+++ b/src/string/strcasestr.c
@@ -4,6 +4,7 @@
 char *strcasestr(const char *h, const char *n)
 {
 	size_t l = strlen(n);
+	if (!l) return (char *)h;
 	for (; *h; h++) if (!strncasecmp(h, n, l)) return (char *)h;
 	return 0;
 }
diff --git a/src/string/strsignal.c b/src/string/strsignal.c
index 96bfe841..5156366e 100644
--- a/src/string/strsignal.c
+++ b/src/string/strsignal.c
@@ -31,7 +31,11 @@ static const char map[] = {
 	[SIGPIPE]   = 13,
 	[SIGALRM]   = 14,
 	[SIGTERM]   = 15,
+#if defined(SIGSTKFLT)
 	[SIGSTKFLT] = 16,
+#elif defined(SIGEMT)
+	[SIGEMT]    = 16,
+#endif
 	[SIGCHLD]   = 17,
 	[SIGCONT]   = 18,
 	[SIGSTOP]   = 19,
@@ -70,7 +74,13 @@ static const char strings[] =
 	"Broken pipe\0"
 	"Alarm clock\0"
 	"Terminated\0"
+#if defined(SIGSTKFLT)
 	"Stack fault\0"
+#elif defined(SIGEMT)
+	"Emulator trap\0"
+#else
+	"Unknown signal\0"
+#endif
 	"Child process status\0"
 	"Continued\0"
 	"Stopped (signal)\0"
diff --git a/src/string/strstr.c b/src/string/strstr.c
index 55ba1c7b..96657bc2 100644
--- a/src/string/strstr.c
+++ b/src/string/strstr.c
@@ -10,16 +10,16 @@ static char *twobyte_strstr(const unsigned char *h, const unsigned char *n)
 
 static char *threebyte_strstr(const unsigned char *h, const unsigned char *n)
 {
-	uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8;
-	uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8;
+	uint32_t nw = (uint32_t)n[0]<<24 | n[1]<<16 | n[2]<<8;
+	uint32_t hw = (uint32_t)h[0]<<24 | h[1]<<16 | h[2]<<8;
 	for (h+=2; *h && hw != nw; hw = (hw|*++h)<<8);
 	return *h ? (char *)h-2 : 0;
 }
 
 static char *fourbyte_strstr(const unsigned char *h, const unsigned char *n)
 {
-	uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3];
-	uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3];
+	uint32_t nw = (uint32_t)n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3];
+	uint32_t hw = (uint32_t)h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3];
 	for (h+=3; *h && hw != nw; hw = hw<<8 | *++h);
 	return *h ? (char *)h-3 : 0;
 }
@@ -96,7 +96,7 @@ static char *twoway_strstr(const unsigned char *h, const unsigned char *n)
 	for (;;) {
 		/* Update incremental end-of-haystack pointer */
 		if (z-h < l) {
-			/* Fast estimate for MIN(l,63) */
+			/* Fast estimate for MAX(l,63) */
 			size_t grow = l | 63;
 			const unsigned char *z2 = memchr(z, 0, grow);
 			if (z2) {
diff --git a/src/string/strverscmp.c b/src/string/strverscmp.c
index 4daf276d..16c1da22 100644
--- a/src/string/strverscmp.c
+++ b/src/string/strverscmp.c
@@ -18,9 +18,9 @@ int strverscmp(const char *l0, const char *r0)
 		else if (c!='0') z=0;
 	}
 
-	if (l[dp]!='0' && r[dp]!='0') {
-		/* If we're not looking at a digit sequence that began
-		 * with a zero, longest digit string is greater. */
+	if (l[dp]-'1'<9U && r[dp]-'1'<9U) {
+		/* If we're looking at non-degenerate digit sequences starting
+		 * with nonzero digits, longest digit string is greater. */
 		for (j=i; isdigit(l[j]); j++)
 			if (!isdigit(r[j])) return 1;
 		if (isdigit(r[j])) return -1;
diff --git a/src/string/wcscmp.c b/src/string/wcscmp.c
index 26eeee70..286ec3ea 100644
--- a/src/string/wcscmp.c
+++ b/src/string/wcscmp.c
@@ -3,5 +3,5 @@
 int wcscmp(const wchar_t *l, const wchar_t *r)
 {
 	for (; *l==*r && *l && *r; l++, r++);
-	return *l - *r;
+	return *l < *r ? -1 : *l > *r;
 }
diff --git a/src/string/wcsncmp.c b/src/string/wcsncmp.c
index 4ab32a92..2b3558bf 100644
--- a/src/string/wcsncmp.c
+++ b/src/string/wcsncmp.c
@@ -3,5 +3,5 @@
 int wcsncmp(const wchar_t *l, const wchar_t *r, size_t n)
 {
 	for (; n && *l==*r && *l && *r; n--, l++, r++);
-	return n ? *l - *r : 0;
+	return n ? (*l < *r ? -1 : *l > *r) : 0;
 }
diff --git a/src/string/wmemcmp.c b/src/string/wmemcmp.c
index 2a193263..717d77b1 100644
--- a/src/string/wmemcmp.c
+++ b/src/string/wmemcmp.c
@@ -3,5 +3,5 @@
 int wmemcmp(const wchar_t *l, const wchar_t *r, size_t n)
 {
 	for (; n && *l==*r; n--, l++, r++);
-	return n ? *l-*r : 0;
+	return n ? (*l < *r ? -1 : *l > *r) : 0;
 }
diff --git a/src/temp/__randname.c b/src/temp/__randname.c
index 2bce37a0..e9b970f1 100644
--- a/src/temp/__randname.c
+++ b/src/temp/__randname.c
@@ -1,5 +1,6 @@
 #include <time.h>
 #include <stdint.h>
+#include "pthread_impl.h"
 
 /* This assumes that a check for the
    template size has already been made */
@@ -10,7 +11,7 @@ char *__randname(char *template)
 	unsigned long r;
 
 	__clock_gettime(CLOCK_REALTIME, &ts);
-	r = ts.tv_nsec*65537 ^ (uintptr_t)&ts / 16 + (uintptr_t)template;
+	r = ts.tv_sec + ts.tv_nsec + __pthread_self()->tid * 65537UL;
 	for (i=0; i<6; i++, r>>=5)
 		template[i] = 'A'+(r&15)+(r&16)*2;
 
diff --git a/src/temp/mkostemp.c b/src/temp/mkostemp.c
index d8dcb805..e3dfdd91 100644
--- a/src/temp/mkostemp.c
+++ b/src/temp/mkostemp.c
@@ -5,5 +5,3 @@ int mkostemp(char *template, int flags)
 {
 	return __mkostemps(template, 0, flags);
 }
-
-weak_alias(mkostemp, mkostemp64);
diff --git a/src/temp/mkostemps.c b/src/temp/mkostemps.c
index ef24eeae..093d2380 100644
--- a/src/temp/mkostemps.c
+++ b/src/temp/mkostemps.c
@@ -26,4 +26,3 @@ int __mkostemps(char *template, int len, int flags)
 }
 
 weak_alias(__mkostemps, mkostemps);
-weak_alias(__mkostemps, mkostemps64);
diff --git a/src/temp/mkstemp.c b/src/temp/mkstemp.c
index 166b8afe..76c835bb 100644
--- a/src/temp/mkstemp.c
+++ b/src/temp/mkstemp.c
@@ -4,5 +4,3 @@ int mkstemp(char *template)
 {
 	return __mkostemps(template, 0, 0);
 }
-
-weak_alias(mkstemp, mkstemp64);
diff --git a/src/temp/mkstemps.c b/src/temp/mkstemps.c
index 6b7531b5..f8eabfec 100644
--- a/src/temp/mkstemps.c
+++ b/src/temp/mkstemps.c
@@ -5,5 +5,3 @@ int mkstemps(char *template, int len)
 {
 	return __mkostemps(template, len, 0);
 }
-
-weak_alias(mkstemps, mkstemps64);
diff --git a/src/termios/cfgetospeed.c b/src/termios/cfgetospeed.c
index 55fa6f55..de46a1d8 100644
--- a/src/termios/cfgetospeed.c
+++ b/src/termios/cfgetospeed.c
@@ -9,5 +9,5 @@ speed_t cfgetospeed(const struct termios *tio)
 
 speed_t cfgetispeed(const struct termios *tio)
 {
-	return cfgetospeed(tio);
+	return (tio->c_cflag & CIBAUD) / (CIBAUD/CBAUD);
 }
diff --git a/src/termios/cfsetospeed.c b/src/termios/cfsetospeed.c
index c9cbdd9d..3eab092a 100644
--- a/src/termios/cfsetospeed.c
+++ b/src/termios/cfsetospeed.c
@@ -16,7 +16,11 @@ int cfsetospeed(struct termios *tio, speed_t speed)
 
 int cfsetispeed(struct termios *tio, speed_t speed)
 {
-	return speed ? cfsetospeed(tio, speed) : 0;
+	if (speed & ~CBAUD) {
+		errno = EINVAL;
+		return -1;
+	}
+	tio->c_cflag &= ~CIBAUD;
+	tio->c_cflag |= speed * (CIBAUD/CBAUD);
+	return 0;
 }
-
-weak_alias(cfsetospeed, cfsetspeed);
diff --git a/src/termios/cfsetspeed.c b/src/termios/cfsetspeed.c
new file mode 100644
index 00000000..2c369db9
--- /dev/null
+++ b/src/termios/cfsetspeed.c
@@ -0,0 +1,11 @@
+#define _BSD_SOURCE
+#include <termios.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+
+int cfsetspeed(struct termios *tio, speed_t speed)
+{
+	int r = cfsetospeed(tio, speed);
+	if (!r) cfsetispeed(tio, 0);
+	return r;
+}
diff --git a/src/termios/tcgetwinsize.c b/src/termios/tcgetwinsize.c
new file mode 100644
index 00000000..9b3a65a4
--- /dev/null
+++ b/src/termios/tcgetwinsize.c
@@ -0,0 +1,8 @@
+#include <termios.h>
+#include <sys/ioctl.h>
+#include "syscall.h"
+
+int tcgetwinsize(int fd, struct winsize *wsz)
+{
+	return syscall(SYS_ioctl, fd, TIOCGWINSZ, wsz);
+}
diff --git a/src/termios/tcsetwinsize.c b/src/termios/tcsetwinsize.c
new file mode 100644
index 00000000..e01d0e25
--- /dev/null
+++ b/src/termios/tcsetwinsize.c
@@ -0,0 +1,8 @@
+#include <termios.h>
+#include <sys/ioctl.h>
+#include "syscall.h"
+
+int tcsetwinsize(int fd, const struct winsize *wsz)
+{
+	return syscall(SYS_ioctl, fd, TIOCSWINSZ, wsz);
+}
diff --git a/src/thread/__lock.c b/src/thread/__lock.c
index 45557c88..60eece49 100644
--- a/src/thread/__lock.c
+++ b/src/thread/__lock.c
@@ -18,9 +18,11 @@
 
 void __lock(volatile int *l)
 {
-	if (!libc.threads_minus_1) return;
+	int need_locks = libc.need_locks;
+	if (!need_locks) return;
 	/* fast path: INT_MIN for the lock, +1 for the congestion */
 	int current = a_cas(l, 0, INT_MIN + 1);
+	if (need_locks < 0) libc.need_locks = 0;
 	if (!current) return;
 	/* A first spin loop, for medium congestion. */
 	for (unsigned i = 0; i < 10; ++i) {
diff --git a/src/thread/__syscall_cp.c b/src/thread/__syscall_cp.c
index af666f06..42a01674 100644
--- a/src/thread/__syscall_cp.c
+++ b/src/thread/__syscall_cp.c
@@ -7,7 +7,7 @@ static long sccp(syscall_arg_t nr,
                  syscall_arg_t u, syscall_arg_t v, syscall_arg_t w,
                  syscall_arg_t x, syscall_arg_t y, syscall_arg_t z)
 {
-	return (__syscall)(nr, u, v, w, x, y, z);
+	return __syscall(nr, u, v, w, x, y, z);
 }
 
 weak_alias(sccp, __syscall_cp_c);
diff --git a/src/thread/__timedwait.c b/src/thread/__timedwait.c
index 229db313..666093be 100644
--- a/src/thread/__timedwait.c
+++ b/src/thread/__timedwait.c
@@ -5,6 +5,30 @@
 #include "syscall.h"
 #include "pthread_impl.h"
 
+#define IS32BIT(x) !((x)+0x80000000ULL>>32)
+#define CLAMP(x) (int)(IS32BIT(x) ? (x) : 0x7fffffffU+((0ULL+(x))>>63))
+
+static int __futex4_cp(volatile void *addr, int op, int val, const struct timespec *to)
+{
+	int r;
+#ifdef SYS_futex_time64
+	time_t s = to ? to->tv_sec : 0;
+	long ns = to ? to->tv_nsec : 0;
+	r = -ENOSYS;
+	if (SYS_futex == SYS_futex_time64 || !IS32BIT(s))
+		r = __syscall_cp(SYS_futex_time64, addr, op, val,
+			to ? ((long long[]){s, ns}) : 0);
+	if (SYS_futex == SYS_futex_time64 || r!=-ENOSYS) return r;
+	to = to ? (void *)(long[]){CLAMP(s), ns} : 0;
+#endif
+	r = __syscall_cp(SYS_futex, addr, op, val, to);
+	if (r != -ENOSYS) return r;
+	return __syscall_cp(SYS_futex, addr, op & ~FUTEX_PRIVATE, val, to);
+}
+
+static volatile int dummy = 0;
+weak_alias(dummy, __eintr_valid_flag);
+
 int __timedwait_cp(volatile int *addr, int val,
 	clockid_t clk, const struct timespec *at, int priv)
 {
@@ -25,9 +49,13 @@ int __timedwait_cp(volatile int *addr, int val,
 		top = &to;
 	}
 
-	r = -__syscall_cp(SYS_futex, addr, FUTEX_WAIT|priv, val, top);
-	if (r == ENOSYS) r = -__syscall_cp(SYS_futex, addr, FUTEX_WAIT, val, top);
+	r = -__futex4_cp(addr, FUTEX_WAIT|priv, val, top);
 	if (r != EINTR && r != ETIMEDOUT && r != ECANCELED) r = 0;
+	/* Mitigate bug in old kernels wrongly reporting EINTR for non-
+	 * interrupting (SA_RESTART) signal handlers. This is only practical
+	 * when NO interrupting signal handlers have been installed, and
+	 * works by sigaction tracking whether that's the case. */
+	if (r == EINTR && !__eintr_valid_flag) r = 0;
 
 	return r;
 }
diff --git a/src/thread/__tls_get_addr.c b/src/thread/__tls_get_addr.c
index d7afdabd..19524fe0 100644
--- a/src/thread/__tls_get_addr.c
+++ b/src/thread/__tls_get_addr.c
@@ -1,12 +1,7 @@
-#include <stddef.h>
 #include "pthread_impl.h"
 
 void *__tls_get_addr(tls_mod_off_t *v)
 {
 	pthread_t self = __pthread_self();
-	if (v[0] <= self->dtv[0])
-		return (void *)(self->dtv[v[0]] + v[1]);
-	return __tls_get_new(v);
+	return (void *)(self->dtv[v[0]] + v[1]);
 }
-
-weak_alias(__tls_get_addr, __tls_get_new);
diff --git a/src/thread/__unmapself.c b/src/thread/__unmapself.c
index 1d3bee1d..31d94e67 100644
--- a/src/thread/__unmapself.c
+++ b/src/thread/__unmapself.c
@@ -4,7 +4,6 @@
 /* cheat and reuse CRTJMP macro from dynlink code */
 #include "dynlink.h"
 
-static volatile int lock;
 static void *unmap_base;
 static size_t unmap_size;
 static char shared_stack[256];
@@ -17,12 +16,8 @@ static void do_unmap()
 
 void __unmapself(void *base, size_t size)
 {
-	int tid=__pthread_self()->tid;
 	char *stack = shared_stack + sizeof shared_stack;
 	stack -= (uintptr_t)stack % 16;
-	while (lock || a_cas(&lock, 0, tid))
-		a_spin();
-	__syscall(SYS_set_tid_address, &lock);
 	unmap_base = base;
 	unmap_size = size;
 	CRTJMP(do_unmap, stack);
diff --git a/src/thread/aarch64/clone.s b/src/thread/aarch64/clone.s
index e3c83395..9ac272bd 100644
--- a/src/thread/aarch64/clone.s
+++ b/src/thread/aarch64/clone.s
@@ -24,7 +24,8 @@ __clone:
 	// parent
 	ret
 	// child
-1:	ldp x1,x0,[sp],#16
+1:	mov fp, 0
+	ldp x1,x0,[sp],#16
 	blr x1
 	mov x8,#93 // SYS_exit
 	svc #0
diff --git a/src/thread/arm/atomics.s b/src/thread/arm/atomics.s
index 101ad391..da50508d 100644
--- a/src/thread/arm/atomics.s
+++ b/src/thread/arm/atomics.s
@@ -15,10 +15,10 @@ __a_barrier_oldkuser:
 	mov r1,r0
 	mov r2,sp
 	ldr ip,=0xffff0fc0
-	mov lr,pc
-	mov pc,ip
+	bl 1f
 	pop {r0,r1,r2,r3,ip,lr}
 	bx lr
+1:	bx ip
 
 .global __a_barrier_v6
 .hidden __a_barrier_v6
diff --git a/src/thread/arm/clone.s b/src/thread/arm/clone.s
index e16b1326..4ff0c0e8 100644
--- a/src/thread/arm/clone.s
+++ b/src/thread/arm/clone.s
@@ -19,14 +19,11 @@ __clone:
 	ldmfd sp!,{r4,r5,r6,r7}
 	bx lr
 
-1:	mov r0,r6
-	tst r5,#1
-	bne 1f
-	mov lr,pc
-	mov pc,r5
+1:	mov fp,#0
+	mov r0,r6
+	bl 3f
 2:	mov r7,#1
 	svc 0
-
-1:	mov lr,pc
-	bx r5
 	b 2b
+
+3:	bx r5
diff --git a/src/thread/arm/syscall_cp.s b/src/thread/arm/syscall_cp.s
index a5730c08..e607dd42 100644
--- a/src/thread/arm/syscall_cp.s
+++ b/src/thread/arm/syscall_cp.s
@@ -11,19 +11,19 @@
 .type __syscall_cp_asm,%function
 __syscall_cp_asm:
 	mov ip,sp
-	stmfd sp!,{r4,r5,r6,r7,lr}
+	stmfd sp!,{r4,r5,r6,r7}
 __cp_begin:
 	ldr r0,[r0]
 	cmp r0,#0
-	blne __cp_cancel
+	bne __cp_cancel
 	mov r7,r1
 	mov r0,r2
 	mov r1,r3
 	ldmfd ip,{r2,r3,r4,r5,r6}
 	svc 0
 __cp_end:
-	ldmfd sp!,{r4,r5,r6,r7,lr}
+	ldmfd sp!,{r4,r5,r6,r7}
 	bx lr
 __cp_cancel:
-	ldmfd sp!,{r4,r5,r6,r7,lr}
+	ldmfd sp!,{r4,r5,r6,r7}
 	b __cancel
diff --git a/src/thread/i386/__set_thread_area.s b/src/thread/i386/__set_thread_area.s
index c2c21dd5..aa6852be 100644
--- a/src/thread/i386/__set_thread_area.s
+++ b/src/thread/i386/__set_thread_area.s
@@ -28,6 +28,7 @@ __set_thread_area:
 	ret
 2:
 	mov %ebx,%ecx
+	xor %eax,%eax
 	xor %ebx,%ebx
 	xor %edx,%edx
 	mov %ebx,(%esp)
diff --git a/src/thread/i386/tls.s b/src/thread/i386/tls.s
index 76d5d462..6e4c4cb9 100644
--- a/src/thread/i386/tls.s
+++ b/src/thread/i386/tls.s
@@ -4,14 +4,6 @@
 ___tls_get_addr:
 	mov %gs:4,%edx
 	mov (%eax),%ecx
-	cmp %ecx,(%edx)
-	jc 1f
 	mov 4(%eax),%eax
 	add (%edx,%ecx,4),%eax
 	ret
-1:	push %eax
-.weak __tls_get_new
-.hidden __tls_get_new
-	call __tls_get_new
-	pop %edx
-	ret
diff --git a/src/thread/loongarch64/__set_thread_area.s b/src/thread/loongarch64/__set_thread_area.s
new file mode 100644
index 00000000..021307fc
--- /dev/null
+++ b/src/thread/loongarch64/__set_thread_area.s
@@ -0,0 +1,7 @@
+.global __set_thread_area
+.hidden __set_thread_area
+.type   __set_thread_area,@function
+__set_thread_area:
+	move $tp, $a0
+	move $a0, $zero
+	jr   $ra
diff --git a/src/thread/loongarch64/__unmapself.s b/src/thread/loongarch64/__unmapself.s
new file mode 100644
index 00000000..719ad056
--- /dev/null
+++ b/src/thread/loongarch64/__unmapself.s
@@ -0,0 +1,7 @@
+.global __unmapself
+.type   __unmapself, @function
+__unmapself:
+	li.d    $a7, 215   # call munmap
+	syscall 0
+	li.d    $a7, 93    # call exit
+	syscall 0
diff --git a/src/thread/loongarch64/clone.s b/src/thread/loongarch64/clone.s
new file mode 100644
index 00000000..cb4aacfc
--- /dev/null
+++ b/src/thread/loongarch64/clone.s
@@ -0,0 +1,30 @@
+#__clone(func, stack, flags, arg, ptid, tls, ctid)
+#         a0,    a1,   a2,    a3,  a4,  a5,   a6
+# sys_clone(flags, stack, ptid, ctid, tls)
+#            a0,    a1,   a2,    a3,  a4
+
+.global __clone
+.hidden __clone
+.type __clone,@function
+__clone:
+	bstrins.d $a1, $zero, 3, 0   #stack to 16 align
+	# Save function pointer and argument pointer on new thread stack
+	addi.d  $a1, $a1, -16
+	st.d    $a0, $a1, 0     # save function pointer
+	st.d    $a3, $a1, 8     # save argument pointer
+	or      $a0, $a2, $zero
+	or      $a2, $a4, $zero
+	or      $a3, $a6, $zero
+	or      $a4, $a5, $zero
+	ori     $a7, $zero, 220
+	syscall 0               # call clone
+
+	beqz    $a0, 1f         # whether child process
+	jirl    $zero, $ra, 0   # parent process return
+1:
+	move    $fp, $zero
+	ld.d    $t8, $sp, 0     # function pointer
+	ld.d    $a0, $sp, 8     # argument pointer
+	jirl    $ra, $t8, 0     # call the user's function
+	ori     $a7, $zero, 93
+	syscall 0               # child process exit
diff --git a/src/thread/loongarch64/syscall_cp.s b/src/thread/loongarch64/syscall_cp.s
new file mode 100644
index 00000000..c057a97b
--- /dev/null
+++ b/src/thread/loongarch64/syscall_cp.s
@@ -0,0 +1,29 @@
+.global __cp_begin
+.hidden __cp_begin
+.global __cp_end
+.hidden __cp_end
+.global __cp_cancel
+.hidden __cp_cancel
+.hidden __cancel
+.global __syscall_cp_asm
+.hidden __syscall_cp_asm
+.type   __syscall_cp_asm,@function
+
+__syscall_cp_asm:
+__cp_begin:
+	ld.w $a0, $a0, 0
+	bnez $a0, __cp_cancel
+	move $t8, $a1    # reserve system call number
+	move $a0, $a2
+	move $a1, $a3
+	move $a2, $a4
+	move $a3, $a5
+	move $a4, $a6
+	move $a5, $a7
+	move $a7, $t8
+	syscall 0
+__cp_end:
+	jr $ra
+__cp_cancel:
+	la.local $t8, __cancel
+	jr $t8
diff --git a/src/thread/m68k/clone.s b/src/thread/m68k/clone.s
index f6dfa06f..0134cf4e 100644
--- a/src/thread/m68k/clone.s
+++ b/src/thread/m68k/clone.s
@@ -18,7 +18,8 @@ __clone:
 	beq 1f
 	movem.l (%sp)+,%d2-%d5
 	rts
-1:	move.l %a1,-(%sp)
+1:	suba.l %fp,%fp
+	move.l %a1,-(%sp)
 	jsr (%a0)
 	move.l #1,%d0
 	trap #0
diff --git a/src/thread/microblaze/clone.s b/src/thread/microblaze/clone.s
index b68cc5fc..64e3f074 100644
--- a/src/thread/microblaze/clone.s
+++ b/src/thread/microblaze/clone.s
@@ -22,7 +22,8 @@ __clone:
 	rtsd    r15, 8
 	nop
 
-1:	lwi     r3, r1, 0
+1:	add     r19, r0, r0
+	lwi     r3, r1, 0
 	lwi     r5, r1, 4
 	brald   r15, r3
 	nop
diff --git a/src/thread/mips/clone.s b/src/thread/mips/clone.s
index 04463385..229b987e 100644
--- a/src/thread/mips/clone.s
+++ b/src/thread/mips/clone.s
@@ -27,7 +27,8 @@ __clone:
 	addu $sp, $sp, 16
 	jr $ra
 	nop
-1:	lw $25, 0($sp)
+1:	move $fp, $0
+	lw $25, 0($sp)
 	lw $4, 4($sp)
 	jalr $25
 	nop
diff --git a/src/thread/mips64/clone.s b/src/thread/mips64/clone.s
index 2d86899a..8de3db6c 100644
--- a/src/thread/mips64/clone.s
+++ b/src/thread/mips64/clone.s
@@ -25,7 +25,8 @@ __clone:
 	nop
 	jr	$ra
 	nop
-1:	ld	$25, 0($sp)	# function pointer
+1:	move	$fp, $0
+	ld	$25, 0($sp)	# function pointer
 	ld	$4, 8($sp)	# argument pointer
 	jalr	$25		# call the user's function
 	nop
diff --git a/src/thread/mipsn32/clone.s b/src/thread/mipsn32/clone.s
index 4d3c8c7a..9571231a 100644
--- a/src/thread/mipsn32/clone.s
+++ b/src/thread/mipsn32/clone.s
@@ -25,7 +25,8 @@ __clone:
 	nop
 	jr	$ra
 	nop
-1:	lw	$25, 0($sp)	# function pointer
+1:	move	$fp, $0
+	lw	$25, 0($sp)	# function pointer
 	lw	$4, 4($sp)	# argument pointer
 	jalr	$25		# call the user's function
 	nop
diff --git a/src/thread/or1k/clone.s b/src/thread/or1k/clone.s
index 2473ac20..b41488a2 100644
--- a/src/thread/or1k/clone.s
+++ b/src/thread/or1k/clone.s
@@ -6,6 +6,8 @@
 .hidden __clone
 .type   __clone,@function
 __clone:
+	l.xori	r11, r0, -4
+	l.and	r4, r4, r11
 	l.addi	r4, r4, -8
 	l.sw	0(r4), r3
 	l.sw	4(r4), r6
@@ -23,7 +25,8 @@ __clone:
 	l.jr	r9
 	 l.nop
 
-1:	l.lwz	r11, 0(r1)
+1:	l.ori	r2, r0, 0
+	l.lwz	r11, 0(r1)
 	l.jalr	r11
 	 l.lwz	r3, 4(r1)
 
diff --git a/src/thread/pthread_atfork.c b/src/thread/pthread_atfork.c
index 76497401..26d32543 100644
--- a/src/thread/pthread_atfork.c
+++ b/src/thread/pthread_atfork.c
@@ -1,7 +1,13 @@
 #include <pthread.h>
+#include <errno.h>
 #include "libc.h"
 #include "lock.h"
 
+#define malloc __libc_malloc
+#define calloc undef
+#define realloc undef
+#define free undef
+
 static struct atfork_funcs {
 	void (*prepare)(void);
 	void (*parent)(void);
@@ -34,7 +40,7 @@ void __fork_handler(int who)
 int pthread_atfork(void (*prepare)(void), void (*parent)(void), void (*child)(void))
 {
 	struct atfork_funcs *new = malloc(sizeof *new);
-	if (!new) return -1;
+	if (!new) return ENOMEM;
 
 	LOCK(lock);
 	new->next = funcs;
diff --git a/src/thread/pthread_attr_get.c b/src/thread/pthread_attr_get.c
index 4aa5afdb..f12ff442 100644
--- a/src/thread/pthread_attr_get.c
+++ b/src/thread/pthread_attr_get.c
@@ -70,7 +70,7 @@ int pthread_condattr_getpshared(const pthread_condattr_t *restrict a, int *restr
 
 int pthread_mutexattr_getprotocol(const pthread_mutexattr_t *restrict a, int *restrict protocol)
 {
-	*protocol = PTHREAD_PRIO_NONE;
+	*protocol = a->__attr / 8U % 2;
 	return 0;
 }
 int pthread_mutexattr_getpshared(const pthread_mutexattr_t *restrict a, int *restrict pshared)
diff --git a/src/thread/pthread_attr_setinheritsched.c b/src/thread/pthread_attr_setinheritsched.c
index 6a648376..ca264be7 100644
--- a/src/thread/pthread_attr_setinheritsched.c
+++ b/src/thread/pthread_attr_setinheritsched.c
@@ -1,25 +1,6 @@
 #include "pthread_impl.h"
 #include "syscall.h"
 
-hidden void *__start_sched(void *p)
-{
-	struct start_sched_args *ssa = p;
-	void *start_arg = ssa->start_arg;
-	void *(*start_fn)(void *) = ssa->start_fn;
-	pthread_t self = __pthread_self();
-
-	int ret = -__syscall(SYS_sched_setscheduler, self->tid,
-		ssa->attr->_a_policy, &ssa->attr->_a_prio);
-	if (!ret) __restore_sigs(&ssa->mask);
-	a_store(&ssa->futex, ret);
-	__wake(&ssa->futex, 1, 1);
-	if (ret) {
-		self->detach_state = DT_DYNAMIC;
-		return 0;
-	}
-	return start_fn(start_arg);
-}
-
 int pthread_attr_setinheritsched(pthread_attr_t *a, int inherit)
 {
 	if (inherit > 1U) return EINVAL;
diff --git a/src/thread/pthread_cancel.c b/src/thread/pthread_cancel.c
index 2f9d5e97..139a6fc8 100644
--- a/src/thread/pthread_cancel.c
+++ b/src/thread/pthread_cancel.c
@@ -56,7 +56,12 @@ static void cancel_handler(int sig, siginfo_t *si, void *ctx)
 
 	_sigaddset(&uc->uc_sigmask, SIGCANCEL);
 
-	if (self->cancelasync || pc >= (uintptr_t)__cp_begin && pc < (uintptr_t)__cp_end) {
+	if (self->cancelasync) {
+		pthread_sigmask(SIG_SETMASK, &uc->uc_sigmask, 0);
+		__cancel();
+	}
+
+	if (pc >= (uintptr_t)__cp_begin && pc < (uintptr_t)__cp_end) {
 		uc->uc_mcontext.MC_PC = (uintptr_t)__cp_cancel;
 #ifdef CANCEL_GOT
 		uc->uc_mcontext.MC_GOT = CANCEL_GOT;
@@ -77,7 +82,7 @@ void __testcancel()
 static void init_cancellation()
 {
 	struct sigaction sa = {
-		.sa_flags = SA_SIGINFO | SA_RESTART,
+		.sa_flags = SA_SIGINFO | SA_RESTART | SA_ONSTACK,
 		.sa_sigaction = cancel_handler
 	};
 	memset(&sa.sa_mask, -1, _NSIG/8);
diff --git a/src/thread/pthread_cond_timedwait.c b/src/thread/pthread_cond_timedwait.c
index d1501240..6b761455 100644
--- a/src/thread/pthread_cond_timedwait.c
+++ b/src/thread/pthread_cond_timedwait.c
@@ -146,14 +146,18 @@ relock:
 
 	if (oldstate == WAITING) goto done;
 
-	if (!node.next) a_inc(&m->_m_waiters);
+	if (!node.next && !(m->_m_type & 8))
+		a_inc(&m->_m_waiters);
 
 	/* Unlock the barrier that's holding back the next waiter, and
 	 * either wake it or requeue it to the mutex. */
-	if (node.prev)
-		unlock_requeue(&node.prev->barrier, &m->_m_lock, m->_m_type & 128);
-	else
-		a_dec(&m->_m_waiters);
+	if (node.prev) {
+		int val = m->_m_lock;
+		if (val>0) a_cas(&m->_m_lock, val, val|0x80000000);
+		unlock_requeue(&node.prev->barrier, &m->_m_lock, m->_m_type & (8|128));
+	} else if (!(m->_m_type & 8)) {
+		a_dec(&m->_m_waiters);		
+	}
 
 	/* Since a signal was consumed, cancellation is not permitted. */
 	if (e == ECANCELED) e = 0;
diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c
index 3da7db14..087f6206 100644
--- a/src/thread/pthread_create.c
+++ b/src/thread/pthread_create.c
@@ -15,12 +15,41 @@ weak_alias(dummy_0, __release_ptc);
 weak_alias(dummy_0, __pthread_tsd_run_dtors);
 weak_alias(dummy_0, __do_orphaned_stdio_locks);
 weak_alias(dummy_0, __dl_thread_cleanup);
+weak_alias(dummy_0, __membarrier_init);
 
-static void *dummy_1(void *p)
+static int tl_lock_count;
+static int tl_lock_waiters;
+
+void __tl_lock(void)
 {
-	return 0;
+	int tid = __pthread_self()->tid;
+	int val = __thread_list_lock;
+	if (val == tid) {
+		tl_lock_count++;
+		return;
+	}
+	while ((val = a_cas(&__thread_list_lock, 0, tid)))
+		__wait(&__thread_list_lock, &tl_lock_waiters, val, 0);
+}
+
+void __tl_unlock(void)
+{
+	if (tl_lock_count) {
+		tl_lock_count--;
+		return;
+	}
+	a_store(&__thread_list_lock, 0);
+	if (tl_lock_waiters) __wake(&__thread_list_lock, 1, 0);
+}
+
+void __tl_sync(pthread_t td)
+{
+	a_barrier();
+	int val = __thread_list_lock;
+	if (!val) return;
+	__wait(&__thread_list_lock, &tl_lock_waiters, val, 0);
+	if (tl_lock_waiters) __wake(&__thread_list_lock, 1, 0);
 }
-weak_alias(dummy_1, __start_sched);
 
 _Noreturn void __pthread_exit(void *result)
 {
@@ -40,30 +69,54 @@ _Noreturn void __pthread_exit(void *result)
 
 	__pthread_tsd_run_dtors();
 
+	__block_app_sigs(&set);
+
+	/* This atomic potentially competes with a concurrent pthread_detach
+	 * call; the loser is responsible for freeing thread resources. */
+	int state = a_cas(&self->detach_state, DT_JOINABLE, DT_EXITING);
+
+	if (state==DT_DETACHED && self->map_base) {
+		/* Since __unmapself bypasses the normal munmap code path,
+		 * explicitly wait for vmlock holders first. This must be
+		 * done before any locks are taken, to avoid lock ordering
+		 * issues that could lead to deadlock. */
+		__vm_wait();
+	}
+
 	/* Access to target the exiting thread with syscalls that use
 	 * its kernel tid is controlled by killlock. For detached threads,
 	 * any use past this point would have undefined behavior, but for
-	 * joinable threads it's a valid usage that must be handled. */
+	 * joinable threads it's a valid usage that must be handled.
+	 * Signals must be blocked since pthread_kill must be AS-safe. */
 	LOCK(self->killlock);
 
-	/* Block all signals before decrementing the live thread count.
-	 * This is important to ensure that dynamically allocated TLS
-	 * is not under-allocated/over-committed, and possibly for other
-	 * reasons as well. */
-	__block_all_sigs(&set);
-
-	/* It's impossible to determine whether this is "the last thread"
-	 * until performing the atomic decrement, since multiple threads
-	 * could exit at the same time. For the last thread, revert the
-	 * decrement, restore the tid, and unblock signals to give the
-	 * atexit handlers and stdio cleanup code a consistent state. */
-	if (a_fetch_add(&libc.threads_minus_1, -1)==0) {
-		libc.threads_minus_1 = 0;
+	/* The thread list lock must be AS-safe, and thus depends on
+	 * application signals being blocked above. */
+	__tl_lock();
+
+	/* If this is the only thread in the list, don't proceed with
+	 * termination of the thread, but restore the previous lock and
+	 * signal state to prepare for exit to call atexit handlers. */
+	if (self->next == self) {
+		__tl_unlock();
 		UNLOCK(self->killlock);
+		self->detach_state = state;
 		__restore_sigs(&set);
 		exit(0);
 	}
 
+	/* At this point we are committed to thread termination. */
+
+	/* After the kernel thread exits, its tid may be reused. Clear it
+	 * to prevent inadvertent use and inform functions that would use
+	 * it that it's no longer available. At this point the killlock
+	 * may be released, since functions that use it will consistently
+	 * see the thread as having exited. Release it now so that no
+	 * remaining locks (except thread list) are held if we end up
+	 * resetting need_locks below. */
+	self->tid = 0;
+	UNLOCK(self->killlock);
+
 	/* Process robust list in userspace to handle non-pshared mutexes
 	 * and the detached thread case where the robust list head will
 	 * be invalid when the kernel would process it. */
@@ -86,39 +139,35 @@ _Noreturn void __pthread_exit(void *result)
 	__do_orphaned_stdio_locks();
 	__dl_thread_cleanup();
 
-	/* This atomic potentially competes with a concurrent pthread_detach
-	 * call; the loser is responsible for freeing thread resources. */
-	int state = a_cas(&self->detach_state, DT_JOINABLE, DT_EXITING);
-
-	if (state>=DT_DETACHED && self->map_base) {
-		/* Detached threads must avoid the kernel clear_child_tid
-		 * feature, since the virtual address will have been
-		 * unmapped and possibly already reused by a new mapping
-		 * at the time the kernel would perform the write. In
-		 * the case of threads that started out detached, the
-		 * initial clone flags are correct, but if the thread was
-		 * detached later, we need to clear it here. */
-		if (state == DT_DYNAMIC) __syscall(SYS_set_tid_address, 0);
+	/* Last, unlink thread from the list. This change will not be visible
+	 * until the lock is released, which only happens after SYS_exit
+	 * has been called, via the exit futex address pointing at the lock.
+	 * This needs to happen after any possible calls to LOCK() that might
+	 * skip locking if process appears single-threaded. */
+	if (!--libc.threads_minus_1) libc.need_locks = -1;
+	self->next->prev = self->prev;
+	self->prev->next = self->next;
+	self->prev = self->next = self;
+
+	if (state==DT_DETACHED && self->map_base) {
+		/* Detached threads must block even implementation-internal
+		 * signals, since they will not have a stack in their last
+		 * moments of existence. */
+		__block_all_sigs(&set);
 
 		/* Robust list will no longer be valid, and was already
 		 * processed above, so unregister it with the kernel. */
 		if (self->robust_list.off)
 			__syscall(SYS_set_robust_list, 0, 3*sizeof(long));
 
-		/* Since __unmapself bypasses the normal munmap code path,
-		 * explicitly wait for vmlock holders first. */
-		__vm_wait();
-
 		/* The following call unmaps the thread's stack mapping
 		 * and then exits without touching the stack. */
 		__unmapself(self->map_base, self->map_size);
 	}
 
-	/* After the kernel thread exits, its tid may be reused. Clear it
-	 * to prevent inadvertent use and inform functions that would use
-	 * it that it's no longer available. */
-	self->tid = 0;
-	UNLOCK(self->killlock);
+	/* Wake any joiner. */
+	a_store(&self->detach_state, DT_EXITED);
+	__wake(&self->detach_state, 1, 1);
 
 	for (;;) __syscall(SYS_exit, 0);
 }
@@ -135,21 +184,35 @@ void __do_cleanup_pop(struct __ptcb *cb)
 	__pthread_self()->cancelbuf = cb->__next;
 }
 
+struct start_args {
+	void *(*start_func)(void *);
+	void *start_arg;
+	volatile int control;
+	unsigned long sig_mask[_NSIG/8/sizeof(long)];
+};
+
 static int start(void *p)
 {
-	pthread_t self = p;
-	if (self->unblock_cancel)
-		__syscall(SYS_rt_sigprocmask, SIG_UNBLOCK,
-			SIGPT_SET, 0, _NSIG/8);
-	__pthread_exit(self->start(self->start_arg));
+	struct start_args *args = p;
+	int state = args->control;
+	if (state) {
+		if (a_cas(&args->control, 1, 2)==1)
+			__wait(&args->control, 0, 2, 1);
+		if (args->control) {
+			__syscall(SYS_set_tid_address, &args->control);
+			for (;;) __syscall(SYS_exit, 0);
+		}
+	}
+	__syscall(SYS_rt_sigprocmask, SIG_SETMASK, &args->sig_mask, 0, _NSIG/8);
+	__pthread_exit(args->start_func(args->start_arg));
 	return 0;
 }
 
 static int start_c11(void *p)
 {
-	pthread_t self = p;
-	int (*start)(void*) = (int(*)(void*)) self->start;
-	__pthread_exit((void *)(uintptr_t)start(self->start_arg));
+	struct start_args *args = p;
+	int (*start)(void*) = (int(*)(void*)) args->start_func;
+	__pthread_exit((void *)(uintptr_t)start(args->start_arg));
 	return 0;
 }
 
@@ -161,8 +224,6 @@ weak_alias(dummy, __pthread_tsd_size);
 static void *dummy_tsd[1] = { 0 };
 weak_alias(dummy_tsd, __pthread_tsd_main);
 
-volatile int __block_new_threads = 0;
-
 static FILE *volatile dummy_file = 0;
 weak_alias(dummy_file, __stdin_used);
 weak_alias(dummy_file, __stdout_used);
@@ -182,9 +243,8 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
 	unsigned flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND
 		| CLONE_THREAD | CLONE_SYSVSEM | CLONE_SETTLS
 		| CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | CLONE_DETACHED;
-	int do_sched = 0;
 	pthread_attr_t attr = { 0 };
-	struct start_sched_args ssa;
+	sigset_t set;
 
 	if (!libc.can_do_threads) return ENOSYS;
 	self = __pthread_self();
@@ -197,6 +257,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
 		init_file_lock(__stderr_used);
 		__syscall(SYS_rt_sigprocmask, SIG_UNBLOCK, SIGPT_SET, 0, _NSIG/8);
 		self->tsd = (void **)__pthread_tsd_main;
+		__membarrier_init();
 		libc.threaded = 1;
 	}
 	if (attrp && !c11) attr = *attrp;
@@ -207,8 +268,6 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
 		attr._a_guardsize = __default_guardsize;
 	}
 
-	if (__block_new_threads) __wait(&__block_new_threads, 0, 1, 1);
-
 	if (attr._a_stackaddr) {
 		size_t need = libc.tls_size + __pthread_tsd_size;
 		size = attr._a_stacksize;
@@ -257,50 +316,74 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
 	new->stack = stack;
 	new->stack_size = stack - stack_limit;
 	new->guard_size = guard;
-	new->start = entry;
-	new->start_arg = arg;
 	new->self = new;
 	new->tsd = (void *)tsd;
 	new->locale = &libc.global_locale;
 	if (attr._a_detach) {
 		new->detach_state = DT_DETACHED;
-		flags -= CLONE_CHILD_CLEARTID;
 	} else {
 		new->detach_state = DT_JOINABLE;
 	}
-	if (attr._a_sched) {
-		do_sched = 1;
-		ssa.futex = -1;
-		ssa.start_fn = new->start;
-		ssa.start_arg = new->start_arg;
-		ssa.attr = &attr;
-		new->start = __start_sched;
-		new->start_arg = &ssa;
-		__block_app_sigs(&ssa.mask);
-	}
 	new->robust_list.head = &new->robust_list.head;
-	new->unblock_cancel = self->cancel;
-	new->CANARY = self->CANARY;
-
-	a_inc(&libc.threads_minus_1);
-	ret = __clone((c11 ? start_c11 : start), stack, flags, new, &new->tid, TP_ADJ(new), &new->detach_state);
-
-	__release_ptc();
+	new->canary = self->canary;
+	new->sysinfo = self->sysinfo;
+
+	/* Setup argument structure for the new thread on its stack.
+	 * It's safe to access from the caller only until the thread
+	 * list is unlocked. */
+	stack -= (uintptr_t)stack % sizeof(uintptr_t);
+	stack -= sizeof(struct start_args);
+	struct start_args *args = (void *)stack;
+	args->start_func = entry;
+	args->start_arg = arg;
+	args->control = attr._a_sched ? 1 : 0;
+
+	/* Application signals (but not the synccall signal) must be
+	 * blocked before the thread list lock can be taken, to ensure
+	 * that the lock is AS-safe. */
+	__block_app_sigs(&set);
+
+	/* Ensure SIGCANCEL is unblocked in new thread. This requires
+	 * working with a copy of the set so we can restore the
+	 * original mask in the calling thread. */
+	memcpy(&args->sig_mask, &set, sizeof args->sig_mask);
+	args->sig_mask[(SIGCANCEL-1)/8/sizeof(long)] &=
+		~(1UL<<((SIGCANCEL-1)%(8*sizeof(long))));
+
+	__tl_lock();
+	if (!libc.threads_minus_1++) libc.need_locks = 1;
+	ret = __clone((c11 ? start_c11 : start), stack, flags, args, &new->tid, TP_ADJ(new), &__thread_list_lock);
+
+	/* All clone failures translate to EAGAIN. If explicit scheduling
+	 * was requested, attempt it before unlocking the thread list so
+	 * that the failed thread is never exposed and so that we can
+	 * clean up all transient resource usage before returning. */
+	if (ret < 0) {
+		ret = -EAGAIN;
+	} else if (attr._a_sched) {
+		ret = __syscall(SYS_sched_setscheduler,
+			new->tid, attr._a_policy, &attr._a_prio);
+		if (a_swap(&args->control, ret ? 3 : 0)==2)
+			__wake(&args->control, 1, 1);
+		if (ret)
+			__wait(&args->control, 0, 3, 0);
+	}
 
-	if (do_sched) {
-		__restore_sigs(&ssa.mask);
+	if (ret >= 0) {
+		new->next = self->next;
+		new->prev = self;
+		new->next->prev = new;
+		new->prev->next = new;
+	} else {
+		if (!--libc.threads_minus_1) libc.need_locks = 0;
 	}
+	__tl_unlock();
+	__restore_sigs(&set);
+	__release_ptc();
 
 	if (ret < 0) {
-		a_dec(&libc.threads_minus_1);
 		if (map) __munmap(map, size);
-		return EAGAIN;
-	}
-
-	if (do_sched) {
-		__futexwait(&ssa.futex, -1, 1);
-		ret = ssa.futex;
-		if (ret) return ret;
+		return -ret;
 	}
 
 	*res = new;
diff --git a/src/thread/pthread_detach.c b/src/thread/pthread_detach.c
index 16b0552d..d73a500e 100644
--- a/src/thread/pthread_detach.c
+++ b/src/thread/pthread_detach.c
@@ -5,8 +5,12 @@ static int __pthread_detach(pthread_t t)
 {
 	/* If the cas fails, detach state is either already-detached
 	 * or exiting/exited, and pthread_join will trap or cleanup. */
-	if (a_cas(&t->detach_state, DT_JOINABLE, DT_DYNAMIC) != DT_JOINABLE)
-		return __pthread_join(t, 0);
+	if (a_cas(&t->detach_state, DT_JOINABLE, DT_DETACHED) != DT_JOINABLE) {
+		int cs;
+		__pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
+		__pthread_join(t, 0);
+		__pthread_setcancelstate(cs, 0);
+	}
 	return 0;
 }
 
diff --git a/src/thread/pthread_getname_np.c b/src/thread/pthread_getname_np.c
new file mode 100644
index 00000000..85504e45
--- /dev/null
+++ b/src/thread/pthread_getname_np.c
@@ -0,0 +1,25 @@
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/prctl.h>
+
+#include "pthread_impl.h"
+
+int pthread_getname_np(pthread_t thread, char *name, size_t len)
+{
+	int fd, cs, status = 0;
+	char f[sizeof "/proc/self/task//comm" + 3*sizeof(int)];
+
+	if (len < 16) return ERANGE;
+
+	if (thread == pthread_self())
+		return prctl(PR_GET_NAME, (unsigned long)name, 0UL, 0UL, 0UL) ? errno : 0;
+
+	snprintf(f, sizeof f, "/proc/self/task/%d/comm", thread->tid);
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
+	if ((fd = open(f, O_RDONLY|O_CLOEXEC)) < 0 || (len = read(fd, name, len)) == -1) status = errno;
+	else name[len-1] = 0; /* remove trailing new line only if successful */
+	if (fd >= 0) close(fd);
+	pthread_setcancelstate(cs, 0);
+	return status;
+}
diff --git a/src/thread/pthread_getschedparam.c b/src/thread/pthread_getschedparam.c
index 1cba073d..c098befb 100644
--- a/src/thread/pthread_getschedparam.c
+++ b/src/thread/pthread_getschedparam.c
@@ -4,6 +4,8 @@
 int pthread_getschedparam(pthread_t t, int *restrict policy, struct sched_param *restrict param)
 {
 	int r;
+	sigset_t set;
+	__block_app_sigs(&set);
 	LOCK(t->killlock);
 	if (!t->tid) {
 		r = ESRCH;
@@ -14,5 +16,6 @@ int pthread_getschedparam(pthread_t t, int *restrict policy, struct sched_param
 		}
 	}
 	UNLOCK(t->killlock);
+	__restore_sigs(&set);
 	return r;
 }
diff --git a/src/thread/pthread_join.c b/src/thread/pthread_join.c
index 54d81039..17dae85d 100644
--- a/src/thread/pthread_join.c
+++ b/src/thread/pthread_join.c
@@ -1,6 +1,12 @@
+#define _GNU_SOURCE
 #include "pthread_impl.h"
 #include <sys/mman.h>
 
+static void dummy1(pthread_t t)
+{
+}
+weak_alias(dummy1, __tl_sync);
+
 static int __pthread_timedjoin_np(pthread_t t, void **res, const struct timespec *at)
 {
 	int state, cs, r = 0;
@@ -9,11 +15,11 @@ static int __pthread_timedjoin_np(pthread_t t, void **res, const struct timespec
 	if (cs == PTHREAD_CANCEL_ENABLE) __pthread_setcancelstate(cs, 0);
 	while ((state = t->detach_state) && r != ETIMEDOUT && r != EINVAL) {
 		if (state >= DT_DETACHED) a_crash();
-		r = __timedwait_cp(&t->detach_state, state, CLOCK_REALTIME, at, 0);
+		r = __timedwait_cp(&t->detach_state, state, CLOCK_REALTIME, at, 1);
 	}
 	__pthread_setcancelstate(cs, 0);
 	if (r == ETIMEDOUT || r == EINVAL) return r;
-	a_barrier();
+	__tl_sync(t);
 	if (res) *res = t->result;
 	if (t->map_base) __munmap(t->map_base, t->map_size);
 	return 0;
diff --git a/src/thread/pthread_key_create.c b/src/thread/pthread_key_create.c
index e26f199c..39770c7a 100644
--- a/src/thread/pthread_key_create.c
+++ b/src/thread/pthread_key_create.c
@@ -1,4 +1,5 @@
 #include "pthread_impl.h"
+#include "fork_impl.h"
 
 volatile size_t __pthread_tsd_size = sizeof(void *) * PTHREAD_KEYS_MAX;
 void *__pthread_tsd_main[PTHREAD_KEYS_MAX] = { 0 };
@@ -13,43 +14,23 @@ static void nodtor(void *dummy)
 {
 }
 
-static void dirty(void *dummy)
+static void dummy_0(void)
 {
 }
 
-struct cleanup_args {
-	pthread_t caller;
-	int ret;
-};
+weak_alias(dummy_0, __tl_lock);
+weak_alias(dummy_0, __tl_unlock);
 
-static void clean_dirty_tsd_callback(void *p)
+void __pthread_key_atfork(int who)
 {
-	struct cleanup_args *args = p;
-	pthread_t self = __pthread_self();
-	pthread_key_t i;
-	for (i=0; i<PTHREAD_KEYS_MAX; i++) {
-		if (keys[i] == dirty && self->tsd[i])
-			self->tsd[i] = 0;
-	}
-	/* Arbitrary choice to avoid data race. */
-	if (args->caller == self) args->ret = 0;
-}
-
-static int clean_dirty_tsd(void)
-{
-	struct cleanup_args args = {
-		.caller = __pthread_self(),
-		.ret = EAGAIN
-	};
-	__pthread_key_delete_synccall(clean_dirty_tsd_callback, &args);
-	return args.ret;
+	if (who<0) __pthread_rwlock_rdlock(&key_lock);
+	else if (!who) __pthread_rwlock_unlock(&key_lock);
+	else key_lock = (pthread_rwlock_t)PTHREAD_RWLOCK_INITIALIZER;
 }
 
 int __pthread_key_create(pthread_key_t *k, void (*dtor)(void *))
 {
-	pthread_key_t j = next_key;
 	pthread_t self = __pthread_self();
-	int found_dirty = 0;
 
 	/* This can only happen in the main thread before
 	 * pthread_create has been called. */
@@ -58,46 +39,38 @@ int __pthread_key_create(pthread_key_t *k, void (*dtor)(void *))
 	/* Purely a sentinel value since null means slot is free. */
 	if (!dtor) dtor = nodtor;
 
-	pthread_rwlock_wrlock(&key_lock);
+	__pthread_rwlock_wrlock(&key_lock);
+	pthread_key_t j = next_key;
 	do {
 		if (!keys[j]) {
 			keys[next_key = *k = j] = dtor;
-			pthread_rwlock_unlock(&key_lock);
+			__pthread_rwlock_unlock(&key_lock);
 			return 0;
-		} else if (keys[j] == dirty) {
-			found_dirty = 1;
 		}
 	} while ((j=(j+1)%PTHREAD_KEYS_MAX) != next_key);
 
-	/* It's possible that all slots are in use or __synccall fails. */
-	if (!found_dirty || clean_dirty_tsd()) {
-		pthread_rwlock_unlock(&key_lock);
-		return EAGAIN;
-	}
-
-	/* If this point is reached there is necessarily a newly-cleaned
-	 * slot to allocate to satisfy the caller's request. Find it and
-	 * mark any additional previously-dirty slots clean. */
-	for (j=0; j<PTHREAD_KEYS_MAX; j++) {
-		if (keys[j] == dirty) {
-			if (dtor) {
-				keys[next_key = *k = j] = dtor;
-				dtor = 0;
-			} else {
-				keys[j] = 0;
-			}
-		}
-	}
-
-	pthread_rwlock_unlock(&key_lock);
-	return 0;
+	__pthread_rwlock_unlock(&key_lock);
+	return EAGAIN;
 }
 
-int __pthread_key_delete_impl(pthread_key_t k)
+int __pthread_key_delete(pthread_key_t k)
 {
-	pthread_rwlock_wrlock(&key_lock);
-	keys[k] = dirty;
-	pthread_rwlock_unlock(&key_lock);
+	sigset_t set;
+	pthread_t self = __pthread_self(), td=self;
+
+	__block_app_sigs(&set);
+	__pthread_rwlock_wrlock(&key_lock);
+
+	__tl_lock();
+	do td->tsd[k] = 0;
+	while ((td=td->next)!=self);
+	__tl_unlock();
+
+	keys[k] = 0;
+
+	__pthread_rwlock_unlock(&key_lock);
+	__restore_sigs(&set);
+
 	return 0;
 }
 
@@ -106,20 +79,21 @@ void __pthread_tsd_run_dtors()
 	pthread_t self = __pthread_self();
 	int i, j;
 	for (j=0; self->tsd_used && j<PTHREAD_DESTRUCTOR_ITERATIONS; j++) {
-		pthread_rwlock_rdlock(&key_lock);
+		__pthread_rwlock_rdlock(&key_lock);
 		self->tsd_used = 0;
 		for (i=0; i<PTHREAD_KEYS_MAX; i++) {
 			void *val = self->tsd[i];
 			void (*dtor)(void *) = keys[i];
 			self->tsd[i] = 0;
-			if (val && dtor && dtor != nodtor && dtor != dirty) {
-				pthread_rwlock_unlock(&key_lock);
+			if (val && dtor && dtor != nodtor) {
+				__pthread_rwlock_unlock(&key_lock);
 				dtor(val);
-				pthread_rwlock_rdlock(&key_lock);
+				__pthread_rwlock_rdlock(&key_lock);
 			}
 		}
-		pthread_rwlock_unlock(&key_lock);
+		__pthread_rwlock_unlock(&key_lock);
 	}
 }
 
 weak_alias(__pthread_key_create, pthread_key_create);
+weak_alias(__pthread_key_delete, pthread_key_delete);
diff --git a/src/thread/pthread_key_delete.c b/src/thread/pthread_key_delete.c
deleted file mode 100644
index 012fe2da..00000000
--- a/src/thread/pthread_key_delete.c
+++ /dev/null
@@ -1,14 +0,0 @@
-#include "pthread_impl.h"
-#include "libc.h"
-
-void __pthread_key_delete_synccall(void (*f)(void *), void *p)
-{
-	__synccall(f, p);
-}
-
-int __pthread_key_delete(pthread_key_t k)
-{
-	return __pthread_key_delete_impl(k);
-}
-
-weak_alias(__pthread_key_delete, pthread_key_delete);
diff --git a/src/thread/pthread_kill.c b/src/thread/pthread_kill.c
index 3d9395cb..79ddb209 100644
--- a/src/thread/pthread_kill.c
+++ b/src/thread/pthread_kill.c
@@ -4,9 +4,15 @@
 int pthread_kill(pthread_t t, int sig)
 {
 	int r;
+	sigset_t set;
+	/* Block not just app signals, but internal ones too, since
+	 * pthread_kill is used to implement pthread_cancel, which
+	 * must be async-cancel-safe. */
+	__block_all_sigs(&set);
 	LOCK(t->killlock);
 	r = t->tid ? -__syscall(SYS_tkill, t->tid, sig)
 		: (sig+0U >= _NSIG ? EINVAL : 0);
 	UNLOCK(t->killlock);
+	__restore_sigs(&set);
 	return r;
 }
diff --git a/src/thread/pthread_mutex_consistent.c b/src/thread/pthread_mutex_consistent.c
index 96b83b52..27c74e5b 100644
--- a/src/thread/pthread_mutex_consistent.c
+++ b/src/thread/pthread_mutex_consistent.c
@@ -1,10 +1,14 @@
 #include "pthread_impl.h"
+#include "atomic.h"
 
 int pthread_mutex_consistent(pthread_mutex_t *m)
 {
-	if (!(m->_m_type & 8)) return EINVAL;
-	if ((m->_m_lock & 0x7fffffff) != __pthread_self()->tid)
+	int old = m->_m_lock;
+	int own = old & 0x3fffffff;
+	if (!(m->_m_type & 4) || !own || !(old & 0x40000000))
+		return EINVAL;
+	if (own != __pthread_self()->tid)
 		return EPERM;
-	m->_m_type &= ~8U;
+	a_and(&m->_m_lock, ~0x40000000);
 	return 0;
 }
diff --git a/src/thread/pthread_mutex_destroy.c b/src/thread/pthread_mutex_destroy.c
index 6d49e689..8d1bf77b 100644
--- a/src/thread/pthread_mutex_destroy.c
+++ b/src/thread/pthread_mutex_destroy.c
@@ -1,6 +1,10 @@
-#include <pthread.h>
+#include "pthread_impl.h"
 
 int pthread_mutex_destroy(pthread_mutex_t *mutex)
 {
+	/* If the mutex being destroyed is process-shared and has nontrivial
+	 * type (tracking ownership), it might be in the pending slot of a
+	 * robust_list; wait for quiescence. */
+	if (mutex->_m_type > 128) __vm_wait();
 	return 0;
 }
diff --git a/src/thread/pthread_mutex_timedlock.c b/src/thread/pthread_mutex_timedlock.c
index 9867f389..9279fc54 100644
--- a/src/thread/pthread_mutex_timedlock.c
+++ b/src/thread/pthread_mutex_timedlock.c
@@ -1,5 +1,58 @@
 #include "pthread_impl.h"
 
+#define IS32BIT(x) !((x)+0x80000000ULL>>32)
+#define CLAMP(x) (int)(IS32BIT(x) ? (x) : 0x7fffffffU+((0ULL+(x))>>63))
+
+static int __futex4(volatile void *addr, int op, int val, const struct timespec *to)
+{
+#ifdef SYS_futex_time64
+	time_t s = to ? to->tv_sec : 0;
+	long ns = to ? to->tv_nsec : 0;
+	int r = -ENOSYS;
+	if (SYS_futex == SYS_futex_time64 || !IS32BIT(s))
+		r = __syscall(SYS_futex_time64, addr, op, val,
+			to ? ((long long[]){s, ns}) : 0);
+	if (SYS_futex == SYS_futex_time64 || r!=-ENOSYS) return r;
+	to = to ? (void *)(long[]){CLAMP(s), ns} : 0;
+#endif
+	return __syscall(SYS_futex, addr, op, val, to);
+}
+
+static int pthread_mutex_timedlock_pi(pthread_mutex_t *restrict m, const struct timespec *restrict at)
+{
+	int type = m->_m_type;
+	int priv = (type & 128) ^ 128;
+	pthread_t self = __pthread_self();
+	int e;
+
+	if (!priv) self->robust_list.pending = &m->_m_next;
+
+	do e = -__futex4(&m->_m_lock, FUTEX_LOCK_PI|priv, 0, at);
+	while (e==EINTR);
+	if (e) self->robust_list.pending = 0;
+
+	switch (e) {
+	case 0:
+		/* Catch spurious success for non-robust mutexes. */
+		if (!(type&4) && ((m->_m_lock & 0x40000000) || m->_m_waiters)) {
+			a_store(&m->_m_waiters, -1);
+			__syscall(SYS_futex, &m->_m_lock, FUTEX_UNLOCK_PI|priv);
+			self->robust_list.pending = 0;
+			break;
+		}
+		/* Signal to trylock that we already have the lock. */
+		m->_m_count = -1;
+		return __pthread_mutex_trylock(m);
+	case ETIMEDOUT:
+		return e;
+	case EDEADLK:
+		if ((type&3) == PTHREAD_MUTEX_ERRORCHECK) return e;
+	}
+	do e = __timedwait(&(int){0}, 0, CLOCK_REALTIME, at, 1);
+	while (e != ETIMEDOUT);
+	return e;
+}
+
 int __pthread_mutex_timedlock(pthread_mutex_t *restrict m, const struct timespec *restrict at)
 {
 	if ((m->_m_type&15) == PTHREAD_MUTEX_NORMAL
@@ -9,17 +62,21 @@ int __pthread_mutex_timedlock(pthread_mutex_t *restrict m, const struct timespec
 	int type = m->_m_type;
 	int r, t, priv = (type & 128) ^ 128;
 
-	r = pthread_mutex_trylock(m);
+	r = __pthread_mutex_trylock(m);
 	if (r != EBUSY) return r;
+
+	if (type&8) return pthread_mutex_timedlock_pi(m, at);
 	
 	int spins = 100;
 	while (spins-- && m->_m_lock && !m->_m_waiters) a_spin();
 
 	while ((r=__pthread_mutex_trylock(m)) == EBUSY) {
-		if (!(r=m->_m_lock) || ((r&0x40000000) && (type&4)))
+		r = m->_m_lock;
+		int own = r & 0x3fffffff;
+		if (!own && (!r || (type&4)))
 			continue;
 		if ((type&3) == PTHREAD_MUTEX_ERRORCHECK
-		 && (r&0x7fffffff) == __pthread_self()->tid)
+		    && own == __pthread_self()->tid)
 			return EDEADLK;
 
 		a_inc(&m->_m_waiters);
diff --git a/src/thread/pthread_mutex_trylock.c b/src/thread/pthread_mutex_trylock.c
index 783ca0c4..a24e7c58 100644
--- a/src/thread/pthread_mutex_trylock.c
+++ b/src/thread/pthread_mutex_trylock.c
@@ -3,21 +3,28 @@
 int __pthread_mutex_trylock_owner(pthread_mutex_t *m)
 {
 	int old, own;
-	int type = m->_m_type & 15;
+	int type = m->_m_type;
 	pthread_t self = __pthread_self();
 	int tid = self->tid;
 
 	old = m->_m_lock;
-	own = old & 0x7fffffff;
-	if (own == tid && (type&3) == PTHREAD_MUTEX_RECURSIVE) {
-		if ((unsigned)m->_m_count >= INT_MAX) return EAGAIN;
-		m->_m_count++;
-		return 0;
+	own = old & 0x3fffffff;
+	if (own == tid) {
+		if ((type&8) && m->_m_count<0) {
+			old &= 0x40000000;
+			m->_m_count = 0;
+			goto success;
+		}
+		if ((type&3) == PTHREAD_MUTEX_RECURSIVE) {
+			if ((unsigned)m->_m_count >= INT_MAX) return EAGAIN;
+			m->_m_count++;
+			return 0;
+		}
 	}
-	if (own == 0x7fffffff) return ENOTRECOVERABLE;
-	if (own && (!(own & 0x40000000) || !(type & 4))) return EBUSY;
+	if (own == 0x3fffffff) return ENOTRECOVERABLE;
+	if (own || (old && !(type & 4))) return EBUSY;
 
-	if (m->_m_type & 128) {
+	if (type & 128) {
 		if (!self->robust_list.off) {
 			self->robust_list.off = (char*)&m->_m_lock-(char *)&m->_m_next;
 			__syscall(SYS_set_robust_list, &self->robust_list, 3*sizeof(long));
@@ -25,12 +32,22 @@ int __pthread_mutex_trylock_owner(pthread_mutex_t *m)
 		if (m->_m_waiters) tid |= 0x80000000;
 		self->robust_list.pending = &m->_m_next;
 	}
+	tid |= old & 0x40000000;
 
 	if (a_cas(&m->_m_lock, old, tid) != old) {
 		self->robust_list.pending = 0;
+		if ((type&12)==12 && m->_m_waiters) return ENOTRECOVERABLE;
 		return EBUSY;
 	}
 
+success:
+	if ((type&8) && m->_m_waiters) {
+		int priv = (type & 128) ^ 128;
+		__syscall(SYS_futex, &m->_m_lock, FUTEX_UNLOCK_PI|priv);
+		self->robust_list.pending = 0;
+		return (type&4) ? ENOTRECOVERABLE : EBUSY;
+	}
+
 	volatile void *next = self->robust_list.head;
 	m->_m_next = next;
 	m->_m_prev = &self->robust_list.head;
@@ -39,9 +56,8 @@ int __pthread_mutex_trylock_owner(pthread_mutex_t *m)
 	self->robust_list.head = &m->_m_next;
 	self->robust_list.pending = 0;
 
-	if (own) {
+	if (old) {
 		m->_m_count = 0;
-		m->_m_type |= 8;
 		return EOWNERDEAD;
 	}
 
diff --git a/src/thread/pthread_mutex_unlock.c b/src/thread/pthread_mutex_unlock.c
index 7dd00d27..b66423e6 100644
--- a/src/thread/pthread_mutex_unlock.c
+++ b/src/thread/pthread_mutex_unlock.c
@@ -7,13 +7,19 @@ int __pthread_mutex_unlock(pthread_mutex_t *m)
 	int cont;
 	int type = m->_m_type & 15;
 	int priv = (m->_m_type & 128) ^ 128;
+	int new = 0;
+	int old;
 
 	if (type != PTHREAD_MUTEX_NORMAL) {
 		self = __pthread_self();
-		if ((m->_m_lock&0x7fffffff) != self->tid)
+		old = m->_m_lock;
+		int own = old & 0x3fffffff;
+		if (own != self->tid)
 			return EPERM;
 		if ((type&3) == PTHREAD_MUTEX_RECURSIVE && m->_m_count)
 			return m->_m_count--, 0;
+		if ((type&4) && (old&0x40000000))
+			new = 0x7fffffff;
 		if (!priv) {
 			self->robust_list.pending = &m->_m_next;
 			__vm_lock();
@@ -24,7 +30,16 @@ int __pthread_mutex_unlock(pthread_mutex_t *m)
 		if (next != &self->robust_list.head) *(volatile void *volatile *)
 			((char *)next - sizeof(void *)) = prev;
 	}
-	cont = a_swap(&m->_m_lock, (type & 8) ? 0x7fffffff : 0);
+	if (type&8) {
+		if (old<0 || a_cas(&m->_m_lock, old, new)!=old) {
+			if (new) a_store(&m->_m_waiters, -1);
+			__syscall(SYS_futex, &m->_m_lock, FUTEX_UNLOCK_PI|priv);
+		}
+		cont = 0;
+		waiters = 0;
+	} else {
+		cont = a_swap(&m->_m_lock, new);
+	}
 	if (type != PTHREAD_MUTEX_NORMAL && !priv) {
 		self->robust_list.pending = 0;
 		__vm_unlock();
diff --git a/src/thread/pthread_mutexattr_setprotocol.c b/src/thread/pthread_mutexattr_setprotocol.c
index c92a31c8..8b80c1ce 100644
--- a/src/thread/pthread_mutexattr_setprotocol.c
+++ b/src/thread/pthread_mutexattr_setprotocol.c
@@ -1,7 +1,28 @@
 #include "pthread_impl.h"
+#include "syscall.h"
+
+static volatile int check_pi_result = -1;
 
 int pthread_mutexattr_setprotocol(pthread_mutexattr_t *a, int protocol)
 {
-	if (protocol) return ENOTSUP;
-	return 0;
+	int r;
+	switch (protocol) {
+	case PTHREAD_PRIO_NONE:
+		a->__attr &= ~8;
+		return 0;
+	case PTHREAD_PRIO_INHERIT:
+		r = check_pi_result;
+		if (r < 0) {
+			volatile int lk = 0;
+			r = -__syscall(SYS_futex, &lk, FUTEX_LOCK_PI, 0, 0);
+			a_store(&check_pi_result, r);
+		}
+		if (r) return r;
+		a->__attr |= 8;
+		return 0;
+	case PTHREAD_PRIO_PROTECT:
+		return ENOTSUP;
+	default:
+		return EINVAL;
+	}
 }
diff --git a/src/thread/pthread_mutexattr_setrobust.c b/src/thread/pthread_mutexattr_setrobust.c
index 04db92a6..30a9ac3b 100644
--- a/src/thread/pthread_mutexattr_setrobust.c
+++ b/src/thread/pthread_mutexattr_setrobust.c
@@ -1,22 +1,20 @@
 #include "pthread_impl.h"
 #include "syscall.h"
 
-static pthread_once_t check_robust_once;
-static int check_robust_result;
-
-static void check_robust()
-{
-	void *p;
-	size_t l;
-	check_robust_result = -__syscall(SYS_get_robust_list, 0, &p, &l);
-}
+static volatile int check_robust_result = -1;
 
 int pthread_mutexattr_setrobust(pthread_mutexattr_t *a, int robust)
 {
 	if (robust > 1U) return EINVAL;
 	if (robust) {
-		pthread_once(&check_robust_once, check_robust);
-		if (check_robust_result) return check_robust_result;
+		int r = check_robust_result;
+		if (r < 0) {
+			void *p;
+			size_t l;
+			r = -__syscall(SYS_get_robust_list, 0, &p, &l);
+			a_store(&check_robust_result, r);
+		}
+		if (r) return r;
 		a->__attr |= 4;
 		return 0;
 	}
diff --git a/src/thread/pthread_rwlock_rdlock.c b/src/thread/pthread_rwlock_rdlock.c
index 0800d21f..8546c07d 100644
--- a/src/thread/pthread_rwlock_rdlock.c
+++ b/src/thread/pthread_rwlock_rdlock.c
@@ -1,6 +1,8 @@
 #include "pthread_impl.h"
 
-int pthread_rwlock_rdlock(pthread_rwlock_t *rw)
+int __pthread_rwlock_rdlock(pthread_rwlock_t *rw)
 {
-	return pthread_rwlock_timedrdlock(rw, 0);
+	return __pthread_rwlock_timedrdlock(rw, 0);
 }
+
+weak_alias(__pthread_rwlock_rdlock, pthread_rwlock_rdlock);
diff --git a/src/thread/pthread_rwlock_timedrdlock.c b/src/thread/pthread_rwlock_timedrdlock.c
index 0d5d0d6c..8cdd8ecf 100644
--- a/src/thread/pthread_rwlock_timedrdlock.c
+++ b/src/thread/pthread_rwlock_timedrdlock.c
@@ -1,6 +1,6 @@
 #include "pthread_impl.h"
 
-int pthread_rwlock_timedrdlock(pthread_rwlock_t *restrict rw, const struct timespec *restrict at)
+int __pthread_rwlock_timedrdlock(pthread_rwlock_t *restrict rw, const struct timespec *restrict at)
 {
 	int r, t;
 
@@ -10,7 +10,7 @@ int pthread_rwlock_timedrdlock(pthread_rwlock_t *restrict rw, const struct times
 	int spins = 100;
 	while (spins-- && rw->_rw_lock && !rw->_rw_waiters) a_spin();
 
-	while ((r=pthread_rwlock_tryrdlock(rw))==EBUSY) {
+	while ((r=__pthread_rwlock_tryrdlock(rw))==EBUSY) {
 		if (!(r=rw->_rw_lock) || (r&0x7fffffff)!=0x7fffffff) continue;
 		t = r | 0x80000000;
 		a_inc(&rw->_rw_waiters);
@@ -21,3 +21,5 @@ int pthread_rwlock_timedrdlock(pthread_rwlock_t *restrict rw, const struct times
 	}
 	return r;
 }
+
+weak_alias(__pthread_rwlock_timedrdlock, pthread_rwlock_timedrdlock);
diff --git a/src/thread/pthread_rwlock_timedwrlock.c b/src/thread/pthread_rwlock_timedwrlock.c
index 7f26dad1..d77706e6 100644
--- a/src/thread/pthread_rwlock_timedwrlock.c
+++ b/src/thread/pthread_rwlock_timedwrlock.c
@@ -1,6 +1,6 @@
 #include "pthread_impl.h"
 
-int pthread_rwlock_timedwrlock(pthread_rwlock_t *restrict rw, const struct timespec *restrict at)
+int __pthread_rwlock_timedwrlock(pthread_rwlock_t *restrict rw, const struct timespec *restrict at)
 {
 	int r, t;
 	
@@ -10,7 +10,7 @@ int pthread_rwlock_timedwrlock(pthread_rwlock_t *restrict rw, const struct times
 	int spins = 100;
 	while (spins-- && rw->_rw_lock && !rw->_rw_waiters) a_spin();
 
-	while ((r=pthread_rwlock_trywrlock(rw))==EBUSY) {
+	while ((r=__pthread_rwlock_trywrlock(rw))==EBUSY) {
 		if (!(r=rw->_rw_lock)) continue;
 		t = r | 0x80000000;
 		a_inc(&rw->_rw_waiters);
@@ -21,3 +21,5 @@ int pthread_rwlock_timedwrlock(pthread_rwlock_t *restrict rw, const struct times
 	}
 	return r;
 }
+
+weak_alias(__pthread_rwlock_timedwrlock, pthread_rwlock_timedwrlock);
diff --git a/src/thread/pthread_rwlock_tryrdlock.c b/src/thread/pthread_rwlock_tryrdlock.c
index fa271fcc..c13bc9cc 100644
--- a/src/thread/pthread_rwlock_tryrdlock.c
+++ b/src/thread/pthread_rwlock_tryrdlock.c
@@ -1,6 +1,6 @@
 #include "pthread_impl.h"
 
-int pthread_rwlock_tryrdlock(pthread_rwlock_t *rw)
+int __pthread_rwlock_tryrdlock(pthread_rwlock_t *rw)
 {
 	int val, cnt;
 	do {
@@ -11,3 +11,5 @@ int pthread_rwlock_tryrdlock(pthread_rwlock_t *rw)
 	} while (a_cas(&rw->_rw_lock, val, val+1) != val);
 	return 0;
 }
+
+weak_alias(__pthread_rwlock_tryrdlock, pthread_rwlock_tryrdlock);
diff --git a/src/thread/pthread_rwlock_trywrlock.c b/src/thread/pthread_rwlock_trywrlock.c
index bb3d3a99..64d9d312 100644
--- a/src/thread/pthread_rwlock_trywrlock.c
+++ b/src/thread/pthread_rwlock_trywrlock.c
@@ -1,7 +1,9 @@
 #include "pthread_impl.h"
 
-int pthread_rwlock_trywrlock(pthread_rwlock_t *rw)
+int __pthread_rwlock_trywrlock(pthread_rwlock_t *rw)
 {
 	if (a_cas(&rw->_rw_lock, 0, 0x7fffffff)) return EBUSY;
 	return 0;
 }
+
+weak_alias(__pthread_rwlock_trywrlock, pthread_rwlock_trywrlock);
diff --git a/src/thread/pthread_rwlock_unlock.c b/src/thread/pthread_rwlock_unlock.c
index 7b5eec84..9ae27ad2 100644
--- a/src/thread/pthread_rwlock_unlock.c
+++ b/src/thread/pthread_rwlock_unlock.c
@@ -1,6 +1,6 @@
 #include "pthread_impl.h"
 
-int pthread_rwlock_unlock(pthread_rwlock_t *rw)
+int __pthread_rwlock_unlock(pthread_rwlock_t *rw)
 {
 	int val, cnt, waiters, new, priv = rw->_rw_shared^128;
 
@@ -16,3 +16,5 @@ int pthread_rwlock_unlock(pthread_rwlock_t *rw)
 
 	return 0;
 }
+
+weak_alias(__pthread_rwlock_unlock, pthread_rwlock_unlock);
diff --git a/src/thread/pthread_rwlock_wrlock.c b/src/thread/pthread_rwlock_wrlock.c
index 7f33535c..46a3b3a5 100644
--- a/src/thread/pthread_rwlock_wrlock.c
+++ b/src/thread/pthread_rwlock_wrlock.c
@@ -1,6 +1,8 @@
 #include "pthread_impl.h"
 
-int pthread_rwlock_wrlock(pthread_rwlock_t *rw)
+int __pthread_rwlock_wrlock(pthread_rwlock_t *rw)
 {
-	return pthread_rwlock_timedwrlock(rw, 0);
+	return __pthread_rwlock_timedwrlock(rw, 0);
 }
+
+weak_alias(__pthread_rwlock_wrlock, pthread_rwlock_wrlock);
diff --git a/src/thread/pthread_setname_np.c b/src/thread/pthread_setname_np.c
index 82d35e17..fc2d2306 100644
--- a/src/thread/pthread_setname_np.c
+++ b/src/thread/pthread_setname_np.c
@@ -19,7 +19,7 @@ int pthread_setname_np(pthread_t thread, const char *name)
 
 	snprintf(f, sizeof f, "/proc/self/task/%d/comm", thread->tid);
 	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
-	if ((fd = open(f, O_WRONLY)) < 0 || write(fd, name, len) < 0) status = errno;
+	if ((fd = open(f, O_WRONLY|O_CLOEXEC)) < 0 || write(fd, name, len) < 0) status = errno;
 	if (fd >= 0) close(fd);
 	pthread_setcancelstate(cs, 0);
 	return status;
diff --git a/src/thread/pthread_setschedparam.c b/src/thread/pthread_setschedparam.c
index 038d13d8..76d4d45a 100644
--- a/src/thread/pthread_setschedparam.c
+++ b/src/thread/pthread_setschedparam.c
@@ -4,8 +4,11 @@
 int pthread_setschedparam(pthread_t t, int policy, const struct sched_param *param)
 {
 	int r;
+	sigset_t set;
+	__block_app_sigs(&set);
 	LOCK(t->killlock);
 	r = !t->tid ? ESRCH : -__syscall(SYS_sched_setscheduler, t->tid, policy, param);
 	UNLOCK(t->killlock);
+	__restore_sigs(&set);
 	return r;
 }
diff --git a/src/thread/pthread_setschedprio.c b/src/thread/pthread_setschedprio.c
index 5bf4a019..fc2e13dd 100644
--- a/src/thread/pthread_setschedprio.c
+++ b/src/thread/pthread_setschedprio.c
@@ -4,8 +4,11 @@
 int pthread_setschedprio(pthread_t t, int prio)
 {
 	int r;
+	sigset_t set;
+	__block_app_sigs(&set);
 	LOCK(t->killlock);
 	r = !t->tid ? ESRCH : -__syscall(SYS_sched_setparam, t->tid, &prio);
 	UNLOCK(t->killlock);
+	__restore_sigs(&set);
 	return r;
 }
diff --git a/src/thread/pthread_sigmask.c b/src/thread/pthread_sigmask.c
index 88c333f6..f188782a 100644
--- a/src/thread/pthread_sigmask.c
+++ b/src/thread/pthread_sigmask.c
@@ -5,7 +5,7 @@
 int pthread_sigmask(int how, const sigset_t *restrict set, sigset_t *restrict old)
 {
 	int ret;
-	if ((unsigned)how - SIG_BLOCK > 2U) return EINVAL;
+	if (set && (unsigned)how - SIG_BLOCK > 2U) return EINVAL;
 	ret = -__syscall(SYS_rt_sigprocmask, how, set, old, _NSIG/8);
 	if (!ret && old) {
 		if (sizeof old->__bits[0] == 8) {
diff --git a/src/thread/riscv32/__set_thread_area.s b/src/thread/riscv32/__set_thread_area.s
new file mode 100644
index 00000000..828154d2
--- /dev/null
+++ b/src/thread/riscv32/__set_thread_area.s
@@ -0,0 +1,6 @@
+.global __set_thread_area
+.type   __set_thread_area, %function
+__set_thread_area:
+	mv tp, a0
+	li a0, 0
+	ret
diff --git a/src/thread/riscv32/__unmapself.s b/src/thread/riscv32/__unmapself.s
new file mode 100644
index 00000000..2849119c
--- /dev/null
+++ b/src/thread/riscv32/__unmapself.s
@@ -0,0 +1,7 @@
+.global __unmapself
+.type __unmapself, %function
+__unmapself:
+	li a7, 215 # SYS_munmap
+	ecall
+	li a7, 93  # SYS_exit
+	ecall
diff --git a/src/thread/riscv32/clone.s b/src/thread/riscv32/clone.s
new file mode 100644
index 00000000..e2116e33
--- /dev/null
+++ b/src/thread/riscv32/clone.s
@@ -0,0 +1,35 @@
+# __clone(func, stack, flags, arg, ptid, tls, ctid)
+#           a0,    a1,    a2,  a3,   a4,  a5,   a6
+
+# syscall(SYS_clone, flags, stack, ptid, tls, ctid)
+#                a7     a0,    a1,   a2,  a3,   a4
+
+.global __clone
+.type  __clone, %function
+__clone:
+	# Save func and arg to stack
+	andi a1, a1, -16
+	addi a1, a1, -16
+	sw a0, 0(a1)
+	sw a3, 4(a1)
+
+	# Call SYS_clone
+	mv a0, a2
+	mv a2, a4
+	mv a3, a5
+	mv a4, a6
+	li a7, 220 # SYS_clone
+	ecall
+
+	beqz a0, 1f
+	# Parent
+	ret
+
+	# Child
+1:      lw a1, 0(sp)
+	lw a0, 4(sp)
+	jalr a1
+
+	# Exit
+	li a7, 93 # SYS_exit
+	ecall
diff --git a/src/thread/riscv32/syscall_cp.s b/src/thread/riscv32/syscall_cp.s
new file mode 100644
index 00000000..079d1ba0
--- /dev/null
+++ b/src/thread/riscv32/syscall_cp.s
@@ -0,0 +1,29 @@
+.global __cp_begin
+.hidden __cp_begin
+.global __cp_end
+.hidden __cp_end
+.global __cp_cancel
+.hidden __cp_cancel
+.hidden __cancel
+.global __syscall_cp_asm
+.hidden __syscall_cp_asm
+.type __syscall_cp_asm, %function
+__syscall_cp_asm:
+__cp_begin:
+	lw t0, 0(a0)
+	bnez t0, __cp_cancel
+
+	mv t0, a1
+	mv a0, a2
+	mv a1, a3
+	mv a2, a4
+	mv a3, a5
+	mv a4, a6
+	mv a5, a7
+	lw a6, 0(sp)
+	mv a7, t0
+	ecall
+__cp_end:
+	ret
+__cp_cancel:
+	tail __cancel
diff --git a/src/thread/riscv64/__set_thread_area.s b/src/thread/riscv64/__set_thread_area.s
new file mode 100644
index 00000000..828154d2
--- /dev/null
+++ b/src/thread/riscv64/__set_thread_area.s
@@ -0,0 +1,6 @@
+.global __set_thread_area
+.type   __set_thread_area, %function
+__set_thread_area:
+	mv tp, a0
+	li a0, 0
+	ret
diff --git a/src/thread/riscv64/__unmapself.s b/src/thread/riscv64/__unmapself.s
new file mode 100644
index 00000000..2849119c
--- /dev/null
+++ b/src/thread/riscv64/__unmapself.s
@@ -0,0 +1,7 @@
+.global __unmapself
+.type __unmapself, %function
+__unmapself:
+	li a7, 215 # SYS_munmap
+	ecall
+	li a7, 93  # SYS_exit
+	ecall
diff --git a/src/thread/riscv64/clone.s b/src/thread/riscv64/clone.s
new file mode 100644
index 00000000..0e6f41a8
--- /dev/null
+++ b/src/thread/riscv64/clone.s
@@ -0,0 +1,35 @@
+# __clone(func, stack, flags, arg, ptid, tls, ctid)
+#           a0,    a1,    a2,  a3,   a4,  a5,   a6
+
+# syscall(SYS_clone, flags, stack, ptid, tls, ctid)
+#                a7     a0,    a1,   a2,  a3,   a4
+
+.global __clone
+.type  __clone, %function
+__clone:
+	# Save func and arg to stack
+	andi a1, a1, -16
+	addi a1, a1, -16
+	sd a0, 0(a1)
+	sd a3, 8(a1)
+
+	# Call SYS_clone
+	mv a0, a2
+	mv a2, a4
+	mv a3, a5
+	mv a4, a6
+	li a7, 220 # SYS_clone
+	ecall
+
+	beqz a0, 1f
+	# Parent
+	ret
+
+	# Child
+1:      ld a1, 0(sp)
+	ld a0, 8(sp)
+	jalr a1
+
+	# Exit
+	li a7, 93 # SYS_exit
+	ecall
diff --git a/src/thread/riscv64/syscall_cp.s b/src/thread/riscv64/syscall_cp.s
new file mode 100644
index 00000000..eeef6391
--- /dev/null
+++ b/src/thread/riscv64/syscall_cp.s
@@ -0,0 +1,29 @@
+.global __cp_begin
+.hidden __cp_begin
+.global __cp_end
+.hidden __cp_end
+.global __cp_cancel
+.hidden __cp_cancel
+.hidden __cancel
+.global __syscall_cp_asm
+.hidden __syscall_cp_asm
+.type __syscall_cp_asm, %function
+__syscall_cp_asm:
+__cp_begin:
+	lw t0, 0(a0)
+	bnez t0, __cp_cancel
+
+	mv t0, a1
+	mv a0, a2
+	mv a1, a3
+	mv a2, a4
+	mv a3, a5
+	mv a4, a6
+	mv a5, a7
+	ld a6, 0(sp)
+	mv a7, t0
+	ecall
+__cp_end:
+	ret
+__cp_cancel:
+	tail __cancel
diff --git a/src/thread/s390x/__tls_get_offset.s b/src/thread/s390x/__tls_get_offset.s
index 8ee92de8..405f118b 100644
--- a/src/thread/s390x/__tls_get_offset.s
+++ b/src/thread/s390x/__tls_get_offset.s
@@ -1,17 +1,17 @@
 	.global __tls_get_offset
 	.type __tls_get_offset,%function
 __tls_get_offset:
-	stmg  %r14, %r15, 112(%r15)
-	aghi  %r15, -160
+	ear   %r0, %a0
+	sllg  %r0, %r0, 32
+	ear   %r0, %a1
 
-	la    %r2, 0(%r2, %r12)
-	brasl %r14, __tls_get_addr
+	la    %r1, 0(%r2, %r12)
 
-	ear   %r1, %a0
-	sllg  %r1, %r1, 32
-	ear   %r1, %a1
+	lg    %r3, 0(%r1)
+	sllg  %r4, %r3, 3
+	lg    %r5, 8(%r0)
+	lg    %r2, 0(%r4, %r5)
+	ag    %r2, 8(%r1)
+	sgr   %r2, %r0
 
-	sgr   %r2, %r1
-
-	lmg   %r14, %r15, 272(%r15)
 	br    %r14
diff --git a/src/thread/s390x/clone.s b/src/thread/s390x/clone.s
index 577748ea..2125f20b 100644
--- a/src/thread/s390x/clone.s
+++ b/src/thread/s390x/clone.s
@@ -17,6 +17,9 @@ __clone:
 	# if (!tid) syscall(SYS_exit, a(d));
 	# return tid;
 
+	# preserve call-saved register used as syscall arg
+	stg  %r6, 48(%r15)
+
 	# create initial stack frame for new thread
 	nill %r3, 0xfff8
 	aghi %r3, -160
@@ -35,6 +38,9 @@ __clone:
 	lg   %r6, 160(%r15)
 	svc  120
 
+	# restore call-saved register
+	lg   %r6, 48(%r15)
+
 	# if error or if we're the parent, return
 	ltgr %r2, %r2
 	bnzr %r14
diff --git a/src/thread/s390x/syscall_cp.s b/src/thread/s390x/syscall_cp.s
index c1da40de..d094cbf5 100644
--- a/src/thread/s390x/syscall_cp.s
+++ b/src/thread/s390x/syscall_cp.s
@@ -14,6 +14,7 @@ __cp_begin:
 	icm %r2, 15, 0(%r2)
 	jne __cp_cancel
 
+	stg %r6, 48(%r15)
 	stg %r7, 56(%r15)
 	lgr %r1, %r3
 	lgr %r2, %r4
@@ -26,6 +27,7 @@ __cp_begin:
 
 __cp_end:
 	lg  %r7, 56(%r15)
+	lg  %r6, 48(%r15)
 	br  %r14
 
 __cp_cancel:
diff --git a/src/thread/sem_getvalue.c b/src/thread/sem_getvalue.c
index d9d83071..c0b7762d 100644
--- a/src/thread/sem_getvalue.c
+++ b/src/thread/sem_getvalue.c
@@ -1,8 +1,9 @@
 #include <semaphore.h>
+#include <limits.h>
 
 int sem_getvalue(sem_t *restrict sem, int *restrict valp)
 {
 	int val = sem->__val[0];
-	*valp = val < 0 ? 0 : val;
+	*valp = val & SEM_VALUE_MAX;
 	return 0;
 }
diff --git a/src/thread/sem_open.c b/src/thread/sem_open.c
index de8555c5..0ad29de9 100644
--- a/src/thread/sem_open.c
+++ b/src/thread/sem_open.c
@@ -12,6 +12,12 @@
 #include <stdlib.h>
 #include <pthread.h>
 #include "lock.h"
+#include "fork_impl.h"
+
+#define malloc __libc_malloc
+#define calloc __libc_calloc
+#define realloc undef
+#define free undef
 
 static struct {
 	ino_t ino;
@@ -19,6 +25,7 @@ static struct {
 	int refcnt;
 } *semtab;
 static volatile int lock[1];
+volatile int *const __sem_open_lockptr = lock;
 
 #define FLAGS (O_RDWR|O_NOFOLLOW|O_CLOEXEC|O_NONBLOCK)
 
@@ -163,10 +170,12 @@ int sem_close(sem_t *sem)
 	int i;
 	LOCK(lock);
 	for (i=0; i<SEM_NSEMS_MAX && semtab[i].sem != sem; i++);
-	if (!--semtab[i].refcnt) {
-		semtab[i].sem = 0;
-		semtab[i].ino = 0;
+	if (--semtab[i].refcnt) {
+		UNLOCK(lock);
+		return 0;
 	}
+	semtab[i].sem = 0;
+	semtab[i].ino = 0;
 	UNLOCK(lock);
 	munmap(sem, sizeof *sem);
 	return 0;
diff --git a/src/thread/sem_post.c b/src/thread/sem_post.c
index 31e3293d..1c8f763c 100644
--- a/src/thread/sem_post.c
+++ b/src/thread/sem_post.c
@@ -1,17 +1,21 @@
 #include <semaphore.h>
+#include <limits.h>
 #include "pthread_impl.h"
 
 int sem_post(sem_t *sem)
 {
-	int val, waiters, priv = sem->__val[2];
+	int val, new, waiters, priv = sem->__val[2];
 	do {
 		val = sem->__val[0];
 		waiters = sem->__val[1];
-		if (val == SEM_VALUE_MAX) {
+		if ((val & SEM_VALUE_MAX) == SEM_VALUE_MAX) {
 			errno = EOVERFLOW;
 			return -1;
 		}
-	} while (a_cas(sem->__val, val, val+1+(val<0)) != val);
-	if (val<0 || waiters) __wake(sem->__val, 1, priv);
+		new = val + 1;
+		if (waiters <= 1)
+			new &= ~0x80000000;
+	} while (a_cas(sem->__val, val, new) != val);
+	if (val<0 || waiters) __wake(sem->__val, waiters>1 ? 1 : -1, priv);
 	return 0;
 }
diff --git a/src/thread/sem_timedwait.c b/src/thread/sem_timedwait.c
index 8132eb1b..aa67376c 100644
--- a/src/thread/sem_timedwait.c
+++ b/src/thread/sem_timedwait.c
@@ -1,4 +1,5 @@
 #include <semaphore.h>
+#include <limits.h>
 #include "pthread_impl.h"
 
 static void cleanup(void *p)
@@ -13,16 +14,17 @@ int sem_timedwait(sem_t *restrict sem, const struct timespec *restrict at)
 	if (!sem_trywait(sem)) return 0;
 
 	int spins = 100;
-	while (spins-- && sem->__val[0] <= 0 && !sem->__val[1]) a_spin();
+	while (spins-- && !(sem->__val[0] & SEM_VALUE_MAX) && !sem->__val[1])
+		a_spin();
 
 	while (sem_trywait(sem)) {
-		int r;
+		int r, priv = sem->__val[2];
 		a_inc(sem->__val+1);
-		a_cas(sem->__val, 0, -1);
+		a_cas(sem->__val, 0, 0x80000000);
 		pthread_cleanup_push(cleanup, (void *)(sem->__val+1));
-		r = __timedwait_cp(sem->__val, -1, CLOCK_REALTIME, at, sem->__val[2]);
+		r = __timedwait_cp(sem->__val, 0x80000000, CLOCK_REALTIME, at, priv);
 		pthread_cleanup_pop(1);
-		if (r && r != EINTR) {
+		if (r) {
 			errno = r;
 			return -1;
 		}
diff --git a/src/thread/sem_trywait.c b/src/thread/sem_trywait.c
index 04edf46b..beb435da 100644
--- a/src/thread/sem_trywait.c
+++ b/src/thread/sem_trywait.c
@@ -1,12 +1,12 @@
 #include <semaphore.h>
+#include <limits.h>
 #include "pthread_impl.h"
 
 int sem_trywait(sem_t *sem)
 {
 	int val;
-	while ((val=sem->__val[0]) > 0) {
-		int new = val-1-(val==1 && sem->__val[1]);
-		if (a_cas(sem->__val, val, new)==val) return 0;
+	while ((val=sem->__val[0]) & SEM_VALUE_MAX) {
+		if (a_cas(sem->__val, val, val-1)==val) return 0;
 	}
 	errno = EAGAIN;
 	return -1;
diff --git a/src/thread/synccall.c b/src/thread/synccall.c
index cc66bd24..38597254 100644
--- a/src/thread/synccall.c
+++ b/src/thread/synccall.c
@@ -1,46 +1,42 @@
 #include "pthread_impl.h"
 #include <semaphore.h>
-#include <unistd.h>
-#include <dirent.h>
 #include <string.h>
-#include <ctype.h>
-#include "futex.h"
-#include "atomic.h"
-#include "../dirent/__dirent.h"
-#include "lock.h"
-
-static struct chain {
-	struct chain *next;
-	int tid;
-	sem_t target_sem, caller_sem;
-} *volatile head;
-
-static volatile int synccall_lock[1];
-static volatile int target_tid;
+
+static void dummy_0(void)
+{
+}
+
+weak_alias(dummy_0, __tl_lock);
+weak_alias(dummy_0, __tl_unlock);
+
+static int target_tid;
 static void (*callback)(void *), *context;
-static volatile int dummy = 0;
-weak_alias(dummy, __block_new_threads);
+static sem_t target_sem, caller_sem, exit_sem;
+
+static void dummy(void *p)
+{
+}
 
 static void handler(int sig)
 {
-	struct chain ch;
-	int old_errno = errno;
+	if (__pthread_self()->tid != target_tid) return;
 
-	sem_init(&ch.target_sem, 0, 0);
-	sem_init(&ch.caller_sem, 0, 0);
+	int old_errno = errno;
 
-	ch.tid = __syscall(SYS_gettid);
+	/* Inform caller we have received signal and wait for
+	 * the caller to let us make the callback. */
+	sem_post(&caller_sem);
+	sem_wait(&target_sem);
 
-	do ch.next = head;
-	while (a_cas_p(&head, ch.next, &ch) != ch.next);
+	callback(context);
 
-	if (a_cas(&target_tid, ch.tid, 0) == (ch.tid | 0x80000000))
-		__syscall(SYS_futex, &target_tid, FUTEX_UNLOCK_PI|FUTEX_PRIVATE);
+	/* Inform caller we've complered the callback and wait
+	 * for the caller to release us to return. */
+	sem_post(&caller_sem);
+	sem_wait(&exit_sem);
 
-	sem_wait(&ch.target_sem);
-	callback(context);
-	sem_post(&ch.caller_sem);
-	sem_wait(&ch.target_sem);
+	/* Inform caller we are returning and state is destroyable. */
+	sem_post(&caller_sem);
 
 	errno = old_errno;
 }
@@ -48,12 +44,10 @@ static void handler(int sig)
 void __synccall(void (*func)(void *), void *ctx)
 {
 	sigset_t oldmask;
-	int cs, i, r, pid, self;;
-	DIR dir = {0};
-	struct dirent *de;
-	struct sigaction sa = { .sa_flags = SA_RESTART, .sa_handler = handler };
-	struct chain *cp, *next;
-	struct timespec ts;
+	int cs, i, r;
+	struct sigaction sa = { .sa_flags = SA_RESTART | SA_ONSTACK, .sa_handler = handler };
+	pthread_t self = __pthread_self(), td;
+	int count = 0;
 
 	/* Blocking signals in two steps, first only app-level signals
 	 * before taking the lock, then all signals after taking the lock,
@@ -62,98 +56,47 @@ void __synccall(void (*func)(void *), void *ctx)
 	 * any until after the lock would allow re-entry in the same thread
 	 * with the lock already held. */
 	__block_app_sigs(&oldmask);
-	LOCK(synccall_lock);
+	__tl_lock();
 	__block_all_sigs(0);
 	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
 
-	head = 0;
+	sem_init(&target_sem, 0, 0);
+	sem_init(&caller_sem, 0, 0);
+	sem_init(&exit_sem, 0, 0);
 
-	if (!libc.threaded) goto single_threaded;
+	if (!libc.threads_minus_1 || __syscall(SYS_gettid) != self->tid)
+		goto single_threaded;
 
 	callback = func;
 	context = ctx;
 
-	/* This atomic store ensures that any signaled threads will see the
-	 * above stores, and prevents more than a bounded number of threads,
-	 * those already in pthread_create, from creating new threads until
-	 * the value is cleared to zero again. */
-	a_store(&__block_new_threads, 1);
-
 	/* Block even implementation-internal signals, so that nothing
 	 * interrupts the SIGSYNCCALL handlers. The main possible source
 	 * of trouble is asynchronous cancellation. */
 	memset(&sa.sa_mask, -1, sizeof sa.sa_mask);
 	__libc_sigaction(SIGSYNCCALL, &sa, 0);
 
-	pid = __syscall(SYS_getpid);
-	self = __syscall(SYS_gettid);
-
-	/* Since opendir is not AS-safe, the DIR needs to be setup manually
-	 * in automatic storage. Thankfully this is easy. */
-	dir.fd = open("/proc/self/task", O_RDONLY|O_DIRECTORY|O_CLOEXEC);
-	if (dir.fd < 0) goto out;
-
-	/* Initially send one signal per counted thread. But since we can't
-	 * synchronize with thread creation/exit here, there could be too
-	 * few signals. This initial signaling is just an optimization, not
-	 * part of the logic. */
-	for (i=libc.threads_minus_1; i; i--)
-		__syscall(SYS_kill, pid, SIGSYNCCALL);
-
-	/* Loop scanning the kernel-provided thread list until it shows no
-	 * threads that have not already replied to the signal. */
-	for (;;) {
-		int miss_cnt = 0;
-		while ((de = readdir(&dir))) {
-			if (!isdigit(de->d_name[0])) continue;
-			int tid = atoi(de->d_name);
-			if (tid == self || !tid) continue;
-
-			/* Set the target thread as the PI futex owner before
-			 * checking if it's in the list of caught threads. If it
-			 * adds itself to the list after we check for it, then
-			 * it will see its own tid in the PI futex and perform
-			 * the unlock operation. */
-			a_store(&target_tid, tid);
-
-			/* Thread-already-caught is a success condition. */
-			for (cp = head; cp && cp->tid != tid; cp=cp->next);
-			if (cp) continue;
-
-			r = -__syscall(SYS_tgkill, pid, tid, SIGSYNCCALL);
-
-			/* Target thread exit is a success condition. */
-			if (r == ESRCH) continue;
-
-			/* The FUTEX_LOCK_PI operation is used to loan priority
-			 * to the target thread, which otherwise may be unable
-			 * to run. Timeout is necessary because there is a race
-			 * condition where the tid may be reused by a different
-			 * process. */
-			clock_gettime(CLOCK_REALTIME, &ts);
-			ts.tv_nsec += 10000000;
-			if (ts.tv_nsec >= 1000000000) {
-				ts.tv_sec++;
-				ts.tv_nsec -= 1000000000;
-			}
-			r = -__syscall(SYS_futex, &target_tid,
-				FUTEX_LOCK_PI|FUTEX_PRIVATE, 0, &ts);
-
-			/* Obtaining the lock means the thread responded. ESRCH
-			 * means the target thread exited, which is okay too. */
-			if (!r || r == ESRCH) continue;
-
-			miss_cnt++;
+
+	for (td=self->next; td!=self; td=td->next) {
+		target_tid = td->tid;
+		while ((r = -__syscall(SYS_tkill, td->tid, SIGSYNCCALL)) == EAGAIN);
+		if (r) {
+			/* If we failed to signal any thread, nop out the
+			 * callback to abort the synccall and just release
+			 * any threads already caught. */
+			callback = func = dummy;
+			break;
 		}
-		if (!miss_cnt) break;
-		rewinddir(&dir);
+		sem_wait(&caller_sem);
+		count++;
 	}
-	close(dir.fd);
+	target_tid = 0;
 
-	/* Serialize execution of callback in caught threads. */
-	for (cp=head; cp; cp=cp->next) {
-		sem_post(&cp->target_sem);
-		sem_wait(&cp->caller_sem);
+	/* Serialize execution of callback in caught threads, or just
+	 * release them all if synccall is being aborted. */
+	for (i=0; i<count; i++) {
+		sem_post(&target_sem);
+		sem_wait(&caller_sem);
 	}
 
 	sa.sa_handler = SIG_IGN;
@@ -164,16 +107,16 @@ single_threaded:
 
 	/* Only release the caught threads once all threads, including the
 	 * caller, have returned from the callback function. */
-	for (cp=head; cp; cp=next) {
-		next = cp->next;
-		sem_post(&cp->target_sem);
-	}
+	for (i=0; i<count; i++)
+		sem_post(&exit_sem);
+	for (i=0; i<count; i++)
+		sem_wait(&caller_sem);
 
-out:
-	a_store(&__block_new_threads, 0);
-	__wake(&__block_new_threads, -1, 1);
+	sem_destroy(&caller_sem);
+	sem_destroy(&target_sem);
+	sem_destroy(&exit_sem);
 
 	pthread_setcancelstate(cs, 0);
-	UNLOCK(synccall_lock);
+	__tl_unlock();
 	__restore_sigs(&oldmask);
 }
diff --git a/src/thread/thrd_sleep.c b/src/thread/thrd_sleep.c
index e8dfe400..97de5345 100644
--- a/src/thread/thrd_sleep.c
+++ b/src/thread/thrd_sleep.c
@@ -1,10 +1,11 @@
 #include <threads.h>
+#include <time.h>
 #include <errno.h>
 #include "syscall.h"
 
 int thrd_sleep(const struct timespec *req, struct timespec *rem)
 {
-	int ret = __syscall(SYS_nanosleep, req, rem);
+	int ret = -__clock_nanosleep(CLOCK_REALTIME, 0, req, rem);
 	switch (ret) {
 	case 0:      return 0;
 	case -EINTR: return -1; /* value specified by C11 */
diff --git a/src/thread/vmlock.c b/src/thread/vmlock.c
index 75f3cb76..fa0a8e3c 100644
--- a/src/thread/vmlock.c
+++ b/src/thread/vmlock.c
@@ -1,6 +1,8 @@
 #include "pthread_impl.h"
+#include "fork_impl.h"
 
 static volatile int vmlock[2];
+volatile int *const __vmlock_lockptr = vmlock;
 
 void __vm_wait()
 {
diff --git a/src/thread/x32/syscall_cp.s b/src/thread/x32/syscall_cp.s
index 9805af0a..4f101716 100644
--- a/src/thread/x32/syscall_cp.s
+++ b/src/thread/x32/syscall_cp.s
@@ -6,10 +6,10 @@
 .global __cp_cancel
 .hidden __cp_cancel
 .hidden __cancel
-.global __syscall_cp_internal
-.hidden __syscall_cp_internal
-.type   __syscall_cp_internal,@function
-__syscall_cp_internal:
+.global __syscall_cp_asm
+.hidden __syscall_cp_asm
+.type   __syscall_cp_asm,@function
+__syscall_cp_asm:
 
 __cp_begin:
 	mov (%rdi),%eax
diff --git a/src/thread/x32/syscall_cp_fixup.c b/src/thread/x32/syscall_cp_fixup.c
deleted file mode 100644
index 4956610f..00000000
--- a/src/thread/x32/syscall_cp_fixup.c
+++ /dev/null
@@ -1,39 +0,0 @@
-#include <sys/syscall.h>
-#include <features.h>
-
-hidden long __syscall_cp_internal(volatile void*, long long, long long,
-                                  long long, long long, long long,
-                                  long long, long long);
-
-struct __timespec { long long tv_sec; long tv_nsec; };
-struct __timespec_kernel { long long tv_sec; long long tv_nsec; };
-#define __tsc(X) ((struct __timespec*)(unsigned long)(X))
-#define __fixup(X) do { if(X) { \
-	ts->tv_sec = __tsc(X)->tv_sec; \
-	ts->tv_nsec = __tsc(X)->tv_nsec; \
-	(X) = (unsigned long)ts; } } while(0)
-
-hidden long __syscall_cp_asm (volatile void * foo, long long n, long long a1,
-                              long long a2, long long a3, long long a4,
-                              long long a5, long long a6)
-{
-	struct __timespec_kernel ts[1];
-	switch (n) {
-	case SYS_mq_timedsend: case SYS_mq_timedreceive: case SYS_pselect6:
-		__fixup(a5);
-		break;
-	case SYS_futex:
-		if((a2 & (~128 /* FUTEX_PRIVATE_FLAG */)) == 0 /* FUTEX_WAIT */)
-			__fixup(a4);
-		break;
-	case SYS_clock_nanosleep:
-	case SYS_rt_sigtimedwait: case SYS_ppoll:
-		__fixup(a3);
-		break;
-	case SYS_nanosleep:
-		__fixup(a1);
-		break;
-	}
-	return __syscall_cp_internal(foo, n, a1, a2, a3, a4, a5, a6);
-}
-
diff --git a/src/time/__map_file.c b/src/time/__map_file.c
index 9d376222..c2b29fe8 100644
--- a/src/time/__map_file.c
+++ b/src/time/__map_file.c
@@ -9,7 +9,7 @@ const char unsigned *__map_file(const char *pathname, size_t *size)
 	const unsigned char *map = MAP_FAILED;
 	int fd = sys_open(pathname, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
 	if (fd < 0) return 0;
-	if (!syscall(SYS_fstat, fd, &st)) {
+	if (!__fstat(fd, &st)) {
 		map = __mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
 		*size = st.st_size;
 	}
diff --git a/src/time/__tz.c b/src/time/__tz.c
index 185642e8..54ed4cf6 100644
--- a/src/time/__tz.c
+++ b/src/time/__tz.c
@@ -4,8 +4,15 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/mman.h>
+#include <ctype.h>
 #include "libc.h"
 #include "lock.h"
+#include "fork_impl.h"
+
+#define malloc __libc_malloc
+#define calloc undef
+#define realloc undef
+#define free undef
 
 long  __timezone = 0;
 int   __daylight = 0;
@@ -17,7 +24,6 @@ weak_alias(__tzname, tzname);
 
 static char std_name[TZNAME_MAX+1];
 static char dst_name[TZNAME_MAX+1];
-const char __utc[] = "UTC";
 
 static int dst_off;
 static int r0[5], r1[5];
@@ -30,6 +36,7 @@ static char *old_tz = old_tz_buf;
 static size_t old_tz_size = sizeof old_tz_buf;
 
 static volatile int lock[1];
+volatile int *const __timezone_lockptr = lock;
 
 static int getint(const char **p)
 {
@@ -86,15 +93,15 @@ static void getname(char *d, const char **p)
 	int i;
 	if (**p == '<') {
 		++*p;
-		for (i=0; (*p)[i]!='>' && i<TZNAME_MAX; i++)
-			d[i] = (*p)[i];
-		++*p;
+		for (i=0; (*p)[i] && (*p)[i]!='>'; i++)
+			if (i<TZNAME_MAX) d[i] = (*p)[i];
+		if ((*p)[i]) ++*p;
 	} else {
-		for (i=0; ((*p)[i]|32)-'a'<26U && i<TZNAME_MAX; i++)
-			d[i] = (*p)[i];
+		for (i=0; ((*p)[i]|32)-'a'<26U; i++)
+			if (i<TZNAME_MAX) d[i] = (*p)[i];
 	}
 	*p += i;
-	d[i] = 0;
+	d[i<TZNAME_MAX?i:TZNAME_MAX] = 0;
 }
 
 #define VEC(...) ((const unsigned char[]){__VA_ARGS__})
@@ -147,10 +154,21 @@ static void do_tzset()
 	}
 	if (old_tz) memcpy(old_tz, s, i+1);
 
+	int posix_form = 0;
+	if (*s != ':') {
+		p = s;
+		char dummy_name[TZNAME_MAX+1];
+		getname(dummy_name, &p);
+		if (p!=s && (*p == '+' || *p == '-' || isdigit(*p)
+		             || !strcmp(dummy_name, "UTC")
+		             || !strcmp(dummy_name, "GMT")))
+			posix_form = 1;
+	}	
+
 	/* Non-suid can use an absolute tzfile pathname or a relative
 	 * pathame beginning with "."; in secure mode, only the
 	 * standard path will be searched. */
-	if (*s == ':' || ((p=strchr(s, '/')) && !memchr(s, ',', p-s))) {
+	if (!posix_form) {
 		if (*s == ':') s++;
 		if (*s == '/' || *s == '.') {
 			if (!libc.secure || !strcmp(s, "/etc/localtime"))
@@ -178,7 +196,7 @@ static void do_tzset()
 	zi = map;
 	if (map) {
 		int scale = 2;
-		if (sizeof(time_t) > 4 && map[4]=='2') {
+		if (map[4]!='1') {
 			size_t skip = zi_dotprod(zi+20, VEC(1,1,8,5,6,1), 6);
 			trans = zi+skip+44+44;
 			scale++;
@@ -274,22 +292,20 @@ static size_t scan_trans(long long t, int local, size_t *alt)
 	n = (index-trans)>>scale;
 	if (a == n-1) return -1;
 	if (a == 0) {
-		x = zi_read32(trans + (a<<scale));
-		if (scale == 3) x = x<<32 | zi_read32(trans + (a<<scale) + 4);
+		x = zi_read32(trans);
+		if (scale == 3) x = x<<32 | zi_read32(trans + 4);
 		else x = (int32_t)x;
-		if (local) off = (int32_t)zi_read32(types + 6 * index[a-1]);
+		/* Find the lowest non-DST type, or 0 if none. */
+		size_t j = 0;
+		for (size_t i=abbrevs-types; i; i-=6) {
+			if (!types[i-6+4]) j = i-6;
+		}
+		if (local) off = (int32_t)zi_read32(types + j);
+		/* If t is before first transition, use the above-found type
+		 * and the index-zero (after transition) type as the alt. */
 		if (t - off < (int64_t)x) {
-			for (a=0; a<(abbrevs-types)/6; a++) {
-				if (types[6*a+4] != types[4]) break;
-			}
-			if (a == (abbrevs-types)/6) a = 0;
-			if (types[6*a+4]) {
-				*alt = a;
-				return 0;
-			} else {
-				*alt = 0;
-				return a;
-			}
+			if (alt) *alt = index[0];
+			return j/6;
 		}
 	}
 
diff --git a/src/time/__utc.c b/src/time/__utc.c
new file mode 100644
index 00000000..9e8bfc58
--- /dev/null
+++ b/src/time/__utc.c
@@ -0,0 +1,3 @@
+#include "time_impl.h"
+
+const char __utc[] = "UTC";
diff --git a/src/time/__year_to_secs.c b/src/time/__year_to_secs.c
index 2824ec6d..b42f5a6d 100644
--- a/src/time/__year_to_secs.c
+++ b/src/time/__year_to_secs.c
@@ -10,9 +10,9 @@ long long __year_to_secs(long long year, int *is_leap)
 		return 31536000*(y-70) + 86400*leaps;
 	}
 
-	int cycles, centuries, leaps, rem;
+	int cycles, centuries, leaps, rem, dummy;
 
-	if (!is_leap) is_leap = &(int){0};
+	if (!is_leap) is_leap = &dummy;
 	cycles = (year-100) / 400;
 	rem = (year-100) % 400;
 	if (rem < 0) {
diff --git a/src/time/clock_getcpuclockid.c b/src/time/clock_getcpuclockid.c
index 8a0e2d4c..bce1e8ab 100644
--- a/src/time/clock_getcpuclockid.c
+++ b/src/time/clock_getcpuclockid.c
@@ -8,6 +8,7 @@ int clock_getcpuclockid(pid_t pid, clockid_t *clk)
 	struct timespec ts;
 	clockid_t id = (-pid-1)*8U + 2;
 	int ret = __syscall(SYS_clock_getres, id, &ts);
+	if (ret == -EINVAL) ret = -ESRCH;
 	if (ret) return -ret;
 	*clk = id;
 	return 0;
diff --git a/src/time/clock_getres.c b/src/time/clock_getres.c
index 36a0d695..81c67037 100644
--- a/src/time/clock_getres.c
+++ b/src/time/clock_getres.c
@@ -3,5 +3,19 @@
 
 int clock_getres(clockid_t clk, struct timespec *ts)
 {
+#ifdef SYS_clock_getres_time64
+	/* On a 32-bit arch, use the old syscall if it exists. */
+	if (SYS_clock_getres != SYS_clock_getres_time64) {
+		long ts32[2];
+		int r = __syscall(SYS_clock_getres, clk, ts32);
+		if (!r && ts) {
+			ts->tv_sec = ts32[0];
+			ts->tv_nsec = ts32[1];
+		}
+		return __syscall_ret(r);
+	}
+#endif
+	/* If reaching this point, it's a 64-bit arch or time64-only
+	 * 32-bit arch and we can get result directly into timespec. */
 	return syscall(SYS_clock_getres, clk, ts);
 }
diff --git a/src/time/clock_gettime.c b/src/time/clock_gettime.c
index 8fd1b8f5..4d2ec22f 100644
--- a/src/time/clock_gettime.c
+++ b/src/time/clock_gettime.c
@@ -8,9 +8,44 @@
 
 static void *volatile vdso_func;
 
+#ifdef VDSO_CGT32_SYM
+static void *volatile vdso_func_32;
+static int cgt_time32_wrap(clockid_t clk, struct timespec *ts)
+{
+	long ts32[2];
+	int (*f)(clockid_t, long[2]) =
+		(int (*)(clockid_t, long[2]))vdso_func_32;
+	int r = f(clk, ts32);
+	if (!r) {
+		/* Fallback to syscalls if time32 overflowed. Maybe
+		 * we lucked out and somehow migrated to a kernel with
+		 * time64 syscalls available. */
+		if (ts32[0] < 0) {
+			a_cas_p(&vdso_func, (void *)cgt_time32_wrap, 0);
+			return -ENOSYS;
+		}
+		ts->tv_sec = ts32[0];
+		ts->tv_nsec = ts32[1];
+	}
+	return r;
+}
+#endif
+
 static int cgt_init(clockid_t clk, struct timespec *ts)
 {
 	void *p = __vdsosym(VDSO_CGT_VER, VDSO_CGT_SYM);
+#ifdef VDSO_CGT32_SYM
+	if (!p) {
+		void *q = __vdsosym(VDSO_CGT32_VER, VDSO_CGT32_SYM);
+		if (q) {
+			a_cas_p(&vdso_func_32, 0, q);
+			p = cgt_time32_wrap;
+		}
+	}
+#ifdef VDSO_CGT_WORKAROUND
+	if (!__vdsosym(VDSO_CGT32_VER, VDSO_CGT32_SYM)) p = 0;
+#endif
+#endif
 	int (*f)(clockid_t, struct timespec *) =
 		(int (*)(clockid_t, struct timespec *))p;
 	a_cas_p(&vdso_func, (void *)cgt_init, p);
@@ -40,7 +75,29 @@ int __clock_gettime(clockid_t clk, struct timespec *ts)
 	}
 #endif
 
+#ifdef SYS_clock_gettime64
+	r = -ENOSYS;
+	if (sizeof(time_t) > 4)
+		r = __syscall(SYS_clock_gettime64, clk, ts);
+	if (SYS_clock_gettime == SYS_clock_gettime64 || r!=-ENOSYS)
+		return __syscall_ret(r);
+	long ts32[2];
+	r = __syscall(SYS_clock_gettime, clk, ts32);
+#ifdef SYS_gettimeofday
+	if (r==-ENOSYS && clk==CLOCK_REALTIME) {
+		r = __syscall(SYS_gettimeofday, ts32, 0);
+		ts32[1] *= 1000;
+	}
+#endif
+	if (!r) {
+		ts->tv_sec = ts32[0];
+		ts->tv_nsec = ts32[1];
+		return r;
+	}
+	return __syscall_ret(r);
+#else
 	r = __syscall(SYS_clock_gettime, clk, ts);
+#ifdef SYS_gettimeofday
 	if (r == -ENOSYS) {
 		if (clk == CLOCK_REALTIME) {
 			__syscall(SYS_gettimeofday, ts, 0);
@@ -49,7 +106,9 @@ int __clock_gettime(clockid_t clk, struct timespec *ts)
 		}
 		r = -EINVAL;
 	}
+#endif
 	return __syscall_ret(r);
+#endif
 }
 
 weak_alias(__clock_gettime, clock_gettime);
diff --git a/src/time/clock_nanosleep.c b/src/time/clock_nanosleep.c
index 32f0c07e..e195499c 100644
--- a/src/time/clock_nanosleep.c
+++ b/src/time/clock_nanosleep.c
@@ -2,8 +2,37 @@
 #include <errno.h>
 #include "syscall.h"
 
-int clock_nanosleep(clockid_t clk, int flags, const struct timespec *req, struct timespec *rem)
+#define IS32BIT(x) !((x)+0x80000000ULL>>32)
+#define CLAMP(x) (int)(IS32BIT(x) ? (x) : 0x7fffffffU+((0ULL+(x))>>63))
+
+int __clock_nanosleep(clockid_t clk, int flags, const struct timespec *req, struct timespec *rem)
 {
-	int r = -__syscall_cp(SYS_clock_nanosleep, clk, flags, req, rem);
-	return clk == CLOCK_THREAD_CPUTIME_ID ? EINVAL : r;
+	if (clk == CLOCK_THREAD_CPUTIME_ID) return EINVAL;
+#ifdef SYS_clock_nanosleep_time64
+	time_t s = req->tv_sec;
+	long ns = req->tv_nsec;
+	int r = -ENOSYS;
+	if (SYS_clock_nanosleep == SYS_clock_nanosleep_time64 || !IS32BIT(s))
+		r = __syscall_cp(SYS_clock_nanosleep_time64, clk, flags,
+			((long long[]){s, ns}), rem);
+	if (SYS_clock_nanosleep == SYS_clock_nanosleep_time64 || r!=-ENOSYS)
+		return -r;
+	long long extra = s - CLAMP(s);
+	long ts32[2] = { CLAMP(s), ns };
+	if (clk == CLOCK_REALTIME && !flags)
+		r = __syscall_cp(SYS_nanosleep, &ts32, &ts32);
+	else
+		r = __syscall_cp(SYS_clock_nanosleep, clk, flags, &ts32, &ts32);
+	if (r==-EINTR && rem && !(flags & TIMER_ABSTIME)) {
+		rem->tv_sec = ts32[0] + extra;
+		rem->tv_nsec = ts32[1];
+	}
+	return -r;
+#else
+	if (clk == CLOCK_REALTIME && !flags)
+		return -__syscall_cp(SYS_nanosleep, req, rem);
+	return -__syscall_cp(SYS_clock_nanosleep, clk, flags, req, rem);
+#endif
 }
+
+weak_alias(__clock_nanosleep, clock_nanosleep);
diff --git a/src/time/clock_settime.c b/src/time/clock_settime.c
index 66b8162d..1004ed15 100644
--- a/src/time/clock_settime.c
+++ b/src/time/clock_settime.c
@@ -1,7 +1,24 @@
 #include <time.h>
+#include <errno.h>
 #include "syscall.h"
 
+#define IS32BIT(x) !((x)+0x80000000ULL>>32)
+
 int clock_settime(clockid_t clk, const struct timespec *ts)
 {
+#ifdef SYS_clock_settime64
+	time_t s = ts->tv_sec;
+	long ns = ts->tv_nsec;
+	int r = -ENOSYS;
+	if (SYS_clock_settime == SYS_clock_settime64 || !IS32BIT(s))
+		r = __syscall(SYS_clock_settime64, clk,
+			((long long[]){s, ns}));
+	if (SYS_clock_settime == SYS_clock_settime64 || r!=-ENOSYS)
+		return __syscall_ret(r);
+	if (!IS32BIT(s))
+		return __syscall_ret(-ENOTSUP);
+	return syscall(SYS_clock_settime, clk, ((long[]){s, ns}));
+#else
 	return syscall(SYS_clock_settime, clk, ts);
+#endif
 }
diff --git a/src/time/nanosleep.c b/src/time/nanosleep.c
index 1e6f3922..bc9f7895 100644
--- a/src/time/nanosleep.c
+++ b/src/time/nanosleep.c
@@ -3,5 +3,5 @@
 
 int nanosleep(const struct timespec *req, struct timespec *rem)
 {
-	return syscall_cp(SYS_nanosleep, req, rem);
+	return __syscall_ret(-__clock_nanosleep(CLOCK_REALTIME, 0, req, rem));
 }
diff --git a/src/time/strftime.c b/src/time/strftime.c
index cc53d536..c40246db 100644
--- a/src/time/strftime.c
+++ b/src/time/strftime.c
@@ -3,6 +3,7 @@
 #include <string.h>
 #include <langinfo.h>
 #include <locale.h>
+#include <ctype.h>
 #include <time.h>
 #include <limits.h>
 #include "locale_impl.h"
@@ -233,7 +234,12 @@ size_t __strftime_l(char *restrict s, size_t n, const char *restrict f, const st
 		pad = 0;
 		if (*f == '-' || *f == '_' || *f == '0') pad = *f++;
 		if ((plus = (*f == '+'))) f++;
-		width = strtoul(f, &p, 10);
+		if (isdigit(*f)) {
+			width = strtoul(f, &p, 10);
+		} else {
+			width = 0;
+			p = (void *)f;
+		}
 		if (*p == 'C' || *p == 'F' || *p == 'G' || *p == 'Y') {
 			if (!width && p!=f) width = 1;
 		} else {
diff --git a/src/time/strptime.c b/src/time/strptime.c
index c54a0d8c..b1147242 100644
--- a/src/time/strptime.c
+++ b/src/time/strptime.c
@@ -59,6 +59,22 @@ char *strptime(const char *restrict s, const char *restrict f, struct tm *restri
 			s = strptime(s, "%m/%d/%y", tm);
 			if (!s) return 0;
 			break;
+		case 'F':
+			/* Use temp buffer to implement the odd requirement
+			 * that entire field be width-limited but the year
+			 * subfield not itself be limited. */
+			i = 0;
+			char tmp[20];
+			if (*s == '-' || *s == '+') tmp[i++] = *s++;
+			while (*s=='0' && isdigit(s[1])) s++;
+			for (; *s && i<(size_t)w && i+1<sizeof tmp; i++) {
+				tmp[i] = *s++;
+			}
+			tmp[i] = 0;
+			char *p = strptime(tmp, "%12Y-%m-%d", tm);
+			if (!p) return 0;
+			s -= tmp+i-p;
+			break;
 		case 'H':
 			dest = &tm->tm_hour;
 			min = 0;
@@ -114,6 +130,13 @@ char *strptime(const char *restrict s, const char *restrict f, struct tm *restri
 			s = strptime(s, "%H:%M", tm);
 			if (!s) return 0;
 			break;
+		case 's':
+			/* Parse only. Effect on tm is unspecified
+			 * and presently no effect is implemented.. */
+			if (*s == '-') s++;
+			if (!isdigit(*s)) return 0;
+			while (isdigit(*s)) s++;
+			break;
 		case 'S':
 			dest = &tm->tm_sec;
 			min = 0;
@@ -125,11 +148,30 @@ char *strptime(const char *restrict s, const char *restrict f, struct tm *restri
 			break;
 		case 'U':
 		case 'W':
-			/* Throw away result, for now. (FIXME?) */
+			/* Throw away result of %U, %V, %W, %g, and %G. Effect
+			 * is unspecified and there is no clear right choice. */
 			dest = &dummy;
 			min = 0;
 			range = 54;
 			goto numeric_range;
+		case 'V':
+			dest = &dummy;
+			min = 1;
+			range = 53;
+			goto numeric_range;
+		case 'g':
+			dest = &dummy;
+			w = 2;
+			goto numeric_digits;
+		case 'G':
+			dest = &dummy;
+			if (w<0) w=4;
+			goto numeric_digits;
+		case 'u':
+			dest = &tm->tm_wday;
+			min = 1;
+			range = 7;
+			goto numeric_range;
 		case 'w':
 			dest = &tm->tm_wday;
 			min = 0;
@@ -154,6 +196,28 @@ char *strptime(const char *restrict s, const char *restrict f, struct tm *restri
 			adj = 1900;
 			want_century = 0;
 			goto numeric_digits;
+		case 'z':
+			if (*s == '+') neg = 0;
+			else if (*s == '-') neg = 1;
+			else return 0;
+			for (i=0; i<4; i++) if (!isdigit(s[1+i])) return 0;
+			tm->__tm_gmtoff = (s[1]-'0')*36000+(s[2]-'0')*3600
+				+ (s[3]-'0')*600 + (s[4]-'0')*60;
+			if (neg) tm->__tm_gmtoff = -tm->__tm_gmtoff;
+			s += 5;
+			break;
+		case 'Z':
+			if (!strncmp(s, tzname[0], len = strlen(tzname[0]))) {
+				tm->tm_isdst = 0;
+				s += len;
+			} else if (!strncmp(s, tzname[1], len=strlen(tzname[1]))) {
+				tm->tm_isdst = 1;
+				s += len;
+			} else {
+				/* FIXME: is this supposed to be an error? */
+				while ((*s|32)-'a' <= 'z'-'a') s++;
+			}
+			break;
 		case '%':
 			if (*s++ != '%') return 0;
 			break;
diff --git a/src/time/timer_create.c b/src/time/timer_create.c
index ad7a2646..cc6c2236 100644
--- a/src/time/timer_create.c
+++ b/src/time/timer_create.c
@@ -1,6 +1,9 @@
 #include <time.h>
 #include <setjmp.h>
+#include <limits.h>
+#include <semaphore.h>
 #include "pthread_impl.h"
+#include "atomic.h"
 
 struct ksigevent {
 	union sigval sigev_value;
@@ -10,7 +13,7 @@ struct ksigevent {
 };
 
 struct start_args {
-	pthread_barrier_t b;
+	sem_t sem1, sem2;
 	struct sigevent *sev;
 };
 
@@ -19,66 +22,60 @@ static void dummy_0()
 }
 weak_alias(dummy_0, __pthread_tsd_run_dtors);
 
+static void timer_handler(int sig, siginfo_t *si, void *ctx)
+{
+}
+
 static void cleanup_fromsig(void *p)
 {
 	pthread_t self = __pthread_self();
-	__pthread_tsd_run_dtors(self);
+	__pthread_tsd_run_dtors();
+	__block_app_sigs(0);
+	__syscall(SYS_rt_sigprocmask, SIG_BLOCK, SIGTIMER_SET, 0, _NSIG/8);
 	self->cancel = 0;
 	self->cancelbuf = 0;
 	self->canceldisable = 0;
 	self->cancelasync = 0;
-	self->unblock_cancel = 0;
 	__reset_tls();
 	longjmp(p, 1);
 }
 
-static void timer_handler(int sig, siginfo_t *si, void *ctx)
-{
-	pthread_t self = __pthread_self();
-	jmp_buf jb;
-	void (*notify)(union sigval) = (void (*)(union sigval))self->start;
-	union sigval val = { .sival_ptr = self->start_arg };
-
-	if (!setjmp(jb) && si->si_code == SI_TIMER) {
-		pthread_cleanup_push(cleanup_fromsig, jb);
-		notify(val);
-		pthread_cleanup_pop(1);
-	}
-}
-
-static void install_handler()
-{
-	struct sigaction sa = {
-		.sa_sigaction = timer_handler,
-		.sa_flags = SA_SIGINFO | SA_RESTART
-	};
-	__libc_sigaction(SIGTIMER, &sa, 0);
-}
-
 static void *start(void *arg)
 {
 	pthread_t self = __pthread_self();
 	struct start_args *args = arg;
-	int id;
+	jmp_buf jb;
+
+	void (*notify)(union sigval) = args->sev->sigev_notify_function;
+	union sigval val = args->sev->sigev_value;
 
-	/* Reuse no-longer-needed thread structure fields to avoid
-	 * needing the timer address in the signal handler. */
-	self->start = (void *(*)(void *))args->sev->sigev_notify_function;
-	self->start_arg = args->sev->sigev_value.sival_ptr;
+	/* The two-way semaphore synchronization ensures that we see
+	 * self->cancel set by the parent if timer creation failed or
+	 * self->timer_id if it succeeded, and informs the parent that
+	 * we are done accessing the arguments so that the parent can
+	 * proceed past their block lifetime. */
+	while (sem_wait(&args->sem1));
+	sem_post(&args->sem2);
 
-	pthread_barrier_wait(&args->b);
-	if ((id = self->timer_id) >= 0) {
-		__syscall(SYS_rt_sigprocmask, SIG_UNBLOCK,
-			SIGTIMER_SET, 0, _NSIG/8);
-		__wait(&self->timer_id, 0, id, 1);
-		__syscall(SYS_timer_delete, id);
+	if (self->cancel)
+		return 0;
+	for (;;) {
+		siginfo_t si;
+		while (sigwaitinfo(SIGTIMER_SET, &si) < 0);
+		if (si.si_code == SI_TIMER && !setjmp(jb)) {
+			pthread_cleanup_push(cleanup_fromsig, jb);
+			notify(val);
+			pthread_cleanup_pop(1);
+		}
+		if (self->timer_id < 0) break;
 	}
+	__syscall(SYS_timer_delete, self->timer_id & INT_MAX);
 	return 0;
 }
 
 int timer_create(clockid_t clk, struct sigevent *restrict evp, timer_t *restrict res)
 {
-	static pthread_once_t once = PTHREAD_ONCE_INIT;
+	static volatile int init = 0;
 	pthread_t td;
 	pthread_attr_t attr;
 	int r;
@@ -90,11 +87,15 @@ int timer_create(clockid_t clk, struct sigevent *restrict evp, timer_t *restrict
 	switch (evp ? evp->sigev_notify : SIGEV_SIGNAL) {
 	case SIGEV_NONE:
 	case SIGEV_SIGNAL:
+	case SIGEV_THREAD_ID:
 		if (evp) {
 			ksev.sigev_value = evp->sigev_value;
 			ksev.sigev_signo = evp->sigev_signo;
 			ksev.sigev_notify = evp->sigev_notify;
-			ksev.sigev_tid = 0;
+			if (evp->sigev_notify == SIGEV_THREAD_ID)
+				ksev.sigev_tid = evp->sigev_notify_thread_id;
+			else
+				ksev.sigev_tid = 0;
 			ksevp = &ksev;
 		}
 		if (syscall(SYS_timer_create, clk, ksevp, &timerid) < 0)
@@ -102,16 +103,25 @@ int timer_create(clockid_t clk, struct sigevent *restrict evp, timer_t *restrict
 		*res = (void *)(intptr_t)timerid;
 		break;
 	case SIGEV_THREAD:
-		pthread_once(&once, install_handler);
+		if (!init) {
+			struct sigaction sa = {
+				.sa_sigaction = timer_handler,
+				.sa_flags = SA_SIGINFO | SA_RESTART
+			};
+			__libc_sigaction(SIGTIMER, &sa, 0);
+			a_store(&init, 1);
+		}
 		if (evp->sigev_notify_attributes)
 			attr = *evp->sigev_notify_attributes;
 		else
 			pthread_attr_init(&attr);
 		pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
-		pthread_barrier_init(&args.b, 0, 2);
+		sem_init(&args.sem1, 0, 0);
+		sem_init(&args.sem2, 0, 0);
 		args.sev = evp;
 
 		__block_app_sigs(&set);
+		__syscall(SYS_rt_sigprocmask, SIG_BLOCK, SIGTIMER_SET, 0, _NSIG/8);
 		r = pthread_create(&td, &attr, start, &args);
 		__restore_sigs(&set);
 		if (r) {
@@ -121,12 +131,15 @@ int timer_create(clockid_t clk, struct sigevent *restrict evp, timer_t *restrict
 
 		ksev.sigev_value.sival_ptr = 0;
 		ksev.sigev_signo = SIGTIMER;
-		ksev.sigev_notify = 4; /* SIGEV_THREAD_ID */
+		ksev.sigev_notify = SIGEV_THREAD_ID;
 		ksev.sigev_tid = td->tid;
-		if (syscall(SYS_timer_create, clk, &ksev, &timerid) < 0)
+		if (syscall(SYS_timer_create, clk, &ksev, &timerid) < 0) {
 			timerid = -1;
+			td->cancel = 1;
+		}
 		td->timer_id = timerid;
-		pthread_barrier_wait(&args.b);
+		sem_post(&args.sem1);
+		while (sem_wait(&args.sem2));
 		if (timerid < 0) return -1;
 		*res = (void *)(INTPTR_MIN | (uintptr_t)td>>1);
 		break;
diff --git a/src/time/timer_delete.c b/src/time/timer_delete.c
index 7c97eeb1..b0bfac09 100644
--- a/src/time/timer_delete.c
+++ b/src/time/timer_delete.c
@@ -7,7 +7,7 @@ int timer_delete(timer_t t)
 	if ((intptr_t)t < 0) {
 		pthread_t td = (void *)((uintptr_t)t << 1);
 		a_store(&td->timer_id, td->timer_id | INT_MIN);
-		__wake(&td->timer_id, 1, 1);
+		__syscall(SYS_tkill, td->tid, SIGTIMER);
 		return 0;
 	}
 	return __syscall(SYS_timer_delete, t);
diff --git a/src/time/timer_gettime.c b/src/time/timer_gettime.c
index ed6d8d65..21c9d32c 100644
--- a/src/time/timer_gettime.c
+++ b/src/time/timer_gettime.c
@@ -8,5 +8,21 @@ int timer_gettime(timer_t t, struct itimerspec *val)
 		pthread_t td = (void *)((uintptr_t)t << 1);
 		t = (void *)(uintptr_t)(td->timer_id & INT_MAX);
 	}
+#ifdef SYS_timer_gettime64
+	int r = -ENOSYS;
+	if (sizeof(time_t) > 4)
+		r = __syscall(SYS_timer_gettime64, t, val);
+	if (SYS_timer_gettime == SYS_timer_gettime64 || r!=-ENOSYS)
+		return __syscall_ret(r);
+	long val32[4];
+	r = __syscall(SYS_timer_gettime, t, val32);
+	if (!r) {
+		val->it_interval.tv_sec = val32[0];
+		val->it_interval.tv_nsec = val32[1];
+		val->it_value.tv_sec = val32[2];
+		val->it_value.tv_nsec = val32[3];
+	}
+	return __syscall_ret(r);
+#endif
 	return syscall(SYS_timer_gettime, t, val);
 }
diff --git a/src/time/timer_settime.c b/src/time/timer_settime.c
index 62631aa4..373f00ce 100644
--- a/src/time/timer_settime.c
+++ b/src/time/timer_settime.c
@@ -2,11 +2,36 @@
 #include <limits.h>
 #include "pthread_impl.h"
 
+#define IS32BIT(x) !((x)+0x80000000ULL>>32)
+
 int timer_settime(timer_t t, int flags, const struct itimerspec *restrict val, struct itimerspec *restrict old)
 {
 	if ((intptr_t)t < 0) {
 		pthread_t td = (void *)((uintptr_t)t << 1);
 		t = (void *)(uintptr_t)(td->timer_id & INT_MAX);
 	}
+#ifdef SYS_timer_settime64
+	time_t is = val->it_interval.tv_sec, vs = val->it_value.tv_sec;
+	long ins = val->it_interval.tv_nsec, vns = val->it_value.tv_nsec;
+	int r = -ENOSYS;
+	if (SYS_timer_settime == SYS_timer_settime64
+	    || !IS32BIT(is) || !IS32BIT(vs) || (sizeof(time_t)>4 && old))
+		r = __syscall(SYS_timer_settime64, t, flags,
+			((long long[]){is, ins, vs, vns}), old);
+	if (SYS_timer_settime == SYS_timer_settime64 || r!=-ENOSYS)
+		return __syscall_ret(r);
+	if (!IS32BIT(is) || !IS32BIT(vs))
+		return __syscall_ret(-ENOTSUP);
+	long old32[4];
+	r = __syscall(SYS_timer_settime, t, flags,
+		((long[]){is, ins, vs, vns}), old32);
+	if (!r && old) {
+		old->it_interval.tv_sec = old32[0];
+		old->it_interval.tv_nsec = old32[1];
+		old->it_value.tv_sec = old32[2];
+		old->it_value.tv_nsec = old32[3];
+	}
+	return __syscall_ret(r);
+#endif
 	return syscall(SYS_timer_settime, t, flags, val, old);
 }
diff --git a/src/unistd/alarm.c b/src/unistd/alarm.c
index 2e3263ac..a5e0c822 100644
--- a/src/unistd/alarm.c
+++ b/src/unistd/alarm.c
@@ -4,7 +4,7 @@
 
 unsigned alarm(unsigned seconds)
 {
-	struct itimerval it = { .it_value.tv_sec = seconds };
-	__syscall(SYS_setitimer, ITIMER_REAL, &it, &it);
-	return it.it_value.tv_sec + !!it.it_value.tv_usec;
+	struct itimerval it = { .it_value.tv_sec = seconds }, old = { 0 };
+	setitimer(ITIMER_REAL, &it, &old);
+	return old.it_value.tv_sec + !!old.it_value.tv_usec;
 }
diff --git a/src/unistd/close.c b/src/unistd/close.c
index 5b38e019..a2105f50 100644
--- a/src/unistd/close.c
+++ b/src/unistd/close.c
@@ -1,5 +1,6 @@
 #include <unistd.h>
 #include <errno.h>
+#include "aio_impl.h"
 #include "syscall.h"
 
 static int dummy(int fd)
diff --git a/src/unistd/dup3.c b/src/unistd/dup3.c
index f919f791..40798bde 100644
--- a/src/unistd/dup3.c
+++ b/src/unistd/dup3.c
@@ -9,12 +9,14 @@ int __dup3(int old, int new, int flags)
 	int r;
 #ifdef SYS_dup2
 	if (old==new) return __syscall_ret(-EINVAL);
-	if (flags & O_CLOEXEC) {
+	if (flags) {
 		while ((r=__syscall(SYS_dup3, old, new, flags))==-EBUSY);
 		if (r!=-ENOSYS) return __syscall_ret(r);
+		if (flags & ~O_CLOEXEC) return __syscall_ret(-EINVAL);
 	}
 	while ((r=__syscall(SYS_dup2, old, new))==-EBUSY);
-	if (flags & O_CLOEXEC) __syscall(SYS_fcntl, new, F_SETFD, FD_CLOEXEC);
+	if (r >= 0 && (flags & O_CLOEXEC))
+		__syscall(SYS_fcntl, new, F_SETFD, FD_CLOEXEC);
 #else
 	while ((r=__syscall(SYS_dup3, old, new, flags))==-EBUSY);
 #endif
diff --git a/src/unistd/faccessat.c b/src/unistd/faccessat.c
index 76bbd4c7..43052dd7 100644
--- a/src/unistd/faccessat.c
+++ b/src/unistd/faccessat.c
@@ -25,12 +25,17 @@ static int checker(void *p)
 
 int faccessat(int fd, const char *filename, int amode, int flag)
 {
-	if (!flag || (flag==AT_EACCESS && getuid()==geteuid() && getgid()==getegid()))
-		return syscall(SYS_faccessat, fd, filename, amode, flag);
+	if (flag) {
+		int ret = __syscall(SYS_faccessat2, fd, filename, amode, flag);
+		if (ret != -ENOSYS) return __syscall_ret(ret);
+	}
 
-	if (flag != AT_EACCESS)
+	if (flag & ~AT_EACCESS)
 		return __syscall_ret(-EINVAL);
 
+	if (!flag || (getuid()==geteuid() && getgid()==getegid()))
+		return syscall(SYS_faccessat, fd, filename, amode);
+
 	char stack[1024];
 	sigset_t set;
 	pid_t pid;
@@ -48,7 +53,7 @@ int faccessat(int fd, const char *filename, int amode, int flag)
 	if (pid<0 || __syscall(SYS_read, p[0], &ret, sizeof ret) != sizeof(ret))
 		ret = -EBUSY;
 	__syscall(SYS_close, p[0]);
-	__syscall(SYS_wait4, pid, &status, __WCLONE, 0);
+	__sys_wait4(pid, &status, __WCLONE, 0);
 
 	__restore_sigs(&set);
 
diff --git a/src/unistd/ftruncate.c b/src/unistd/ftruncate.c
index b41be0fa..54ff34bc 100644
--- a/src/unistd/ftruncate.c
+++ b/src/unistd/ftruncate.c
@@ -5,5 +5,3 @@ int ftruncate(int fd, off_t length)
 {
 	return syscall(SYS_ftruncate, fd, __SYSCALL_LL_O(length));
 }
-
-weak_alias(ftruncate, ftruncate64);
diff --git a/src/unistd/isatty.c b/src/unistd/isatty.c
index 75a9c186..21222eda 100644
--- a/src/unistd/isatty.c
+++ b/src/unistd/isatty.c
@@ -6,8 +6,6 @@
 int isatty(int fd)
 {
 	struct winsize wsz;
-	unsigned long r = syscall(SYS_ioctl, fd, TIOCGWINSZ, &wsz);
-	if (r == 0) return 1;
-	if (errno != EBADF) errno = ENOTTY;
-	return 0;
+	/* +1 converts from error status (0/-1) to boolean (1/0) */
+	return syscall(SYS_ioctl, fd, TIOCGWINSZ, &wsz) + 1;
 }
diff --git a/src/unistd/lseek.c b/src/unistd/lseek.c
index bf8cd852..f5b66682 100644
--- a/src/unistd/lseek.c
+++ b/src/unistd/lseek.c
@@ -1,7 +1,7 @@
 #include <unistd.h>
 #include "syscall.h"
 
-off_t lseek(int fd, off_t offset, int whence)
+off_t __lseek(int fd, off_t offset, int whence)
 {
 #ifdef SYS__llseek
 	off_t result;
@@ -11,4 +11,4 @@ off_t lseek(int fd, off_t offset, int whence)
 #endif
 }
 
-weak_alias(lseek, lseek64);
+weak_alias(__lseek, lseek);
diff --git a/src/unistd/mipsn32/lseek.c b/src/unistd/mipsn32/lseek.c
new file mode 100644
index 00000000..0f6cbcaa
--- /dev/null
+++ b/src/unistd/mipsn32/lseek.c
@@ -0,0 +1,19 @@
+#include <unistd.h>
+#include "syscall.h"
+
+off_t __lseek(int fd, off_t offset, int whence)
+{
+	register long long r4 __asm__("$4") = fd;
+	register long long r5 __asm__("$5") = offset;
+	register long long r6 __asm__("$6") = whence;
+	register long long r7 __asm__("$7");
+	register long long r2 __asm__("$2") = SYS_lseek;
+	__asm__ __volatile__ (
+		"syscall"
+		: "+&r"(r2), "=r"(r7)
+		: "r"(r4), "r"(r5), "r"(r6)
+		: SYSCALL_CLOBBERLIST);
+	return r7 ? __syscall_ret(-r2) : r2;
+}
+
+weak_alias(__lseek, lseek);
diff --git a/src/unistd/nice.c b/src/unistd/nice.c
index 6c25c8c3..1c2295ff 100644
--- a/src/unistd/nice.c
+++ b/src/unistd/nice.c
@@ -1,4 +1,5 @@
 #include <unistd.h>
+#include <errno.h>
 #include <sys/resource.h>
 #include <limits.h>
 #include "syscall.h"
@@ -12,5 +13,11 @@ int nice(int inc)
 		prio += getpriority(PRIO_PROCESS, 0);
 	if (prio > NZERO-1) prio = NZERO-1;
 	if (prio < -NZERO) prio = -NZERO;
-	return setpriority(PRIO_PROCESS, 0, prio) ? -1 : prio;
+	if (setpriority(PRIO_PROCESS, 0, prio)) {
+		if (errno == EACCES)
+			errno = EPERM;
+		return -1;
+	} else {
+		return prio;
+	}
 }
diff --git a/src/unistd/pause.c b/src/unistd/pause.c
index 90bbf4ca..90cc8db5 100644
--- a/src/unistd/pause.c
+++ b/src/unistd/pause.c
@@ -3,9 +3,5 @@
 
 int pause(void)
 {
-#ifdef SYS_pause
-	return syscall_cp(SYS_pause);
-#else
-	return syscall_cp(SYS_ppoll, 0, 0, 0, 0);
-#endif
+	return sys_pause_cp();
 }
diff --git a/src/unistd/pipe2.c b/src/unistd/pipe2.c
index f24f74fb..a096990b 100644
--- a/src/unistd/pipe2.c
+++ b/src/unistd/pipe2.c
@@ -8,6 +8,7 @@ int pipe2(int fd[2], int flag)
 	if (!flag) return pipe(fd);
 	int ret = __syscall(SYS_pipe2, fd, flag);
 	if (ret != -ENOSYS) return __syscall_ret(ret);
+	if (flag & ~(O_CLOEXEC|O_NONBLOCK)) return __syscall_ret(-EINVAL);
 	ret = pipe(fd);
 	if (ret) return ret;
 	if (flag & O_CLOEXEC) {
diff --git a/src/unistd/pread.c b/src/unistd/pread.c
index 5681b045..b03fb0ad 100644
--- a/src/unistd/pread.c
+++ b/src/unistd/pread.c
@@ -5,5 +5,3 @@ ssize_t pread(int fd, void *buf, size_t size, off_t ofs)
 {
 	return syscall_cp(SYS_pread, fd, buf, size, __SYSCALL_LL_PRW(ofs));
 }
-
-weak_alias(pread, pread64);
diff --git a/src/unistd/preadv.c b/src/unistd/preadv.c
index 8376d60f..890ab403 100644
--- a/src/unistd/preadv.c
+++ b/src/unistd/preadv.c
@@ -8,5 +8,3 @@ ssize_t preadv(int fd, const struct iovec *iov, int count, off_t ofs)
 	return syscall_cp(SYS_preadv, fd, iov, count,
 		(long)(ofs), (long)(ofs>>32));
 }
-
-weak_alias(preadv, preadv64);
diff --git a/src/unistd/pwrite.c b/src/unistd/pwrite.c
index ca376576..a008b3ec 100644
--- a/src/unistd/pwrite.c
+++ b/src/unistd/pwrite.c
@@ -1,9 +1,18 @@
+#define _GNU_SOURCE
 #include <unistd.h>
+#include <sys/uio.h>
+#include <fcntl.h>
 #include "syscall.h"
 
 ssize_t pwrite(int fd, const void *buf, size_t size, off_t ofs)
 {
+	if (ofs == -1) ofs--;
+	int r = __syscall_cp(SYS_pwritev2, fd,
+		(&(struct iovec){ .iov_base = (void *)buf, .iov_len = size }),
+		1, (long)(ofs), (long)(ofs>>32), RWF_NOAPPEND);
+	if (r != -EOPNOTSUPP && r != -ENOSYS)
+		return __syscall_ret(r);
+	if (fcntl(fd, F_GETFL) & O_APPEND)
+		return __syscall_ret(-EOPNOTSUPP);
 	return syscall_cp(SYS_pwrite, fd, buf, size, __SYSCALL_LL_PRW(ofs));
 }
-
-weak_alias(pwrite, pwrite64);
diff --git a/src/unistd/pwritev.c b/src/unistd/pwritev.c
index f5a612c4..44a53d85 100644
--- a/src/unistd/pwritev.c
+++ b/src/unistd/pwritev.c
@@ -1,12 +1,18 @@
-#define _BSD_SOURCE
+#define _GNU_SOURCE
 #include <sys/uio.h>
 #include <unistd.h>
+#include <fcntl.h>
 #include "syscall.h"
 
 ssize_t pwritev(int fd, const struct iovec *iov, int count, off_t ofs)
 {
+	if (ofs == -1) ofs--;
+	int r = __syscall_cp(SYS_pwritev2, fd, iov, count,
+		(long)(ofs), (long)(ofs>>32), RWF_NOAPPEND);
+	if (r != -EOPNOTSUPP && r != -ENOSYS)
+		return __syscall_ret(r);
+	if (fcntl(fd, F_GETFL) & O_APPEND)
+		return __syscall_ret(-EOPNOTSUPP);
 	return syscall_cp(SYS_pwritev, fd, iov, count,
 		(long)(ofs), (long)(ofs>>32));
 }
-
-weak_alias(pwritev, pwritev64);
diff --git a/src/unistd/readlink.c b/src/unistd/readlink.c
index a152d524..32f4537f 100644
--- a/src/unistd/readlink.c
+++ b/src/unistd/readlink.c
@@ -4,9 +4,16 @@
 
 ssize_t readlink(const char *restrict path, char *restrict buf, size_t bufsize)
 {
+	char dummy[1];
+	if (!bufsize) {
+		buf = dummy;
+		bufsize = 1;
+	}
 #ifdef SYS_readlink
-	return syscall(SYS_readlink, path, buf, bufsize);
+	int r = __syscall(SYS_readlink, path, buf, bufsize);
 #else
-	return syscall(SYS_readlinkat, AT_FDCWD, path, buf, bufsize);
+	int r = __syscall(SYS_readlinkat, AT_FDCWD, path, buf, bufsize);
 #endif
+	if (buf == dummy && r > 0) r = 0;
+	return __syscall_ret(r);
 }
diff --git a/src/unistd/readlinkat.c b/src/unistd/readlinkat.c
index 9af45cd5..f79d3d14 100644
--- a/src/unistd/readlinkat.c
+++ b/src/unistd/readlinkat.c
@@ -3,5 +3,12 @@
 
 ssize_t readlinkat(int fd, const char *restrict path, char *restrict buf, size_t bufsize)
 {
-	return syscall(SYS_readlinkat, fd, path, buf, bufsize);
+	char dummy[1];
+	if (!bufsize) {
+		buf = dummy;
+		bufsize = 1;
+	}
+	int r = __syscall(SYS_readlinkat, fd, path, buf, bufsize);
+	if (buf == dummy && r > 0) r = 0;
+	return __syscall_ret(r);
 }
diff --git a/src/unistd/renameat.c b/src/unistd/renameat.c
index 12574822..c3b40a25 100644
--- a/src/unistd/renameat.c
+++ b/src/unistd/renameat.c
@@ -3,5 +3,9 @@
 
 int renameat(int oldfd, const char *old, int newfd, const char *new)
 {
+#ifdef SYS_renameat
 	return syscall(SYS_renameat, oldfd, old, newfd, new);
+#else
+	return syscall(SYS_renameat2, oldfd, old, newfd, new, 0);
+#endif
 }
diff --git a/src/unistd/setxid.c b/src/unistd/setxid.c
index 0239f8af..a629ed4b 100644
--- a/src/unistd/setxid.c
+++ b/src/unistd/setxid.c
@@ -1,20 +1,19 @@
 #include <unistd.h>
-#include <errno.h>
+#include <signal.h>
 #include "syscall.h"
 #include "libc.h"
-#include "pthread_impl.h"
 
 struct ctx {
 	int id, eid, sid;
-	int nr, err;
+	int nr, ret;
 };
 
 static void do_setxid(void *p)
 {
 	struct ctx *c = p;
-	if (c->err>0) return;
-	int ret = -__syscall(c->nr, c->id, c->eid, c->sid);
-	if (ret && !c->err) {
+	if (c->ret<0) return;
+	int ret = __syscall(c->nr, c->id, c->eid, c->sid);
+	if (ret && !c->ret) {
 		/* If one thread fails to set ids after another has already
 		 * succeeded, forcibly killing the process is the only safe
 		 * thing to do. State is inconsistent and dangerous. Use
@@ -22,18 +21,14 @@ static void do_setxid(void *p)
 		__block_all_sigs(0);
 		__syscall(SYS_kill, __syscall(SYS_getpid), SIGKILL);
 	}
-	c->err = ret;
+	c->ret = ret;
 }
 
 int __setxid(int nr, int id, int eid, int sid)
 {
-	/* err is initially nonzero so that failure of the first thread does not
+	/* ret is initially nonzero so that failure of the first thread does not
 	 * trigger the safety kill above. */
-	struct ctx c = { .nr = nr, .id = id, .eid = eid, .sid = sid, .err = -1 };
+	struct ctx c = { .nr = nr, .id = id, .eid = eid, .sid = sid, .ret = 1 };
 	__synccall(do_setxid, &c);
-	if (c.err) {
-		if (c.err>0) errno = c.err;
-		return -1;
-	}
-	return 0;
+	return __syscall_ret(c.ret > 0 ? -EAGAIN : c.ret);
 }
diff --git a/src/unistd/truncate.c b/src/unistd/truncate.c
index 97296800..077351e1 100644
--- a/src/unistd/truncate.c
+++ b/src/unistd/truncate.c
@@ -5,5 +5,3 @@ int truncate(const char *path, off_t length)
 {
 	return syscall(SYS_truncate, path, __SYSCALL_LL_O(length));
 }
-
-weak_alias(truncate, truncate64);
diff --git a/src/unistd/ualarm.c b/src/unistd/ualarm.c
index 855504bc..2985855c 100644
--- a/src/unistd/ualarm.c
+++ b/src/unistd/ualarm.c
@@ -7,7 +7,7 @@ unsigned ualarm(unsigned value, unsigned interval)
 	struct itimerval it = {
 		.it_interval.tv_usec = interval,
 		.it_value.tv_usec = value
-	};
-	setitimer(ITIMER_REAL, &it, &it);
-	return it.it_value.tv_sec*1000000 + it.it_value.tv_usec;
+	}, it_old;
+	setitimer(ITIMER_REAL, &it, &it_old);
+	return it_old.it_value.tv_sec*1000000 + it_old.it_value.tv_usec;
 }
diff --git a/src/unistd/x32/lseek.c b/src/unistd/x32/lseek.c
new file mode 100644
index 00000000..5f93292f
--- /dev/null
+++ b/src/unistd/x32/lseek.c
@@ -0,0 +1,14 @@
+#include <unistd.h>
+#include "syscall.h"
+
+off_t __lseek(int fd, off_t offset, int whence)
+{
+	off_t ret;
+	__asm__ __volatile__ ("syscall"
+		: "=a"(ret)
+		: "a"(SYS_lseek), "D"(fd), "S"(offset), "d"(whence)
+		: "rcx", "r11", "memory");
+	return ret < 0 ? __syscall_ret(ret) : ret;
+}
+
+weak_alias(__lseek, lseek);
diff --git a/tools/add-cfi.i386.awk b/tools/add-cfi.i386.awk
index 9162e309..d05037de 100644
--- a/tools/add-cfi.i386.awk
+++ b/tools/add-cfi.i386.awk
@@ -81,7 +81,7 @@ function adjust_sp_offset(delta) {
     in_function = 0
   }
 }
-/^\.type [a-zA-Z0-9_]+,\@function/ {
+/^\.type [a-zA-Z0-9_]+,@function/ {
   functions[substr($2, 1, length($2)-10)] = 1
 }
 # not interested in assembler directives beyond this, just pass them through
diff --git a/tools/add-cfi.x86_64.awk b/tools/add-cfi.x86_64.awk
index bbc90daa..7e1513d6 100644
--- a/tools/add-cfi.x86_64.awk
+++ b/tools/add-cfi.x86_64.awk
@@ -76,7 +76,7 @@ function adjust_sp_offset(delta) {
     in_function = 0
   }
 }
-/^\.type [a-zA-Z0-9_]+,\@function/ {
+/^\.type [a-zA-Z0-9_]+,@function/ {
   functions[substr($2, 1, length($2)-10)] = 1
 }
 # not interested in assembler directives beyond this, just pass them through
diff --git a/tools/install.sh b/tools/install.sh
index d913b60b..855a8ca2 100755
--- a/tools/install.sh
+++ b/tools/install.sh
@@ -48,7 +48,9 @@ trap 'rm -f "$tmp"' EXIT INT QUIT TERM HUP
 umask 077
 
 if test "$symlink" ; then
+umask 000
 ln -s "$1" "$tmp"
+umask 077
 else
 cat < "$1" > "$tmp"
 chmod "$mode" "$tmp"