diff options
Diffstat (limited to 'src/linux')
-rw-r--r-- | src/linux/cache.c | 3 | ||||
-rw-r--r-- | src/linux/clock_adjtime.c | 84 | ||||
-rw-r--r-- | src/linux/clone.c | 56 | ||||
-rw-r--r-- | src/linux/epoll.c | 5 | ||||
-rw-r--r-- | src/linux/fallocate.c | 3 | ||||
-rw-r--r-- | src/linux/getdents.c | 2 | ||||
-rw-r--r-- | src/linux/gettid.c | 8 | ||||
-rw-r--r-- | src/linux/membarrier.c | 7 | ||||
-rw-r--r-- | src/linux/ppoll.c | 26 | ||||
-rw-r--r-- | src/linux/preadv2.c | 17 | ||||
-rw-r--r-- | src/linux/prlimit.c | 3 | ||||
-rw-r--r-- | src/linux/pwritev2.c | 17 | ||||
-rw-r--r-- | src/linux/sendfile.c | 2 | ||||
-rw-r--r-- | src/linux/setgroups.c | 30 | ||||
-rw-r--r-- | src/linux/statx.c | 42 | ||||
-rw-r--r-- | src/linux/wait4.c | 34 |
16 files changed, 259 insertions, 80 deletions
diff --git a/src/linux/cache.c b/src/linux/cache.c index 0eb051c2..e76f7812 100644 --- a/src/linux/cache.c +++ b/src/linux/cache.c @@ -21,7 +21,7 @@ weak_alias(__cachectl, cachectl); #ifdef SYS_riscv_flush_icache #define VDSO_FLUSH_ICACHE_SYM "__vdso_flush_icache" -#define VDSO_FLUSH_ICACHE_VER "LINUX_4.5" +#define VDSO_FLUSH_ICACHE_VER "LINUX_4.15" static void *volatile vdso_func; @@ -45,6 +45,7 @@ int __riscv_flush_icache(void *start, void *end, unsigned long int flags) if (!r) return r; if (r != -ENOSYS) return __syscall_ret(r); } + return syscall(SYS_riscv_flush_icache, start, end, flags); } weak_alias(__riscv_flush_icache, riscv_flush_icache); #endif diff --git a/src/linux/clock_adjtime.c b/src/linux/clock_adjtime.c index 2f531397..d4d03d24 100644 --- a/src/linux/clock_adjtime.c +++ b/src/linux/clock_adjtime.c @@ -38,9 +38,60 @@ int clock_adjtime (clockid_t clock_id, struct timex *utx) { int r = -ENOSYS; #ifdef SYS_clock_adjtime64 - if (SYS_clock_adjtime == SYS_clock_adjtime64 || - (utx->modes & ADJ_SETOFFSET) && !IS32BIT(utx->time.tv_sec)) { - struct ktimex64 ktx = { + struct ktimex64 ktx = { + .modes = utx->modes, + .offset = utx->offset, + .freq = utx->freq, + .maxerror = utx->maxerror, + .esterror = utx->esterror, + .status = utx->status, + .constant = utx->constant, + .precision = utx->precision, + .tolerance = utx->tolerance, + .time_sec = utx->time.tv_sec, + .time_usec = utx->time.tv_usec, + .tick = utx->tick, + .ppsfreq = utx->ppsfreq, + .jitter = utx->jitter, + .shift = utx->shift, + .stabil = utx->stabil, + .jitcnt = utx->jitcnt, + .calcnt = utx->calcnt, + .errcnt = utx->errcnt, + .stbcnt = utx->stbcnt, + .tai = utx->tai, + }; + r = __syscall(SYS_clock_adjtime64, clock_id, &ktx); + if (r>=0) { + utx->modes = ktx.modes; + utx->offset = ktx.offset; + utx->freq = ktx.freq; + utx->maxerror = ktx.maxerror; + utx->esterror = ktx.esterror; + utx->status = ktx.status; + utx->constant = ktx.constant; + utx->precision = ktx.precision; + utx->tolerance = ktx.tolerance; + utx->time.tv_sec = ktx.time_sec; + utx->time.tv_usec = ktx.time_usec; + utx->tick = ktx.tick; + utx->ppsfreq = ktx.ppsfreq; + utx->jitter = ktx.jitter; + utx->shift = ktx.shift; + utx->stabil = ktx.stabil; + utx->jitcnt = ktx.jitcnt; + utx->calcnt = ktx.calcnt; + utx->errcnt = ktx.errcnt; + utx->stbcnt = ktx.stbcnt; + utx->tai = ktx.tai; + } + if (SYS_clock_adjtime == SYS_clock_adjtime64 || r!=-ENOSYS) + return __syscall_ret(r); + if ((utx->modes & ADJ_SETOFFSET) && !IS32BIT(utx->time.tv_sec)) + return __syscall_ret(-ENOTSUP); +#endif + if (sizeof(time_t) > sizeof(long)) { + struct ktimex ktx = { .modes = utx->modes, .offset = utx->offset, .freq = utx->freq, @@ -63,6 +114,10 @@ int clock_adjtime (clockid_t clock_id, struct timex *utx) .stbcnt = utx->stbcnt, .tai = utx->tai, }; +#ifdef SYS_adjtimex + if (clock_id==CLOCK_REALTIME) r = __syscall(SYS_adjtimex, &ktx); + else +#endif r = __syscall(SYS_clock_adjtime, clock_id, &ktx); if (r>=0) { utx->modes = ktx.modes; @@ -87,29 +142,6 @@ int clock_adjtime (clockid_t clock_id, struct timex *utx) utx->stbcnt = ktx.stbcnt; utx->tai = ktx.tai; } - } - if (SYS_clock_adjtime == SYS_clock_adjtime64 || r!=-ENOSYS) - return __syscall_ret(r); - if ((utx->modes & ADJ_SETOFFSET) && !IS32BIT(utx->time.tv_sec)) - return __syscall_ret(-ENOTSUP); -#endif - if (sizeof(time_t) > sizeof(long)) { - union { - struct timex utx; - struct ktimex ktx; - } u = { *utx }; - u.ktx.time_sec = utx->time.tv_sec; - u.ktx.time_usec = utx->time.tv_usec; -#ifdef SYS_adjtimex - if (clock_id==CLOCK_REALTIME) r = __syscall(SYS_adjtimex, &u); - else -#endif - r = __syscall(SYS_clock_adjtime, clock_id, &u); - if (r>=0) { - *utx = u.utx; - utx->time.tv_sec = u.ktx.time_sec; - utx->time.tv_usec = u.ktx.time_usec; - } return __syscall_ret(r); } #ifdef SYS_adjtimex diff --git a/src/linux/clone.c b/src/linux/clone.c index 8c1af7d3..257c1cec 100644 --- a/src/linux/clone.c +++ b/src/linux/clone.c @@ -4,18 +4,62 @@ #include <sched.h> #include "pthread_impl.h" #include "syscall.h" +#include "lock.h" +#include "fork_impl.h" + +struct clone_start_args { + int (*func)(void *); + void *arg; + sigset_t sigmask; +}; + +static int clone_start(void *arg) +{ + struct clone_start_args *csa = arg; + __post_Fork(0); + __restore_sigs(&csa->sigmask); + return csa->func(csa->arg); +} int clone(int (*func)(void *), void *stack, int flags, void *arg, ...) { + struct clone_start_args csa; va_list ap; - pid_t *ptid, *ctid; - void *tls; + pid_t *ptid = 0, *ctid = 0; + void *tls = 0; + + /* Flags that produce an invalid thread/TLS state are disallowed. */ + int badflags = CLONE_THREAD | CLONE_SETTLS | CLONE_CHILD_CLEARTID; + + if ((flags & badflags) || !stack) + return __syscall_ret(-EINVAL); va_start(ap, arg); - ptid = va_arg(ap, pid_t *); - tls = va_arg(ap, void *); - ctid = va_arg(ap, pid_t *); + if (flags & (CLONE_PIDFD | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID)) + ptid = va_arg(ap, pid_t *); + if (flags & CLONE_CHILD_SETTID) { + tls = va_arg(ap, void *); + ctid = va_arg(ap, pid_t *); + } va_end(ap); - return __syscall_ret(__clone(func, stack, flags, arg, ptid, tls, ctid)); + /* If CLONE_VM is used, it's impossible to give the child a consistent + * thread structure. In this case, the best we can do is assume the + * caller is content with an extremely restrictive execution context + * like the one vfork() would provide. */ + if (flags & CLONE_VM) return __syscall_ret( + __clone(func, stack, flags, arg, ptid, tls, ctid)); + + __block_all_sigs(&csa.sigmask); + LOCK(__abort_lock); + + /* Setup the a wrapper start function for the child process to do + * mimic _Fork in producing a consistent execution state. */ + csa.func = func; + csa.arg = arg; + int ret = __clone(clone_start, stack, flags, &csa, ptid, tls, ctid); + + __post_Fork(ret); + __restore_sigs(&csa.sigmask); + return __syscall_ret(ret); } diff --git a/src/linux/epoll.c b/src/linux/epoll.c index deff5b10..e56e8f4c 100644 --- a/src/linux/epoll.c +++ b/src/linux/epoll.c @@ -5,6 +5,7 @@ int epoll_create(int size) { + if (size<=0) return __syscall_ret(-EINVAL); return epoll_create1(0); } @@ -24,9 +25,9 @@ int epoll_ctl(int fd, int op, int fd2, struct epoll_event *ev) int epoll_pwait(int fd, struct epoll_event *ev, int cnt, int to, const sigset_t *sigs) { - int r = __syscall(SYS_epoll_pwait, fd, ev, cnt, to, sigs, _NSIG/8); + int r = __syscall_cp(SYS_epoll_pwait, fd, ev, cnt, to, sigs, _NSIG/8); #ifdef SYS_epoll_wait - if (r==-ENOSYS && !sigs) r = __syscall(SYS_epoll_wait, fd, ev, cnt, to); + if (r==-ENOSYS && !sigs) r = __syscall_cp(SYS_epoll_wait, fd, ev, cnt, to); #endif return __syscall_ret(r); } diff --git a/src/linux/fallocate.c b/src/linux/fallocate.c index 7d68bc8f..9146350e 100644 --- a/src/linux/fallocate.c +++ b/src/linux/fallocate.c @@ -7,6 +7,3 @@ int fallocate(int fd, int mode, off_t base, off_t len) return syscall(SYS_fallocate, fd, mode, __SYSCALL_LL_E(base), __SYSCALL_LL_E(len)); } - -#undef fallocate64 -weak_alias(fallocate, fallocate64); diff --git a/src/linux/getdents.c b/src/linux/getdents.c index 796c1e5c..97f76e14 100644 --- a/src/linux/getdents.c +++ b/src/linux/getdents.c @@ -8,5 +8,3 @@ int getdents(int fd, struct dirent *buf, size_t len) if (len>INT_MAX) len = INT_MAX; return syscall(SYS_getdents, fd, buf, len); } - -weak_alias(getdents, getdents64); diff --git a/src/linux/gettid.c b/src/linux/gettid.c new file mode 100644 index 00000000..70767137 --- /dev/null +++ b/src/linux/gettid.c @@ -0,0 +1,8 @@ +#define _GNU_SOURCE +#include <unistd.h> +#include "pthread_impl.h" + +pid_t gettid(void) +{ + return __pthread_self()->tid; +} diff --git a/src/linux/membarrier.c b/src/linux/membarrier.c index 9ebe906e..f64fe7e1 100644 --- a/src/linux/membarrier.c +++ b/src/linux/membarrier.c @@ -9,13 +9,8 @@ static void dummy_0(void) { } -static void dummy_1(pthread_t t) -{ -} - weak_alias(dummy_0, __tl_lock); weak_alias(dummy_0, __tl_unlock); -weak_alias(dummy_1, __tl_sync); static sem_t barrier_sem; @@ -40,7 +35,7 @@ int __membarrier(int cmd, int flags) __tl_lock(); sem_init(&barrier_sem, 0, 0); struct sigaction sa = { - .sa_flags = SA_RESTART, + .sa_flags = SA_RESTART | SA_ONSTACK, .sa_handler = bcast_barrier }; memset(&sa.sa_mask, -1, sizeof sa.sa_mask); diff --git a/src/linux/ppoll.c b/src/linux/ppoll.c deleted file mode 100644 index e614600a..00000000 --- a/src/linux/ppoll.c +++ /dev/null @@ -1,26 +0,0 @@ -#define _GNU_SOURCE -#include <poll.h> -#include <signal.h> -#include <errno.h> -#include "syscall.h" - -#define IS32BIT(x) !((x)+0x80000000ULL>>32) -#define CLAMP(x) (int)(IS32BIT(x) ? (x) : 0x7fffffffU+((0ULL+(x))>>63)) - -int ppoll(struct pollfd *fds, nfds_t n, const struct timespec *to, const sigset_t *mask) -{ - time_t s = to ? to->tv_sec : 0; - long ns = to ? to->tv_nsec : 0; -#ifdef SYS_ppoll_time64 - int r = -ENOSYS; - if (SYS_ppoll == SYS_ppoll_time64 || !IS32BIT(s)) - r = __syscall_cp(SYS_ppoll_time64, fds, n, - to ? ((long long[]){s, ns}) : 0, - mask, _NSIG/8); - if (SYS_ppoll == SYS_ppoll_time64 || r != -ENOSYS) - return __syscall_ret(r); - s = CLAMP(s); -#endif - return syscall_cp(SYS_ppoll, fds, n, - to ? ((long[]){s, ns}) : 0, mask, _NSIG/8); -} diff --git a/src/linux/preadv2.c b/src/linux/preadv2.c new file mode 100644 index 00000000..5e7ab70f --- /dev/null +++ b/src/linux/preadv2.c @@ -0,0 +1,17 @@ +#define _GNU_SOURCE +#include <sys/uio.h> +#include <unistd.h> +#include "syscall.h" + +ssize_t preadv2(int fd, const struct iovec *iov, int count, off_t ofs, int flags) +{ +#ifdef SYS_preadv + if (!flags) { + if (ofs==-1) return readv(fd, iov, count); + return syscall_cp(SYS_preadv, fd, iov, count, + (long)(ofs), (long)(ofs>>32)); + } +#endif + return syscall_cp(SYS_preadv2, fd, iov, count, + (long)(ofs), (long)(ofs>>32), flags); +} diff --git a/src/linux/prlimit.c b/src/linux/prlimit.c index 3df9ffba..fcf45aab 100644 --- a/src/linux/prlimit.c +++ b/src/linux/prlimit.c @@ -21,6 +21,3 @@ int prlimit(pid_t pid, int resource, const struct rlimit *new_limit, struct rlim } return r; } - -#undef prlimit64 -weak_alias(prlimit, prlimit64); diff --git a/src/linux/pwritev2.c b/src/linux/pwritev2.c new file mode 100644 index 00000000..ece90d7c --- /dev/null +++ b/src/linux/pwritev2.c @@ -0,0 +1,17 @@ +#define _GNU_SOURCE +#include <sys/uio.h> +#include <unistd.h> +#include "syscall.h" + +ssize_t pwritev2(int fd, const struct iovec *iov, int count, off_t ofs, int flags) +{ +#ifdef SYS_pwritev + if (!flags) { + if (ofs==-1) return writev(fd, iov, count); + return syscall_cp(SYS_pwritev, fd, iov, count, + (long)(ofs), (long)(ofs>>32)); + } +#endif + return syscall_cp(SYS_pwritev2, fd, iov, count, + (long)(ofs), (long)(ofs>>32), flags); +} diff --git a/src/linux/sendfile.c b/src/linux/sendfile.c index 9afe6dd6..fc1577d3 100644 --- a/src/linux/sendfile.c +++ b/src/linux/sendfile.c @@ -5,5 +5,3 @@ ssize_t sendfile(int out_fd, int in_fd, off_t *ofs, size_t count) { return syscall(SYS_sendfile, out_fd, in_fd, ofs, count); } - -weak_alias(sendfile, sendfile64); diff --git a/src/linux/setgroups.c b/src/linux/setgroups.c index 1248fdbf..47142f14 100644 --- a/src/linux/setgroups.c +++ b/src/linux/setgroups.c @@ -1,8 +1,36 @@ #define _GNU_SOURCE #include <unistd.h> +#include <signal.h> #include "syscall.h" +#include "libc.h" + +struct ctx { + size_t count; + const gid_t *list; + int ret; +}; + +static void do_setgroups(void *p) +{ + struct ctx *c = p; + if (c->ret<0) return; + int ret = __syscall(SYS_setgroups, c->count, c->list); + if (ret && !c->ret) { + /* If one thread fails to set groups after another has already + * succeeded, forcibly killing the process is the only safe + * thing to do. State is inconsistent and dangerous. Use + * SIGKILL because it is uncatchable. */ + __block_all_sigs(0); + __syscall(SYS_kill, __syscall(SYS_getpid), SIGKILL); + } + c->ret = ret; +} int setgroups(size_t count, const gid_t list[]) { - return syscall(SYS_setgroups, count, list); + /* ret is initially nonzero so that failure of the first thread does not + * trigger the safety kill above. */ + struct ctx c = { .count = count, .list = list, .ret = 1 }; + __synccall(do_setgroups, &c); + return __syscall_ret(c.ret); } diff --git a/src/linux/statx.c b/src/linux/statx.c new file mode 100644 index 00000000..4616bff4 --- /dev/null +++ b/src/linux/statx.c @@ -0,0 +1,42 @@ +#define _GNU_SOURCE +#include <sys/stat.h> +#include <string.h> +#include <syscall.h> +#include <sys/sysmacros.h> +#include <errno.h> + +int statx(int dirfd, const char *restrict path, int flags, unsigned mask, struct statx *restrict stx) +{ + int ret = __syscall(SYS_statx, dirfd, path, flags, mask, stx); + +#ifndef SYS_fstatat + return __syscall_ret(ret); +#endif + + if (ret != -ENOSYS) return __syscall_ret(ret); + + struct stat st; + ret = fstatat(dirfd, path, &st, flags); + if (ret) return ret; + + stx->stx_dev_major = major(st.st_dev); + stx->stx_dev_minor = minor(st.st_dev); + stx->stx_ino = st.st_ino; + stx->stx_mode = st.st_mode; + stx->stx_nlink = st.st_nlink; + stx->stx_uid = st.st_uid; + stx->stx_gid = st.st_gid; + stx->stx_size = st.st_size; + stx->stx_blksize = st.st_blksize; + stx->stx_blocks = st.st_blocks; + stx->stx_atime.tv_sec = st.st_atim.tv_sec; + stx->stx_atime.tv_nsec = st.st_atim.tv_nsec; + stx->stx_mtime.tv_sec = st.st_mtim.tv_sec; + stx->stx_mtime.tv_nsec = st.st_mtim.tv_nsec; + stx->stx_ctime.tv_sec = st.st_ctim.tv_sec; + stx->stx_ctime.tv_nsec = st.st_ctim.tv_nsec; + stx->stx_btime = (struct statx_timestamp){.tv_sec=0, .tv_nsec=0}; + stx->stx_mask = STATX_BASIC_STATS; + + return 0; +} diff --git a/src/linux/wait4.c b/src/linux/wait4.c index 97f12cc5..fb08c0d0 100644 --- a/src/linux/wait4.c +++ b/src/linux/wait4.c @@ -1,9 +1,39 @@ #define _GNU_SOURCE #include <sys/wait.h> #include <sys/resource.h> +#include <string.h> +#include <errno.h> #include "syscall.h" -pid_t wait4(pid_t pid, int *status, int options, struct rusage *usage) +pid_t wait4(pid_t pid, int *status, int options, struct rusage *ru) { - return syscall(SYS_wait4, pid, status, options, usage); + int r; +#ifdef SYS_wait4_time64 + if (ru) { + long long kru64[18]; + r = __syscall(SYS_wait4_time64, pid, status, options, kru64); + if (r > 0) { + ru->ru_utime = (struct timeval) + { .tv_sec = kru64[0], .tv_usec = kru64[1] }; + ru->ru_stime = (struct timeval) + { .tv_sec = kru64[2], .tv_usec = kru64[3] }; + char *slots = (char *)&ru->ru_maxrss; + for (int i=0; i<14; i++) + *(long *)(slots + i*sizeof(long)) = kru64[4+i]; + } + if (SYS_wait4_time64 == SYS_wait4 || r != -ENOSYS) + return __syscall_ret(r); + } +#endif + char *dest = ru ? (char *)&ru->ru_maxrss - 4*sizeof(long) : 0; + r = __sys_wait4(pid, status, options, dest); + if (r>0 && ru && sizeof(time_t) > sizeof(long)) { + long kru[4]; + memcpy(kru, dest, 4*sizeof(long)); + ru->ru_utime = (struct timeval) + { .tv_sec = kru[0], .tv_usec = kru[1] }; + ru->ru_stime = (struct timeval) + { .tv_sec = kru[2], .tv_usec = kru[3] }; + } + return __syscall_ret(r); } |