diff options
author | Rich Felker <dalias@aerifal.cx> | 2020-11-30 12:14:47 -0500 |
---|---|---|
committer | Rich Felker <dalias@aerifal.cx> | 2020-11-30 13:46:52 -0500 |
commit | 29ff7599a448232f2527841c2362643d246cee36 (patch) | |
tree | 668d86c24719f2acd784767b662810a7e443e294 | |
parent | 5d464f524ba5447e2a8fc77560b98c1ad9a83570 (diff) | |
download | musl-29ff7599a448232f2527841c2362643d246cee36.tar.gz |
implement realpath directly instead of using procfs readlink
inability to use realpath in chroot/container without procfs access
and at early boot prior to mount of /proc has been an ongoing issue,
and it turns out realpath was one of the last remaining interfaces
that needed procfs for its core functionality. during investigation
while reimplementing, it was determined that there were also serious
problems with the procfs-based implementation. most seriously it was
unsafe on pre-O_PATH kernels, and unlike other places where O_PATH was
used, the unsafety was hard or impossible to fix because O_NOFOLLOW
can't be used (since the whole purpose was to follow symlinks).
the new implementation is a direct one, performing readlink on each
path component to resolve it. an explicit stack, as opposed to
recursion, is used to represent the remaining components to be
processed. the stack starts out holding just the input string, and
reading a link pushes the link contents onto the stack.
unlike many other implementations, this one does not call getcwd
initially for relative pathnames. instead it accumulates initial ..
components to be applied to the working directory if the result is
still a relative path. this avoids calling getcwd (which may fail) at
all when symlink traversal will eventually yield an absolute path. it
also doesn't use any form of stat operation; instead it arranges for
readlink to tell it when a non-directory is used in a context where a
directory is needed. this minimizes the number of syscalls needed,
avoids accessing inodes when the directory table suffices, and reduces
the amount of code pulled in for static linking.
-rw-r--r-- | src/misc/realpath.c | 159 |
1 files changed, 136 insertions, 23 deletions
diff --git a/src/misc/realpath.c b/src/misc/realpath.c index d2708e59..db8b74dc 100644 --- a/src/misc/realpath.c +++ b/src/misc/realpath.c @@ -1,43 +1,156 @@ #include <stdlib.h> #include <limits.h> -#include <sys/stat.h> -#include <fcntl.h> #include <errno.h> #include <unistd.h> #include <string.h> -#include "syscall.h" + +static size_t slash_len(const char *s) +{ + const char *s0 = s; + while (*s == '/') s++; + return s-s0; +} char *realpath(const char *restrict filename, char *restrict resolved) { - int fd; - ssize_t r; - struct stat st1, st2; - char buf[15+3*sizeof(int)]; - char tmp[PATH_MAX]; + char stack[PATH_MAX+1]; + char output[PATH_MAX]; + size_t p, q, l, l0, cnt=0, nup=0; + int check_dir=0; if (!filename) { errno = EINVAL; return 0; } + l = strnlen(filename, sizeof stack); + if (!l) { + errno = ENOENT; + return 0; + } + if (l >= PATH_MAX) goto toolong; + p = sizeof stack - l - 1; + q = 0; + memcpy(stack+p, filename, l+1); + + /* Main loop. Each iteration pops the next part from stack of + * remaining path components and consumes any slashes that follow. + * If not a link, it's moved to output; if a link, contents are + * pushed to the stack. */ +restart: + for (; ; p+=slash_len(stack+p)) { + /* If stack starts with /, the whole component is / or // + * and the output state must be reset. */ + if (stack[p] == '/') { + check_dir=0; + nup=0; + q=0; + output[q++] = '/'; + p++; + /* Initial // is special. */ + if (stack[p] == '/' && stack[p+1] != '/') + output[q++] = '/'; + continue; + } + + char *z = __strchrnul(stack+p, '/'); + l0 = l = z-(stack+p); - fd = sys_open(filename, O_PATH|O_NONBLOCK|O_CLOEXEC); - if (fd < 0) return 0; - __procfdname(buf, fd); + if (!l && !check_dir) break; - r = readlink(buf, tmp, sizeof tmp - 1); - if (r < 0) goto err; - tmp[r] = 0; + /* Skip any . component but preserve check_dir status. */ + if (l==1 && stack[p]=='.') { + p += l; + continue; + } - fstat(fd, &st1); - r = stat(tmp, &st2); - if (r<0 || st1.st_dev != st2.st_dev || st1.st_ino != st2.st_ino) { - if (!r) errno = ELOOP; - goto err; + /* Copy next component onto output at least temporarily, to + * call readlink, but wait to advance output position until + * determining it's not a link. */ + if (q && output[q-1] != '/') { + if (!p) goto toolong; + stack[--p] = '/'; + l++; + } + if (q+l >= PATH_MAX) goto toolong; + memcpy(output+q, stack+p, l); + output[q+l] = 0; + p += l; + + int up = 0; + if (l0==2 && stack[p-2]=='.' && stack[p-1]=='.') { + up = 1; + /* Any non-.. path components we could cancel start + * after nup repetitions of the 3-byte string "../"; + * if there are none, accumulate .. components to + * later apply to cwd, if needed. */ + if (q <= 3*nup) { + nup++; + q += l; + continue; + } + /* When previous components are already known to be + * directories, processing .. can skip readlink. */ + if (!check_dir) goto skip_readlink; + } + ssize_t k = readlink(output, stack, p); + if (k==p) goto toolong; + if (!k) { + errno = ENOENT; + return 0; + } + if (k<0) { + if (errno != EINVAL) return 0; +skip_readlink: + check_dir = 0; + if (up) { + while(q && output[q-1]!='/') q--; + if (q>1 && (q>2 || output[0]!='/')) q--; + continue; + } + if (l0) q += l; + check_dir = stack[p]; + continue; + } + if (++cnt == SYMLOOP_MAX) { + errno = ELOOP; + return 0; + } + + /* If link contents end in /, strip any slashes already on + * stack to avoid /->// or //->/// or spurious toolong. */ + if (stack[k-1]=='/') while (stack[p]=='/') p++; + p -= k; + memmove(stack+p, stack, k); + + /* Skip the stack advancement in case we have a new + * absolute base path. */ + goto restart; } - __syscall(SYS_close, fd); - return resolved ? strcpy(resolved, tmp) : strdup(tmp); -err: - __syscall(SYS_close, fd); + output[q] = 0; + + if (output[0] != '/') { + if (!getcwd(stack, sizeof stack)) return 0; + l = strlen(stack); + /* Cancel any initial .. components. */ + p = 0; + while (nup--) { + while(l>1 && stack[l-1]!='/') l--; + if (l>1) l--; + p += 2; + if (p<q) p++; + } + if (q-p && stack[l-1]!='/') stack[l++] = '/'; + if (l + (q-p) + 1 >= PATH_MAX) goto toolong; + memmove(output + l, output + p, q - p + 1); + memcpy(output, stack, l); + q = l + q-p; + } + + if (resolved) return memcpy(resolved, output, q+1); + else return strdup(output); + +toolong: + errno = ENAMETOOLONG; return 0; } |