diff options
| author | Rich Felker <dalias@aerifal.cx> | 2012-04-16 16:03:45 -0400 | 
|---|---|---|
| committer | Rich Felker <dalias@aerifal.cx> | 2012-04-16 16:03:45 -0400 | 
| commit | 18efeb320b763e541a7dbf61a7da1cbe13ab2be9 (patch) | |
| tree | f8eb42a87b9c07ad8d9de0380356af3541b425aa /src | |
| parent | cc762434d91a2f441a1d2f44962ab1d4854b607b (diff) | |
| download | musl-18efeb320b763e541a7dbf61a7da1cbe13ab2be9.tar.gz | |
new scanf implementation and corresponding integer parser/converter
advantages over the old code:
- correct results for floating point (old code was bogus)
- wide/regular scanf separated so scanf does not pull in wide code
- well-defined behavior on integers that overflow dest type
- support for %[a-b] ranges with %[ (impl-defined by widely used)
- no intermediate conversion of fmt string to wide string
- cleaner, easier to share code with strto* functions
- better standards conformance for corner cases
the old code remains in the source tree, as the wide versions of the
scanf-family functions are still using it. it will be removed when no
longer needed.
Diffstat (limited to 'src')
| -rw-r--r-- | src/internal/intscan.c | 97 | ||||
| -rw-r--r-- | src/internal/intscan.h | 8 | ||||
| -rw-r--r-- | src/internal/stdio_impl.h | 2 | ||||
| -rw-r--r-- | src/stdio/__string_read.c | 13 | ||||
| -rw-r--r-- | src/stdio/vfscanf.c | 338 | ||||
| -rw-r--r-- | src/stdio/vsscanf.c | 22 | 
6 files changed, 450 insertions, 30 deletions
| diff --git a/src/internal/intscan.c b/src/internal/intscan.c new file mode 100644 index 00000000..a00f2ccc --- /dev/null +++ b/src/internal/intscan.c @@ -0,0 +1,97 @@ +#include <limits.h> +#include <errno.h> +#include "shgetc.h" + +/* Lookup table for digit values. -1==255>=36 -> invalid */ +static const unsigned char table[] = { -1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, +-1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24, +25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1, +-1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24, +25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +}; + +unsigned long long __intscan(FILE *f, unsigned base, int pok, unsigned long long lim) +{ +	const unsigned char *val = table+1; +	int c, neg=0; +	unsigned x; +	unsigned long long y; +	if (base > 36) { +		errno = EINVAL; +		return 0; +	} +	c = shgetc(f); +	if (c=='+' || c=='-') { +		neg = -(c=='-'); +		c = shgetc(f); +	} +	if ((base == 0 || base == 16) && c=='0') { +		c = shgetc(f); +		if ((c|32)=='x') { +			c = shgetc(f); +			if (val[c]>=16) { +				shunget(f); +				if (pok) shunget(f); +				else shlim(f, 0); +				return 0; +			} +			base = 16; +		} else if (base == 0) { +			base = 8; +		} +	} else { +		if (base == 0) base = 10; +		if (val[c] >= base) { +			shlim(f, 0); +			errno = EINVAL; +			return 0; +		} +	} +	if (base == 10) { +		for (x=0; c-'0'<10U && x<=UINT_MAX/10-1; c=shgetc(f)) +			x = x*10 + (c-'0'); +		for (y=x; c-'0'<10U && y<=ULLONG_MAX/10 && 10*y<=ULLONG_MAX-(c-'0'); c=shgetc(f)) +			y = y*10 + (c-'0'); +		if (c-'0'>=10U) goto done; +	} else if (!(base & base-1)) { +		int bs = "\0\1\2\4\7\3\6\5"[(0x17*base)>>5&7]; +		for (x=0; val[c]<base && x<=UINT_MAX/32; c=shgetc(f)) +			x = x<<bs | val[c]; +		for (y=x; val[c]<base && y<=ULLONG_MAX>>bs; c=shgetc(f)) +			y = y<<bs | val[c]; +	} else { +		for (x=0; val[c]<base && x<=UINT_MAX/36-1; c=shgetc(f)) +			x = x*base + val[c]; +		for (y=x; val[c]<base && y<=ULLONG_MAX/base && base*y<=ULLONG_MAX-val[c]; c=shgetc(f)) +			y = y*base + val[c]; +	} +	if (val[c]<base) { +		for (; val[c]<base; c=shgetc(f)); +		errno = ERANGE; +		y = lim; +	} +done: +	shunget(f); +	if (y>=lim) { +		if (!(lim&1) && !neg) { +			errno = ERANGE; +			return lim-1; +		} else if (y>lim) { +			errno = ERANGE; +			return lim; +		} +	} +	return (y^neg)-neg; +} diff --git a/src/internal/intscan.h b/src/internal/intscan.h new file mode 100644 index 00000000..994c5e7d --- /dev/null +++ b/src/internal/intscan.h @@ -0,0 +1,8 @@ +#ifndef INTSCAN_H +#define INTSCAN_H + +#include <stdio.h> + +unsigned long long __intscan(FILE *, unsigned, int, unsigned long long); + +#endif diff --git a/src/internal/stdio_impl.h b/src/internal/stdio_impl.h index 5ec296f3..af7aacc8 100644 --- a/src/internal/stdio_impl.h +++ b/src/internal/stdio_impl.h @@ -69,6 +69,8 @@ size_t __stdout_write(FILE *, const unsigned char *, size_t);  off_t __stdio_seek(FILE *, off_t, int);  int __stdio_close(FILE *); +size_t __string_read(FILE *, unsigned char *, size_t); +  int __toread(FILE *);  int __towrite(FILE *); diff --git a/src/stdio/__string_read.c b/src/stdio/__string_read.c new file mode 100644 index 00000000..5c3728d7 --- /dev/null +++ b/src/stdio/__string_read.c @@ -0,0 +1,13 @@ +#include "stdio_impl.h" + +size_t __string_read(FILE *f, unsigned char *buf, size_t len) +{ +	char *src = f->cookie; +	size_t k = strnlen(src, len+256); +	if (k < len) len = k; +	memcpy(buf, src, len); +	f->rpos = (void *)(src+len); +	f->rend = (void *)(src+k); +	f->cookie = src+k; +	return len; +} diff --git a/src/stdio/vfscanf.c b/src/stdio/vfscanf.c index 414c2a3d..5c1e49b1 100644 --- a/src/stdio/vfscanf.c +++ b/src/stdio/vfscanf.c @@ -1,36 +1,342 @@  #include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <ctype.h> +#include <wchar.h> +#include <wctype.h> +#include <limits.h>  #include <string.h>  #include <errno.h> -#include <ctype.h> +#include <math.h> +#include <float.h>  #include "stdio_impl.h" -#include "__scanf.h" +#include "shgetc.h" +#include "intscan.h" +#include "floatscan.h" -static void f_read(rctx_t *r) +#define SIZE_hh -2 +#define SIZE_h  -1 +#define SIZE_def 0 +#define SIZE_l   1 +#define SIZE_L   2 +#define SIZE_ll  3 + +static void store_int(void *dest, int size, unsigned long long i)  { -	FILE *f = r->opaque; -	if ((r->c = getc_unlocked(f)) >= 0) r->l++; +	if (!dest) return; +	switch (size) { +	case SIZE_hh: +		*(char *)dest = i; +		break; +	case SIZE_h: +		*(short *)dest = i; +		break; +	case SIZE_def: +		*(int *)dest = i; +		break; +	case SIZE_l: +		*(long *)dest = i; +		break; +	case SIZE_ll: +		*(long long *)dest = i; +		break; +	}  } -int vfscanf(FILE *f, const char *fmt, va_list ap) +static void *arg_n(va_list ap, unsigned int n)  { -	size_t l = strlen(fmt), i, result; -	rctx_t r = { f_read, (void *)f, 0, isspace }; -	wchar_t fmt2[l+1]; +	void *p; +	unsigned int i; +	va_list ap2; +	va_copy(ap2, ap); +	for (i=n; i>1; i--) va_arg(ap2, void *); +	p = va_arg(ap2, void *); +	va_end(ap2); +	return p; +} -	if (l > 0x100000) { -		errno = ENOMEM; +static int readwc(int c, wchar_t **wcs, mbstate_t *st) +{ +	char ch = c; +	wchar_t wc; +	switch (mbrtowc(&wc, &ch, 1, st)) { +	case -1:  		return -1; +	case -2: +		break; +	default: +		if (*wcs) *(*wcs)++ = wc;  	} -	for (i=0; i<=l; i++) fmt2[i] = (unsigned char)fmt[i]; +	return 0; +} + +int vfscanf(FILE *f, const char *fmt, va_list ap) +{ +	int width; +	int size; +	int alloc; +	int base; +	const unsigned char *p; +	int c, t; +	char *s; +	wchar_t *wcs; +	mbstate_t st; +	void *dest=NULL; +	int invert; +	int matches=0; +	unsigned long long x; +	long double y; +	off_t pos = 0;  	FLOCK(f); -	result = __scanf(&r, fmt2, ap); +	for (p=(const unsigned char *)fmt; *p; p++) { + +		if (isspace(*p)) { +			while (isspace(p[1])) p++; +			shlim(f, 0); +			while (isspace(shgetc(f))); +			shunget(f); +			pos += shcnt(f); +			continue; +		} +		if (*p != '%' || p[1] == '%') { +			p += *p=='%'; +			c = shgetc(f); +			if (c!=*p) { +				shunget(f); +				if (c<0) goto input_fail; +				goto match_fail; +			} +			pos++; +			continue; +		} + +		p++; +		if (*p=='*') { +			dest = 0; p++; +		} else if (isdigit(*p) && p[1]=='$') { +			dest = arg_n(ap, *p-'0'); p+=2; +		} else { +			dest = va_arg(ap, void *); +		} + +		for (width=0; isdigit(*p); p++) { +			width = 10*width + *p - '0'; +		} -	if (r.u && r.c >= 0) -		ungetc(r.c, f); +		if (*p=='m') { +			alloc = 1; +			p++; +		} else { +			alloc = 0; +		} +		size = SIZE_def; +		switch (*p++) { +		case 'h': +			if (*p == 'h') p++, size = SIZE_hh; +			else size = SIZE_h; +			break; +		case 'l': +			if (*p == 'l') p++, size = SIZE_ll; +			else size = SIZE_l; +			break; +		case 'j': +			size = SIZE_ll; +			break; +		case 'z': +		case 't': +			size = SIZE_l; +			break; +		case 'L': +			size = SIZE_L; +			break; +		case 'd': case 'i': case 'o': case 'u': case 'x': +		case 'a': case 'e': case 'f': case 'g': +		case 'A': case 'E': case 'F': case 'G': case 'X': +		case 's': case 'c': case '[': +		case 'S': case 'C': +		case 'p': case 'n': +			p--; +			break; +		default: +			goto fmt_fail; +		} + +		t = *p; + +		switch (t) { +		case 'C': +		case 'c': +			if (width < 1) width = 1; +		case 's': +			if (size == SIZE_l) t &= ~0x20; +		case 'd': case 'i': case 'o': case 'u': case 'x': +		case 'a': case 'e': case 'f': case 'g': +		case 'A': case 'E': case 'F': case 'G': case 'X': +		case '[': case 'S': +		case 'p': case 'n': +			if (width < 1) width = 0; +			break; +		default: +			goto fmt_fail; +		} + +		shlim(f, width); + +		if (t != 'n') { +			if (shgetc(f) < 0) goto input_fail; +			shunget(f); +		} + +		switch (t) { +		case 'n': +			store_int(dest, size, pos); +			/* do not increment match count, etc! */ +			continue; +		case 'C': +			wcs = dest; +			st = (mbstate_t){ 0 }; +			while ((c=shgetc(f)) >= 0) { +				if (readwc(c, &wcs, &st) < 0) +					goto input_fail; +			} +			if (!mbsinit(&st)) goto input_fail; +			if (shcnt(f) != width) goto match_fail; +			break; +		case 'c': +			if (dest) { +				s = dest; +				while ((c=shgetc(f)) >= 0) *s++ = c; +			} else { +				while (shgetc(f)>=0); +			} +			if (shcnt(f) < width) goto match_fail; +			break; +		case '[': +			s = dest; +			wcs = dest; + +			if (*++p == '^') p++, invert = 1; +			else invert = 0; + +			unsigned char scanset[257]; +			memset(scanset, invert, sizeof scanset); + +			scanset[0] = 0; +			if (*p == '-') p++, scanset[1+'-'] = 1-invert; +			if (*p == ']') p++, scanset[1+']'] = 1-invert; +			for (; *p && *p != ']'; p++) { +				if (*p=='-' && p[1] != ']') +					for (c=p++[-1]; c<*p; c++) +						scanset[1+c] = 1-invert; +				scanset[1+*p] = 1-invert; +			} +			if (!*p) goto fmt_fail; + +			if (size == SIZE_l) { +				st = (mbstate_t){0}; +				while (scanset[(c=shgetc(f))+1]) { +					if (readwc(c, &wcs, &st) < 0) +						goto input_fail; +				} +				if (!mbsinit(&st)) goto input_fail; +				s = 0; +			} else if (s) { +				while (scanset[(c=shgetc(f))+1]) +					*s++ = c; +				wcs = 0; +			} else { +				while (scanset[(c=shgetc(f))+1]); +			} +			shunget(f); +			if (!shcnt(f)) goto match_fail; +			if (s) *s = 0; +			if (wcs) *wcs = 0; +			break; +		default: +			shlim(f, 0); +			while (isspace(shgetc(f))); +			shunget(f); +			pos += shcnt(f); +			shlim(f, width); +			if (shgetc(f) < 0) goto input_fail; +			shunget(f); +		} + +		switch (t) { +		case 'p': +		case 'X': +		case 'x': +			base = 16; +			goto int_common; +		case 'o': +			base = 8; +			goto int_common; +		case 'd': +		case 'u': +			base = 10; +			goto int_common; +		case 'i': +			base = 0; +		int_common: +			x = __intscan(f, base, 0, ULLONG_MAX); +			if (!shcnt(f)) goto match_fail; +			if (t=='p') *(void **)dest = (void *)(uintptr_t)x; +			else store_int(dest, size, x); +			break; +		case 'a': case 'A': +		case 'e': case 'E': +		case 'f': case 'F': +		case 'g': case 'G': +			y = __floatscan(f, -1, size, 0); +			if (!shcnt(f)) goto match_fail; +			if (dest) switch (size) { +			case SIZE_def: +				*(float *)dest = y; +				break; +			case SIZE_l: +				*(double *)dest = y; +				break; +			case SIZE_L: +				*(long double *)dest = y; +				break; +			} +			break; +		case 'S': +			wcs = dest; +			st = (mbstate_t){ 0 }; +			while (!isspace(c=shgetc(f)) && c!=EOF) { +				if (readwc(c, &wcs, &st) < 0) +					goto input_fail; +			} +			if (!mbsinit(&st)) goto input_fail; +			if (dest) *wcs++ = 0; +			break; +		case 's': +			if (dest) { +				s = dest; +				while (!isspace(c=shgetc(f)) && c!=EOF) +					*s++ = c; +				*s = 0; +			} else { +				while (!isspace(c=shgetc(f)) && c!=EOF); +			} +			shunget(f); +			break; +		} + +		pos += shcnt(f); +		if (dest) matches++; +	} +	if (0) { +fmt_fail: +input_fail: +		if (!matches) matches--; +	} +match_fail:  	FUNLOCK(f); -	return result; +	return matches;  } diff --git a/src/stdio/vsscanf.c b/src/stdio/vsscanf.c index fd48f709..fbc15e69 100644 --- a/src/stdio/vsscanf.c +++ b/src/stdio/vsscanf.c @@ -1,21 +1,15 @@ -#include <stdio.h> -#include <string.h> -#include <ctype.h> +#include "stdio_impl.h" -#include "__scanf.h" - -static void s_read(rctx_t *r) +static size_t do_read(FILE *f, unsigned char *buf, size_t len)  { -	unsigned char *s = r->opaque; -	if (!s[r->l]) r->c = -1; -	else r->c = s[r->l++]; +	return __string_read(f, buf, len);  }  int vsscanf(const char *s, const char *fmt, va_list ap)  { -	size_t l = strlen(fmt), i; -	wchar_t fmt2[l+1]; -	rctx_t r = { s_read, (void *)s, 0, isspace }; -	for (i=0; i<=l; i++) fmt2[i] = (unsigned char)fmt[i]; -	return __scanf(&r, fmt2, ap); +	FILE f = { +		.buf = (void *)s, .cookie = (void *)s, +		.read = do_read, .lock = -1 +	}; +	return vfscanf(&f, fmt, ap);  } | 
