From ecc9c5fcfa4831b290cc1a63c0346cbb0c1fcf42 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 14 Jul 2011 00:51:45 -0400 Subject: new restartable integer parsing framework. this fixes a number of bugs in integer parsing due to lazy haphazard wrapping, as well as some misinterpretations of the standard. the new parser is able to work character-at-a-time or on whole strings, making it easy to support the wide functions without unbounded space for conversion. it will also be possible to update scanf to use the new parser. --- src/stdlib/strtoimax.c | 37 ++++++++++----- src/stdlib/strtoumax.c | 123 +++++++------------------------------------------ src/stdlib/wcstoimax.c | 36 ++++++++++----- src/stdlib/wcstoumax.c | 41 ++++++----------- 4 files changed, 81 insertions(+), 156 deletions(-) (limited to 'src/stdlib') diff --git a/src/stdlib/strtoimax.c b/src/stdlib/strtoimax.c index aeb0397f..247f91d4 100644 --- a/src/stdlib/strtoimax.c +++ b/src/stdlib/strtoimax.c @@ -1,25 +1,38 @@ #include #include #include +#include "intparse.h" intmax_t strtoimax(const char *s1, char **p, int base) { - const unsigned char *s = (const void *)s1; - int sign = 0; - uintmax_t x; + const unsigned char *s = (void *)s1; + struct intparse ip = {0}; + + if (p) *p = (char *)s1; + + if (base && base-2U > 34) { + errno = EINVAL; + return 0; + } - /* Initial whitespace */ for (; isspace(*s); s++); - /* Optional sign */ - if (*s == '-') sign = *s++; - else if (*s == '+') s++; + ip.base = base; + __intparse(&ip, s, SIZE_MAX); + + if (p && ip.err != EINVAL) + *p = (char *)s + ip.cnt; + + if (ip.err) { + errno = ip.err; + if (ip.err = EINVAL) return 0; + return ip.neg ? INTMAX_MIN : INTMAX_MAX; + } - x = strtoumax((const void *)s, p, base); - if (x > INTMAX_MAX) { - if (!sign || -x != INTMAX_MIN) + if (ip.val > INTMAX_MAX) { + if (!ip.neg || -ip.val != INTMAX_MIN) errno = ERANGE; - return sign ? INTMAX_MIN : INTMAX_MAX; + return ip.neg ? INTMAX_MIN : INTMAX_MAX; } - return sign ? -x : x; + return ip.neg ? -ip.val : ip.val; } diff --git a/src/stdlib/strtoumax.c b/src/stdlib/strtoumax.c index f1902476..a2bb4d7d 100644 --- a/src/stdlib/strtoumax.c +++ b/src/stdlib/strtoumax.c @@ -2,122 +2,33 @@ #include #include #include -#include - -/* Lookup table for digit values. -1==255>=36 -> invalid */ -static const unsigned char digits[] = { --1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, --1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, --1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, --1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24, -25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1, --1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24, -25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1, --1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, --1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, --1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, --1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, --1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, --1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, --1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, --1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -}; +#include "intparse.h" uintmax_t strtoumax(const char *s1, char **p, int base) { const unsigned char *s = (void *)s1; - size_t x1, z1; - uintmax_t x, z=0; - int sign = 0; - int shift; - - if (!p) p = (char **)&s1; - - /* Initial whitespace */ - for (; isspace(*s); s++); - - /* Optional sign */ - if (*s == '-') sign = *s++; - else if (*s == '+') s++; + struct intparse ip = {0}; - /* Default base 8, 10, or 16 depending on prefix */ - if (base == 0) { - if (s[0] == '0') { - if ((s[1]|32) == 'x') base = 16; - else base = 8; - } else { - base = 10; - } - } + if (p) *p = (char *)s1; - if ((unsigned)base-2 > 36-2 || digits[*s]>=base) { - *p = (char *)s1; + if (base && base-2U > 34) { errno = EINVAL; return 0; } - /* Main loops. Only use big types if we have to. */ - if (base == 10) { - for (x1=0; isdigit(*s) && x1<=SIZE_MAX/10-10; s++) - x1 = 10*x1 + *s-'0'; - for (x=x1; isdigit(*s) && x<=UINTMAX_MAX/10-10; s++) - x = 10*x + *s-'0'; - if (isdigit(*s)) { - if (isdigit(s[1]) || 10*x>UINTMAX_MAX-(*s-'0')) - goto overflow; - x = 10*x + *s-'0'; - } - } else if (!(base & base/2)) { - if (base == 16) { - if (s[0]=='0' && (s[1]|32)=='x' && digits[s[2]]<16) - s+=2; - shift=4; - z1 = SIZE_MAX/16; - z = UINTMAX_MAX/16; - } else if (base == 8) { - shift=3; - z1 = SIZE_MAX/8; - z = UINTMAX_MAX/8; - } else if (base == 2) { - shift=1; - z1 = SIZE_MAX/2; - z = UINTMAX_MAX/2; - } else if (base == 4) { - shift=2; - z1 = SIZE_MAX/4; - z = UINTMAX_MAX/4; - } else /* if (base == 32) */ { - shift=5; - z1 = SIZE_MAX/32; - z = UINTMAX_MAX/32; - } - for (x1=0; digits[*s]UINTMAX_MAX-digits[*s]) - goto overflow; - x = x*base + digits[*s]; - } - } + for (; isspace(*s); s++); + + ip.base = base; + __intparse(&ip, s, SIZE_MAX); - *p = (char *)s; - return sign ? -x : x; + if (p && ip.err != EINVAL) + *p = (char *)s + ip.cnt; + + if (ip.err) { + errno = ip.err; + if (ip.err = EINVAL) return 0; + return UINTMAX_MAX; + } -overflow: - for (; digits[*s] < base; s++); - *p = (char *)s; - errno = ERANGE; - return UINTMAX_MAX; + return ip.neg ? -ip.val : ip.val; } diff --git a/src/stdlib/wcstoimax.c b/src/stdlib/wcstoimax.c index 59894f60..b83206b7 100644 --- a/src/stdlib/wcstoimax.c +++ b/src/stdlib/wcstoimax.c @@ -2,24 +2,38 @@ #include #include #include +#include "intparse.h" intmax_t wcstoimax(const wchar_t *s, wchar_t **p, int base) { - int sign = 0; - uintmax_t x; + struct intparse ip = {0}; + unsigned char tmp; + + if (p) *p = (wchar_t *)s; + + if (base && base-2U > 34) { + errno = EINVAL; + return 0; + } - /* Initial whitespace */ for (; iswspace(*s); s++); - /* Optional sign */ - if (*s == '-') sign = *s++; - else if (*s == '+') s++; + ip.base = base; + for (; *s<256 && (tmp=*s, __intparse(&ip, &tmp, 1)); s++); + + if (p && ip.err != EINVAL) + *p = (wchar_t *)s; + + if (ip.err) { + errno = ip.err; + if (ip.err = EINVAL) return 0; + return ip.neg ? INTMAX_MIN : INTMAX_MAX; + } - x = wcstoumax(s, p, base); - if (x > INTMAX_MAX) { - if (!sign || -x != INTMAX_MIN) + if (ip.val > INTMAX_MAX) { + if (!ip.neg || -ip.val != INTMAX_MIN) errno = ERANGE; - return sign ? INTMAX_MIN : INTMAX_MAX; + return ip.neg ? INTMAX_MIN : INTMAX_MAX; } - return sign ? -x : x; + return ip.neg ? -ip.val : ip.val; } diff --git a/src/stdlib/wcstoumax.c b/src/stdlib/wcstoumax.c index 86528ef1..e30b0638 100644 --- a/src/stdlib/wcstoumax.c +++ b/src/stdlib/wcstoumax.c @@ -3,46 +3,33 @@ #include #include #include +#include "intparse.h" uintmax_t wcstoumax(const wchar_t *s, wchar_t **p, int base) { - /* Large enough for largest value in binary */ - char buf[sizeof(uintmax_t)*8+2]; - int sign = 0, skipped=0; + struct intparse ip = {0}; + unsigned char tmp; - if (!p) p = (wchar_t **)&s; + if (p) *p = (wchar_t *)s; - if (base && (unsigned)base-2 > 36-2) { - *p = (wchar_t *)s; + if (base && base-2U > 34) { errno = EINVAL; return 0; } - /* Initial whitespace */ for (; iswspace(*s); s++); - /* Optional sign */ - if (*s == '-') sign = *s++; - else if (*s == '+') s++; - - /* Skip leading zeros but don't allow leading zeros before "0x". */ - for (; s[0]=='0' && s[1]=='0'; s++) skipped=1; - if (skipped && (base==0 || base==16) && (s[1]|32)=='x') { - *p = (wchar_t *)(s+1); - return 0; - } - - /* Convert to normal char string so we can use strtoumax */ - buf[0] = sign; - if (wcstombs(buf+!!sign, s, sizeof buf-1) == -1) return 0; - buf[sizeof buf-1]=0; + ip.base = base; + for (; *s<256 && (tmp=*s, __intparse(&ip, &tmp, 1)); s++); - /* Compute final position */ - if (p) { - if ((base==0 || base==16) && s[0]=='0' && (s[1]|32)=='x' && iswxdigit(s[2])) s+=2; - for(;*s&&((unsigned)*s-'0'