summaryrefslogtreecommitdiff
path: root/src/stdio/vfscanf.c
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2012-04-16 16:03:45 -0400
committerRich Felker <dalias@aerifal.cx>2012-04-16 16:03:45 -0400
commit18efeb320b763e541a7dbf61a7da1cbe13ab2be9 (patch)
treef8eb42a87b9c07ad8d9de0380356af3541b425aa /src/stdio/vfscanf.c
parentcc762434d91a2f441a1d2f44962ab1d4854b607b (diff)
downloadmusl-18efeb320b763e541a7dbf61a7da1cbe13ab2be9.tar.gz
new scanf implementation and corresponding integer parser/converter
advantages over the old code: - correct results for floating point (old code was bogus) - wide/regular scanf separated so scanf does not pull in wide code - well-defined behavior on integers that overflow dest type - support for %[a-b] ranges with %[ (impl-defined by widely used) - no intermediate conversion of fmt string to wide string - cleaner, easier to share code with strto* functions - better standards conformance for corner cases the old code remains in the source tree, as the wide versions of the scanf-family functions are still using it. it will be removed when no longer needed.
Diffstat (limited to 'src/stdio/vfscanf.c')
-rw-r--r--src/stdio/vfscanf.c338
1 files changed, 322 insertions, 16 deletions
diff --git a/src/stdio/vfscanf.c b/src/stdio/vfscanf.c
index 414c2a3d..5c1e49b1 100644
--- a/src/stdio/vfscanf.c
+++ b/src/stdio/vfscanf.c
@@ -1,36 +1,342 @@
#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <wchar.h>
+#include <wctype.h>
+#include <limits.h>
#include <string.h>
#include <errno.h>
-#include <ctype.h>
+#include <math.h>
+#include <float.h>
#include "stdio_impl.h"
-#include "__scanf.h"
+#include "shgetc.h"
+#include "intscan.h"
+#include "floatscan.h"
-static void f_read(rctx_t *r)
+#define SIZE_hh -2
+#define SIZE_h -1
+#define SIZE_def 0
+#define SIZE_l 1
+#define SIZE_L 2
+#define SIZE_ll 3
+
+static void store_int(void *dest, int size, unsigned long long i)
{
- FILE *f = r->opaque;
- if ((r->c = getc_unlocked(f)) >= 0) r->l++;
+ if (!dest) return;
+ switch (size) {
+ case SIZE_hh:
+ *(char *)dest = i;
+ break;
+ case SIZE_h:
+ *(short *)dest = i;
+ break;
+ case SIZE_def:
+ *(int *)dest = i;
+ break;
+ case SIZE_l:
+ *(long *)dest = i;
+ break;
+ case SIZE_ll:
+ *(long long *)dest = i;
+ break;
+ }
}
-int vfscanf(FILE *f, const char *fmt, va_list ap)
+static void *arg_n(va_list ap, unsigned int n)
{
- size_t l = strlen(fmt), i, result;
- rctx_t r = { f_read, (void *)f, 0, isspace };
- wchar_t fmt2[l+1];
+ void *p;
+ unsigned int i;
+ va_list ap2;
+ va_copy(ap2, ap);
+ for (i=n; i>1; i--) va_arg(ap2, void *);
+ p = va_arg(ap2, void *);
+ va_end(ap2);
+ return p;
+}
- if (l > 0x100000) {
- errno = ENOMEM;
+static int readwc(int c, wchar_t **wcs, mbstate_t *st)
+{
+ char ch = c;
+ wchar_t wc;
+ switch (mbrtowc(&wc, &ch, 1, st)) {
+ case -1:
return -1;
+ case -2:
+ break;
+ default:
+ if (*wcs) *(*wcs)++ = wc;
}
- for (i=0; i<=l; i++) fmt2[i] = (unsigned char)fmt[i];
+ return 0;
+}
+
+int vfscanf(FILE *f, const char *fmt, va_list ap)
+{
+ int width;
+ int size;
+ int alloc;
+ int base;
+ const unsigned char *p;
+ int c, t;
+ char *s;
+ wchar_t *wcs;
+ mbstate_t st;
+ void *dest=NULL;
+ int invert;
+ int matches=0;
+ unsigned long long x;
+ long double y;
+ off_t pos = 0;
FLOCK(f);
- result = __scanf(&r, fmt2, ap);
+ for (p=(const unsigned char *)fmt; *p; p++) {
+
+ if (isspace(*p)) {
+ while (isspace(p[1])) p++;
+ shlim(f, 0);
+ while (isspace(shgetc(f)));
+ shunget(f);
+ pos += shcnt(f);
+ continue;
+ }
+ if (*p != '%' || p[1] == '%') {
+ p += *p=='%';
+ c = shgetc(f);
+ if (c!=*p) {
+ shunget(f);
+ if (c<0) goto input_fail;
+ goto match_fail;
+ }
+ pos++;
+ continue;
+ }
+
+ p++;
+ if (*p=='*') {
+ dest = 0; p++;
+ } else if (isdigit(*p) && p[1]=='$') {
+ dest = arg_n(ap, *p-'0'); p+=2;
+ } else {
+ dest = va_arg(ap, void *);
+ }
+
+ for (width=0; isdigit(*p); p++) {
+ width = 10*width + *p - '0';
+ }
- if (r.u && r.c >= 0)
- ungetc(r.c, f);
+ if (*p=='m') {
+ alloc = 1;
+ p++;
+ } else {
+ alloc = 0;
+ }
+ size = SIZE_def;
+ switch (*p++) {
+ case 'h':
+ if (*p == 'h') p++, size = SIZE_hh;
+ else size = SIZE_h;
+ break;
+ case 'l':
+ if (*p == 'l') p++, size = SIZE_ll;
+ else size = SIZE_l;
+ break;
+ case 'j':
+ size = SIZE_ll;
+ break;
+ case 'z':
+ case 't':
+ size = SIZE_l;
+ break;
+ case 'L':
+ size = SIZE_L;
+ break;
+ case 'd': case 'i': case 'o': case 'u': case 'x':
+ case 'a': case 'e': case 'f': case 'g':
+ case 'A': case 'E': case 'F': case 'G': case 'X':
+ case 's': case 'c': case '[':
+ case 'S': case 'C':
+ case 'p': case 'n':
+ p--;
+ break;
+ default:
+ goto fmt_fail;
+ }
+
+ t = *p;
+
+ switch (t) {
+ case 'C':
+ case 'c':
+ if (width < 1) width = 1;
+ case 's':
+ if (size == SIZE_l) t &= ~0x20;
+ case 'd': case 'i': case 'o': case 'u': case 'x':
+ case 'a': case 'e': case 'f': case 'g':
+ case 'A': case 'E': case 'F': case 'G': case 'X':
+ case '[': case 'S':
+ case 'p': case 'n':
+ if (width < 1) width = 0;
+ break;
+ default:
+ goto fmt_fail;
+ }
+
+ shlim(f, width);
+
+ if (t != 'n') {
+ if (shgetc(f) < 0) goto input_fail;
+ shunget(f);
+ }
+
+ switch (t) {
+ case 'n':
+ store_int(dest, size, pos);
+ /* do not increment match count, etc! */
+ continue;
+ case 'C':
+ wcs = dest;
+ st = (mbstate_t){ 0 };
+ while ((c=shgetc(f)) >= 0) {
+ if (readwc(c, &wcs, &st) < 0)
+ goto input_fail;
+ }
+ if (!mbsinit(&st)) goto input_fail;
+ if (shcnt(f) != width) goto match_fail;
+ break;
+ case 'c':
+ if (dest) {
+ s = dest;
+ while ((c=shgetc(f)) >= 0) *s++ = c;
+ } else {
+ while (shgetc(f)>=0);
+ }
+ if (shcnt(f) < width) goto match_fail;
+ break;
+ case '[':
+ s = dest;
+ wcs = dest;
+
+ if (*++p == '^') p++, invert = 1;
+ else invert = 0;
+
+ unsigned char scanset[257];
+ memset(scanset, invert, sizeof scanset);
+
+ scanset[0] = 0;
+ if (*p == '-') p++, scanset[1+'-'] = 1-invert;
+ if (*p == ']') p++, scanset[1+']'] = 1-invert;
+ for (; *p && *p != ']'; p++) {
+ if (*p=='-' && p[1] != ']')
+ for (c=p++[-1]; c<*p; c++)
+ scanset[1+c] = 1-invert;
+ scanset[1+*p] = 1-invert;
+ }
+ if (!*p) goto fmt_fail;
+
+ if (size == SIZE_l) {
+ st = (mbstate_t){0};
+ while (scanset[(c=shgetc(f))+1]) {
+ if (readwc(c, &wcs, &st) < 0)
+ goto input_fail;
+ }
+ if (!mbsinit(&st)) goto input_fail;
+ s = 0;
+ } else if (s) {
+ while (scanset[(c=shgetc(f))+1])
+ *s++ = c;
+ wcs = 0;
+ } else {
+ while (scanset[(c=shgetc(f))+1]);
+ }
+ shunget(f);
+ if (!shcnt(f)) goto match_fail;
+ if (s) *s = 0;
+ if (wcs) *wcs = 0;
+ break;
+ default:
+ shlim(f, 0);
+ while (isspace(shgetc(f)));
+ shunget(f);
+ pos += shcnt(f);
+ shlim(f, width);
+ if (shgetc(f) < 0) goto input_fail;
+ shunget(f);
+ }
+
+ switch (t) {
+ case 'p':
+ case 'X':
+ case 'x':
+ base = 16;
+ goto int_common;
+ case 'o':
+ base = 8;
+ goto int_common;
+ case 'd':
+ case 'u':
+ base = 10;
+ goto int_common;
+ case 'i':
+ base = 0;
+ int_common:
+ x = __intscan(f, base, 0, ULLONG_MAX);
+ if (!shcnt(f)) goto match_fail;
+ if (t=='p') *(void **)dest = (void *)(uintptr_t)x;
+ else store_int(dest, size, x);
+ break;
+ case 'a': case 'A':
+ case 'e': case 'E':
+ case 'f': case 'F':
+ case 'g': case 'G':
+ y = __floatscan(f, -1, size, 0);
+ if (!shcnt(f)) goto match_fail;
+ if (dest) switch (size) {
+ case SIZE_def:
+ *(float *)dest = y;
+ break;
+ case SIZE_l:
+ *(double *)dest = y;
+ break;
+ case SIZE_L:
+ *(long double *)dest = y;
+ break;
+ }
+ break;
+ case 'S':
+ wcs = dest;
+ st = (mbstate_t){ 0 };
+ while (!isspace(c=shgetc(f)) && c!=EOF) {
+ if (readwc(c, &wcs, &st) < 0)
+ goto input_fail;
+ }
+ if (!mbsinit(&st)) goto input_fail;
+ if (dest) *wcs++ = 0;
+ break;
+ case 's':
+ if (dest) {
+ s = dest;
+ while (!isspace(c=shgetc(f)) && c!=EOF)
+ *s++ = c;
+ *s = 0;
+ } else {
+ while (!isspace(c=shgetc(f)) && c!=EOF);
+ }
+ shunget(f);
+ break;
+ }
+
+ pos += shcnt(f);
+ if (dest) matches++;
+ }
+ if (0) {
+fmt_fail:
+input_fail:
+ if (!matches) matches--;
+ }
+match_fail:
FUNLOCK(f);
- return result;
+ return matches;
}