diff options
Diffstat (limited to 'src/stdio')
| -rw-r--r-- | src/stdio/__scanf.c | 483 | ||||
| -rw-r--r-- | src/stdio/__scanf.h | 16 | ||||
| -rw-r--r-- | src/stdio/vfwscanf.c | 297 | ||||
| -rw-r--r-- | src/stdio/vswscanf.c | 40 | 
4 files changed, 312 insertions, 524 deletions
| diff --git a/src/stdio/__scanf.c b/src/stdio/__scanf.c deleted file mode 100644 index 7c82cca4..00000000 --- a/src/stdio/__scanf.c +++ /dev/null @@ -1,483 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <stdarg.h> -#include <ctype.h> -#include <wchar.h> -#include <wctype.h> -#include <limits.h> -#include <string.h> -#include <errno.h> -#include <math.h> -#include <float.h> - -#include "__scanf.h" - -static int read(rctx_t *r) -{ -	if (--r->w < 0) return r->w = -1; -	if (r->u) r->u = 0; -	else r->read(r); -	return r->c; -} - -static void unread(rctx_t *r) -{ -	if (r->c < 0 || r->w < 0) return; -	r->w++; -	r->u = 1; -} - -#define SIZE_hh -2 -#define SIZE_h  -1 -#define SIZE_def 0 -#define SIZE_l   1 -#define SIZE_ll  2 -#define SIZE_L   3 - -static void store_int(void *dest, int size, int neg, unsigned long long i) -{ -	if (!dest) return; -	if (neg) i = -i; -	switch (size) { -	case SIZE_hh: -		*(char *)dest = i; -		break; -	case SIZE_h: -		*(short *)dest = i; -		break; -	case SIZE_def: -		*(int *)dest = i; -		break; -	case SIZE_l: -		*(long *)dest = i; -		break; -	case SIZE_ll: -		*(long long *)dest = i; -		break; -	} -} - -static void *arg_n(va_list ap, unsigned int n) -{ -	void *p; -	unsigned int i; -	va_list ap2; -	va_copy(ap2, ap); -	for (i=n; i>1; i--) va_arg(ap2, void *); -	p = va_arg(ap2, void *); -	va_end(ap2); -	return p; -} - -int __scanf(rctx_t *r, const wchar_t *fmt, va_list ap) -{ -	int mode=0; -	int width; -	int size; -	const wchar_t *p, *z; -	int c, l, t, m; -	long long dummy; -	char *s; -	wchar_t *wcs; -	mbstate_t st; -	int wide = r->wide; -	void *dest=NULL; -	int invert; -	unsigned long long i=0; -	int neg=0; -	int matches=0; -	long double f; -	int (*is_space)(int) = r->is_space; - -	for (p=fmt; *p; ) { -		if (is_space(*p)) { -			do p++; while (is_space(*p)); -			do r->w=1; while (is_space(read(r))); -			unread(r); -			continue; -		} else if (*p != '%' || p[1] == '%') { -			if (*p == '%') p++; -			r->w = 1; -			if ((c = read(r)) < 0) -				goto input_fail; -			if (*p++ != c) -				goto match_fail; -			continue; -		} -		p++; -		if (mode != 1) { -			for (z=p; isdigit(*z); z++); -			if (*z != '$' && *z != '*') { -				if (mode == 0) mode = 1; -				else goto fmt_fail; -			} else if (*z != '*') { -				int pos = 0; -				mode = 2; -				for (; p<z; p++) { -					pos = 10*pos + *p - '0'; -				} -				p++; -				if (!pos) goto fmt_fail; -				dest = arg_n(ap, pos); -			} -		} -		if (*p == '*') { -			dest = NULL; -			p++; -		} else if (mode == 1) { -			dest = va_arg(ap, void *); -		} -		 -		if (!*p) goto fmt_fail; - -		width = 0; -		for (; isdigit(*p); p++) { -			width = 10*width + *p - '0'; -		} - -		size = 0; -		switch (*p++) { -		case 0: -			goto fmt_fail; -		case 'h': -			if (*p == 'h') p++, size = SIZE_hh; -			else size = SIZE_h; -			break; -		case 'l': -			if (*p == 'l') p++, size = SIZE_ll; -			else size = SIZE_l; -			break; -		case 'j': -			size = SIZE_ll; -			break; -		case 'z': -		case 't': -			size = SIZE_l; -			break; -		case 'L': -			size = SIZE_L; -			break; -		case 'd': case 'i': case 'o': case 'u': case 'x': -		case 'a': case 'e': case 'f': case 'g': -		case 'A': case 'E': case 'F': case 'G': case 'X': -		case 's': case 'c': case '[': -		case 'S': case 'C': -		case 'p': case 'n': -			p--; -			break; -		default: -			goto fmt_fail; -		} - -		t = *p++; - -		switch (t) { -		case 'C': -		case 'c': -			if (width < 1) width = 1; -		case 's': -			if (size == SIZE_l) t &= ~0x20; -		case 'd': case 'i': case 'o': case 'u': case 'x': -		case 'a': case 'e': case 'f': case 'g': -		case 'A': case 'E': case 'F': case 'G': case 'X': -		case '[': case 'S': -		case 'p': case 'n': -			if (width < 1) width = INT_MAX; -			break; -		default: -			goto fmt_fail; -		} - -		r->w = width; - -		if (t != 'n') { -			if (read(r) < 0) goto input_fail; -			unread(r); -		} - -		switch (t) { -		case 'n': -			store_int(dest, size, 0, r->l - r->u); -			/* do not increment match count, etc! */ -			continue; -		case 'C': -			wcs = dest ? dest : (void *)&dummy; -			st = (mbstate_t){ 0 }; -			while ((c=read(r)) >= 0) { -				if (wide) { -					if (dest) *wcs++ = c; -				} else { -					char ch = c; -					switch (mbrtowc(wcs, &ch, 1, &st)) { -					case -1: -						goto enc_fail; -					case -2: -						break; -					default: -						if (dest) wcs++; -					} -				} -			} -			if (r->w > 0) goto match_fail; -			break; -		case 'c': -			s = dest ? dest : (void *)&dummy; -			while ((c=read(r)) >= 0) { -				if (wide) { -					if ((l=wctomb(s, c)) < 0) -						goto enc_fail; -					if (dest) s += l; -				} else { -					if (dest) *s++ = c; -				} -			} -			if (r->w > 0) goto match_fail; -			break; -		case '[': -			wcs = dest ? dest : (void *)&dummy; -			s = dest ? dest : (void *)&dummy; -			if (!wide && size == SIZE_l) st = (mbstate_t){ 0 }; - -			if (*p == '^') p++, invert = 1; -			else invert = 0; - -			if (wide) { -				for (m=0; (c=read(r)) >= 0; m=1) { -					for (z=p; *z && *z != c && (*z != ']' || z==p); z++); -					if (!*z) goto fmt_fail; -					if (*z == c && (*z != ']' || z==p)) { -						if (invert) break; -					} else { -						if (!invert) break; -					} -					if (size == SIZE_l) { -						if (dest) *wcs++ = c; -					} else { -						if ((l=wctomb(s, c)) < 0) -							goto enc_fail; -						if (dest) s += l; -					} -				} -				for (p++; *p && *p != ']'; p++); -				p++; -			} else { -				unsigned char scanset[257]; -				memset(scanset, invert, sizeof scanset); -				scanset[0] = 0; -				for (z=p; *z && (*z != ']' || z==p); z++) -					scanset[1+*z] = 1-invert; -				if (!*z) goto fmt_fail; -				p=z+1; -				c=0; -				for (m=0; scanset[(c=read(r))+1]; m=1) { -					if (size == SIZE_l) { -						char ch = c; -						switch (mbrtowc(wcs, &ch, 1, &st)) { -						case -1: -							goto enc_fail; -						case -2: -							break; -						default: -							if (dest) wcs++; -						} -					} else { -						if (dest) *s++ = c; -					} -				} -			} -			if (!m) goto match_fail; -			if (dest) { -				if (size == SIZE_l) *wcs++ = 0; -				else *s++ = 0; -			} -			break; -		default: -			/* read unlimited number of spaces, then reset width */ -			do r->w = 1; while (is_space(c = read(r))); -			if (c < 0) goto input_fail; -			unread(r); -			r->w = width; -		} - -		switch (t) { -		case 'p': -		case 'X': -			t = 'x'; -		case 'd': -		case 'i': -		case 'o': -		case 'u': -		case 'x': -			i = m = neg = 0; -			if ((c=read(r)) == '-') neg=1; -			else if (c != '+') unread(r); -			switch (t) { -			case 'i': -			case 'x': -				if ((c=read(r)) != '0') { -					if (t == 'i') t = 'd'; -					unread(r); -					break; -				} -				m = 1; -				if (((c=read(r))|0x20) != 'x') { -					if (t == 'i') t = 'o'; -					unread(r); -					break; -				} -				t = 'x'; -				m = 0; -			} -		} -		 -		switch (t) { -		case 'd': -		case 'u': -			for (; isdigit(c=read(r)); m=1) -				i = 10*i + c-'0'; -			goto int_finish; -		case 'o': -			for (; (unsigned)(c=read(r))-'0' < 8; m=1) -				i = (i<<3) + c-'0'; -			goto int_finish; -		case 'x': -			for (; ; m=1) { -				if (isdigit(c=read(r))) { -					i = (i<<4) + c-'0'; -				} else if ((unsigned)(c|0x20)-'a' < 6) { -					i = (i<<4) + (c|0x20)-'a'+10; -				} else break; -			} -		int_finish: -			if (!m) goto match_fail; -			store_int(dest, size, neg, i); -			break; -		case 'a': -		case 'e': -		case 'f': -		case 'g': -			f = 0.0; -			neg = m = 0; -			if ((c=read(r)) == '-') neg=1; -			else if (c != '+') unread(r); -			/* FIXME: check for INF/NAN strings here */ -			if (read(r)=='0' && (m=1, (read(r)|0x20) == 'x')) -				goto hexfloat; -			else unread(r); -			for (; isdigit(c=read(r)); m=1) -				f = 10.0 * f + (c-'0'); -			if (c=='.') { -				double mag = 10.0; -				for (; isdigit(c=read(r)); mag*=10.0) -					f += (c-'0')/mag; -			} -			if ((c|0x20)=='e') { -				int ex=0, en=0; -				m = 0; -				if ((c=read(r))=='-') en=1; -				else if (c!='+') unread(r); -				for (; isdigit(c=read(r)); m=1) -					if (ex < LDBL_MAX_10_EXP) -						ex = 10 * ex + (c-'0'); -				if (ex > LDBL_MAX_10_EXP) -					f = en ? 0 : INFINITY; -				else { -					if (en) while (ex--) f/=10.0; -					else while (ex--) f*=10.0; -				} -			} -			goto writefloat; -hexfloat: -			m = 0; -			for (; isxdigit(c=read(r)); m=1) -				if (isdigit(c)) f = 16.0*f + (c-'0'); -				else f = 16.0*f + ((c|32)-'a'+10); -			if (c=='.') { -				double mag = 1/16.0; -				for (; isxdigit(c=read(r)); mag*=1/16.0) -					if (isdigit(c)) f += (c-'0')*mag; -					else f += ((c|32)-'a'+10)*mag; -			} -			if ((c|0x20)=='p') { -				int ex=0, en=0; -				m = 0; -				if ((c=read(r))=='-') en=1; -				else if (c!='+') unread(r); -				for (; isdigit(c=read(r)); m=1) -					if (ex < LDBL_MAX_EXP) -						ex = 10 * ex + (c-'0'); -				if (ex > LDBL_MAX_EXP) -					f = en ? 0 : INFINITY; -				else { -					if (en) while (ex--) f*=0.5; -					else while (ex--) f*=2.0; -				} -			} -writefloat: -			if (!m) goto match_fail; -			if (neg) f *= -1.0; -			if (dest) switch (size) { -			case SIZE_def: -				*(float *)dest = f; -				break; -			case SIZE_l: -				*(double *)dest = f; -				break; -			case SIZE_L: -				*(long double *)dest = f; -				break; -			} -			break; -		case 'S': -			wcs = dest ? dest : (void *)&dummy; -			st = (mbstate_t){ 0 }; -			while((c=read(r)) >= 0) { -				if (wide) { -					if (is_space(c)) break; -					if (dest) *wcs++ = c; -				} else { -					char ch = c; -					if (is_space(c)) break; -					switch (mbrtowc(wcs, &ch, 1, &st)) { -					case -1: -						goto enc_fail; -					case -2: -						break; -					default: -						if (dest) wcs++; -					} -				} -			} -			if (dest) *wcs++ = 0; -			break; -		case 's': -			s = dest ? dest : (void *)&dummy; -			while((c=read(r)) >= 0) { -				if (wide) { -					if (is_space(c)) break; -					if ((l=wctomb(s, c)) < 0) -						goto enc_fail; -					if (dest) s += l; -				} else { -					if (is_space(c)) break; -					if (dest) *s++ = c; -				} -			} -			if (dest) *s++ = 0; -			break; -		} - -		/* unread will do nothing if field width was exhausted */ -		unread(r); -		if (dest) matches++; -	} -	return matches; -enc_fail: -	errno = EILSEQ; -fmt_fail: -input_fail: -	if (!matches) matches--; -match_fail: -	unread(r); -	return matches; -} diff --git a/src/stdio/__scanf.h b/src/stdio/__scanf.h deleted file mode 100644 index e549b979..00000000 --- a/src/stdio/__scanf.h +++ /dev/null @@ -1,16 +0,0 @@ -#include <wchar.h> - -typedef struct rctx -{ -	void (*read)(struct rctx *); -	void *opaque; -	int wide; -	int (*is_space)(); -	int l; -	int e; -	int c; -	int u; -	int w; -} rctx_t; - -int __scanf(rctx_t *, const wchar_t *, va_list); diff --git a/src/stdio/vfwscanf.c b/src/stdio/vfwscanf.c index 491c1403..4426a129 100644 --- a/src/stdio/vfwscanf.c +++ b/src/stdio/vfwscanf.c @@ -1,28 +1,299 @@  #include <stdio.h> -#include <string.h> -#include <errno.h> +#include <stdlib.h> +#include <stdarg.h> +#include <ctype.h>  #include <wchar.h>  #include <wctype.h> +#include <limits.h> +#include <string.h> +#include <errno.h> +#include <math.h> +#include <float.h>  #include "stdio_impl.h" -#include "__scanf.h" +#include "shgetc.h" +#include "intscan.h" +#include "floatscan.h" + +#define SIZE_hh -2 +#define SIZE_h  -1 +#define SIZE_def 0 +#define SIZE_l   1 +#define SIZE_L   2 +#define SIZE_ll  3 + +static void store_int(void *dest, int size, unsigned long long i) +{ +	if (!dest) return; +	switch (size) { +	case SIZE_hh: +		*(char *)dest = i; +		break; +	case SIZE_h: +		*(short *)dest = i; +		break; +	case SIZE_def: +		*(int *)dest = i; +		break; +	case SIZE_l: +		*(long *)dest = i; +		break; +	case SIZE_ll: +		*(long long *)dest = i; +		break; +	} +} + +static void *arg_n(va_list ap, unsigned int n) +{ +	void *p; +	unsigned int i; +	va_list ap2; +	va_copy(ap2, ap); +	for (i=n; i>1; i--) va_arg(ap2, void *); +	p = va_arg(ap2, void *); +	va_end(ap2); +	return p; +} -static void f_read(rctx_t *r) +static int in_set(const wchar_t *set, int c)  { -	FILE *f = r->opaque; -	if ((r->c = fgetwc(f)) >= 0) r->l++; +	int j; +	const wchar_t *p = set; +	if (*p == '-') { +		if (c=='-') return 1; +		p++; +	} else if (*p == ']') { +		if (c==']') return 1; +		p++; +	} +	for (; *p && *p != ']'; p++) { +		if (*p=='-' && p[1] && p[1] != ']') +			for (j=p++[-1]; j<*p; j++) +				if (c==j) return 1; +		if (c==*p) return 1; +	} +	return 0;  } +#if 1 +#undef getwc +#define getwc(f) \ +	((f)->rpos < (f)->rend && *(f)->rpos < 128 ? *(f)->rpos++ : (getwc)(f)) + +#undef ungetwc +#define ungetwc(c,f) \ +	((f)->rend && (c)<128 ? *--(f)->rpos : ungetwc((c),(f))) +#endif +  int vfwscanf(FILE *f, const wchar_t *fmt, va_list ap)  { -	rctx_t r = { f_read, (void *)f, 1, iswspace }; -	int result; +	int width; +	int size; +	int alloc; +	const wchar_t *p; +	int c, t; +	char *s; +	wchar_t *wcs; +	void *dest=NULL; +	int invert; +	int matches=0; +	off_t pos = 0, cnt; +	static const char size_pfx[][3] = { "hh", "h", "", "l", "L", "ll" }; +	char tmp[3*sizeof(int)+10]; -	result = __scanf(&r, fmt, ap); +	FLOCK(f); -	if (r.u && r.c >= 0) { -		ungetwc(r.c, f); -	} +	for (p=fmt; *p; p++) { + +		if (iswspace(*p)) { +			while (iswspace(p[1])) p++; +			while (iswspace((c=getwc(f)))) pos++; +			ungetwc(c, f); +			continue; +		} +		if (*p != '%' || p[1] == '%') { +			p += *p=='%'; +			c = getwc(f); +			if (c!=*p) { +				ungetwc(c, f); +				if (c<0) goto input_fail; +				goto match_fail; +			} +			pos++; +			continue; +		} + +		p++; +		if (*p=='*') { +			dest = 0; p++; +		} else if (iswdigit(*p) && p[1]=='$') { +			dest = arg_n(ap, *p-'0'); p+=2; +		} else { +			dest = va_arg(ap, void *); +		} + +		for (width=0; iswdigit(*p); p++) { +			width = 10*width + *p - '0'; +		} -	return result; +		if (*p=='m') { +			alloc = 1; +			p++; +		} else { +			alloc = 0; +		} + +		size = SIZE_def; +		switch (*p++) { +		case 'h': +			if (*p == 'h') p++, size = SIZE_hh; +			else size = SIZE_h; +			break; +		case 'l': +			if (*p == 'l') p++, size = SIZE_ll; +			else size = SIZE_l; +			break; +		case 'j': +			size = SIZE_ll; +			break; +		case 'z': +		case 't': +			size = SIZE_l; +			break; +		case 'L': +			size = SIZE_L; +			break; +		case 'd': case 'i': case 'o': case 'u': case 'x': +		case 'a': case 'e': case 'f': case 'g': +		case 'A': case 'E': case 'F': case 'G': case 'X': +		case 's': case 'c': case '[': +		case 'S': case 'C': +		case 'p': case 'n': +			p--; +			break; +		default: +			goto fmt_fail; +		} + +		t = *p; + +		/* Transform ls,lc -> S,C */ +		if (size==SIZE_l && (t&15)==3) t&=~32; + +		if (t != 'n' && t != '[' && (t|32) != 'c') { +			while (iswspace((c=getwc(f)))) pos++; +			if (c < 0) goto input_fail; +			ungetwc(c, f); +		} + +		switch (t) { +		case 'n': +			store_int(dest, size, pos); +			/* do not increment match count, etc! */ +			continue; + +		case 'c': +			if (width < 1) width = 1; +			s = dest; +			for (; width && (c=getwc(f)) >= 0; width--) { +				int l = wctomb(s?s:tmp, c); +				if (l<0) goto input_fail; +				if (s) s+=l; +				pos++; +			} +			if (width) goto match_fail; +			break; + +		case 'C': +			if (width < 1) width = 1; +			wcs = dest; +			for (; width && (c=getwc(f)) >= 0; width--) +				pos++, wcs && (*wcs++ = c); +			if (width) goto match_fail; +			break; + +		case 's': +			s = dest; +			while (!iswspace(c=getwc(f)) && c!=EOF) { +				int l = wctomb(s?s:tmp, c); +				if (l<0) goto input_fail; +				if (s) s+=l; +				pos++; +			} +			if (s) *s = 0; +			break; + +		case 'S': +			wcs = dest; +			while (!iswspace(c=getwc(f)) && c!=EOF) +				pos++, *wcs++ = c; +			if (wcs) *wcs = 0; +			break; + +		case '[': +			s = (size == SIZE_def) ? dest : 0; +			wcs = (size == SIZE_l) ? dest : 0; + +			if (*++p == '^') p++, invert = 1; +			else invert = 0; + +			int gotmatch = 0; + +			for (;;) { +				if ((c=getwc(f))<0) break; +				if (in_set(p, c) == invert) +					break; +				if (wcs) { +					*wcs++ = c; +				} else if (size != SIZE_l) { +					int l = wctomb(s?s:tmp, c); +					if (l<0) goto input_fail; +					if (s) s+=l; +				} +				pos++; +				gotmatch=1; +			} +			ungetwc(c, f); + +			if (!gotmatch) goto match_fail; + +			if (*p==']') p++; +			while (*p!=']') { +				if (!*p) goto fmt_fail; +				p++; +			} + +			if (wcs) *wcs++ = 0; +			if (s) *s++ = 0; +			break; + +		case 'd': case 'i': case 'o': case 'u': case 'x': +		case 'a': case 'e': case 'f': case 'g': +		case 'A': case 'E': case 'F': case 'G': case 'X': +		case 'p': +			if (width < 1) width = 0; +			snprintf(tmp, sizeof tmp, "%.*s%.0d%s%c%%lln", +				1+!dest, "%*", width, size_pfx[size+2], t); +			cnt = 0; +			if (fscanf(f, tmp, dest?dest:&cnt, &cnt) == -1) +				goto input_fail; +			else if (!cnt) +				goto match_fail; +			pos += cnt; +			break; +		default: +			goto fmt_fail; +		} + +		if (dest) matches++; +	} +	if (0) { +fmt_fail: +input_fail: +		if (!matches) matches--; +	} +match_fail: +	FUNLOCK(f); +	return matches;  } diff --git a/src/stdio/vswscanf.c b/src/stdio/vswscanf.c index 2c4ffbe0..4396d7df 100644 --- a/src/stdio/vswscanf.c +++ b/src/stdio/vswscanf.c @@ -1,19 +1,35 @@ -#include <stdio.h> -#include <string.h> -#include <wchar.h> -#include <wctype.h> +#include "stdio_impl.h" -#include "__scanf.h" - -static void s_read(rctx_t *r) +static size_t wstring_read(FILE *f, unsigned char *buf, size_t len)  { -	wchar_t *s = r->opaque; -	if (!s[r->l]) r->c = -1; -	else r->c = s[r->l++]; +	const wchar_t *src = f->cookie; +	size_t k; + +	if (!src) return 0; + +	k = wcsrtombs((void *)f->buf, &src, f->buf_size, 0); +	if (k==(size_t)-1) { +		f->rpos = f->rend = 0; +		return 0; +	} + +	f->rpos = f->buf; +	f->rend = f->buf + k; +	f->cookie = (void *)src; + +	if (!len) return 0; + +	*buf = *f->rpos++; +	return 1;  }  int vswscanf(const wchar_t *s, const wchar_t *fmt, va_list ap)  { -	rctx_t r = { s_read, (void *)s, 1, iswspace }; -	return __scanf(&r, fmt, ap); +	unsigned char buf[256]; +	FILE f = { +		.buf = buf, .buf_size = sizeof buf, +		.cookie = (void *)s, +		.read = wstring_read, .lock = -1 +	}; +	return vfwscanf(&f, fmt, ap);  } | 
