diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/ctype/__ctype_get_mb_cur_max.c | 5 | ||||
| -rw-r--r-- | src/locale/langinfo.c | 3 | ||||
| -rw-r--r-- | src/multibyte/btowc.c | 6 | ||||
| -rw-r--r-- | src/multibyte/internal.h | 7 | ||||
| -rw-r--r-- | src/multibyte/mbrtowc.c | 2 | ||||
| -rw-r--r-- | src/multibyte/mbsrtowcs.c | 19 | ||||
| -rw-r--r-- | src/multibyte/mbtowc.c | 2 | ||||
| -rw-r--r-- | src/multibyte/wcrtomb.c | 9 | ||||
| -rw-r--r-- | src/multibyte/wctob.c | 4 | ||||
| -rw-r--r-- | src/regex/fnmatch.c | 3 | 
10 files changed, 53 insertions, 7 deletions
diff --git a/src/ctype/__ctype_get_mb_cur_max.c b/src/ctype/__ctype_get_mb_cur_max.c index d235f4da..8e946fc1 100644 --- a/src/ctype/__ctype_get_mb_cur_max.c +++ b/src/ctype/__ctype_get_mb_cur_max.c @@ -1,6 +1,7 @@ -#include <stddef.h> +#include <stdlib.h> +#include "locale_impl.h"  size_t __ctype_get_mb_cur_max()  { -	return 4; +	return MB_CUR_MAX;  } diff --git a/src/locale/langinfo.c b/src/locale/langinfo.c index a1ada246..776b4478 100644 --- a/src/locale/langinfo.c +++ b/src/locale/langinfo.c @@ -33,7 +33,8 @@ char *__nl_langinfo_l(nl_item item, locale_t loc)  	int idx = item & 65535;  	const char *str; -	if (item == CODESET) return "UTF-8"; +	if (item == CODESET) +		return MB_CUR_MAX==1 ? "UTF-8-CODE-UNITS" : "UTF-8";  	switch (cat) {  	case LC_NUMERIC: diff --git a/src/multibyte/btowc.c b/src/multibyte/btowc.c index 29cb798d..8acd0a2c 100644 --- a/src/multibyte/btowc.c +++ b/src/multibyte/btowc.c @@ -1,8 +1,10 @@  #include <stdio.h>  #include <wchar.h> +#include <stdlib.h> +#include "internal.h"  wint_t btowc(int c)  { -	c = (unsigned char)c; -	return c<128U ? c : EOF; +	int b = (unsigned char)c; +	return b<128U ? b : (MB_CUR_MAX==1 && c!=EOF) ? CODEUNIT(c) : WEOF;  } diff --git a/src/multibyte/internal.h b/src/multibyte/internal.h index cc017fa2..53d62eda 100644 --- a/src/multibyte/internal.h +++ b/src/multibyte/internal.h @@ -23,3 +23,10 @@ extern const uint32_t bittab[];  #define SA 0xc2u  #define SB 0xf4u + +/* Arbitrary encoding for representing code units instead of characters. */ +#define CODEUNIT(c) (0xdfff & (signed char)(c)) +#define IS_CODEUNIT(c) ((unsigned)(c)-0xdf80 < 0x80) + +/* Get inline definition of MB_CUR_MAX. */ +#include "locale_impl.h" diff --git a/src/multibyte/mbrtowc.c b/src/multibyte/mbrtowc.c index e7b36540..ca7da700 100644 --- a/src/multibyte/mbrtowc.c +++ b/src/multibyte/mbrtowc.c @@ -4,6 +4,7 @@   * unnecessary.   */ +#include <stdlib.h>  #include <wchar.h>  #include <errno.h>  #include "internal.h" @@ -27,6 +28,7 @@ size_t mbrtowc(wchar_t *restrict wc, const char *restrict src, size_t n, mbstate  	if (!n) return -2;  	if (!c) {  		if (*s < 0x80) return !!(*wc = *s); +		if (MB_CUR_MAX==1) return (*wc = CODEUNIT(*s)), 1;  		if (*s-SA > SB-SA) goto ilseq;  		c = bittab[*s++-SA]; n--;  	} diff --git a/src/multibyte/mbsrtowcs.c b/src/multibyte/mbsrtowcs.c index 3c1343ae..e23083d2 100644 --- a/src/multibyte/mbsrtowcs.c +++ b/src/multibyte/mbsrtowcs.c @@ -7,6 +7,8 @@  #include <stdint.h>  #include <wchar.h>  #include <errno.h> +#include <string.h> +#include <stdlib.h>  #include "internal.h"  size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbstate_t *restrict st) @@ -24,6 +26,23 @@ size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbs  		}  	} +	if (MB_CUR_MAX==1) { +		if (!ws) return strlen((const char *)s); +		for (;;) { +			if (!wn) { +				*src = (const void *)s; +				return wn0; +			} +			if (!*s) break; +			c = *s++; +			*ws++ = CODEUNIT(c); +			wn--; +		} +		*ws = 0; +		*src = 0; +		return wn0-wn; +	} +  	if (!ws) for (;;) {  		if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) {  			while (!(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) { diff --git a/src/multibyte/mbtowc.c b/src/multibyte/mbtowc.c index 803d2213..71a95066 100644 --- a/src/multibyte/mbtowc.c +++ b/src/multibyte/mbtowc.c @@ -4,6 +4,7 @@   * unnecessary.   */ +#include <stdlib.h>  #include <wchar.h>  #include <errno.h>  #include "internal.h" @@ -19,6 +20,7 @@ int mbtowc(wchar_t *restrict wc, const char *restrict src, size_t n)  	if (!wc) wc = &dummy;  	if (*s < 0x80) return !!(*wc = *s); +	if (MB_CUR_MAX==1) return (*wc = CODEUNIT(*s)), 1;  	if (*s-SA > SB-SA) goto ilseq;  	c = bittab[*s++-SA]; diff --git a/src/multibyte/wcrtomb.c b/src/multibyte/wcrtomb.c index 59f733db..ddc37a57 100644 --- a/src/multibyte/wcrtomb.c +++ b/src/multibyte/wcrtomb.c @@ -4,8 +4,10 @@   * unnecessary.   */ +#include <stdlib.h>  #include <wchar.h>  #include <errno.h> +#include "internal.h"  size_t wcrtomb(char *restrict s, wchar_t wc, mbstate_t *restrict st)  { @@ -13,6 +15,13 @@ size_t wcrtomb(char *restrict s, wchar_t wc, mbstate_t *restrict st)  	if ((unsigned)wc < 0x80) {  		*s = wc;  		return 1; +	} else if (MB_CUR_MAX == 1) { +		if (!IS_CODEUNIT(wc)) { +			errno = EILSEQ; +			return -1; +		} +		*s = wc; +		return 1;  	} else if ((unsigned)wc < 0x800) {  		*s++ = 0xc0 | (wc>>6);  		*s = 0x80 | (wc&0x3f); diff --git a/src/multibyte/wctob.c b/src/multibyte/wctob.c index d6353ee1..4aeda6a1 100644 --- a/src/multibyte/wctob.c +++ b/src/multibyte/wctob.c @@ -1,8 +1,10 @@ -#include <stdio.h>  #include <wchar.h> +#include <stdlib.h> +#include "internal.h"  int wctob(wint_t c)  {  	if (c < 128U) return c; +	if (MB_CUR_MAX==1 && IS_CODEUNIT(c)) return (unsigned char)c;  	return EOF;  } diff --git a/src/regex/fnmatch.c b/src/regex/fnmatch.c index 7f6b65f3..978fff88 100644 --- a/src/regex/fnmatch.c +++ b/src/regex/fnmatch.c @@ -18,6 +18,7 @@  #include <stdlib.h>  #include <wchar.h>  #include <wctype.h> +#include "locale_impl.h"  #define END 0  #define UNMATCHABLE -2 @@ -229,7 +230,7 @@ static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n  	 * On illegal sequences we may get it wrong, but in that case  	 * we necessarily have a matching failure anyway. */  	for (s=endstr; s>str && tailcnt; tailcnt--) { -		if (s[-1] < 128U) s--; +		if (s[-1] < 128U || MB_CUR_MAX==1) s--;  		else while ((unsigned char)*--s-0x80U<0x40 && s>str);  	}  	if (tailcnt) return FNM_NOMATCH;  | 
