diff options
Diffstat (limited to 'src/locale/iconv.c')
| -rw-r--r-- | src/locale/iconv.c | 50 |
1 files changed, 38 insertions, 12 deletions
diff --git a/src/locale/iconv.c b/src/locale/iconv.c index 3047c27b..4151411d 100644 --- a/src/locale/iconv.c +++ b/src/locale/iconv.c @@ -49,10 +49,10 @@ static const unsigned char charmaps[] = "ucs4\0utf32\0\0\313" "ucs2\0\0\314" "eucjp\0\0\320" -"shiftjis\0sjis\0\0\321" +"shiftjis\0sjis\0cp932\0\0\321" "iso2022jp\0\0\322" "gb18030\0\0\330" -"gbk\0\0\331" +"gbk\0cp936\0windows936\0\0\331" "gb2312\0\0\332" "big5\0bigfive\0cp950\0big5hkscs\0\0\340" "euckr\0ksc5601\0ksx1001\0cp949\0\0\350" @@ -74,6 +74,10 @@ static const unsigned short gb18030[126][190] = { #include "gb18030.h" }; +static const unsigned short gb18030utf[][2] = { +#include "gb18030utf.h" +}; + static const unsigned short big5[89][157] = { #include "big5.h" }; @@ -224,6 +228,8 @@ static unsigned uni_to_jis(unsigned c) } } +#define countof(a) (sizeof (a) / sizeof *(a)) + size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restrict out, size_t *restrict outb) { size_t x=0; @@ -339,7 +345,10 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri } else if (d-159 <= 252-159) { c++; d -= 159; + } else { + goto ilseq; } + if (c>=84) goto ilseq; c = jis0208[c][d]; if (!c) goto ilseq; break; @@ -403,6 +412,10 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri if (c < 128) break; if (c < 0xa1) goto ilseq; case GBK: + if (c == 128) { + c = 0x20ac; + break; + } case GB18030: if (c < 128) break; c -= 0x81; @@ -423,15 +436,24 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri d = *((unsigned char *)*in + 3); if (d-'0'>9) goto ilseq; c += d-'0'; - c += 128; - for (d=0; d<=c; ) { - k = 0; - for (int i=0; i<126; i++) - for (int j=0; j<190; j++) - if (gb18030[i][j]-d <= c-d) - k++; - d = c+1; - c += k; + /* Starting at 90 30 81 30 (189000), mapping is + * linear without gaps, to U+10000 and up. */ + if (c >= 189000) { + c -= 189000; + c += 0x10000; + if (c >= 0x110000) goto ilseq; + break; + } + /* Otherwise we must process an index into set + * of characters unmapped by 2-byte table. */ + for (int i=0; ; i++) { + if (i==countof(gb18030utf)) + goto ilseq; + if (c<gb18030utf[i][1]) { + c += gb18030utf[i][0]; + break; + } + c -= gb18030utf[i][1]; } break; } @@ -495,7 +517,7 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri if (c >= 93 || d >= 94) { c += (0xa1-0x81); d += 0xa1; - if (c >= 93 || c>=0xc6-0x81 && d>0x52) + if (c > 0xc6-0x81 || c==0xc6-0x81 && d>0x52) goto ilseq; if (d-'A'<26) d = d-'A'; else if (d-'a'<26) d = d-'a'+26; @@ -538,6 +560,10 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri if (*outb < k) goto toobig; memcpy(*out, tmp, k); } else k = wctomb_utf8(*out, c); + /* This failure condition should be unreachable, but + * is included to prevent decoder bugs from translating + * into advancement outside the output buffer range. */ + if (k>4) goto ilseq; *out += k; *outb -= k; break; |
