summaryrefslogtreecommitdiff
path: root/src/locale/iconv.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/locale/iconv.c')
-rw-r--r--src/locale/iconv.c50
1 files changed, 38 insertions, 12 deletions
diff --git a/src/locale/iconv.c b/src/locale/iconv.c
index 3047c27b..4151411d 100644
--- a/src/locale/iconv.c
+++ b/src/locale/iconv.c
@@ -49,10 +49,10 @@ static const unsigned char charmaps[] =
"ucs4\0utf32\0\0\313"
"ucs2\0\0\314"
"eucjp\0\0\320"
-"shiftjis\0sjis\0\0\321"
+"shiftjis\0sjis\0cp932\0\0\321"
"iso2022jp\0\0\322"
"gb18030\0\0\330"
-"gbk\0\0\331"
+"gbk\0cp936\0windows936\0\0\331"
"gb2312\0\0\332"
"big5\0bigfive\0cp950\0big5hkscs\0\0\340"
"euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
@@ -74,6 +74,10 @@ static const unsigned short gb18030[126][190] = {
#include "gb18030.h"
};
+static const unsigned short gb18030utf[][2] = {
+#include "gb18030utf.h"
+};
+
static const unsigned short big5[89][157] = {
#include "big5.h"
};
@@ -224,6 +228,8 @@ static unsigned uni_to_jis(unsigned c)
}
}
+#define countof(a) (sizeof (a) / sizeof *(a))
+
size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restrict out, size_t *restrict outb)
{
size_t x=0;
@@ -339,7 +345,10 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
} else if (d-159 <= 252-159) {
c++;
d -= 159;
+ } else {
+ goto ilseq;
}
+ if (c>=84) goto ilseq;
c = jis0208[c][d];
if (!c) goto ilseq;
break;
@@ -403,6 +412,10 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
if (c < 128) break;
if (c < 0xa1) goto ilseq;
case GBK:
+ if (c == 128) {
+ c = 0x20ac;
+ break;
+ }
case GB18030:
if (c < 128) break;
c -= 0x81;
@@ -423,15 +436,24 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
d = *((unsigned char *)*in + 3);
if (d-'0'>9) goto ilseq;
c += d-'0';
- c += 128;
- for (d=0; d<=c; ) {
- k = 0;
- for (int i=0; i<126; i++)
- for (int j=0; j<190; j++)
- if (gb18030[i][j]-d <= c-d)
- k++;
- d = c+1;
- c += k;
+ /* Starting at 90 30 81 30 (189000), mapping is
+ * linear without gaps, to U+10000 and up. */
+ if (c >= 189000) {
+ c -= 189000;
+ c += 0x10000;
+ if (c >= 0x110000) goto ilseq;
+ break;
+ }
+ /* Otherwise we must process an index into set
+ * of characters unmapped by 2-byte table. */
+ for (int i=0; ; i++) {
+ if (i==countof(gb18030utf))
+ goto ilseq;
+ if (c<gb18030utf[i][1]) {
+ c += gb18030utf[i][0];
+ break;
+ }
+ c -= gb18030utf[i][1];
}
break;
}
@@ -495,7 +517,7 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
if (c >= 93 || d >= 94) {
c += (0xa1-0x81);
d += 0xa1;
- if (c >= 93 || c>=0xc6-0x81 && d>0x52)
+ if (c > 0xc6-0x81 || c==0xc6-0x81 && d>0x52)
goto ilseq;
if (d-'A'<26) d = d-'A';
else if (d-'a'<26) d = d-'a'+26;
@@ -538,6 +560,10 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
if (*outb < k) goto toobig;
memcpy(*out, tmp, k);
} else k = wctomb_utf8(*out, c);
+ /* This failure condition should be unreachable, but
+ * is included to prevent decoder bugs from translating
+ * into advancement outside the output buffer range. */
+ if (k>4) goto ilseq;
*out += k;
*outb -= k;
break;