summaryrefslogtreecommitdiff
path: root/src/locale/iconv.c
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2017-11-14 23:47:05 -0500
committerRich Felker <dalias@aerifal.cx>2017-11-14 23:54:02 -0500
commita223dbd27ae36fe53f9f67f86caf685b729593fc (patch)
tree68c77c0d8c0491da9b9fa573919205a4eb5c2f47 /src/locale/iconv.c
parent105eff9dec51bc4898a74af2854ab71f927a5c3b (diff)
downloadmusl-a223dbd27ae36fe53f9f67f86caf685b729593fc.tar.gz
add reverse iconv mappings for JIS-based encodings
these encodings are still commonly used in messaging protocols and such. the reverse mapping is implemented as a binary search of a list of the jis 0208 characters in unicode order; the existing forward table is used to perform the comparison in the search.
Diffstat (limited to 'src/locale/iconv.c')
-rw-r--r--src/locale/iconv.c98
1 files changed, 97 insertions, 1 deletions
diff --git a/src/locale/iconv.c b/src/locale/iconv.c
index 01f17521..1784dc9d 100644
--- a/src/locale/iconv.c
+++ b/src/locale/iconv.c
@@ -80,6 +80,10 @@ static const unsigned short ksc[93][94] = {
#include "ksc.h"
};
+static const unsigned short rev_jis[] = {
+#include "revjis.h"
+};
+
static int fuzzycmp(const unsigned char *a, const unsigned char *b)
{
for (; *a && *b; a++, b++) {
@@ -134,7 +138,7 @@ iconv_t iconv_open(const char *to, const char *from)
if ((t = find_charmap(to))==-1
|| (f = find_charmap(from))==-1
- || (charmaps[t] >= 0320)) {
+ || (charmaps[t] >= 0330)) {
errno = EINVAL;
return (iconv_t)-1;
}
@@ -192,6 +196,25 @@ static unsigned legacy_map(const unsigned char *map, unsigned c)
return x < 256 ? x : legacy_chars[x-256];
}
+static unsigned uni_to_jis(unsigned c)
+{
+ unsigned nel = sizeof rev_jis / sizeof *rev_jis;
+ unsigned d, j, i, b = 0;
+ for (;;) {
+ i = nel/2;
+ j = rev_jis[b+i];
+ d = jis0208[j/256][j%256];
+ if (d==c) return j + 0x2121;
+ else if (nel == 1) return 0;
+ else if (c < d)
+ nel /= 2;
+ else {
+ b += i;
+ nel -= nel/2;
+ }
+ }
+}
+
size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restrict out, size_t *restrict outb)
{
size_t x=0;
@@ -493,6 +516,79 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
}
}
goto subst;
+ case SHIFT_JIS:
+ if (c < 128) goto revout;
+ if (c == 0xa5) {
+ x++;
+ c = '\\';
+ goto revout;
+ }
+ if (c == 0x203e) {
+ x++;
+ c = '~';
+ goto revout;
+ }
+ if (c-0xff61 <= 0xdf-0xa1) {
+ c += 0xa1 - 0xff61;
+ goto revout;
+ }
+ c = uni_to_jis(c);
+ if (!c) goto subst;
+ if (*outb < 2) goto toobig;
+ d = c%256;
+ c = c/256;
+ *(*out)++ = (c+1)/2 + (c<95 ? 112 : 176);
+ *(*out)++ = c%2 ? d + 31 + d/96 : d + 126;
+ *outb -= 2;
+ break;
+ case EUC_JP:
+ if (c < 128) goto revout;
+ if (c-0xff61 <= 0xdf-0xa1) {
+ c += 0x0e00 + 0x21 - 0xff61;
+ } else {
+ c = uni_to_jis(c);
+ }
+ if (!c) goto subst;
+ if (*outb < 2) goto toobig;
+ *(*out)++ = c/256 + 0x80;
+ *(*out)++ = c%256 + 0x80;
+ *outb -= 2;
+ break;
+ case ISO2022_JP:
+ if (c < 128) goto revout;
+ if (c-0xff61 <= 0xdf-0xa1 || c==0xa5 || c==0x203e) {
+ if (*outb < 7) goto toobig;
+ *(*out)++ = '\033';
+ *(*out)++ = '(';
+ if (c==0xa5) {
+ *(*out)++ = 'J';
+ *(*out)++ = '\\';
+ } else if (c==0x203e) {
+ *(*out)++ = 'J';
+ *(*out)++ = '~';
+ } else {
+ *(*out)++ = 'I';
+ *(*out)++ = c-0xff61+0x21;
+ }
+ *(*out)++ = '\033';
+ *(*out)++ = '(';
+ *(*out)++ = 'B';
+ *outb -= 7;
+ break;
+ }
+ c = uni_to_jis(c);
+ if (!c) goto subst;
+ if (*outb < 8) goto toobig;
+ *(*out)++ = '\033';
+ *(*out)++ = '$';
+ *(*out)++ = 'B';
+ *(*out)++ = c/256;
+ *(*out)++ = c%256;
+ *(*out)++ = '\033';
+ *(*out)++ = '(';
+ *(*out)++ = 'B';
+ *outb -= 8;
+ break;
case UCS2BE:
case UCS2LE:
case UTF_16BE: