summaryrefslogtreecommitdiff
path: root/src/locale
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2017-05-27 21:36:00 -0400
committerRich Felker <dalias@aerifal.cx>2017-05-27 21:36:00 -0400
commit97bd6b09dbe7478d5a90a06ecd9e5b59389d8eb9 (patch)
tree01e743743f40c220b87fb01937418320290a7007 /src/locale
parentf9f686b7721e2cc35e20fa5c6df6da2dc4ac3f50 (diff)
downloadmusl-97bd6b09dbe7478d5a90a06ecd9e5b59389d8eb9.tar.gz
fix iconv conversions to legacy 8bit encodings
there was missing reverse-conversion logic for the case, handled specially in the character set tables, where a byte represents a unicode codepoint with the same value. this patch adds code to handle the case, and refactors the two-level 10-bit table lookup for legacy character sets into a function to avoid repeating it yet another time as part of the fix.
Diffstat (limited to 'src/locale')
-rw-r--r--src/locale/iconv.c21
1 files changed, 12 insertions, 9 deletions
diff --git a/src/locale/iconv.c b/src/locale/iconv.c
index 1eeea94e..4636307f 100644
--- a/src/locale/iconv.c
+++ b/src/locale/iconv.c
@@ -151,6 +151,14 @@ static void put_32(unsigned char *s, unsigned c, int e)
#define mbrtowc_utf8 mbrtowc
#define wctomb_utf8 wctomb
+static unsigned legacy_map(const unsigned char *map, unsigned c)
+{
+ unsigned x = c - 128 + map[-1];
+ x = legacy_chars[ map[x*5/4]>>2*x%8 |
+ map[x*5/4+1]<<8-2*x%8 & 1023 ];
+ return x ? x : c;
+}
+
size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restrict out, size_t *restrict outb)
{
size_t x=0;
@@ -364,10 +372,7 @@ size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restr
break;
default:
if (c < 128+type) break;
- c -= 128+type;
- c = legacy_chars[ map[c*5/4]>>2*c%8 |
- map[c*5/4+1]<<8-2*c%8 & 1023 ];
- if (!c) c = *(unsigned char *)*in;
+ c = legacy_map(map, c);
if (c==1) goto ilseq;
}
@@ -392,17 +397,15 @@ size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restr
if (c > 0x7f) subst: x++, c='*';
default:
if (*outb < 1) goto toobig;
- if (c < 128+totype) {
+ if (c < 128+totype || (c<256 && c==legacy_map(tomap, c))) {
revout:
*(*out)++ = c;
*outb -= 1;
break;
}
d = c;
- for (c=0; c<128-totype; c++) {
- if (d == legacy_chars[ tomap[c*5/4]>>2*c%8 |
- tomap[c*5/4+1]<<8-2*c%8 & 1023 ]) {
- c += 128;
+ for (c=128+totype; c<256; c++) {
+ if (d == legacy_map(tomap, c)) {
goto revout;
}
}