diff options
| author | Rich Felker <dalias@aerifal.cx> | 2017-05-27 21:36:00 -0400 | 
|---|---|---|
| committer | Rich Felker <dalias@aerifal.cx> | 2017-05-27 21:36:00 -0400 | 
| commit | 97bd6b09dbe7478d5a90a06ecd9e5b59389d8eb9 (patch) | |
| tree | 01e743743f40c220b87fb01937418320290a7007 /src | |
| parent | f9f686b7721e2cc35e20fa5c6df6da2dc4ac3f50 (diff) | |
| download | musl-97bd6b09dbe7478d5a90a06ecd9e5b59389d8eb9.tar.gz | |
fix iconv conversions to legacy 8bit encodings
there was missing reverse-conversion logic for the case, handled
specially in the character set tables, where a byte represents a
unicode codepoint with the same value.
this patch adds code to handle the case, and refactors the two-level
10-bit table lookup for legacy character sets into a function to avoid
repeating it yet another time as part of the fix.
Diffstat (limited to 'src')
| -rw-r--r-- | src/locale/iconv.c | 21 | 
1 files changed, 12 insertions, 9 deletions
diff --git a/src/locale/iconv.c b/src/locale/iconv.c index 1eeea94e..4636307f 100644 --- a/src/locale/iconv.c +++ b/src/locale/iconv.c @@ -151,6 +151,14 @@ static void put_32(unsigned char *s, unsigned c, int e)  #define mbrtowc_utf8 mbrtowc  #define wctomb_utf8 wctomb +static unsigned legacy_map(const unsigned char *map, unsigned c) +{ +	unsigned x = c - 128 + map[-1]; +	x = legacy_chars[ map[x*5/4]>>2*x%8 | +		map[x*5/4+1]<<8-2*x%8 & 1023 ]; +	return x ? x : c; +} +  size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restrict out, size_t *restrict outb)  {  	size_t x=0; @@ -364,10 +372,7 @@ size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restr  			break;  		default:  			if (c < 128+type) break; -			c -= 128+type; -			c = legacy_chars[ map[c*5/4]>>2*c%8 | -				map[c*5/4+1]<<8-2*c%8 & 1023 ]; -			if (!c) c = *(unsigned char *)*in; +			c = legacy_map(map, c);  			if (c==1) goto ilseq;  		} @@ -392,17 +397,15 @@ size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restr  			if (c > 0x7f) subst: x++, c='*';  		default:  			if (*outb < 1) goto toobig; -			if (c < 128+totype) { +			if (c < 128+totype || (c<256 && c==legacy_map(tomap, c))) {  			revout:  				*(*out)++ = c;  				*outb -= 1;  				break;  			}  			d = c; -			for (c=0; c<128-totype; c++) { -				if (d == legacy_chars[ tomap[c*5/4]>>2*c%8 | -					tomap[c*5/4+1]<<8-2*c%8 & 1023 ]) { -					c += 128; +			for (c=128+totype; c<256; c++) { +				if (d == legacy_map(tomap, c)) {  					goto revout;  				}  			}  | 
