From 0b44a0315b47dd8eced9f3b7f31580cf14bbfc01 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sat, 12 Feb 2011 00:22:29 -0500 Subject: initial check-in, version 0.5.0 --- src/multibyte/decode.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 src/multibyte/decode.c (limited to 'src/multibyte/decode.c') diff --git a/src/multibyte/decode.c b/src/multibyte/decode.c new file mode 100644 index 00000000..8d3d3c0b --- /dev/null +++ b/src/multibyte/decode.c @@ -0,0 +1,47 @@ +/* + * This code was written by Rich Felker in 2010; no copyright is claimed. + * This code is in the public domain. Attribution is appreciated but + * unnecessary. + */ + +#include +#include +#include +#include + +#include "internal.h" + +/* Decodes UTF-8 byte-by-byte. The c argument must be initialized to 0 + * to begin decoding; when finished it will contain the Unicode scalar + * value decoded. Return value is 1 if finished, 0 if in-progress, and + * -1 if an invalid sequence was encountered. After an invalid sequence, + * the state (in c) automatically resets to 0 if a continuation byte was + * expected to facilitate a calling idiom of immediately retrying a + * failed decode call after processing the invalid sequence. If the + * second try fails, the byte is invalid as a starter as well. + * + * A trivial usage idiom is: + * while (src=0) 1[dst+=n]=0, src++; + */ + +int decode(unsigned *c, unsigned b) +{ + if (!*c) { + if (b < 0x80) { + *c = b; + return 1; + } else if (b-SA >= SB-SA) { + *c = FAILSTATE; + return -1; + } + *c = bittab[b-SA]; + return 0; + } + + if (OOB(*c,b)) { + *c = 0; + return -1; + } + *c = *c<<6 | b-0x80; + return !(*c&(1U<<31)); +} -- cgit v1.2.1