summaryrefslogtreecommitdiff
path: root/src/multibyte/decode.c
blob: 8d3d3c0b91445b64b2746a7a598efd84c9a31d84 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
/* 
 * This code was written by Rich Felker in 2010; no copyright is claimed.
 * This code is in the public domain. Attribution is appreciated but
 * unnecessary.
 */

#include <stdlib.h>
#include <inttypes.h>
#include <wchar.h>
#include <errno.h>

#include "internal.h"

/* Decodes UTF-8 byte-by-byte. The c argument must be initialized to 0
 * to begin decoding; when finished it will contain the Unicode scalar
 * value decoded. Return value is 1 if finished, 0 if in-progress, and
 * -1 if an invalid sequence was encountered. After an invalid sequence,
 * the state (in c) automatically resets to 0 if a continuation byte was
 * expected to facilitate a calling idiom of immediately retrying a
 * failed decode call after processing the invalid sequence. If the
 * second try fails, the byte is invalid as a starter as well.
 *
 * A trivial usage idiom is:
 *       while (src<end && (n=decode(dst, *src))>=0) 1[dst+=n]=0, src++;
 */

int decode(unsigned *c, unsigned b)
{
	if (!*c) {
		if (b < 0x80) {
			*c = b;
			return 1;
		} else if (b-SA >= SB-SA) {
			*c = FAILSTATE;
			return -1;
		}
		*c = bittab[b-SA];
		return 0;
	}

	if (OOB(*c,b)) {
		*c = 0;
		return -1;
	}
	*c = *c<<6 | b-0x80;
	return !(*c&(1U<<31));
}