diff options
| author | Rich Felker <dalias@aerifal.cx> | 2014-07-28 18:04:15 -0400 | 
|---|---|---|
| committer | Rich Felker <dalias@aerifal.cx> | 2014-07-28 18:04:15 -0400 | 
| commit | 73d2a3bfda462eebe8291eb788ef8be567a9add8 (patch) | |
| tree | 00a8d3cf08a4d7c27dbd9cf616d742dcf075d05b | |
| parent | c0284b372ca26fe8ea15bb76395c8bff2c444ba9 (diff) | |
| download | musl-73d2a3bfda462eebe8291eb788ef8be567a9add8.tar.gz | |
implement non-default plural rules for ngettext translations
the new code in dcngettext was written by me, and the expression
evaluator by Szabolcs Nagy (nsz).
| -rw-r--r-- | src/locale/dcngettext.c | 54 | ||||
| -rw-r--r-- | src/locale/pleval.c | 197 | 
2 files changed, 243 insertions, 8 deletions
| diff --git a/src/locale/dcngettext.c b/src/locale/dcngettext.c index 4f9e4174..51e6522a 100644 --- a/src/locale/dcngettext.c +++ b/src/locale/dcngettext.c @@ -4,6 +4,7 @@  #include <errno.h>  #include <limits.h>  #include <sys/stat.h> +#include <ctype.h>  #include "locale_impl.h"  #include "libc.h"  #include "atomic.h" @@ -95,6 +96,8 @@ struct msgcat {  	struct msgcat *next;  	const void *map;  	size_t map_size; +	void *plural_rule; +	int nplurals;  	char name[];  }; @@ -107,6 +110,7 @@ weak_alias(dummy_gettextdomain, __gettextdomain);  const unsigned char *__map_file(const char *, size_t *);  int __munmap(void *, size_t); +unsigned long __pleval(const char *, unsigned long);  char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2, unsigned long int n, int category)  { @@ -190,19 +194,53 @@ notrans:  	const char *trans = __mo_lookup(p->map, p->map_size, msgid1);  	if (!trans) goto notrans; -	/* FIXME: support alternate plural rules */ -	if (n != 1) { -		size_t l = strlen(trans); -		if (l+1 >= p->map_size - (trans - (char *)p->map)) -			goto notrans; -		trans += l+1; +	/* Non-plural-processing gettext forms pass a null pointer as +	 * msgid2 to request that dcngettext suppress plural processing. */ +	if (!msgid2) return (char *)trans; + +	if (!p->plural_rule) { +		const char *rule = "n!=1;"; +		unsigned long np = 2; +		const char *r = __mo_lookup(p->map, p->map_size, ""); +		char *z; +		while (r && strncmp(r, "Plural-Forms:", 13)) { +			z = strchr(r, '\n'); +			r = z ? z+1 : 0; +		} +		if (r) { +			r += 13; +			while (isspace(*r)) r++; +			if (!strncmp(r, "nplurals=", 9)) { +				np = strtoul(r+9, &z, 10); +				r = z; +			} +			while (*r && *r != ';') r++; +			if (*r) { +				r++; +				while (isspace(*r)) r++; +				if (!strncmp(r, "plural=", 7)) +					rule = r+7; +			} +		} +		a_store(&p->nplurals, np); +		a_cas_p(&p->plural_rule, 0, (void *)rule); +	} +	if (p->nplurals) { +		unsigned long plural = __pleval(p->plural_rule, n); +		if (plural > p->nplurals) goto notrans; +		while (plural--) { +			size_t l = strlen(trans); +			if (l+1 >= p->map_size - (trans - (char *)p->map)) +				goto notrans; +			trans += l+1; +		}  	}  	return (char *)trans;  }  char *dcgettext(const char *domainname, const char *msgid, int category)  { -	return dcngettext(domainname, msgid, msgid, 1, category); +	return dcngettext(domainname, msgid, 0, 1, category);  }  char *dngettext(const char *domainname, const char *msgid1, const char *msgid2, unsigned long int n) @@ -212,5 +250,5 @@ char *dngettext(const char *domainname, const char *msgid1, const char *msgid2,  char *dgettext(const char *domainname, const char *msgid)  { -	return dcngettext(domainname, msgid, msgid, 1, LC_MESSAGES); +	return dcngettext(domainname, msgid, 0, 1, LC_MESSAGES);  } diff --git a/src/locale/pleval.c b/src/locale/pleval.c new file mode 100644 index 00000000..47aefc34 --- /dev/null +++ b/src/locale/pleval.c @@ -0,0 +1,197 @@ +#include <stdlib.h> +#include <ctype.h> + +/* +grammar: + +Start = Expr ';' +Expr  = Or | Or '?' Expr ':' Expr +Or    = And | Or '||' And +And   = Eq | And '&&' Eq +Eq    = Rel | Eq '==' Rel | Eq '!=' Rel +Rel   = Add | Rel '<=' Add | Rel '>=' Add | Rel '<' Add | Rel '>' Add +Add   = Mul | Add '+' Mul | Add '-' Mul +Mul   = Term | Mul '*' Term | Mul '/' Term | Mul '%' Term +Term  = '(' Expr ')' | '!' Term | decimal | 'n' + +internals: + +recursive descent expression evaluator with stack depth limit. +eval* functions return the value of the subexpression and set +the current string pointer to the next non-space char. +*/ + +struct st { +	const char *s; +	unsigned long n; +	int err; +}; + +static const char *skipspace(const char *s) +{ +	while (isspace(*s)) s++; +	return s; +} + +static unsigned long evalconst(struct st *st) +{ +	char *e; +	unsigned long n; +	n = strtoul(st->s, &e, 10); +	if (!isdigit(*st->s) || e == st->s || n == -1) +		st->err = 1; +	st->s = skipspace(e); +	return n; +} + +static unsigned long evalexpr(struct st *st, int d); + +static unsigned long evalterm(struct st *st, int d) +{ +	unsigned long a; +	if (d <= 0) { +		st->err = 1; +		return 0; +	} +	st->s = skipspace(st->s); +	if (*st->s == '!') { +		st->s++; +		return !evalterm(st, d-1); +	} +	if (*st->s == '(') { +		st->s++; +		a = evalexpr(st, d-1); +		if (*st->s != ')') { +			st->err = 1; +			return 0; +		} +		st->s = skipspace(st->s + 1); +		return a; +	} +	if (*st->s == 'n') { +		st->s = skipspace(st->s + 1); +		return st->n; +	} +	return evalconst(st); +} + +static unsigned long evalmul(struct st *st, int d) +{ +	unsigned long b, a = evalterm(st, d-1); +	int op; +	for (;;) { +		op = *st->s; +		if (op != '*' && op != '/' && op != '%') +			return a; +		st->s++; +		b = evalterm(st, d-1); +		if (op == '*') { +			a *= b; +		} else if (!b) { +			st->err = 1; +			return 0; +		} else if (op == '%') { +			a %= b; +		} else { +			a /= b; +		} +	} +} + +static unsigned long evaladd(struct st *st, int d) +{ +	unsigned long a = 0; +	int add = 1; +	for (;;) { +		a += (add?1:-1) * evalmul(st, d-1); +		if (*st->s != '+' && *st->s != '-') +			return a; +		add = *st->s == '+'; +		st->s++; +	} +} + +static unsigned long evalrel(struct st *st, int d) +{ +	unsigned long b, a = evaladd(st, d-1); +	int less, eq; +	for (;;) { +		if (*st->s != '<' && *st->s != '>') +			return a; +		less = st->s[0] == '<'; +		eq = st->s[1] == '='; +		st->s += 1 + eq; +		b = evaladd(st, d-1); +		a = (less ? a < b : a > b) || (eq && a == b); +	} +} + +static unsigned long evaleq(struct st *st, int d) +{ +	unsigned long a = evalrel(st, d-1); +	int neg; +	for (;;) { +		if ((st->s[0] != '=' && st->s[0] != '!') || st->s[1] != '=') +			return a; +		neg = st->s[0] == '!'; +		st->s += 2; +		a = evalrel(st, d-1) == a; +		a ^= neg; +	} +} + +static unsigned long evaland(struct st *st, int d) +{ +	unsigned long a = evaleq(st, d-1); +	for (;;) { +		if (st->s[0] != '&' || st->s[1] != '&') +			return a; +		st->s += 2; +		a = evaleq(st, d-1) && a; +	} +} + +static unsigned long evalor(struct st *st, int d) +{ +	unsigned long a = evaland(st, d-1); +	for (;;) { +		if (st->s[0] != '|' || st->s[1] != '|') +			return a; +		st->s += 2; +		a = evaland(st, d-1) || a; +	} +} + +static unsigned long evalexpr(struct st *st, int d) +{ +	unsigned long a1, a2, a3; +	if (d <= 0) { +		st->err = 1; +		return 0; +	} +	a1 = evalor(st, d-1); +	if (*st->s != '?') +		return a1; +	st->s++; +	a2 = evalexpr(st, d-1); +	if (*st->s != ':') { +		st->err = 1; +		return 0; +	} +	st->s++; +	a3 = evalexpr(st, d-1); +	return a1 ? a2 : a3; +} + +unsigned long __pleval(const char *s, unsigned long n) +{ +	unsigned long a; +	struct st st; +	st.s = s; +	st.n = n; +	st.err = 0; +	a = evalexpr(&st, 100); +	if (st.err || *st.s != ';') +		return -1; +	return a; +} | 
