From 73d2a3bfda462eebe8291eb788ef8be567a9add8 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 28 Jul 2014 18:04:15 -0400 Subject: implement non-default plural rules for ngettext translations the new code in dcngettext was written by me, and the expression evaluator by Szabolcs Nagy (nsz). --- src/locale/dcngettext.c | 54 +++++++++++-- src/locale/pleval.c | 197 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 243 insertions(+), 8 deletions(-) create mode 100644 src/locale/pleval.c (limited to 'src/locale') diff --git a/src/locale/dcngettext.c b/src/locale/dcngettext.c index 4f9e4174..51e6522a 100644 --- a/src/locale/dcngettext.c +++ b/src/locale/dcngettext.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "locale_impl.h" #include "libc.h" #include "atomic.h" @@ -95,6 +96,8 @@ struct msgcat { struct msgcat *next; const void *map; size_t map_size; + void *plural_rule; + int nplurals; char name[]; }; @@ -107,6 +110,7 @@ weak_alias(dummy_gettextdomain, __gettextdomain); const unsigned char *__map_file(const char *, size_t *); int __munmap(void *, size_t); +unsigned long __pleval(const char *, unsigned long); char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2, unsigned long int n, int category) { @@ -190,19 +194,53 @@ notrans: const char *trans = __mo_lookup(p->map, p->map_size, msgid1); if (!trans) goto notrans; - /* FIXME: support alternate plural rules */ - if (n != 1) { - size_t l = strlen(trans); - if (l+1 >= p->map_size - (trans - (char *)p->map)) - goto notrans; - trans += l+1; + /* Non-plural-processing gettext forms pass a null pointer as + * msgid2 to request that dcngettext suppress plural processing. */ + if (!msgid2) return (char *)trans; + + if (!p->plural_rule) { + const char *rule = "n!=1;"; + unsigned long np = 2; + const char *r = __mo_lookup(p->map, p->map_size, ""); + char *z; + while (r && strncmp(r, "Plural-Forms:", 13)) { + z = strchr(r, '\n'); + r = z ? z+1 : 0; + } + if (r) { + r += 13; + while (isspace(*r)) r++; + if (!strncmp(r, "nplurals=", 9)) { + np = strtoul(r+9, &z, 10); + r = z; + } + while (*r && *r != ';') r++; + if (*r) { + r++; + while (isspace(*r)) r++; + if (!strncmp(r, "plural=", 7)) + rule = r+7; + } + } + a_store(&p->nplurals, np); + a_cas_p(&p->plural_rule, 0, (void *)rule); + } + if (p->nplurals) { + unsigned long plural = __pleval(p->plural_rule, n); + if (plural > p->nplurals) goto notrans; + while (plural--) { + size_t l = strlen(trans); + if (l+1 >= p->map_size - (trans - (char *)p->map)) + goto notrans; + trans += l+1; + } } return (char *)trans; } char *dcgettext(const char *domainname, const char *msgid, int category) { - return dcngettext(domainname, msgid, msgid, 1, category); + return dcngettext(domainname, msgid, 0, 1, category); } char *dngettext(const char *domainname, const char *msgid1, const char *msgid2, unsigned long int n) @@ -212,5 +250,5 @@ char *dngettext(const char *domainname, const char *msgid1, const char *msgid2, char *dgettext(const char *domainname, const char *msgid) { - return dcngettext(domainname, msgid, msgid, 1, LC_MESSAGES); + return dcngettext(domainname, msgid, 0, 1, LC_MESSAGES); } diff --git a/src/locale/pleval.c b/src/locale/pleval.c new file mode 100644 index 00000000..47aefc34 --- /dev/null +++ b/src/locale/pleval.c @@ -0,0 +1,197 @@ +#include +#include + +/* +grammar: + +Start = Expr ';' +Expr = Or | Or '?' Expr ':' Expr +Or = And | Or '||' And +And = Eq | And '&&' Eq +Eq = Rel | Eq '==' Rel | Eq '!=' Rel +Rel = Add | Rel '<=' Add | Rel '>=' Add | Rel '<' Add | Rel '>' Add +Add = Mul | Add '+' Mul | Add '-' Mul +Mul = Term | Mul '*' Term | Mul '/' Term | Mul '%' Term +Term = '(' Expr ')' | '!' Term | decimal | 'n' + +internals: + +recursive descent expression evaluator with stack depth limit. +eval* functions return the value of the subexpression and set +the current string pointer to the next non-space char. +*/ + +struct st { + const char *s; + unsigned long n; + int err; +}; + +static const char *skipspace(const char *s) +{ + while (isspace(*s)) s++; + return s; +} + +static unsigned long evalconst(struct st *st) +{ + char *e; + unsigned long n; + n = strtoul(st->s, &e, 10); + if (!isdigit(*st->s) || e == st->s || n == -1) + st->err = 1; + st->s = skipspace(e); + return n; +} + +static unsigned long evalexpr(struct st *st, int d); + +static unsigned long evalterm(struct st *st, int d) +{ + unsigned long a; + if (d <= 0) { + st->err = 1; + return 0; + } + st->s = skipspace(st->s); + if (*st->s == '!') { + st->s++; + return !evalterm(st, d-1); + } + if (*st->s == '(') { + st->s++; + a = evalexpr(st, d-1); + if (*st->s != ')') { + st->err = 1; + return 0; + } + st->s = skipspace(st->s + 1); + return a; + } + if (*st->s == 'n') { + st->s = skipspace(st->s + 1); + return st->n; + } + return evalconst(st); +} + +static unsigned long evalmul(struct st *st, int d) +{ + unsigned long b, a = evalterm(st, d-1); + int op; + for (;;) { + op = *st->s; + if (op != '*' && op != '/' && op != '%') + return a; + st->s++; + b = evalterm(st, d-1); + if (op == '*') { + a *= b; + } else if (!b) { + st->err = 1; + return 0; + } else if (op == '%') { + a %= b; + } else { + a /= b; + } + } +} + +static unsigned long evaladd(struct st *st, int d) +{ + unsigned long a = 0; + int add = 1; + for (;;) { + a += (add?1:-1) * evalmul(st, d-1); + if (*st->s != '+' && *st->s != '-') + return a; + add = *st->s == '+'; + st->s++; + } +} + +static unsigned long evalrel(struct st *st, int d) +{ + unsigned long b, a = evaladd(st, d-1); + int less, eq; + for (;;) { + if (*st->s != '<' && *st->s != '>') + return a; + less = st->s[0] == '<'; + eq = st->s[1] == '='; + st->s += 1 + eq; + b = evaladd(st, d-1); + a = (less ? a < b : a > b) || (eq && a == b); + } +} + +static unsigned long evaleq(struct st *st, int d) +{ + unsigned long a = evalrel(st, d-1); + int neg; + for (;;) { + if ((st->s[0] != '=' && st->s[0] != '!') || st->s[1] != '=') + return a; + neg = st->s[0] == '!'; + st->s += 2; + a = evalrel(st, d-1) == a; + a ^= neg; + } +} + +static unsigned long evaland(struct st *st, int d) +{ + unsigned long a = evaleq(st, d-1); + for (;;) { + if (st->s[0] != '&' || st->s[1] != '&') + return a; + st->s += 2; + a = evaleq(st, d-1) && a; + } +} + +static unsigned long evalor(struct st *st, int d) +{ + unsigned long a = evaland(st, d-1); + for (;;) { + if (st->s[0] != '|' || st->s[1] != '|') + return a; + st->s += 2; + a = evaland(st, d-1) || a; + } +} + +static unsigned long evalexpr(struct st *st, int d) +{ + unsigned long a1, a2, a3; + if (d <= 0) { + st->err = 1; + return 0; + } + a1 = evalor(st, d-1); + if (*st->s != '?') + return a1; + st->s++; + a2 = evalexpr(st, d-1); + if (*st->s != ':') { + st->err = 1; + return 0; + } + st->s++; + a3 = evalexpr(st, d-1); + return a1 ? a2 : a3; +} + +unsigned long __pleval(const char *s, unsigned long n) +{ + unsigned long a; + struct st st; + st.s = s; + st.n = n; + st.err = 0; + a = evalexpr(&st, 100); + if (st.err || *st.s != ';') + return -1; + return a; +} -- cgit v1.2.1