summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2014-07-28 18:04:15 -0400
committerRich Felker <dalias@aerifal.cx>2014-07-28 18:04:15 -0400
commit73d2a3bfda462eebe8291eb788ef8be567a9add8 (patch)
tree00a8d3cf08a4d7c27dbd9cf616d742dcf075d05b
parentc0284b372ca26fe8ea15bb76395c8bff2c444ba9 (diff)
downloadmusl-73d2a3bfda462eebe8291eb788ef8be567a9add8.tar.gz
implement non-default plural rules for ngettext translations
the new code in dcngettext was written by me, and the expression evaluator by Szabolcs Nagy (nsz).
-rw-r--r--src/locale/dcngettext.c54
-rw-r--r--src/locale/pleval.c197
2 files changed, 243 insertions, 8 deletions
diff --git a/src/locale/dcngettext.c b/src/locale/dcngettext.c
index 4f9e4174..51e6522a 100644
--- a/src/locale/dcngettext.c
+++ b/src/locale/dcngettext.c
@@ -4,6 +4,7 @@
#include <errno.h>
#include <limits.h>
#include <sys/stat.h>
+#include <ctype.h>
#include "locale_impl.h"
#include "libc.h"
#include "atomic.h"
@@ -95,6 +96,8 @@ struct msgcat {
struct msgcat *next;
const void *map;
size_t map_size;
+ void *plural_rule;
+ int nplurals;
char name[];
};
@@ -107,6 +110,7 @@ weak_alias(dummy_gettextdomain, __gettextdomain);
const unsigned char *__map_file(const char *, size_t *);
int __munmap(void *, size_t);
+unsigned long __pleval(const char *, unsigned long);
char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2, unsigned long int n, int category)
{
@@ -190,19 +194,53 @@ notrans:
const char *trans = __mo_lookup(p->map, p->map_size, msgid1);
if (!trans) goto notrans;
- /* FIXME: support alternate plural rules */
- if (n != 1) {
- size_t l = strlen(trans);
- if (l+1 >= p->map_size - (trans - (char *)p->map))
- goto notrans;
- trans += l+1;
+ /* Non-plural-processing gettext forms pass a null pointer as
+ * msgid2 to request that dcngettext suppress plural processing. */
+ if (!msgid2) return (char *)trans;
+
+ if (!p->plural_rule) {
+ const char *rule = "n!=1;";
+ unsigned long np = 2;
+ const char *r = __mo_lookup(p->map, p->map_size, "");
+ char *z;
+ while (r && strncmp(r, "Plural-Forms:", 13)) {
+ z = strchr(r, '\n');
+ r = z ? z+1 : 0;
+ }
+ if (r) {
+ r += 13;
+ while (isspace(*r)) r++;
+ if (!strncmp(r, "nplurals=", 9)) {
+ np = strtoul(r+9, &z, 10);
+ r = z;
+ }
+ while (*r && *r != ';') r++;
+ if (*r) {
+ r++;
+ while (isspace(*r)) r++;
+ if (!strncmp(r, "plural=", 7))
+ rule = r+7;
+ }
+ }
+ a_store(&p->nplurals, np);
+ a_cas_p(&p->plural_rule, 0, (void *)rule);
+ }
+ if (p->nplurals) {
+ unsigned long plural = __pleval(p->plural_rule, n);
+ if (plural > p->nplurals) goto notrans;
+ while (plural--) {
+ size_t l = strlen(trans);
+ if (l+1 >= p->map_size - (trans - (char *)p->map))
+ goto notrans;
+ trans += l+1;
+ }
}
return (char *)trans;
}
char *dcgettext(const char *domainname, const char *msgid, int category)
{
- return dcngettext(domainname, msgid, msgid, 1, category);
+ return dcngettext(domainname, msgid, 0, 1, category);
}
char *dngettext(const char *domainname, const char *msgid1, const char *msgid2, unsigned long int n)
@@ -212,5 +250,5 @@ char *dngettext(const char *domainname, const char *msgid1, const char *msgid2,
char *dgettext(const char *domainname, const char *msgid)
{
- return dcngettext(domainname, msgid, msgid, 1, LC_MESSAGES);
+ return dcngettext(domainname, msgid, 0, 1, LC_MESSAGES);
}
diff --git a/src/locale/pleval.c b/src/locale/pleval.c
new file mode 100644
index 00000000..47aefc34
--- /dev/null
+++ b/src/locale/pleval.c
@@ -0,0 +1,197 @@
+#include <stdlib.h>
+#include <ctype.h>
+
+/*
+grammar:
+
+Start = Expr ';'
+Expr = Or | Or '?' Expr ':' Expr
+Or = And | Or '||' And
+And = Eq | And '&&' Eq
+Eq = Rel | Eq '==' Rel | Eq '!=' Rel
+Rel = Add | Rel '<=' Add | Rel '>=' Add | Rel '<' Add | Rel '>' Add
+Add = Mul | Add '+' Mul | Add '-' Mul
+Mul = Term | Mul '*' Term | Mul '/' Term | Mul '%' Term
+Term = '(' Expr ')' | '!' Term | decimal | 'n'
+
+internals:
+
+recursive descent expression evaluator with stack depth limit.
+eval* functions return the value of the subexpression and set
+the current string pointer to the next non-space char.
+*/
+
+struct st {
+ const char *s;
+ unsigned long n;
+ int err;
+};
+
+static const char *skipspace(const char *s)
+{
+ while (isspace(*s)) s++;
+ return s;
+}
+
+static unsigned long evalconst(struct st *st)
+{
+ char *e;
+ unsigned long n;
+ n = strtoul(st->s, &e, 10);
+ if (!isdigit(*st->s) || e == st->s || n == -1)
+ st->err = 1;
+ st->s = skipspace(e);
+ return n;
+}
+
+static unsigned long evalexpr(struct st *st, int d);
+
+static unsigned long evalterm(struct st *st, int d)
+{
+ unsigned long a;
+ if (d <= 0) {
+ st->err = 1;
+ return 0;
+ }
+ st->s = skipspace(st->s);
+ if (*st->s == '!') {
+ st->s++;
+ return !evalterm(st, d-1);
+ }
+ if (*st->s == '(') {
+ st->s++;
+ a = evalexpr(st, d-1);
+ if (*st->s != ')') {
+ st->err = 1;
+ return 0;
+ }
+ st->s = skipspace(st->s + 1);
+ return a;
+ }
+ if (*st->s == 'n') {
+ st->s = skipspace(st->s + 1);
+ return st->n;
+ }
+ return evalconst(st);
+}
+
+static unsigned long evalmul(struct st *st, int d)
+{
+ unsigned long b, a = evalterm(st, d-1);
+ int op;
+ for (;;) {
+ op = *st->s;
+ if (op != '*' && op != '/' && op != '%')
+ return a;
+ st->s++;
+ b = evalterm(st, d-1);
+ if (op == '*') {
+ a *= b;
+ } else if (!b) {
+ st->err = 1;
+ return 0;
+ } else if (op == '%') {
+ a %= b;
+ } else {
+ a /= b;
+ }
+ }
+}
+
+static unsigned long evaladd(struct st *st, int d)
+{
+ unsigned long a = 0;
+ int add = 1;
+ for (;;) {
+ a += (add?1:-1) * evalmul(st, d-1);
+ if (*st->s != '+' && *st->s != '-')
+ return a;
+ add = *st->s == '+';
+ st->s++;
+ }
+}
+
+static unsigned long evalrel(struct st *st, int d)
+{
+ unsigned long b, a = evaladd(st, d-1);
+ int less, eq;
+ for (;;) {
+ if (*st->s != '<' && *st->s != '>')
+ return a;
+ less = st->s[0] == '<';
+ eq = st->s[1] == '=';
+ st->s += 1 + eq;
+ b = evaladd(st, d-1);
+ a = (less ? a < b : a > b) || (eq && a == b);
+ }
+}
+
+static unsigned long evaleq(struct st *st, int d)
+{
+ unsigned long a = evalrel(st, d-1);
+ int neg;
+ for (;;) {
+ if ((st->s[0] != '=' && st->s[0] != '!') || st->s[1] != '=')
+ return a;
+ neg = st->s[0] == '!';
+ st->s += 2;
+ a = evalrel(st, d-1) == a;
+ a ^= neg;
+ }
+}
+
+static unsigned long evaland(struct st *st, int d)
+{
+ unsigned long a = evaleq(st, d-1);
+ for (;;) {
+ if (st->s[0] != '&' || st->s[1] != '&')
+ return a;
+ st->s += 2;
+ a = evaleq(st, d-1) && a;
+ }
+}
+
+static unsigned long evalor(struct st *st, int d)
+{
+ unsigned long a = evaland(st, d-1);
+ for (;;) {
+ if (st->s[0] != '|' || st->s[1] != '|')
+ return a;
+ st->s += 2;
+ a = evaland(st, d-1) || a;
+ }
+}
+
+static unsigned long evalexpr(struct st *st, int d)
+{
+ unsigned long a1, a2, a3;
+ if (d <= 0) {
+ st->err = 1;
+ return 0;
+ }
+ a1 = evalor(st, d-1);
+ if (*st->s != '?')
+ return a1;
+ st->s++;
+ a2 = evalexpr(st, d-1);
+ if (*st->s != ':') {
+ st->err = 1;
+ return 0;
+ }
+ st->s++;
+ a3 = evalexpr(st, d-1);
+ return a1 ? a2 : a3;
+}
+
+unsigned long __pleval(const char *s, unsigned long n)
+{
+ unsigned long a;
+ struct st st;
+ st.s = s;
+ st.n = n;
+ st.err = 0;
+ a = evalexpr(&st, 100);
+ if (st.err || *st.s != ';')
+ return -1;
+ return a;
+}