From 0bc03091bb674ebb9fa6fe69e4aec1da3ac484f2 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 2 Jul 2014 19:33:19 -0400 Subject: add locale framework this commit adds non-stub implementations of setlocale, duplocale, newlocale, and uselocale, along with the data structures and minimal code needed for representing the active locale on a per-thread basis and optimizing the common case where thread-local locale settings are not in use. at this point, the data structures only contain what is necessary to represent LC_CTYPE (a single flag) and LC_MESSAGES (a name for use in finding message translation files). representation for the other categories will be added later; the expectation is that a single pointer will suffice for each. for LC_CTYPE, the strings "C" and "POSIX" are treated as special; any other string is accepted and treated as "C.UTF-8". for other categories, any string is accepted after being truncated to a maximum supported length (currently 15 bytes). for LC_MESSAGES, the name is kept regardless of whether libc itself can use such a message translation locale, since applications using catgets or gettext should be able to use message locales libc is not aware of. for other categories, names which are not successfully loaded as locales (which, at present, means all names) are treated as aliases for "C". setlocale never fails. locale settings are not yet used anywhere, so this commit should have no visible effects except for the contents of the string returned by setlocale. --- src/locale/__setlocalecat.c | 46 ++++++++++++++++++++++++++++++ src/locale/duplocale.c | 15 +++++++--- src/locale/newlocale.c | 24 ++++++++++++---- src/locale/setlocale.c | 68 +++++++++++++++++++++++++++++++++++++++++---- src/locale/uselocale.c | 21 +++++++++++--- 5 files changed, 155 insertions(+), 19 deletions(-) create mode 100644 src/locale/__setlocalecat.c (limited to 'src/locale') diff --git a/src/locale/__setlocalecat.c b/src/locale/__setlocalecat.c new file mode 100644 index 00000000..f1e4bf07 --- /dev/null +++ b/src/locale/__setlocalecat.c @@ -0,0 +1,46 @@ +#include +#include +#include "locale_impl.h" +#include "libc.h" +#include "atomic.h" + +static const char envvars[][12] = { + "LC_CTYPE", + "LC_NUMERIC", + "LC_TIME", + "LC_COLLATE", + "LC_MONETARY", + "LC_MESSAGES", +}; + +int __setlocalecat(locale_t loc, int cat, const char *val) +{ + if (!*val) { + (val = getenv("LC_ALL")) || + (val = getenv(envvars[cat])) || + (val = getenv("LANG")) || + (val = "C.UTF-8"); + } + + size_t n = strnlen(val, LOCALE_NAME_MAX); + int builtin = (val[0]=='C' && !val[1]) + || !strcmp(val, "C.UTF-8") + || !strcmp(val, "POSIX"); + + switch (cat) { + case LC_CTYPE: + a_store(&loc->ctype_utf8, !builtin || val[1]=='.'); + break; + case LC_MESSAGES: + if (builtin) { + loc->messages_name[0] = 0; + } else { + memcpy(loc->messages_name, val, n); + loc->messages_name[n] = 0; + } + /* fall through */ + default: + break; + } + return 0; +} diff --git a/src/locale/duplocale.c b/src/locale/duplocale.c index f9fc1ffa..13368707 100644 --- a/src/locale/duplocale.c +++ b/src/locale/duplocale.c @@ -3,12 +3,19 @@ #include "locale_impl.h" #include "libc.h" -locale_t duplocale(locale_t old) +locale_t __duplocale(locale_t old) { - locale_t new; - new = calloc(1, sizeof *new); + locale_t new = calloc(1, sizeof *new + LOCALE_NAME_MAX + 1); + if (!new) return 0; + new->messages_name = (void *)(new+1); + + if (old == LC_GLOBAL_LOCALE) old = &libc.global_locale; + new->ctype_utf8 = old->ctype_utf8; + if (old->messages_name) + strcpy(new->messages_name, old->messages_name); + if (new && old != LC_GLOBAL_LOCALE) memcpy(new, old, sizeof *new); return new; } -weak_alias(duplocale, __duplocale); +weak_alias(__duplocale, duplocale); diff --git a/src/locale/newlocale.c b/src/locale/newlocale.c index 447c8fc2..39501d0c 100644 --- a/src/locale/newlocale.c +++ b/src/locale/newlocale.c @@ -3,12 +3,24 @@ #include "locale_impl.h" #include "libc.h" -locale_t newlocale(int mask, const char *name, locale_t base) +locale_t __newlocale(int mask, const char *name, locale_t loc) { - if (*name && strcmp(name, "C") && strcmp(name, "POSIX")) - return 0; - if (!base) base = calloc(1, sizeof *base); - return base; + int i; + + if (!loc) { + loc = calloc(1, sizeof *loc + LOCALE_NAME_MAX + 1); + if (!loc) return 0; + loc->messages_name = (void *)(loc+1); + for (i=0; i +#include +#include +#include "locale_impl.h" +#include "libc.h" +#include "atomic.h" -char *setlocale(int category, const char *locale) +static char buf[2+4*(LOCALE_NAME_MAX+1)]; + +char *setlocale(int cat, const char *name) { - /* Note: plain "C" would be better, but puts some broken - * software into legacy 8-bit-codepage mode, ignoring - * the standard library's multibyte encoding */ - return "C.UTF-8"; + if (!libc.global_locale.messages_name) { + libc.global_locale.messages_name = + buf + 2 + 3*(LOCALE_NAME_MAX+1); + } + + if ((unsigned)cat > LC_ALL) return 0; + + /* For LC_ALL, setlocale is required to return a string which + * encodes the current setting for all categories. The format of + * this string is unspecified, and only the following code, which + * performs both the serialization and deserialization, depends + * on the format, so it can easily be changed if needed. */ + if (cat == LC_ALL) { + if (name) { + char part[LOCALE_NAME_MAX+1]; + int i, j; + if (name[0] && name[1]==';' + && strlen(name) > 2 + 3*(LOCALE_NAME_MAX+1)) { + part[0] = name[0]; + part[1] = 0; + setlocale(LC_CTYPE, part); + part[LOCALE_NAME_MAX] = 0; + for (i=LC_TIME; ilocale; - if (l) self->locale = l; - return old; + locale_t global = &libc.global_locale; + + if (new == LC_GLOBAL_LOCALE) new = global; + + if (new && new != old) { + int adj = 0; + if (new == global) a_dec(&libc.uselocale_cnt); + else if (!new->ctype_utf8) adj++; + if (old == global) a_inc(&libc.uselocale_cnt); + else if (!old->ctype_utf8) adj--; + a_fetch_add(&libc.bytelocale_cnt_minus_1, adj); + self->locale = new; + } + + return old == global ? LC_GLOBAL_LOCALE : old; } -weak_alias(uselocale, __uselocale); +weak_alias(__uselocale, uselocale); -- cgit v1.2.1