From 7e38b1ea2bf1a0817942275cea89c959bf446d87 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 23 Apr 2012 15:25:23 -0400 Subject: destubify iswalpha and update iswpunct to unicode 6.1 alpha is defined as unicode property "Alphabetic" plus category Nd minus ASCII digits minus 2 special-cased Thai punctuation marks supposedly misclassified by Unicode as letters. punct is defined as all of unicode except control, alphanumeric, and space characters. the tables were generated by a simple tool based on the code posted previously to the mailing list. in the future, this and other code used for maintaining locale/iconv/i18n data will be published either in the main source repository or in a separate locale data generation repository. --- src/ctype/iswpunct.c | 136 ++------------------------------------------------- 1 file changed, 5 insertions(+), 131 deletions(-) (limited to 'src/ctype/iswpunct.c') diff --git a/src/ctype/iswpunct.c b/src/ctype/iswpunct.c index 1414c30c..a8297452 100644 --- a/src/ctype/iswpunct.c +++ b/src/ctype/iswpunct.c @@ -1,138 +1,12 @@ #include -#include -/* The below data is derived from classes (P.|Sm) plus Pattern_Syntax */ - -#define R(a,b) { (b), (b)-(a) } - -static const struct range { - uint32_t base:20; - uint32_t len:12; -} ranges[] = { -R(0x21, 0x2f), -R(0x3a, 0x40), -R(0x5b, 0x60), -R(0x7b, 0x7e), -R(0xa1, 0xa7), -R(0xa9, 0xa9), -R(0xab, 0xac), -R(0xae, 0xae), -R(0xb0, 0xb1), -R(0xb6, 0xb7), -R(0xbb, 0xbb), -R(0xbf, 0xbf), -R(0xd7, 0xd7), -R(0xf7, 0xf7), -R(0x37e, 0x37e), -R(0x387, 0x387), -R(0x3f6, 0x3f6), -R(0x55a, 0x55f), -R(0x589, 0x58a), -R(0x5be, 0x5be), -R(0x5c0, 0x5c0), -R(0x5c3, 0x5c3), -R(0x5c6, 0x5c6), -R(0x5f3, 0x5f4), -R(0x606, 0x60a), -R(0x60c, 0x60d), -R(0x61b, 0x61b), -R(0x61e, 0x61f), -R(0x66a, 0x66d), -R(0x6d4, 0x6d4), -R(0x700, 0x70d), -R(0x7f7, 0x7f9), -R(0x964, 0x965), -R(0x970, 0x970), -R(0xdf4, 0xdf4), -R(0xe4f, 0xe4f), -R(0xe5a, 0xe5b), -R(0xf04, 0xf12), -R(0xf3a, 0xf3d), -R(0xf85, 0xf85), -R(0xfd0, 0xfd4), -R(0x104a, 0x104f), -R(0x10fb, 0x10fb), -R(0x1361, 0x1368), -R(0x166d, 0x166e), -R(0x1680, 0x1680), -R(0x169b, 0x169c), -R(0x16eb, 0x16ed), -R(0x1735, 0x1736), -R(0x17d4, 0x17d6), -R(0x17d8, 0x17da), -R(0x1800, 0x180a), -R(0x180e, 0x180e), -R(0x1944, 0x1945), -R(0x19de, 0x19df), -R(0x1a1e, 0x1a1f), -R(0x1b5a, 0x1b60), -R(0x1c3b, 0x1c3f), -R(0x1c7e, 0x1c7f), -R(0x2010, 0x2027), -R(0x2030, 0x205e), -R(0x207a, 0x207e), -R(0x208a, 0x208e), -R(0x2140, 0x2144), -R(0x214b, 0x214b), -R(0x2190, 0x245f), -R(0x2500, 0x2775), -R(0x2794, 0x2bff), -R(0x2cf9, 0x2cfc), -R(0x2cfe, 0x2cff), -R(0x2e00, 0x2e7f), -R(0x3001, 0x3003), -R(0x3008, 0x3020), -R(0x3030, 0x3030), -R(0x303d, 0x303d), -R(0x30a0, 0x30a0), -R(0x30fb, 0x30fb), -R(0xa60d, 0xa60f), -R(0xa874, 0xa877), -R(0xa8ce, 0xa8cf), -R(0xa92e, 0xa92f), -R(0xa95f, 0xa95f), -R(0xfb29, 0xfb29), -R(0xfd3e, 0xfd3f), -R(0xfe10, 0xfe19), -R(0xfe30, 0xfe52), -R(0xfe54, 0xfe66), -R(0xfe68, 0xfe68), -R(0xfe6a, 0xfe6b), -R(0xff01, 0xff03), -R(0xff05, 0xff0f), -R(0xff1a, 0xff20), -R(0xff3b, 0xff3d), -R(0xff3f, 0xff3f), -R(0xff5b, 0xff65), -R(0xffe2, 0xffe2), -R(0xffe9, 0xffec), -R(0x10100, 0x10101), -R(0x1039f, 0x1039f), -R(0x103d0, 0x103d0), -R(0x1091f, 0x1091f), -R(0x1093f, 0x1093f), -R(0x10a50, 0x10a58), -R(0x12470, 0x12473), -R(0x1d6c1, 0x1d6c1), -R(0x1d6db, 0x1d6db), -R(0x1d6fb, 0x1d6fb), -R(0x1d715, 0x1d715), -R(0x1d735, 0x1d735), -R(0x1d74f, 0x1d74f), -R(0x1d76f, 0x1d76f), -R(0x1d789, 0x1d789), -R(0x1d7a9, 0x1d7a9), -R(0x1d7c3, 0x1d7c3), +static unsigned char table[] = { +#include "punct.h" }; int iswpunct(wint_t wc) { - unsigned c = wc; - int a = 0; - int n = sizeof ranges / sizeof ranges[0]; - do { - n >>= 1; - a += n+1 & (signed)(ranges[a+n].base-c)>>31; - } while (n); - return ranges[a].base-c <= ranges[a].len; + if (wc<0x20000U) + return (table[table[wc>>8]*32+((wc&255)>>3)]>>(wc&7))&1; + return 0; } -- cgit v1.2.1