authorRich Felker <>2012-04-23 15:25:23 -0400
committerRich Felker <>2012-04-23 15:25:23 -0400
commit7e38b1ea2bf1a0817942275cea89c959bf446d87 (patch)
tree1f35fdc54a5eb360956b3ab9ce142a29ebf2dc62 /src/ctype/iswpunct.c
parenta5d10eb1f587a13a513504ffb5d8e6a8738eee6f (diff)
destubify iswalpha and update iswpunct to unicode 6.1
alpha is defined as unicode property "Alphabetic" plus category Nd minus ASCII digits minus 2 special-cased Thai punctuation marks supposedly misclassified by Unicode as letters. punct is defined as all of unicode except control, alphanumeric, and space characters. the tables were generated by a simple tool based on the code posted previously to the mailing list. in the future, this and other code used for maintaining locale/iconv/i18n data will be published either in the main source repository or in a separate locale data generation repository.
1 files changed, 5 insertions, 131 deletions
#include <wctype.h>
-#include <inttypes.h>
-/* The below data is derived from classes (P.|Sm) plus Pattern_Syntax */
-#define R(a,b) { (b), (b)-(a) }
-static const struct range {
- uint32_t base:20;
- uint32_t len:12;
-} ranges[] = {
-R(0x21, 0x2f),
-R(0x3a, 0x40),
-R(0x5b, 0x60),
-R(0x7b, 0x7e),
-R(0xa1, 0xa7),
-R(0xa9, 0xa9),
-R(0xab, 0xac),
-R(0xae, 0xae),
-R(0xb0, 0xb1),
-R(0xb6, 0xb7),
-R(0xbb, 0xbb),
-R(0xbf, 0xbf),
-R(0xd7, 0xd7),
-R(0xf7, 0xf7),
-R(0x37e, 0x37e),
-R(0x387, 0x387),
-R(0x3f6, 0x3f6),
-R(0x55a, 0x55f),
-R(0x589, 0x58a),
-R(0x5be, 0x5be),
-R(0x5c0, 0x5c0),
-R(0x5c3, 0x5c3),
-R(0x5c6, 0x5c6),
-R(0x5f3, 0x5f4),
-R(0x606, 0x60a),
-R(0x60c, 0x60d),
-R(0x61b, 0x61b),
-R(0x61e, 0x61f),
-R(0x66a, 0x66d),
-R(0x6d4, 0x6d4),
-R(0x700, 0x70d),
-R(0x7f7, 0x7f9),
-R(0x964, 0x965),
-R(0x970, 0x970),
-R(0xdf4, 0xdf4),
-R(0xe4f, 0xe4f),
-R(0xe5a, 0xe5b),
-R(0xf04, 0xf12),
-R(0xf3a, 0xf3d),
-R(0xf85, 0xf85),
-R(0xfd0, 0xfd4),
-R(0x104a, 0x104f),
-R(0x10fb, 0x10fb),
-R(0x1361, 0x1368),
-R(0x166d, 0x166e),
-R(0x1680, 0x1680),
-R(0x169b, 0x169c),
-R(0x16eb, 0x16ed),
-R(0x1735, 0x1736),
-R(0x17d4, 0x17d6),
-R(0x17d8, 0x17da),
-R(0x1800, 0x180a),
-R(0x180e, 0x180e),
-R(0x1944, 0x1945),
-R(0x19de, 0x19df),
-R(0x1a1e, 0x1a1f),
-R(0x1b5a, 0x1b60),
-R(0x1c3b, 0x1c3f),
-R(0x1c7e, 0x1c7f),
-R(0x2010, 0x2027),
-R(0x2030, 0x205e),
-R(0x207a, 0x207e),
-R(0x208a, 0x208e),
-R(0x2140, 0x2144),
-R(0x214b, 0x214b),
-R(0x2190, 0x245f),
-R(0x2500, 0x2775),
-R(0x2794, 0x2bff),
-R(0x2cf9, 0x2cfc),
-R(0x2cfe, 0x2cff),
-R(0x2e00, 0x2e7f),
-R(0x3001, 0x3003),
-R(0x3008, 0x3020),
-R(0x3030, 0x3030),
-R(0x303d, 0x303d),
-R(0x30a0, 0x30a0),
-R(0x30fb, 0x30fb),
-R(0xa60d, 0xa60f),
-R(0xa874, 0xa877),
-R(0xa8ce, 0xa8cf),
-R(0xa92e, 0xa92f),
-R(0xa95f, 0xa95f),
-R(0xfb29, 0xfb29),
-R(0xfd3e, 0xfd3f),
-R(0xfe10, 0xfe19),
-R(0xfe30, 0xfe52),
-R(0xfe54, 0xfe66),
-R(0xfe68, 0xfe68),
-R(0xfe6a, 0xfe6b),
-R(0xff01, 0xff03),
-R(0xff05, 0xff0f),
-R(0xff1a, 0xff20),
-R(0xff3b, 0xff3d),
-R(0xff3f, 0xff3f),
-R(0xff5b, 0xff65),
-R(0xffe2, 0xffe2),
-R(0xffe9, 0xffec),
-R(0x10100, 0x10101),
-R(0x1039f, 0x1039f),
-R(0x103d0, 0x103d0),
-R(0x1091f, 0x1091f),
-R(0x1093f, 0x1093f),
-R(0x10a50, 0x10a58),
-R(0x12470, 0x12473),
-R(0x1d6c1, 0x1d6c1),
-R(0x1d6db, 0x1d6db),
-R(0x1d6fb, 0x1d6fb),
-R(0x1d715, 0x1d715),
-R(0x1d735, 0x1d735),
-R(0x1d74f, 0x1d74f),
-R(0x1d76f, 0x1d76f),
-R(0x1d789, 0x1d789),
-R(0x1d7a9, 0x1d7a9),
-R(0x1d7c3, 0x1d7c3),
+static unsigned char table[] = {
+#include "punct.h"
int iswpunct(wint_t wc)
- unsigned c = wc;
- int a = 0;
- int n = sizeof ranges / sizeof ranges[0];
- do {
- n >>= 1;
- a += n+1 & (signed)(ranges[a+n].base-c)>>31;
- } while (n);
- return ranges[a].base-c <= ranges[a].len;
+ if (wc<0x20000U)
+ return (table[table[wc>>8]*32+((wc&255)>>3)]>>(wc&7))&1;
+ return 0;