/* tre-internal.h - TRE internal definitions Copyright (c) 2001-2006 Ville Laurikari . This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #define TRE_MULTIBYTE 1 #undef TRE_MBSTATE #define TRE_WCHAR 1 #define TRE_USE_SYSTEM_WCTYPE 1 #define HAVE_WCSTOMBS 1 #define TRE_MB_CUR_MAX MB_CUR_MAX #define NDEBUG #define TRE_REGEX_T_FIELD __opaque typedef int reg_errcode_t; typedef wchar_t tre_char_t; #ifdef TRE_DEBUG #include #define DPRINT(msg) do {printf msg; fflush(stdout);} while(0) #else /* !TRE_DEBUG */ #define DPRINT(msg) do { } while(0) #endif /* !TRE_DEBUG */ #define elementsof(x) ( sizeof(x) / sizeof(x[0]) ) #if 1 int __mbtowc(wchar_t *, const char *); #define tre_mbrtowc(pwc, s, n, ps) (__mbtowc((pwc), (s))) #else #define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n))) #endif /* Wide characters. */ typedef wint_t tre_cint_t; #define TRE_CHAR_MAX WCHAR_MAX #ifdef TRE_MULTIBYTE #define TRE_MB_CUR_MAX MB_CUR_MAX #else /* !TRE_MULTIBYTE */ #define TRE_MB_CUR_MAX 1 #endif /* !TRE_MULTIBYTE */ #define tre_isalnum iswalnum #define tre_isalpha iswalpha #define tre_isblank iswblank #define tre_iscntrl iswcntrl #define tre_isdigit iswdigit #define tre_isgraph iswgraph #define tre_islower iswlower #define tre_isprint iswprint #define tre_ispunct iswpunct #define tre_isspace iswspace #define tre_isupper iswupper #define tre_isxdigit iswxdigit #define tre_tolower towlower #define tre_toupper towupper #define tre_strlen wcslen /* Use system provided iswctype() and wctype(). */ typedef wctype_t tre_ctype_t; #define tre_isctype iswctype #define tre_ctype wctype /* Returns number of bytes to add to (char *)ptr to make it properly aligned for the type. */ #define ALIGN(ptr, type) \ ((((long)ptr) % sizeof(type)) \ ? (sizeof(type) - (((long)ptr) % sizeof(type))) \ : 0) #undef MAX #undef MIN #define MAX(a, b) (((a) >= (b)) ? (a) : (b)) #define MIN(a, b) (((a) <= (b)) ? (a) : (b)) /* Define STRF to the correct printf formatter for strings. */ #define STRF "ls" /* TNFA transition type. A TNFA state is an array of transitions, the terminator is a transition with NULL `state'. */ typedef struct tnfa_transition tre_tnfa_transition_t; struct tnfa_transition { /* Range of accepted characters. */ tre_cint_t code_min; tre_cint_t code_max; /* Pointer to the destination state. */ tre_tnfa_transition_t *state; /* ID number of the destination state. */ int state_id; /* -1 terminated array of tags (or NULL). */ int *tags; /* Assertion bitmap. */ int assertions; /* Assertion parameters. */ union { /* Character class assertion. */ tre_ctype_t class; /* Back reference assertion. */ int backref; } u; /* Negative character class assertions. */ tre_ctype_t *neg_classes; }; /* Assertions. */ #define ASSERT_AT_BOL 1 /* Beginning of line. */ #define ASSERT_AT_EOL 2 /* End of line. */ #define ASSERT_CHAR_CLASS 4 /* Character class in `class'. */ #define ASSERT_CHAR_CLASS_NEG 8 /* Character classes in `neg_classes'. */ #define ASSERT_AT_BOW 16 /* Beginning of word. */ #define ASSERT_AT_EOW 32 /* End of word. */ #define ASSERT_AT_WB 64 /* Word boundary. */ #define ASSERT_AT_WB_NEG 128 /* Not a word boundary. */ #define ASSERT_BACKREF 256 /* A back reference in `backref'. */ #define ASSERT_LAST 256 /* Tag directions. */ typedef enum { TRE_TAG_MINIMIZE = 0, TRE_TAG_MAXIMIZE = 1 } tre_tag_direction_t; /* Instructions to compute submatch register values from tag values after a successful match. */ struct tre_submatch_data { /* Tag that gives the value for rm_so (submatch start offset). */ int so_tag; /* Tag that gives the value for rm_eo (submatch end offset). */ int eo_tag; /* List of submatches this submatch is contained in. */ int *parents; }; typedef struct tre_submatch_data tre_submatch_data_t; /* TNFA definition. */ typedef struct tnfa tre_tnfa_t; struct tnfa { tre_tnfa_transition_t *transitions; unsigned int num_transitions; tre_tnfa_transition_t *initial; tre_tnfa_transition_t *final; tre_submatch_data_t *submatch_data; unsigned int num_submatches; tre_tag_direction_t *tag_directions; int num_tags; int end_tag; int num_states; int cflags; int have_backrefs; }; #if 0 static int tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags); static void tre_free(regex_t *preg); static void tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, const tre_tnfa_t *tnfa, int *tags, int match_eo); static reg_errcode_t tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len, tre_str_type_t type, int *match_tags, int eflags, int *match_end_ofs); static reg_errcode_t tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len, tre_str_type_t type, int *match_tags, int eflags, int *match_end_ofs); static reg_errcode_t tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, int len, tre_str_type_t type, int *match_tags, int eflags, int *match_end_ofs); #endif /* from tre-mem.h: */ #define TRE_MEM_BLOCK_SIZE 1024 typedef struct tre_list { void *data; struct tre_list *next; } tre_list_t; typedef struct tre_mem_struct { tre_list_t *blocks; tre_list_t *current; char *ptr; size_t n; int failed; void **provided; } *tre_mem_t; #define tre_mem_new_impl __tre_mem_new_impl #define tre_mem_alloc_impl __tre_mem_alloc_impl #define tre_mem_destroy __tre_mem_destroy tre_mem_t tre_mem_new_impl(int provided, void *provided_block); void *tre_mem_alloc_impl(tre_mem_t mem, int provided, void *provided_block, int zero, size_t size); /* Returns a new memory allocator or NULL if out of memory. */ #define tre_mem_new() tre_mem_new_impl(0, NULL) /* Allocates a block of `size' bytes from `mem'. Returns a pointer to the allocated block or NULL if an underlying malloc() failed. */ #define tre_mem_alloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 0, size) /* Allocates a block of `size' bytes from `mem'. Returns a pointer to the allocated block or NULL if an underlying malloc() failed. The memory is set to zero. */ #define tre_mem_calloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 1, size) #ifdef TRE_USE_ALLOCA /* alloca() versions. Like above, but memory is allocated with alloca() instead of malloc(). */ #define tre_mem_newa() \ tre_mem_new_impl(1, alloca(sizeof(struct tre_mem_struct))) #define tre_mem_alloca(mem, size) \ ((mem)->n >= (size) \ ? tre_mem_alloc_impl((mem), 1, NULL, 0, (size)) \ : tre_mem_alloc_impl((mem), 1, alloca(TRE_MEM_BLOCK_SIZE), 0, (size))) #endif /* TRE_USE_ALLOCA */ /* Frees the memory allocator and all memory allocated with it. */ void tre_mem_destroy(tre_mem_t mem); #define xmalloc malloc #define xcalloc calloc #define xfree free #define xrealloc realloc /* EOF */