From da4cc13b9705e7d3a02216959b9711b3b30828c1 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Sat, 18 Apr 2015 16:47:17 +0000 Subject: regex: treat \| in BRE as alternation The standard does not define semantics for \| in BRE, but some code depends on it meaning alternation. Empty alternative expression is allowed to be consistent with ERE. Based on a patch by Rob Landley. --- src/regex/regcomp.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'src/regex/regcomp.c') diff --git a/src/regex/regcomp.c b/src/regex/regcomp.c index 078f657c..f1f06afe 100644 --- a/src/regex/regcomp.c +++ b/src/regex/regcomp.c @@ -841,6 +841,14 @@ static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s) /* reject repetitions after empty expression in BRE */ if (!ere) return REG_BADRPT; + case '|': + /* extension: treat \| as alternation in BRE */ + if (!ere) { + node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); + s--; + goto end; + } + /* fallthrough */ default: if (!ere && (unsigned)*s-'1' < 9) { /* back reference */ @@ -918,6 +926,7 @@ parse_literal: s += len; break; } +end: if (!node) return REG_ESPACE; ctx->n = node; @@ -1016,13 +1025,20 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx) if ((ere && *s == '|') || (ere && *s == ')' && depth) || (!ere && *s == '\\' && s[1] == ')') || + /* extension: treat \| as alternation in BRE */ + (!ere && *s == '\\' && s[1] == '|') || !*s) { /* extension: empty branch is unspecified (), (|a), (a|) here they are not rejected but match on empty string */ int c = *s; nunion = tre_ast_new_union(ctx->mem, nunion, nbranch); nbranch = 0; - if (c != '|') { + + if (c == '\\' && s[1] == '|') { + s+=2; + } else if (c == '|') { + s++; + } else { if (c == '\\') { if (!depth) return REG_EPAREN; s+=2; @@ -1042,7 +1058,6 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx) nunion = tre_stack_pop_voidptr(stack); goto parse_iter; } - s++; } } } -- cgit v1.2.1