From 03498ec22a4804ddbd8203d9ac94b6f7b6574b3c Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Sat, 18 Apr 2015 17:25:31 +0000 Subject: regex: rewrite the repetition parsing code The goto logic was hard to follow and modify. This is in preparation for the BRE \+ and \? support. --- src/regex/regcomp.c | 59 ++++++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 30 deletions(-) (limited to 'src/regex') diff --git a/src/regex/regcomp.c b/src/regex/regcomp.c index f1f06afe..ccd3755b 100644 --- a/src/regex/regcomp.c +++ b/src/regex/regcomp.c @@ -984,41 +984,40 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx) /* extension: repetitions are rejected after an empty node eg. (+), |*, {2}, but assertions are not treated as empty so ^* or $? are accepted currently. */ - switch (*s) { - case '+': - case '?': - if (!ere) + for (;;) { + if (*s!='\\' && *s!='*') { + if (!ere) + break; + if (*s!='+' && *s!='?' && *s!='{') + break; + } + if (*s=='\\' && ere) break; - /* fallthrough */ - case '*':; - int min=0, max=-1; - if (*s == '+') - min = 1; - if (*s == '?') - max = 1; - s++; - ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0); - if (!ctx->n) - return REG_ESPACE; + if (*s=='\\' && s[1]!='{') + break; + if (*s=='\\') + s++; + /* extension: multiple consecutive *+?{,} is unspecified, but (a+)+ has to be supported so accepting a++ makes sense, note however that the RE_DUP_MAX limit can be circumvented: (a{255}){255} uses a lot of memory.. */ - goto parse_iter; - case '\\': - if (ere || s[1] != '{') - break; - s++; - goto parse_brace; - case '{': - if (!ere) - break; - parse_brace: - err = parse_dup(ctx, s+1); - if (err != REG_OK) - return err; - s = ctx->s; - goto parse_iter; + if (*s=='{') { + err = parse_dup(ctx, s+1); + if (err != REG_OK) + return err; + s = ctx->s; + } else { + int min=0, max=-1; + if (*s == '+') + min = 1; + if (*s == '?') + max = 1; + s++; + ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0); + if (!ctx->n) + return REG_ESPACE; + } } nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n); -- cgit v1.2.1