diff options
| author | Szabolcs Nagy <nsz@port70.net> | 2015-04-18 16:47:17 +0000 | 
|---|---|---|
| committer | Rich Felker <dalias@aerifal.cx> | 2016-01-30 20:53:17 -0500 | 
| commit | da4cc13b9705e7d3a02216959b9711b3b30828c1 (patch) | |
| tree | 40b725270f05a2673495e6c72ce78acd85d68ada | |
| parent | 7eaa76fc2e7993582989d3838b1ac32dd8abac09 (diff) | |
| download | musl-da4cc13b9705e7d3a02216959b9711b3b30828c1.tar.gz | |
regex: treat \| in BRE as alternation
The standard does not define semantics for \| in BRE, but some code
depends on it meaning alternation. Empty alternative expression is
allowed to be consistent with ERE.
Based on a patch by Rob Landley.
| -rw-r--r-- | src/regex/regcomp.c | 19 | 
1 files changed, 17 insertions, 2 deletions
| diff --git a/src/regex/regcomp.c b/src/regex/regcomp.c index 078f657c..f1f06afe 100644 --- a/src/regex/regcomp.c +++ b/src/regex/regcomp.c @@ -841,6 +841,14 @@ static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)  			/* reject repetitions after empty expression in BRE */  			if (!ere)  				return REG_BADRPT; +		case '|': +			/* extension: treat \| as alternation in BRE */ +			if (!ere) { +				node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); +				s--; +				goto end; +			} +			/* fallthrough */  		default:  			if (!ere && (unsigned)*s-'1' < 9) {  				/* back reference */ @@ -918,6 +926,7 @@ parse_literal:  		s += len;  		break;  	} +end:  	if (!node)  		return REG_ESPACE;  	ctx->n = node; @@ -1016,13 +1025,20 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)  		if ((ere && *s == '|') ||  		    (ere && *s == ')' && depth) ||  		    (!ere && *s == '\\' && s[1] == ')') || +		    /* extension: treat \| as alternation in BRE */ +		    (!ere && *s == '\\' && s[1] == '|') ||  		    !*s) {  			/* extension: empty branch is unspecified (), (|a), (a|)  			   here they are not rejected but match on empty string */  			int c = *s;  			nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);  			nbranch = 0; -			if (c != '|') { + +			if (c == '\\' && s[1] == '|') { +				s+=2; +			} else if (c == '|') { +				s++; +			} else {  				if (c == '\\') {  					if (!depth) return REG_EPAREN;  					s+=2; @@ -1042,7 +1058,6 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)  				nunion = tre_stack_pop_voidptr(stack);  				goto parse_iter;  			} -			s++;  		}  	}  } | 
