summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSzabolcs Nagy <nsz@port70.net>2026-03-23 17:33:20 +0000
committerRich Felker <dalias@aerifal.cx>2026-03-30 15:59:35 -0400
commit40acb04b2c1291f7d3091c61080109da11eea48b (patch)
tree06a95e4fed688e1bc3d1f254a7e2a9c0769f65f0 /src
parent0572555dab1d1e10b5f7351a005ec588cab41e25 (diff)
downloadmusl-40acb04b2c1291f7d3091c61080109da11eea48b.tar.gz
regex: reject invalid \digit back reference in BRE
in BRE \n matches the nth subexpression, but regcomp did not check if the nth subexpression was complete or not, only that there were more subexpressions overall than the largest backref. fix regcomp to error if the referenced subexpression is incomplete. the bug could cause an infinite loop in regexec: regcomp(&re, "\\(^a*\\1\\)*", 0); regexec(&re, "aa", 0, 0, 0); since BRE has backreferences, any application accepting a BRE from untrusted sources is already vulnerable to an attacker-controlled near-infinite (exponential-time) loop, but this particular case where the loop is actually infinite can and should be avoided. ERE is not affected since the language an ERE describes is actually regular. Reported-by: Simon Resch <simon.resch@code-intelligence.com>
Diffstat (limited to 'src')
-rw-r--r--src/regex/regcomp.c6
1 files changed, 6 insertions, 0 deletions
diff --git a/src/regex/regcomp.c b/src/regex/regcomp.c
index fb24556e..b4b81968 100644
--- a/src/regex/regcomp.c
+++ b/src/regex/regcomp.c
@@ -409,6 +409,8 @@ typedef struct {
int position;
/* The highest back reference or -1 if none seen so far. */
int max_backref;
+ /* Bit mask of submatch IDs that can be back referenced. */
+ int backref_ok;
/* Compilation flags. */
int cflags;
} tre_parse_ctx_t;
@@ -769,6 +771,8 @@ static reg_errcode_t marksub(tre_parse_ctx_t *ctx, tre_ast_node_t *node, int sub
node->submatch_id = subid;
node->num_submatches++;
ctx->n = node;
+ if (subid < 10)
+ ctx->backref_ok |= 1<<subid;
return REG_OK;
}
@@ -864,6 +868,8 @@ static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
if (!ere && (unsigned)*s-'1' < 9) {
/* back reference */
int val = *s - '0';
+ if (!(ctx->backref_ok & 1<<val))
+ return REG_ESUBREG;
node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position++);
ctx->max_backref = MAX(val, ctx->max_backref);
} else {