# HG changeset patch # User Matti Hamalainen # Date 1579171809 -7200 # Node ID d895b0fd6ad684fd299cc0cf6b45e0c132269ba7 # Parent a0e8d9c6300b7ab6ec35aeea4126d7ccc965b756 Combine code from th_regex_compile() to th_regex_compile_do(). diff -r a0e8d9c6300b -r d895b0fd6ad6 th_regex.c --- a/th_regex.c Thu Jan 16 03:33:11 2020 +0200 +++ b/th_regex.c Thu Jan 16 12:50:09 2020 +0200 @@ -234,21 +234,31 @@ } -static int th_regex_compile_do(th_regex_parse_ctx *ctx) +int th_regex_compile(th_regex_ctx **pexpr, const th_regex_char *pattern) { int res = THERR_OK; + th_regex_parse_ctx ctx; th_regex_node node, *pnode; th_regex_char *tmp = NULL; size_t start; - for (; ctx->pattern[ctx->offs] != 0; ctx->offs++) + if (pexpr == NULL || pattern == NULL) { - DBG_RE_PRINT_COMPILE("[%" PRIu_SIZE_T "] '%c'\n", ctx->offs, ctx->pattern[ctx->offs]); - switch (ctx->pattern[ctx->offs]) + res = THERR_NULLPTR; + goto exit; + } + + memset(&ctx, 0, sizeof(ctx)); + ctx.pattern = pattern; + + for (; ctx.pattern[ctx.offs] != 0; ctx.offs++) + { + DBG_RE_PRINT_COMPILE("[%" PRIu_SIZE_T "] '%c'\n", ctx.offs, ctx.pattern[ctx.offs]); + switch (ctx.pattern[ctx.offs]) { case '?': // Previous token is optional (repeat 0-1 times) - if ((res = th_regex_ctx_get_prev_node(ctx, &pnode)) != THERR_OK) + if ((res = th_regex_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK) goto exit; pnode->mode = TH_RE_MATCH_COUNT; @@ -258,7 +268,7 @@ case '*': // Previous token can repeat 0 or more times - if ((res = th_regex_ctx_get_prev_node(ctx, &pnode)) != THERR_OK) + if ((res = th_regex_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK) goto exit; pnode->mode = TH_RE_MATCH_COUNT; @@ -268,7 +278,7 @@ case '+': // Previous token must repeat 1 or more times - if ((res = th_regex_ctx_get_prev_node(ctx, &pnode)) != THERR_OK) + if ((res = th_regex_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK) goto exit; pnode->mode = TH_RE_MATCH_COUNT; @@ -278,8 +288,8 @@ case '{': // {n} | {min,max} - start = ctx->offs + 1; - if (!th_regex_find_next(ctx->pattern, start, &ctx->offs, '}')) + start = ctx.offs + 1; + if (!th_regex_find_next(ctx.pattern, start, &ctx.offs, '}')) { // End not found res = THERR_INVALID_DATA; @@ -288,9 +298,9 @@ th_free(tmp); - if ((res = th_regex_ctx_get_prev_node(ctx, &pnode)) != THERR_OK || - (res = th_regex_strndup(&tmp, ctx->pattern + start, - ctx->offs - start)) != THERR_OK) + if ((res = th_regex_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK || + (res = th_regex_strndup(&tmp, ctx.pattern + start, + ctx.offs - start)) != THERR_OK) goto exit; pnode->mode = TH_RE_MATCH_COUNT; @@ -326,7 +336,7 @@ case '(': // Start of subpattern - if ((res = th_regex_ctx_push(ctx)) != THERR_OK) + if ((res = th_regex_ctx_push(&ctx)) != THERR_OK) goto exit; break; @@ -335,8 +345,8 @@ th_regex_node_init(&node); node.type = TH_RE_TYPE_SUBEXPR; - if ((res = th_regex_ctx_pop(ctx, &node.match.expr)) != THERR_OK || - (res = th_regex_ctx_node_commit(ctx, &node)) != THERR_OK) + if ((res = th_regex_ctx_pop(&ctx, &node.match.expr)) != THERR_OK || + (res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK) goto exit; break; @@ -345,7 +355,7 @@ th_regex_node_init(&node); node.mode = TH_RE_MATCH_ANCHOR_START; - if ((res = th_regex_ctx_node_commit(ctx, &node)) != THERR_OK) + if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK) goto exit; break; @@ -354,29 +364,29 @@ th_regex_node_init(&node); node.mode = TH_RE_MATCH_ANCHOR_END; - if ((res = th_regex_ctx_node_commit(ctx, &node)) != THERR_OK) + if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK) goto exit; break; case '[': // Start of char list - start = ctx->offs + 1; - if (!th_regex_find_next(ctx->pattern, start, &ctx->offs, ']') || - ctx->offs == start) + start = ctx.offs + 1; + if (!th_regex_find_next(ctx.pattern, start, &ctx.offs, ']') || + ctx.offs == start) { res = THERR_INVALID_DATA; goto exit; } th_regex_node_init(&node); - node.type = (ctx->pattern[start] == '^') ? + node.type = (ctx.pattern[start] == '^') ? TH_RE_TYPE_LIST_REVERSE : TH_RE_TYPE_LIST; - node.match.list.nchars = ctx->offs - start; + node.match.list.nchars = ctx.offs - start; if ((res = th_regex_strndup(&node.match.list.chars, - ctx->pattern + start, node.match.list.nchars)) != THERR_OK) + ctx.pattern + start, node.match.list.nchars)) != THERR_OK) goto exit; - if ((res = th_regex_ctx_node_commit(ctx, &node)) != THERR_OK) + if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK) goto exit; break; @@ -385,14 +395,14 @@ th_regex_node_init(&node); node.type = TH_RE_TYPE_ANY_CHAR; - if ((res = th_regex_ctx_node_commit(ctx, &node)) != THERR_OK) + if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK) goto exit; break; case '\\': // Literal escape - ctx->offs++; - if (ctx->pattern[ctx->offs] == 0) + ctx.offs++; + if (ctx.pattern[ctx.offs] == 0) { // End of pattern, error res = THERR_INVALID_DATA; @@ -401,9 +411,9 @@ th_regex_node_init(&node); node.type = TH_RE_TYPE_CHAR; - node.match.chr = ctx->pattern[ctx->offs]; + node.match.chr = ctx.pattern[ctx.offs]; - if ((res = th_regex_ctx_node_commit(ctx, &node)) != THERR_OK) + if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK) goto exit; break; @@ -411,35 +421,21 @@ // Given character must match th_regex_node_init(&node); node.type = TH_RE_TYPE_CHAR; - node.match.chr = ctx->pattern[ctx->offs]; + node.match.chr = ctx.pattern[ctx.offs]; - if ((res = th_regex_ctx_node_commit(ctx, &node)) != THERR_OK) + if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK) goto exit; break; } } exit: + *pexpr = ctx.data; th_free(tmp); return res; } -int th_regex_compile(th_regex_ctx **pexpr, const th_regex_char *pattern) -{ - th_regex_parse_ctx ctx; - int res = THERR_OK; - - if (pexpr == NULL || pattern == NULL) - return THERR_NULLPTR; - - memset(&ctx, 0, sizeof(ctx)); - ctx.pattern = pattern; - res = th_regex_compile_do(&ctx); - *pexpr = ctx.data; - - return res; -} void th_regex_free(th_regex_ctx *expr)