# HG changeset patch # User Matti Hamalainen # Date 1579860403 -7200 # Node ID a2bf1ea05b054f3db5a9456da3b68a3547143a34 # Parent 3a35db5c1873925a0018f0b285b70c6d239c00e4 Cleanups. diff -r 3a35db5c1873 -r a2bf1ea05b05 tests.c --- a/tests.c Fri Jan 24 09:39:09 2020 +0200 +++ b/tests.c Fri Jan 24 12:06:43 2020 +0200 @@ -577,7 +577,6 @@ int res; printf("========================================\n"); - printf("pattern '%s'\n", pattern); if ((res = th_regex_compile(®, pattern)) != THERR_OK) { THERR("Regex compilation failed: %s\n", @@ -590,6 +589,7 @@ th_regex_match_t *matches = NULL; size_t nmatches; + printf("----------------------------------------\n"); if ((res = th_regex_match(reg, def->str, &nmatches, &matches, -1, def->flags)) != THERR_OK) { @@ -598,6 +598,7 @@ goto out; } + printf("\npattern '%s'\n", pattern); printf(" '%s': matched %" PRIu_SIZE_T " time(s), testresult=%s\n", def->str, nmatches, @@ -879,7 +880,7 @@ if (test_set_start("Regular expressions")) { #ifdef TH_EXPERIMENTAL_REGEX_DEBUG - th_dbg_re_flags = TH_DBG_RE_MATCH; + th_dbg_re_flags = th_verbosity > 0 ? TH_DBG_RE_MATCH : 0; #endif #if 0 @@ -890,8 +891,6 @@ printf("result: %s\n", th_error_str(res)); th_regex_free(reg); } - - // { static const test_regex_def tlist[] = { diff -r 3a35db5c1873 -r a2bf1ea05b05 th_regex.c --- a/th_regex.c Fri Jan 24 09:39:09 2020 +0200 +++ b/th_regex.c Fri Jan 24 12:06:43 2020 +0200 @@ -19,8 +19,7 @@ static const char *re_match_modes[] = { "ONCE", - "COUNT GREEDY", - "COUNT NONGREEDY", + "COUNT", "ANCHOR START", "ANCHOR END", }; @@ -83,8 +82,7 @@ enum { TH_RE_MATCH_ONCE, - TH_RE_MATCH_COUNT_GREEDY, - TH_RE_MATCH_COUNT_NONGREEDY, + TH_RE_MATCH_COUNT, TH_RE_MATCH_ANCHOR_START, TH_RE_MATCH_ANCHOR_END, @@ -423,33 +421,21 @@ if (cch == '?') { - // Check if previous was a count - if (pnode->mode == TH_RE_MATCH_COUNT_NONGREEDY) - { - res = THERR_INVALID_DATA; - goto exit; - } - else - if (pnode->mode != TH_RE_MATCH_COUNT_GREEDY) - { - // Previous token is optional (repeat 0-1 times) - pnode->repeatMin = 0; - pnode->repeatMax = 1; - } - - pnode->mode = TH_RE_MATCH_COUNT_NONGREEDY; + // Previous token is optional (repeat 0-1 times) (non-greedy matching) + pnode->mode = TH_RE_MATCH_COUNT; + pnode->repeatMin = 0; + pnode->repeatMax = 1; } else { // Check if previous was a count ("**", "*+", etc.) - if (pnode->mode == TH_RE_MATCH_COUNT_GREEDY || - pnode->mode == TH_RE_MATCH_COUNT_NONGREEDY) + if (pnode->mode == TH_RE_MATCH_COUNT) { res = THERR_INVALID_DATA; goto exit; } - pnode->mode = TH_RE_MATCH_COUNT_GREEDY; + pnode->mode = TH_RE_MATCH_COUNT; if (cch == '*') { @@ -483,7 +469,7 @@ ctx.offs - start)) != THERR_OK) goto exit; - pnode->mode = TH_RE_MATCH_COUNT_GREEDY; + pnode->mode = TH_RE_MATCH_COUNT; if (th_regex_find_next(tmp, 0, &start, ',')) { @@ -744,18 +730,17 @@ const th_regex_node_t *node = &expr->nodes[nnode]; DBG_RE_MATCH(" expr [%" PRIu_SIZE_T "/%" PRIu_SIZE_T "]: %s %s '%s'\n", - nnode, expr->nnodes, + nnode + 1, expr->nnodes, re_match_modes[node->mode], re_match_types[node->type], - haystack + *poffs); + haystack + soffs); switch (node->mode) { case TH_RE_MATCH_ONCE: - res = th_regex_match_one(haystack, poffs, node, flags); + res = th_regex_match_one(haystack, &soffs, node, flags); break; - case TH_RE_MATCH_COUNT_GREEDY: - case TH_RE_MATCH_COUNT_NONGREEDY: + case TH_RE_MATCH_COUNT: { ssize_t count = 0; @@ -765,26 +750,29 @@ do { BOOL match; - size_t toffs = *poffs, tnode = nnode; + size_t toffs = soffs, tnode; + DBG_RE_MATCH(" START '%s'\n", haystack + toffs); do { - match = th_regex_match_one(haystack, &toffs, node, flags); - if (match && haystack[toffs] != 0) + match = TRUE; + for (tnode = nnode; match && tnode < expr->nnodes && haystack[toffs] != 0; ) { - for (tnode = nnode + 1; match && tnode < expr->nnodes && haystack[toffs] != 0; tnode++) - { - size_t noffs = toffs; - match = th_regex_match_one(haystack, &toffs, &expr->nodes[tnode], flags); - DBG_RE_MATCH(" '%s': %d\n", haystack + noffs, match); - } + match = th_regex_match_one(haystack, &toffs, &expr->nodes[tnode], flags); + if (match) + tnode++; } + DBG_RE_MATCH(" '%s': %d (tnode=%" PRIu_SIZE_T ")\n", haystack + toffs, match, tnode); + if (node->repeatMin >= 0 && match) + break; } while (!match && haystack[toffs] != 0); + DBG_RE_MATCH(" END '%s': %d (tnode=%" PRIu_SIZE_T ")\n", haystack + toffs, match, tnode); + if (match) { // Node matched count++; - *poffs = soffs; + soffs = toffs; nnode = tnode; res = (node->repeatMax > 0 && count >= node->repeatMax); } @@ -801,18 +789,17 @@ } break; - case TH_RE_MATCH_ANCHOR_START: - res = (*poffs == 0); + res = (soffs == 0); break; case TH_RE_MATCH_ANCHOR_END: - res = (haystack[*poffs] == 0); + res = (haystack[soffs] == 0); break; } } - if (!res) + if (res) *poffs = soffs; return res;