Mercurial > hg > th-libs
diff th_regex.c @ 647:1e7e3f96632e
And some more work.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Sat, 25 Jan 2020 06:47:41 +0200 |
parents | b897995101b7 |
children | 91c43398c6fc |
line wrap: on
line diff
--- a/th_regex.c Sat Jan 25 05:46:40 2020 +0200 +++ b/th_regex.c Sat Jan 25 06:47:41 2020 +0200 @@ -10,8 +10,15 @@ #ifdef TH_EXPERIMENTAL_REGEX_DEBUG -# define DBG_RE_MATCH(...) do { if (th_dbg_re_flags) fprintf(stdout, __VA_ARGS__); } while (0) BOOL th_dbg_re_flags = FALSE; + +# define DBG_RE_MATCH(...) do { \ + if (th_dbg_re_flags) \ + { \ + th_regex_dump_indent(stdout, level); \ + fprintf(stdout, __VA_ARGS__); \ + } \ + } while (0) #else # define DBG_RE_MATCH(...) #endif @@ -707,6 +714,62 @@ } +static void th_regex_dump_indent(FILE *fh, const int level) +{ + for (int indent = 0; indent < level; indent++) + fprintf(fh, " "); +} + + +static void th_regex_dump_node(FILE *fh, const th_regex_node_t *node) +{ + fprintf(fh, + "%s %s ", + re_match_modes[node->mode], + re_match_types[node->type]); + + if (node->mode == TH_RE_MATCH_COUNT) + { + fprintf(fh, "min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T " : ", + node->repeatMin, node->repeatMax); + } + + switch (node->type) + { + case TH_RE_TYPE_CHAR: + fprintf(fh, "'%c'", node->match.chr); + break; + + case TH_RE_TYPE_STR: + fprintf(fh, "\"%s\"", node->match.str); + break; + + case TH_RE_TYPE_ANY_CHAR: + fprintf(fh, "."); + break; + + case TH_RE_TYPE_LIST: + case TH_RE_TYPE_LIST_REVERSE: + fprintf(fh, "[ "); + for (size_t n = 0; n < node->match.list.nitems; n++) + { + const th_regex_list_item_t *li = &node->match.list.items[n]; + if (li->type) + { + fprintf(fh, "'%c-%c' ", li->start, li->end); + } + else + { + for (size_t i = 0; i < li->nchars; i++) + fprintf(fh, "'%c' ", li->chars[i]); + } + } + fprintf(fh, "]"); + break; + } +} + + void th_regex_dump(FILE *fh, const int level, const th_regex_t *expr) { if (expr != NULL) @@ -715,58 +778,17 @@ { th_regex_node_t *node = &expr->nodes[nnode]; - for (int indent = 0; indent < level; indent++) - fprintf(fh, " "); - - fprintf(fh, "[%" PRIu_SIZE_T " / %" PRIu_SIZE_T "] %s %s ", - nnode + 1, expr->nnodes, - re_match_modes[node->mode], - re_match_types[node->type]); + th_regex_dump_indent(fh, level); - if (node->mode == TH_RE_MATCH_COUNT) - { - fprintf(fh, "min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T " : ", - node->repeatMin, node->repeatMax); - } - - switch (node->type) - { - case TH_RE_TYPE_CHAR: - fprintf(fh, "'%c'\n", node->match.chr); - break; - - case TH_RE_TYPE_STR: - fprintf(fh, "\"%s\"\n", node->match.str); - break; + fprintf(fh, + "[%" PRIu_SIZE_T "/%" PRIu_SIZE_T "] ", + nnode + 1, expr->nnodes); - case TH_RE_TYPE_ANY_CHAR: - fprintf(fh, ".\n"); - break; + th_regex_dump_node(fh, node); + fprintf(fh, "\n"); - case TH_RE_TYPE_LIST: - case TH_RE_TYPE_LIST_REVERSE: - fprintf(fh, "[ "); - for (size_t n = 0; n < node->match.list.nitems; n++) - { - const th_regex_list_item_t *li = &node->match.list.items[n]; - if (li->type) - { - fprintf(fh, "'%c-%c' ", li->start, li->end); - } - else - { - for (size_t i = 0; i < li->nchars; i++) - fprintf(fh, "'%c' ", li->chars[i]); - } - } - fprintf(fh, "]\n"); - break; - - case TH_RE_TYPE_SUBEXPR: - fprintf(fh, "\n"); - th_regex_dump(fh, level + 1, node->match.expr); - break; - } + if (node->type == TH_RE_TYPE_SUBEXPR) + th_regex_dump(fh, level + 1, node->match.expr); } } } @@ -801,7 +823,8 @@ size_t *poffs, const th_regex_t *expr, const size_t startnode, - const int flags + const int flags, + const int level ); @@ -809,7 +832,8 @@ const th_regex_char_t *haystack, size_t *poffs, const th_regex_node_t *node, - const int flags + const int flags, + const int level ) { th_regex_char_t cch; @@ -818,7 +842,7 @@ switch (node->type) { case TH_RE_TYPE_SUBEXPR: - res = th_regex_match_expr(haystack, poffs, node->match.expr, 0, flags); + res = th_regex_match_expr(haystack, poffs, node->match.expr, 0, flags, level + 1); break; case TH_RE_TYPE_LIST: @@ -880,7 +904,8 @@ size_t *poffs, const th_regex_t *expr, const size_t startnode, - const int flags + const int flags, + const int level ) { BOOL res = TRUE; @@ -890,24 +915,28 @@ { const th_regex_node_t *node = &expr->nodes[nnode]; - DBG_RE_MATCH(" expr [%" PRIu_SIZE_T "/%" PRIu_SIZE_T "]: %s %s '%s'\n", - nnode + 1, expr->nnodes, - re_match_modes[node->mode], re_match_types[node->type], +#ifdef TH_EXPERIMENTAL_REGEX_DEBUG + th_regex_dump_indent(stdout, level); + fprintf(stdout, + "[%" PRIu_SIZE_T "/%" PRIu_SIZE_T "] ", + nnode + 1, expr->nnodes); + + th_regex_dump_node(stdout, node); + + fprintf(stdout, " <-> \"%s\"\n", haystack + soffs); +#endif switch (node->mode) { case TH_RE_MATCH_ONCE: - res = th_regex_match_one(haystack, &soffs, node, flags); + res = th_regex_match_one(haystack, &soffs, node, flags, level); break; case TH_RE_MATCH_COUNT: { ssize_t count = 0; - DBG_RE_MATCH(" min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T "\n", - node->repeatMin, node->repeatMax); - do { BOOL match; @@ -922,7 +951,7 @@ size_t noffs = toffs; for (tnode = nnode; match && tnode < expr->nnodes && haystack[toffs] != 0; ) { - match = th_regex_match_one(haystack, &toffs, &expr->nodes[tnode], flags); + match = th_regex_match_one(haystack, &toffs, &expr->nodes[tnode], flags, level + 1); if (match) tnode++; } @@ -980,6 +1009,7 @@ const int flags) { size_t nmatches = 0; + int level = 0; (void) flags; if (pnmatches != NULL) @@ -998,10 +1028,7 @@ { size_t coffs = soffs; - DBG_RE_MATCH("\nTRY_MATCH @ startoffs=%" PRIu_SIZE_T ": '%s'\n", - soffs, haystack + soffs); - - if (th_regex_match_expr(haystack, &coffs, expr, 0, flags)) + if (th_regex_match_expr(haystack, &coffs, expr, 0, flags, level)) { // A match was found, increase count nmatches++; @@ -1012,6 +1039,7 @@ if (pmatches != NULL) { + // Add the match region to the list th_regex_match_t *match = th_malloc0(sizeof(th_regex_match_t)); if (match == NULL) return THERR_MALLOC;