diff th_regex.c @ 647:1e7e3f96632e

And some more work.
author Matti Hamalainen <ccr@tnsp.org>
date Sat, 25 Jan 2020 06:47:41 +0200
parents b897995101b7
children 91c43398c6fc
line wrap: on
line diff
--- a/th_regex.c	Sat Jan 25 05:46:40 2020 +0200
+++ b/th_regex.c	Sat Jan 25 06:47:41 2020 +0200
@@ -10,8 +10,15 @@
 
 
 #ifdef TH_EXPERIMENTAL_REGEX_DEBUG
-#    define DBG_RE_MATCH(...)   do { if (th_dbg_re_flags) fprintf(stdout, __VA_ARGS__); } while (0)
 BOOL th_dbg_re_flags = FALSE;
+
+#    define DBG_RE_MATCH(...) do { \
+        if (th_dbg_re_flags) \
+        { \
+            th_regex_dump_indent(stdout, level); \
+            fprintf(stdout, __VA_ARGS__); \
+        } \
+    } while (0)
 #else
 #    define DBG_RE_MATCH(...)
 #endif
@@ -707,6 +714,62 @@
 }
 
 
+static void th_regex_dump_indent(FILE *fh, const int level)
+{
+    for (int indent = 0; indent < level; indent++)
+        fprintf(fh, "    ");
+}
+
+
+static void th_regex_dump_node(FILE *fh, const th_regex_node_t *node)
+{
+    fprintf(fh,
+        "%s %s ",
+        re_match_modes[node->mode],
+        re_match_types[node->type]);
+
+    if (node->mode == TH_RE_MATCH_COUNT)
+    {
+        fprintf(fh, "min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T " : ",
+            node->repeatMin, node->repeatMax);
+    }
+
+    switch (node->type)
+    {
+        case TH_RE_TYPE_CHAR:
+            fprintf(fh, "'%c'", node->match.chr);
+            break;
+
+        case TH_RE_TYPE_STR:
+            fprintf(fh, "\"%s\"", node->match.str);
+            break;
+
+        case TH_RE_TYPE_ANY_CHAR:
+            fprintf(fh, ".");
+            break;
+
+        case TH_RE_TYPE_LIST:
+        case TH_RE_TYPE_LIST_REVERSE:
+            fprintf(fh, "[ ");
+            for (size_t n = 0; n < node->match.list.nitems; n++)
+            {
+                const th_regex_list_item_t *li = &node->match.list.items[n];
+                if (li->type)
+                {
+                    fprintf(fh, "'%c-%c' ", li->start, li->end);
+                }
+                else
+                {
+                    for (size_t i = 0; i < li->nchars; i++)
+                        fprintf(fh, "'%c' ", li->chars[i]);
+                }
+            }
+            fprintf(fh, "]");
+            break;
+    }
+}
+
+
 void th_regex_dump(FILE *fh, const int level, const th_regex_t *expr)
 {
     if (expr != NULL)
@@ -715,58 +778,17 @@
         {
             th_regex_node_t *node = &expr->nodes[nnode];
 
-            for (int indent = 0; indent < level; indent++)
-                fprintf(fh, "    ");
-
-            fprintf(fh, "[%" PRIu_SIZE_T " / %" PRIu_SIZE_T "] %s %s ",
-                nnode + 1, expr->nnodes,
-                re_match_modes[node->mode],
-                re_match_types[node->type]);
+            th_regex_dump_indent(fh, level);
 
-            if (node->mode == TH_RE_MATCH_COUNT)
-            {
-                fprintf(fh, "min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T " : ",
-                    node->repeatMin, node->repeatMax);
-            }
-
-            switch (node->type)
-            {
-                case TH_RE_TYPE_CHAR:
-                    fprintf(fh, "'%c'\n", node->match.chr);
-                    break;
-
-                case TH_RE_TYPE_STR:
-                    fprintf(fh, "\"%s\"\n", node->match.str);
-                    break;
+            fprintf(fh,
+                "[%" PRIu_SIZE_T "/%" PRIu_SIZE_T "] ",
+                nnode + 1, expr->nnodes);
 
-                case TH_RE_TYPE_ANY_CHAR:
-                    fprintf(fh, ".\n");
-                    break;
+            th_regex_dump_node(fh, node);
+            fprintf(fh, "\n");
 
-                case TH_RE_TYPE_LIST:
-                case TH_RE_TYPE_LIST_REVERSE:
-                    fprintf(fh, "[ ");
-                    for (size_t n = 0; n < node->match.list.nitems; n++)
-                    {
-                        const th_regex_list_item_t *li = &node->match.list.items[n];
-                        if (li->type)
-                        {
-                            fprintf(fh, "'%c-%c' ", li->start, li->end);
-                        }
-                        else
-                        {
-                            for (size_t i = 0; i < li->nchars; i++)
-                                fprintf(fh, "'%c' ", li->chars[i]);
-                        }
-                    }
-                    fprintf(fh, "]\n");
-                    break;
-
-                case TH_RE_TYPE_SUBEXPR:
-                    fprintf(fh, "\n");
-                    th_regex_dump(fh, level + 1, node->match.expr);
-                    break;
-            }
+            if (node->type == TH_RE_TYPE_SUBEXPR)
+                th_regex_dump(fh, level + 1, node->match.expr);
         }
     }
 }
@@ -801,7 +823,8 @@
     size_t *poffs,
     const th_regex_t *expr,
     const size_t startnode,
-    const int flags
+    const int flags,
+    const int level
     );
 
 
@@ -809,7 +832,8 @@
     const th_regex_char_t *haystack,
     size_t *poffs,
     const th_regex_node_t *node,
-    const int flags
+    const int flags,
+    const int level
     )
 {
     th_regex_char_t cch;
@@ -818,7 +842,7 @@
     switch (node->type)
     {
         case TH_RE_TYPE_SUBEXPR:
-            res = th_regex_match_expr(haystack, poffs, node->match.expr, 0, flags);
+            res = th_regex_match_expr(haystack, poffs, node->match.expr, 0, flags, level + 1);
             break;
 
         case TH_RE_TYPE_LIST:
@@ -880,7 +904,8 @@
     size_t *poffs,
     const th_regex_t *expr,
     const size_t startnode,
-    const int flags
+    const int flags,
+    const int level
     )
 {
     BOOL res = TRUE;
@@ -890,24 +915,28 @@
     {
         const th_regex_node_t *node = &expr->nodes[nnode];
 
-        DBG_RE_MATCH("  expr [%" PRIu_SIZE_T "/%" PRIu_SIZE_T "]: %s %s '%s'\n",
-            nnode + 1, expr->nnodes,
-            re_match_modes[node->mode], re_match_types[node->type],
+#ifdef TH_EXPERIMENTAL_REGEX_DEBUG
+        th_regex_dump_indent(stdout, level);
+        fprintf(stdout,
+            "[%" PRIu_SIZE_T "/%" PRIu_SIZE_T "] ",
+            nnode + 1, expr->nnodes);
+
+        th_regex_dump_node(stdout, node);
+
+        fprintf(stdout, " <-> \"%s\"\n",
             haystack + soffs);
+#endif
 
         switch (node->mode)
         {
             case TH_RE_MATCH_ONCE:
-                res = th_regex_match_one(haystack, &soffs, node, flags);
+                res = th_regex_match_one(haystack, &soffs, node, flags, level);
                 break;
 
             case TH_RE_MATCH_COUNT:
                 {
                     ssize_t count = 0;
 
-                    DBG_RE_MATCH("    min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T "\n",
-                        node->repeatMin, node->repeatMax);
-
                     do
                     {
                         BOOL match;
@@ -922,7 +951,7 @@
                             size_t noffs = toffs;
                             for (tnode = nnode; match && tnode < expr->nnodes && haystack[toffs] != 0; )
                             {
-                                match = th_regex_match_one(haystack, &toffs, &expr->nodes[tnode], flags);
+                                match = th_regex_match_one(haystack, &toffs, &expr->nodes[tnode], flags, level + 1);
                                 if (match)
                                     tnode++;
                             }
@@ -980,6 +1009,7 @@
     const int flags)
 {
     size_t nmatches = 0;
+    int level = 0;
     (void) flags;
 
     if (pnmatches != NULL)
@@ -998,10 +1028,7 @@
     {
         size_t coffs = soffs;
 
-        DBG_RE_MATCH("\nTRY_MATCH @ startoffs=%" PRIu_SIZE_T ": '%s'\n",
-            soffs, haystack + soffs);
-
-        if (th_regex_match_expr(haystack, &coffs, expr, 0, flags))
+        if (th_regex_match_expr(haystack, &coffs, expr, 0, flags, level))
         {
             // A match was found, increase count
             nmatches++;
@@ -1012,6 +1039,7 @@
 
             if (pmatches != NULL)
             {
+                // Add the match region to the list
                 th_regex_match_t *match = th_malloc0(sizeof(th_regex_match_t));
                 if (match == NULL)
                     return THERR_MALLOC;