changeset 647:1e7e3f96632e

And some more work.
author Matti Hamalainen <ccr@tnsp.org>
date Sat, 25 Jan 2020 06:47:41 +0200
parents 9fcb0098f302
children 91c43398c6fc
files tests.c th_regex.c
diffstat 2 files changed, 159 insertions(+), 77 deletions(-) [+]
line wrap: on
line diff
--- a/tests.c	Sat Jan 25 05:46:40 2020 +0200
+++ b/tests.c	Sat Jan 25 06:47:41 2020 +0200
@@ -568,7 +568,16 @@
     th_regex_char_t *str;
     size_t nmatches;
     int flags;
-} test_regex_def;
+} test_regex_def1;
+
+
+typedef struct
+{
+    th_regex_char_t *pattern;
+    th_regex_char_t *str;
+    size_t nmatches;
+    int flags;
+} test_regex_def2;
 
 
 void test_regex_print_matches(const th_regex_char_t *str, const th_regex_match_t *matches)
@@ -586,7 +595,7 @@
 }
 
 
-void test_regex_list(const test_regex_def *list, const th_regex_char_t *pattern)
+void test_regex_list1(const test_regex_def1 *list, const th_regex_char_t *pattern)
 {
     th_regex_t *expr = NULL;
     int res;
@@ -602,7 +611,7 @@
 
     th_regex_dump(stdout, 1, expr);
 
-    for (const test_regex_def *def = list; def->str != NULL; def++)
+    for (const test_regex_def1 *def = list; def->str != NULL; def++)
     {
         th_regex_match_t *matches = NULL;
         size_t nmatches;
@@ -629,6 +638,50 @@
     th_regex_free(expr);
 }
 
+void test_regex_list2(const test_regex_def2 *list)
+{
+    printf("========================================\n");
+
+    for (const test_regex_def2 *def = list; def->str != NULL; def++)
+    {
+        th_regex_t *expr = NULL;
+        th_regex_match_t *matches = NULL;
+        size_t nmatches;
+        int res;
+
+        printf("Compiling pattern \"%s\"\n", def->pattern);
+        if ((res = th_regex_compile(&expr, def->pattern)) != THERR_OK)
+        {
+            THERR("Regex compilation failed: %s\n",
+                th_error_str(res));
+            goto out;
+        }
+
+        th_regex_dump(stdout, 1, expr);
+
+        printf("----------------------------------------\n");
+
+        if ((res = th_regex_match(expr, def->str,
+            &nmatches, &matches, -1, def->flags)) != THERR_OK)
+        {
+            THERR("Regex match returned error: %s\n",
+                th_error_str(res));
+            goto out;
+        }
+
+        printf("'%s': matched %" PRIu_SIZE_T " time(s), testresult=%s\n",
+            def->str,
+            nmatches,
+            def->nmatches == nmatches ? "YES" : "NO");
+
+        test_regex_print_matches(def->str, matches);
+
+out:
+        th_regex_free_matches(matches);
+        th_regex_free(expr);
+    }
+}
+
 #endif
 
 
@@ -906,7 +959,7 @@
 
 #if 0
         {
-            static const test_regex_def tlist[] =
+            static const test_regex_def1 tlist[] =
             {
                 { "abcfoabccg"                   , 1, 0 },
                 { "abcbcfoabccg"                 , 1, 0 },
@@ -915,11 +968,11 @@
                 { NULL                           , 0, 0 }
             };
 
-            test_regex_list(tlist, "a(bc){1,2}fo[oab]*cc?g");
+            test_regex_list1(tlist, "a(bc){1,2}fo[oab]*cc?g");
         }
 
         {
-            static const test_regex_def tlist[] =
+            static const test_regex_def1 tlist[] =
             {
                 { "abcfoabccg"                   , 1, 0 },
                 { "abcbcfoabccg"                 , 1, 0 },
@@ -928,11 +981,11 @@
                 { NULL                           , 0, 0 }
             };
 
-            test_regex_list(tlist, "^a(bc){1,2}fo[oab]*cc?g");
+            test_regex_list1(tlist, "^a(bc){1,2}fo[oab]*cc?g");
         }
 
         {
-            static const test_regex_def tlist[] =
+            static const test_regex_def1 tlist[] =
             {
                 { "cg"                           , 1, 0 },
                 { "g"                            , 1, 0 },
@@ -941,12 +994,12 @@
                 { NULL                           , 0, 0 }
             };
 
-            test_regex_list(tlist, "g$");
+            test_regex_list1(tlist, "g$");
         }
 #endif
 
         {
-            static const test_regex_def tlist[] =
+            static const test_regex_def1 tlist[] =
             {
 //                { "zoobar"                       , 1, 0 },
                 { "zoo lol bar"                  , 1, 0 },
@@ -954,7 +1007,8 @@
                 { NULL                           , 0, 0 }
             };
 
-            test_regex_list(tlist, "zoo.*?bar");
+            test_regex_list1(tlist, "zoo.*?bar");
+//            test_regex_list(tlist, "zoo.*?bar");
         }
     }
 #endif
--- a/th_regex.c	Sat Jan 25 05:46:40 2020 +0200
+++ b/th_regex.c	Sat Jan 25 06:47:41 2020 +0200
@@ -10,8 +10,15 @@
 
 
 #ifdef TH_EXPERIMENTAL_REGEX_DEBUG
-#    define DBG_RE_MATCH(...)   do { if (th_dbg_re_flags) fprintf(stdout, __VA_ARGS__); } while (0)
 BOOL th_dbg_re_flags = FALSE;
+
+#    define DBG_RE_MATCH(...) do { \
+        if (th_dbg_re_flags) \
+        { \
+            th_regex_dump_indent(stdout, level); \
+            fprintf(stdout, __VA_ARGS__); \
+        } \
+    } while (0)
 #else
 #    define DBG_RE_MATCH(...)
 #endif
@@ -707,6 +714,62 @@
 }
 
 
+static void th_regex_dump_indent(FILE *fh, const int level)
+{
+    for (int indent = 0; indent < level; indent++)
+        fprintf(fh, "    ");
+}
+
+
+static void th_regex_dump_node(FILE *fh, const th_regex_node_t *node)
+{
+    fprintf(fh,
+        "%s %s ",
+        re_match_modes[node->mode],
+        re_match_types[node->type]);
+
+    if (node->mode == TH_RE_MATCH_COUNT)
+    {
+        fprintf(fh, "min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T " : ",
+            node->repeatMin, node->repeatMax);
+    }
+
+    switch (node->type)
+    {
+        case TH_RE_TYPE_CHAR:
+            fprintf(fh, "'%c'", node->match.chr);
+            break;
+
+        case TH_RE_TYPE_STR:
+            fprintf(fh, "\"%s\"", node->match.str);
+            break;
+
+        case TH_RE_TYPE_ANY_CHAR:
+            fprintf(fh, ".");
+            break;
+
+        case TH_RE_TYPE_LIST:
+        case TH_RE_TYPE_LIST_REVERSE:
+            fprintf(fh, "[ ");
+            for (size_t n = 0; n < node->match.list.nitems; n++)
+            {
+                const th_regex_list_item_t *li = &node->match.list.items[n];
+                if (li->type)
+                {
+                    fprintf(fh, "'%c-%c' ", li->start, li->end);
+                }
+                else
+                {
+                    for (size_t i = 0; i < li->nchars; i++)
+                        fprintf(fh, "'%c' ", li->chars[i]);
+                }
+            }
+            fprintf(fh, "]");
+            break;
+    }
+}
+
+
 void th_regex_dump(FILE *fh, const int level, const th_regex_t *expr)
 {
     if (expr != NULL)
@@ -715,58 +778,17 @@
         {
             th_regex_node_t *node = &expr->nodes[nnode];
 
-            for (int indent = 0; indent < level; indent++)
-                fprintf(fh, "    ");
-
-            fprintf(fh, "[%" PRIu_SIZE_T " / %" PRIu_SIZE_T "] %s %s ",
-                nnode + 1, expr->nnodes,
-                re_match_modes[node->mode],
-                re_match_types[node->type]);
+            th_regex_dump_indent(fh, level);
 
-            if (node->mode == TH_RE_MATCH_COUNT)
-            {
-                fprintf(fh, "min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T " : ",
-                    node->repeatMin, node->repeatMax);
-            }
-
-            switch (node->type)
-            {
-                case TH_RE_TYPE_CHAR:
-                    fprintf(fh, "'%c'\n", node->match.chr);
-                    break;
-
-                case TH_RE_TYPE_STR:
-                    fprintf(fh, "\"%s\"\n", node->match.str);
-                    break;
+            fprintf(fh,
+                "[%" PRIu_SIZE_T "/%" PRIu_SIZE_T "] ",
+                nnode + 1, expr->nnodes);
 
-                case TH_RE_TYPE_ANY_CHAR:
-                    fprintf(fh, ".\n");
-                    break;
+            th_regex_dump_node(fh, node);
+            fprintf(fh, "\n");
 
-                case TH_RE_TYPE_LIST:
-                case TH_RE_TYPE_LIST_REVERSE:
-                    fprintf(fh, "[ ");
-                    for (size_t n = 0; n < node->match.list.nitems; n++)
-                    {
-                        const th_regex_list_item_t *li = &node->match.list.items[n];
-                        if (li->type)
-                        {
-                            fprintf(fh, "'%c-%c' ", li->start, li->end);
-                        }
-                        else
-                        {
-                            for (size_t i = 0; i < li->nchars; i++)
-                                fprintf(fh, "'%c' ", li->chars[i]);
-                        }
-                    }
-                    fprintf(fh, "]\n");
-                    break;
-
-                case TH_RE_TYPE_SUBEXPR:
-                    fprintf(fh, "\n");
-                    th_regex_dump(fh, level + 1, node->match.expr);
-                    break;
-            }
+            if (node->type == TH_RE_TYPE_SUBEXPR)
+                th_regex_dump(fh, level + 1, node->match.expr);
         }
     }
 }
@@ -801,7 +823,8 @@
     size_t *poffs,
     const th_regex_t *expr,
     const size_t startnode,
-    const int flags
+    const int flags,
+    const int level
     );
 
 
@@ -809,7 +832,8 @@
     const th_regex_char_t *haystack,
     size_t *poffs,
     const th_regex_node_t *node,
-    const int flags
+    const int flags,
+    const int level
     )
 {
     th_regex_char_t cch;
@@ -818,7 +842,7 @@
     switch (node->type)
     {
         case TH_RE_TYPE_SUBEXPR:
-            res = th_regex_match_expr(haystack, poffs, node->match.expr, 0, flags);
+            res = th_regex_match_expr(haystack, poffs, node->match.expr, 0, flags, level + 1);
             break;
 
         case TH_RE_TYPE_LIST:
@@ -880,7 +904,8 @@
     size_t *poffs,
     const th_regex_t *expr,
     const size_t startnode,
-    const int flags
+    const int flags,
+    const int level
     )
 {
     BOOL res = TRUE;
@@ -890,24 +915,28 @@
     {
         const th_regex_node_t *node = &expr->nodes[nnode];
 
-        DBG_RE_MATCH("  expr [%" PRIu_SIZE_T "/%" PRIu_SIZE_T "]: %s %s '%s'\n",
-            nnode + 1, expr->nnodes,
-            re_match_modes[node->mode], re_match_types[node->type],
+#ifdef TH_EXPERIMENTAL_REGEX_DEBUG
+        th_regex_dump_indent(stdout, level);
+        fprintf(stdout,
+            "[%" PRIu_SIZE_T "/%" PRIu_SIZE_T "] ",
+            nnode + 1, expr->nnodes);
+
+        th_regex_dump_node(stdout, node);
+
+        fprintf(stdout, " <-> \"%s\"\n",
             haystack + soffs);
+#endif
 
         switch (node->mode)
         {
             case TH_RE_MATCH_ONCE:
-                res = th_regex_match_one(haystack, &soffs, node, flags);
+                res = th_regex_match_one(haystack, &soffs, node, flags, level);
                 break;
 
             case TH_RE_MATCH_COUNT:
                 {
                     ssize_t count = 0;
 
-                    DBG_RE_MATCH("    min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T "\n",
-                        node->repeatMin, node->repeatMax);
-
                     do
                     {
                         BOOL match;
@@ -922,7 +951,7 @@
                             size_t noffs = toffs;
                             for (tnode = nnode; match && tnode < expr->nnodes && haystack[toffs] != 0; )
                             {
-                                match = th_regex_match_one(haystack, &toffs, &expr->nodes[tnode], flags);
+                                match = th_regex_match_one(haystack, &toffs, &expr->nodes[tnode], flags, level + 1);
                                 if (match)
                                     tnode++;
                             }
@@ -980,6 +1009,7 @@
     const int flags)
 {
     size_t nmatches = 0;
+    int level = 0;
     (void) flags;
 
     if (pnmatches != NULL)
@@ -998,10 +1028,7 @@
     {
         size_t coffs = soffs;
 
-        DBG_RE_MATCH("\nTRY_MATCH @ startoffs=%" PRIu_SIZE_T ": '%s'\n",
-            soffs, haystack + soffs);
-
-        if (th_regex_match_expr(haystack, &coffs, expr, 0, flags))
+        if (th_regex_match_expr(haystack, &coffs, expr, 0, flags, level))
         {
             // A match was found, increase count
             nmatches++;
@@ -1012,6 +1039,7 @@
 
             if (pmatches != NULL)
             {
+                // Add the match region to the list
                 th_regex_match_t *match = th_malloc0(sizeof(th_regex_match_t));
                 if (match == NULL)
                     return THERR_MALLOC;