changeset 649:2c9260f5cf44

Tweedle.
author Matti Hamalainen <ccr@tnsp.org>
date Sat, 25 Jan 2020 11:35:59 +0200
parents 91c43398c6fc
children 24cbab6e88c6
files tests.c th_regex.c
diffstat 2 files changed, 88 insertions(+), 72 deletions(-) [+]
line wrap: on
line diff
--- a/tests.c	Sat Jan 25 08:01:17 2020 +0200
+++ b/tests.c	Sat Jan 25 11:35:59 2020 +0200
@@ -609,7 +609,8 @@
         goto out;
     }
 
-    th_regex_dump(stdout, 1, expr);
+    if (th_verbosity > 0)
+        th_regex_dump(stdout, 1, expr);
 
     for (const test_regex_def1 *def = list; def->str != NULL; def++)
     {
@@ -638,6 +639,7 @@
     th_regex_free(expr);
 }
 
+
 void test_regex_list2(const test_regex_def2 *list)
 {
     printf("========================================\n");
@@ -657,7 +659,8 @@
             goto out;
         }
 
-        th_regex_dump(stdout, 1, expr);
+        if (th_verbosity > 0)
+            th_regex_dump(stdout, 1, expr);
 
         printf("----------------------------------------\n");
 
--- a/th_regex.c	Sat Jan 25 08:01:17 2020 +0200
+++ b/th_regex.c	Sat Jan 25 11:35:59 2020 +0200
@@ -813,8 +813,10 @@
         if (item->type == 0)
         {
             for (size_t n = 0; n < item->nchars; n++)
-            if (item->chars[n] == cch)
-                return TRUE;
+            {
+                if (item->chars[n] == cch)
+                    return TRUE;
+            }
         }
         else
         {
@@ -829,7 +831,7 @@
 
 static BOOL th_regex_match_expr(
     const th_regex_char_t *haystack,
-    size_t *poffs,
+    size_t *offs,
     const th_regex_t *expr,
     const size_t startnode,
     const int flags,
@@ -839,7 +841,7 @@
 
 static BOOL th_regex_match_one(
     const th_regex_char_t *haystack,
-    size_t *poffs,
+    size_t *offs,
     const th_regex_node_t *node,
     const int flags,
     const int level
@@ -851,12 +853,12 @@
     switch (node->type)
     {
         case TH_RE_TYPE_SUBEXPR:
-            res = th_regex_match_expr(haystack, poffs, node->match.expr, 0, flags, level + 1);
+            res = th_regex_match_expr(haystack, offs, node->match.expr, 0, flags, level + 1);
             break;
 
         case TH_RE_TYPE_LIST:
         case TH_RE_TYPE_LIST_REVERSE:
-            if ((cch = haystack[*poffs]) == 0)
+            if ((cch = haystack[*offs]) == 0)
                 res = FALSE;
             else
             {
@@ -865,27 +867,27 @@
                 if (node->type == TH_RE_TYPE_LIST_REVERSE)
                     res = !res;
 
-                (*poffs)++;
+                (*offs)++;
             }
             break;
 
         case TH_RE_TYPE_ANY_CHAR:
-            if ((cch = haystack[*poffs]) == 0)
+            if ((cch = haystack[*offs]) == 0)
                 res = FALSE;
             else
             {
                 res = TRUE;
-                (*poffs)++;
+                (*offs)++;
             }
             break;
 
         case TH_RE_TYPE_CHAR:
-            if ((cch = haystack[*poffs]) == 0)
+            if ((cch = haystack[*offs]) == 0)
                 res = FALSE;
             else
             {
                 res = (cch == node->match.chr);
-                (*poffs)++;
+                (*offs)++;
             }
             break;
 
@@ -893,9 +895,9 @@
             res = TRUE;
             for (th_regex_char_t *str = node->match.str;
                 res && *str != 0;
-                str++, (*poffs)++)
+                str++, (*offs)++)
             {
-                if (haystack[*poffs] != *str)
+                if (haystack[*offs] != *str)
                     res = FALSE;
             }
             break;
@@ -905,9 +907,73 @@
 }
 
 
+static BOOL th_regex_match_count(
+    const th_regex_char_t *haystack,
+    size_t *offs,
+    const th_regex_t *expr,
+    const th_regex_node_t *node,
+    size_t *nnode,
+    const int flags,
+    const int level
+    )
+{
+    size_t toffs = *offs, noffs;
+    BOOL res, match = FALSE;
+    ssize_t count = 0;
+
+    if (node->repeatMin > 0)
+    do
+    {
+        noffs = toffs;
+        match = th_regex_match_one(haystack, &toffs, node, flags, level);
+        if (match)
+        {
+            count++;
+        }
+        else
+            toffs = noffs;
+
+        if (node->repeatMin >= 0 &&
+            count >= node->repeatMin &&
+            node->repeatMax > 0 &&
+            count >= node->repeatMax)
+            break;
+
+    } while (match && toffs > noffs);
+
+    if (count > 0 || node->repeatMin == 0)
+    {
+        DBG_RE_MATCH("count=%" PRId_SSIZE_T " \"%s\"\n",
+            count, haystack + toffs);
+
+        match = th_regex_match_expr(haystack, &toffs, expr, *nnode + 1, flags, level + 1);
+
+        DBG_RE_MATCH("rest expr match=%s \"%s\"\n",
+            match ? "YES" : "NO", haystack + toffs);
+    }
+
+    if (match)
+    {
+        *offs = toffs;
+        *nnode = expr->nnodes;
+    }
+
+    res = match &&
+        (
+        (node->repeatMax > 0 && count >= node->repeatMax) ||
+        (node->repeatMin >= 0 && count >= node->repeatMin)
+        );
+
+    DBG_RE_MATCH("RESULT: match=%s, res=%s\n",
+        match ? "YES" : "NO", res ? "YES" : "NO");
+
+    return res;
+}
+
+
 static BOOL th_regex_match_expr(
     const th_regex_char_t *haystack,
-    size_t *poffs,
+    size_t *offs,
     const th_regex_t *expr,
     const size_t startnode,
     const int flags,
@@ -915,7 +981,7 @@
     )
 {
     BOOL res = TRUE;
-    size_t soffs = *poffs;
+    size_t soffs = *offs;
 
     for (size_t nnode = startnode; res && nnode < expr->nnodes; nnode++)
     {
@@ -943,60 +1009,7 @@
                 break;
 
             case TH_RE_MATCH_COUNT:
-                {
-                    ssize_t count = 0, ncount = 0;
-
-                    do
-                    {
-                        BOOL match;
-                        size_t toffs = soffs, noffs;
-
-                        DBG_RE_MATCH("    ROUND #%" PRIu_SIZE_T ": '%s'\n",
-                            count, haystack + toffs);
-
-                        res = FALSE;
-                        match = TRUE;
-                        do
-                        {
-                            noffs = toffs;
-                            match = th_regex_match_one(haystack, &toffs, node, flags, level + 1);
-                            if (match)
-                            {
-                                ncount++;
-
-                                if (node->repeatMin >= 0 && ncount >= node->repeatMin)
-                                    break;
-                            }
-                        } while (match && haystack[toffs] != 0 && toffs > noffs);
-
-                        DBG_RE_MATCH("    ROUND #%" PRIu_SIZE_T " END: match=%s \"%s\"\n",
-                            ncount, match ? "YES" : "NO", haystack + toffs);
-
-                        if (ncount > 0)
-                        {
-                            match = th_regex_match_expr(haystack, &toffs, expr, nnode + 1, flags, level + 2);
-                        }
-
-                        DBG_RE_MATCH("    ROUND #%" PRIu_SIZE_T " END: match=%s \"%s\"\n",
-                            count, match ? "YES" : "NO", haystack + toffs);
-
-                        if (match)
-                        {
-                            // Node matched
-                            count++;
-                            soffs = toffs;
-                            nnode = expr->nnodes;
-
-                            res =
-                                (node->repeatMax > 0 && ncount >= node->repeatMax) ||
-                                (node->repeatMin >= 0 && ncount >= node->repeatMin);
-                        }
-
-                    } while (!res);
-
-                    DBG_RE_MATCH("    RESULT: count=%" PRId_SSIZE_T ", ncount=%" PRId_SSIZE_T ", done=%s\n",
-                        count, ncount, res ? "YES" : "NO");
-                }
+                res = th_regex_match_count(haystack, &soffs, expr, node, &nnode, flags, level);
                 break;
 
             case TH_RE_MATCH_ANCHOR_START:
@@ -1010,7 +1023,7 @@
     }
 
     if (res)
-        *poffs = soffs;
+        *offs = soffs;
 
     return res;
 }