changeset 768:600a3c08747f

Add handle_escapes parameter to th_str{case}match() functions to enable handling of glob token escaping \* \?. This breaks the API. Also update the testcases and add few testcases for checking the escape functionality.
author Matti Hamalainen <ccr@tnsp.org>
date Fri, 10 Feb 2023 02:46:57 +0200
parents 449a031c297c
children 8eca15bde07d
files tests.c th_strglob.c th_string.c th_string.h
diffstat 4 files changed, 96 insertions(+), 77 deletions(-) [+]
line wrap: on
line diff
--- a/tests.c	Fri Feb 10 02:45:34 2023 +0200
+++ b/tests.c	Fri Feb 10 02:46:57 2023 +0200
@@ -364,7 +364,7 @@
 
 #define TEST_3B(fun, str1, str2, sbool, ret) do { \
         test_ctx ctx; \
-        test_start(&ctx, # fun  "('%s', '%s', %s)", str1, str2, sbool ? "true" : "false"); \
+        test_start(&ctx, # fun  "('%s', '%s', %s) == %s", str1, str2, sbool ? "true" : "false", ret ? "true" : "false"); \
         test_result(&ctx, fun (str1, str2, sbool) == ret); \
         test_end(&ctx); \
     } while (0)
@@ -1103,25 +1103,35 @@
         TEST_2C(th_strrcasecmp, "foo aSdFq baz", "asdfq", false);
     }
 
-    if (test_set_start("String matching #1"))
+    if (test_set_start("String matching #1 (case-sensitive)"))
     {
-        TEST_2B(th_strmatch, "abba ABBAkukka lol"      , "*lol"       , true);
-        TEST_2B(th_strmatch, "abba ABBAkukka lol"      , "*lo*"       , true);
-        TEST_2B(th_strmatch, "abba ABBAkukka lol"      , "*lo"        , false);
-        TEST_2B(th_strmatch, "abba ABBAkukka lol"      , "abba"       , false);
-        TEST_2B(th_strmatch, "abba ABBAkukka lol"      , "*bba*"      , true);
-        TEST_2B(th_strmatch, "abba ABBAkukka lol"      , "abba*"      , true);
-        TEST_2B(th_strmatch, "abba ABBAkukka lol"      , "abbak*"     , false);
-        TEST_2B(th_strmatch, "abba ABBAöökukka lol"    , "*abbaö?"    , false);
+        TEST_3B(th_strmatch, "abba ABBAkukka lol"      , "*lol"            , false, true);
+        TEST_3B(th_strmatch, "abba ABBAkukka lol"      , "*lo*"            , false, true);
+        TEST_3B(th_strmatch, "abba ABBAkukka lol"      , "*lo"             , false, false);
+        TEST_3B(th_strmatch, "abba ABBAkukka lol"      , "abba"            , false, false);
+        TEST_3B(th_strmatch, "abba ABBAkukka lol"      , "*bba*"           , false, true);
+        TEST_3B(th_strmatch, "abba ABBAkukka lol"      , "abba*"           , false, true);
+        TEST_3B(th_strmatch, "abba ABBAkukka lol"      , "abbak*"          , false, false);
+        TEST_3B(th_strmatch, "abba ABBAöökukka lol"    , "*abbaö?"         , false, false);
     }
 
-    if (test_set_start("String matching #2"))
+    if (test_set_start("String matching #2 (case-insensitive)"))
     {
-        TEST_2B(th_strcasematch, "abba ABBAkukka lol"  , "abbak*"     , false);
-        TEST_2B(th_strcasematch, "abba ABBAkukka lol"  , "*abbak*"    , true);
-        TEST_2B(th_strcasematch, "abba ABBAkukka lol"  , "*ab?ak*"    , true);
-        TEST_2B(th_strcasematch, "abba ABBAkukka lol"  , "*abbak?"    , false);
-        TEST_2B(th_strcasematch, "abba ABBAkukka lol"  , "?bba?abba*" , true);
+        TEST_3B(th_strcasematch, "abba ABBAkukka lol"    , "abbak*"        , false, false);
+        TEST_3B(th_strcasematch, "abba ABBAkukka lol"    , "*abbak*"       , false, true);
+        TEST_3B(th_strcasematch, "abba ABBAkukka lol"    , "*ab?ak*"       , false, true);
+        TEST_3B(th_strcasematch, "abba ABBAkukka lol"    , "*abbak?"       , false, false);
+        TEST_3B(th_strcasematch, "abba ABBAkukka lol"    , "?bba?abba*"    , false, true);
+    }
+
+    if (test_set_start("String matching #3 (escapes)"))
+    {
+        TEST_3B(th_strcasematch, "abba ABBA*kukka lol"   , "*abba\\*ku*"   , true, true);
+        TEST_3B(th_strcasematch, "abba ABBAkuk?ka lol"   , "*kuk\\?ka*"    , true, true);
+        TEST_3B(th_strcasematch, "abba ABBA*kukka lol"   , "*abba*ku*"     , true, true);
+        TEST_3B(th_strcasematch, "abba ABBAkuk?ka lol"   , "*kuk?ka*"      , true, true);
+        TEST_3B(th_strcasematch, "abba ABBAkuk?ka lol"   , "*kuk?ka\\*"    , true, false);
+        TEST_3B(th_strcasematch, "abba ABBAkuk\\ka lol"  , "*kuk\\\\ka*"   , true, true);
     }
 
     // Tests that test for things that do not work correctly yet
@@ -1131,7 +1141,7 @@
     {
         TEST_2A(th_strcasecmp, "ÖÄÅ", "öäå", false); // if it worked, SHOULD match
         TEST_3A(th_strncasecmp, "Aäöå", "aöå", 2, true); // if worked, it should NOT match
-        TEST_2B(th_strmatch, "öriÖRI! lol", "?ri?RI!*", false); // should match
+        TEST_3B(th_strmatch, "öriÖRI! lol", "?ri?RI!*", false, false); // should match
     }
 
     //
--- a/th_strglob.c	Fri Feb 10 02:45:34 2023 +0200
+++ b/th_strglob.c	Fri Feb 10 02:46:57 2023 +0200
@@ -6,70 +6,85 @@
  * Please read file 'COPYING' for information on license and distribution.
  */
 
-bool TH_STRGLOB_FUNC (const char *haystack, const char *pattern)
+bool TH_STRGLOB_FUNC (const char *haystack, const char *pattern, const bool handle_escapes)
+#ifndef TH_STRGLOB_IMPL
+;
+#else
 {
-    bool matched = true, any = false, end = false;
-    const char *tmp = NULL;
+    bool matched = true, end = false, any = false, escaped = false;
+    const char *save = NULL;
 
     // Check given pattern and string
     if (haystack == NULL || pattern == NULL)
         return false;
 
     // Start comparision
-    while (matched && !end)
-    switch (*pattern)
+    do
     {
-    case '?':
-        // Any single character matches
-        if (*haystack)
-        {
-            pattern++;
-            haystack++;
-        }
-        else
-            matched = false;
-        break;
+        const bool esc_ok = !handle_escapes || !escaped;
 
-    case '*':
-        pattern++;
-        if (!*pattern || *pattern == '?')
-            end = true;
-        any = true;
-        tmp = pattern;
-        break;
-
-    case 0:
-        if (any)
+        if (!*pattern)
         {
+            // End of pattern
+            if (any)
+            {
+                if (*haystack)
+                    haystack++;
+                else
+                    end = true;
+            }
+            else
             if (*haystack)
-                haystack++;
+            {
+                if (save)
+                {
+                    any = true;
+                    pattern = save;
+                }
+                else
+                    matched = false;
+            }
             else
                 end = true;
         }
         else
-        if (*haystack)
+        if (*pattern == '?' && esc_ok)
         {
-            if (tmp)
+            // Any single character matches
+            if (*haystack)
             {
-                any = true;
-                pattern = tmp;
+                pattern++;
+                haystack++;
             }
             else
                 matched = false;
         }
         else
-            end = true;
-        break;
+        if (*pattern == '*' && esc_ok)
+        {
+            // None or more any characters match
+            pattern++;
+            if (!*pattern || *pattern == '?')
+                end = true;
 
-    default:
+            any = true;
+            save = pattern;
+            escaped = false;
+        }
+        else
+        if (*pattern == '\\' && handle_escapes && !escaped)
+        {
+            pattern++;
+            escaped = true;
+        }
+        else
         {
             bool equals = TH_STRGLOB_COLLATE(*pattern) == TH_STRGLOB_COLLATE(*haystack);
+            escaped = false;
             if (any)
             {
                 if (equals)
-                {
                     any = false;
-                }
                 else
                 if (*haystack)
                     haystack++;
@@ -82,14 +97,15 @@
                 {
                     if (*pattern)
                         pattern++;
+
                     if (*haystack)
                         haystack++;
                 }
                 else
-                if (tmp)
+                if (save)
                 {
                     any = true;
-                    pattern = tmp;
+                    pattern = save;
                 }
                 else
                     matched = false;
@@ -98,13 +114,12 @@
             if (!*haystack && !*pattern)
                 end = true;
         }
-        break;
-    }
+    } while (matched && !end);
 
     return matched;
 }
+#endif
 
 
 #undef TH_STRGLOB_FUNC
 #undef TH_STRGLOB_COLLATE
-
--- a/th_string.c	Fri Feb 10 02:45:34 2023 +0200
+++ b/th_string.c	Fri Feb 10 02:46:57 2023 +0200
@@ -11,6 +11,17 @@
 // Include printf implementation
 #include "th_printf.c"
 
+// Include strmatch / glob function implementations
+#define TH_STRGLOB_IMPL 1
+
+#define TH_STRGLOB_FUNC th_strmatch
+#define TH_STRGLOB_COLLATE(pch) (pch)
+#include "th_strglob.c"
+
+#define TH_STRGLOB_FUNC th_strcasematch
+#define TH_STRGLOB_COLLATE(pch) th_tolower(pch)
+#include "th_strglob.c"
+
 
 /**
  * Implementation of strchr() for th_char_t.
@@ -773,23 +784,6 @@
 }
 
 
-/* Compare a string to a pattern. Case-SENSITIVE version.
- * The matching pattern can consist of any normal characters plus
- * wildcards ? and *. "?" matches any character and "*" matches
- * any number of characters.
- */
-#define TH_STRGLOB_FUNC th_strmatch
-#define TH_STRGLOB_COLLATE(px) (px)
-#include "th_strglob.c"
-
-
-/* Compare a string to a pattern. Case-INSENSITIVE version.
- */
-#define TH_STRGLOB_FUNC th_strcasematch
-#define TH_STRGLOB_COLLATE(px) th_tolower(px)
-#include "th_strglob.c"
-
-
 bool th_get_hex_triplet(const th_char_t *str, unsigned int *value)
 {
     const th_char_t *p = str;
--- a/th_string.h	Fri Feb 10 02:45:34 2023 +0200
+++ b/th_string.h	Fri Feb 10 02:46:57 2023 +0200
@@ -211,13 +211,13 @@
 
 /* Parsing, matching
  */
+bool        th_strmatch(const th_char_t *haystack, const th_char_t *pattern, const bool handle_escapes);
+bool        th_strcasematch(const th_char_t *haystack, const th_char_t *pattern, const bool handle_escapes);
+
 const th_char_t  *th_findnext(const th_char_t *str, size_t *pos);
 const th_char_t  *th_findsep(const th_char_t *str, size_t *pos, const th_char_t sep);
 const th_char_t  *th_findseporspace(const th_char_t *str, size_t *pos, const th_char_t sep);
 
-bool        th_strmatch(const th_char_t *haystack, const th_char_t *pattern);
-bool        th_strcasematch(const th_char_t *haystack, const th_char_t *pattern);
-
 bool        th_get_hex_triplet(const th_char_t *str, unsigned int *value);
 bool        th_get_boolean(const th_char_t *str, bool *value);
 bool        th_get_int(const th_char_t *str, unsigned int *value, bool *neg);