# HG changeset patch # User Matti Hamalainen # Date 1675990017 -7200 # Node ID 600a3c08747f807da9e2287842760d44a7149a7b # Parent 449a031c297c1fa7419452eba7346c833aff8344 Add handle_escapes parameter to th_str{case}match() functions to enable handling of glob token escaping \* \?. This breaks the API. Also update the testcases and add few testcases for checking the escape functionality. diff -r 449a031c297c -r 600a3c08747f tests.c --- a/tests.c Fri Feb 10 02:45:34 2023 +0200 +++ b/tests.c Fri Feb 10 02:46:57 2023 +0200 @@ -364,7 +364,7 @@ #define TEST_3B(fun, str1, str2, sbool, ret) do { \ test_ctx ctx; \ - test_start(&ctx, # fun "('%s', '%s', %s)", str1, str2, sbool ? "true" : "false"); \ + test_start(&ctx, # fun "('%s', '%s', %s) == %s", str1, str2, sbool ? "true" : "false", ret ? "true" : "false"); \ test_result(&ctx, fun (str1, str2, sbool) == ret); \ test_end(&ctx); \ } while (0) @@ -1103,25 +1103,35 @@ TEST_2C(th_strrcasecmp, "foo aSdFq baz", "asdfq", false); } - if (test_set_start("String matching #1")) + if (test_set_start("String matching #1 (case-sensitive)")) { - TEST_2B(th_strmatch, "abba ABBAkukka lol" , "*lol" , true); - TEST_2B(th_strmatch, "abba ABBAkukka lol" , "*lo*" , true); - TEST_2B(th_strmatch, "abba ABBAkukka lol" , "*lo" , false); - TEST_2B(th_strmatch, "abba ABBAkukka lol" , "abba" , false); - TEST_2B(th_strmatch, "abba ABBAkukka lol" , "*bba*" , true); - TEST_2B(th_strmatch, "abba ABBAkukka lol" , "abba*" , true); - TEST_2B(th_strmatch, "abba ABBAkukka lol" , "abbak*" , false); - TEST_2B(th_strmatch, "abba ABBAöökukka lol" , "*abbaö?" , false); + TEST_3B(th_strmatch, "abba ABBAkukka lol" , "*lol" , false, true); + TEST_3B(th_strmatch, "abba ABBAkukka lol" , "*lo*" , false, true); + TEST_3B(th_strmatch, "abba ABBAkukka lol" , "*lo" , false, false); + TEST_3B(th_strmatch, "abba ABBAkukka lol" , "abba" , false, false); + TEST_3B(th_strmatch, "abba ABBAkukka lol" , "*bba*" , false, true); + TEST_3B(th_strmatch, "abba ABBAkukka lol" , "abba*" , false, true); + TEST_3B(th_strmatch, "abba ABBAkukka lol" , "abbak*" , false, false); + TEST_3B(th_strmatch, "abba ABBAöökukka lol" , "*abbaö?" , false, false); } - if (test_set_start("String matching #2")) + if (test_set_start("String matching #2 (case-insensitive)")) { - TEST_2B(th_strcasematch, "abba ABBAkukka lol" , "abbak*" , false); - TEST_2B(th_strcasematch, "abba ABBAkukka lol" , "*abbak*" , true); - TEST_2B(th_strcasematch, "abba ABBAkukka lol" , "*ab?ak*" , true); - TEST_2B(th_strcasematch, "abba ABBAkukka lol" , "*abbak?" , false); - TEST_2B(th_strcasematch, "abba ABBAkukka lol" , "?bba?abba*" , true); + TEST_3B(th_strcasematch, "abba ABBAkukka lol" , "abbak*" , false, false); + TEST_3B(th_strcasematch, "abba ABBAkukka lol" , "*abbak*" , false, true); + TEST_3B(th_strcasematch, "abba ABBAkukka lol" , "*ab?ak*" , false, true); + TEST_3B(th_strcasematch, "abba ABBAkukka lol" , "*abbak?" , false, false); + TEST_3B(th_strcasematch, "abba ABBAkukka lol" , "?bba?abba*" , false, true); + } + + if (test_set_start("String matching #3 (escapes)")) + { + TEST_3B(th_strcasematch, "abba ABBA*kukka lol" , "*abba\\*ku*" , true, true); + TEST_3B(th_strcasematch, "abba ABBAkuk?ka lol" , "*kuk\\?ka*" , true, true); + TEST_3B(th_strcasematch, "abba ABBA*kukka lol" , "*abba*ku*" , true, true); + TEST_3B(th_strcasematch, "abba ABBAkuk?ka lol" , "*kuk?ka*" , true, true); + TEST_3B(th_strcasematch, "abba ABBAkuk?ka lol" , "*kuk?ka\\*" , true, false); + TEST_3B(th_strcasematch, "abba ABBAkuk\\ka lol" , "*kuk\\\\ka*" , true, true); } // Tests that test for things that do not work correctly yet @@ -1131,7 +1141,7 @@ { TEST_2A(th_strcasecmp, "ÖÄÅ", "öäå", false); // if it worked, SHOULD match TEST_3A(th_strncasecmp, "Aäöå", "aöå", 2, true); // if worked, it should NOT match - TEST_2B(th_strmatch, "öriÖRI! lol", "?ri?RI!*", false); // should match + TEST_3B(th_strmatch, "öriÖRI! lol", "?ri?RI!*", false, false); // should match } // diff -r 449a031c297c -r 600a3c08747f th_strglob.c --- a/th_strglob.c Fri Feb 10 02:45:34 2023 +0200 +++ b/th_strglob.c Fri Feb 10 02:46:57 2023 +0200 @@ -6,70 +6,85 @@ * Please read file 'COPYING' for information on license and distribution. */ -bool TH_STRGLOB_FUNC (const char *haystack, const char *pattern) +bool TH_STRGLOB_FUNC (const char *haystack, const char *pattern, const bool handle_escapes) +#ifndef TH_STRGLOB_IMPL +; +#else { - bool matched = true, any = false, end = false; - const char *tmp = NULL; + bool matched = true, end = false, any = false, escaped = false; + const char *save = NULL; // Check given pattern and string if (haystack == NULL || pattern == NULL) return false; // Start comparision - while (matched && !end) - switch (*pattern) + do { - case '?': - // Any single character matches - if (*haystack) - { - pattern++; - haystack++; - } - else - matched = false; - break; + const bool esc_ok = !handle_escapes || !escaped; - case '*': - pattern++; - if (!*pattern || *pattern == '?') - end = true; - any = true; - tmp = pattern; - break; - - case 0: - if (any) + if (!*pattern) { + // End of pattern + if (any) + { + if (*haystack) + haystack++; + else + end = true; + } + else if (*haystack) - haystack++; + { + if (save) + { + any = true; + pattern = save; + } + else + matched = false; + } else end = true; } else - if (*haystack) + if (*pattern == '?' && esc_ok) { - if (tmp) + // Any single character matches + if (*haystack) { - any = true; - pattern = tmp; + pattern++; + haystack++; } else matched = false; } else - end = true; - break; + if (*pattern == '*' && esc_ok) + { + // None or more any characters match + pattern++; + if (!*pattern || *pattern == '?') + end = true; - default: + any = true; + save = pattern; + escaped = false; + } + else + if (*pattern == '\\' && handle_escapes && !escaped) + { + pattern++; + escaped = true; + } + else { bool equals = TH_STRGLOB_COLLATE(*pattern) == TH_STRGLOB_COLLATE(*haystack); + escaped = false; if (any) { if (equals) - { any = false; - } else if (*haystack) haystack++; @@ -82,14 +97,15 @@ { if (*pattern) pattern++; + if (*haystack) haystack++; } else - if (tmp) + if (save) { any = true; - pattern = tmp; + pattern = save; } else matched = false; @@ -98,13 +114,12 @@ if (!*haystack && !*pattern) end = true; } - break; - } + } while (matched && !end); return matched; } +#endif #undef TH_STRGLOB_FUNC #undef TH_STRGLOB_COLLATE - diff -r 449a031c297c -r 600a3c08747f th_string.c --- a/th_string.c Fri Feb 10 02:45:34 2023 +0200 +++ b/th_string.c Fri Feb 10 02:46:57 2023 +0200 @@ -11,6 +11,17 @@ // Include printf implementation #include "th_printf.c" +// Include strmatch / glob function implementations +#define TH_STRGLOB_IMPL 1 + +#define TH_STRGLOB_FUNC th_strmatch +#define TH_STRGLOB_COLLATE(pch) (pch) +#include "th_strglob.c" + +#define TH_STRGLOB_FUNC th_strcasematch +#define TH_STRGLOB_COLLATE(pch) th_tolower(pch) +#include "th_strglob.c" + /** * Implementation of strchr() for th_char_t. @@ -773,23 +784,6 @@ } -/* Compare a string to a pattern. Case-SENSITIVE version. - * The matching pattern can consist of any normal characters plus - * wildcards ? and *. "?" matches any character and "*" matches - * any number of characters. - */ -#define TH_STRGLOB_FUNC th_strmatch -#define TH_STRGLOB_COLLATE(px) (px) -#include "th_strglob.c" - - -/* Compare a string to a pattern. Case-INSENSITIVE version. - */ -#define TH_STRGLOB_FUNC th_strcasematch -#define TH_STRGLOB_COLLATE(px) th_tolower(px) -#include "th_strglob.c" - - bool th_get_hex_triplet(const th_char_t *str, unsigned int *value) { const th_char_t *p = str; diff -r 449a031c297c -r 600a3c08747f th_string.h --- a/th_string.h Fri Feb 10 02:45:34 2023 +0200 +++ b/th_string.h Fri Feb 10 02:46:57 2023 +0200 @@ -211,13 +211,13 @@ /* Parsing, matching */ +bool th_strmatch(const th_char_t *haystack, const th_char_t *pattern, const bool handle_escapes); +bool th_strcasematch(const th_char_t *haystack, const th_char_t *pattern, const bool handle_escapes); + const th_char_t *th_findnext(const th_char_t *str, size_t *pos); const th_char_t *th_findsep(const th_char_t *str, size_t *pos, const th_char_t sep); const th_char_t *th_findseporspace(const th_char_t *str, size_t *pos, const th_char_t sep); -bool th_strmatch(const th_char_t *haystack, const th_char_t *pattern); -bool th_strcasematch(const th_char_t *haystack, const th_char_t *pattern); - bool th_get_hex_triplet(const th_char_t *str, unsigned int *value); bool th_get_boolean(const th_char_t *str, bool *value); bool th_get_int(const th_char_t *str, unsigned int *value, bool *neg);