# HG changeset patch # User Matti Hamalainen # Date 1579138391 -7200 # Node ID a0e8d9c6300b7ab6ec35aeea4126d7ccc965b756 # Parent 69f1cb7f9b38399d1e23cb2836d20c0cd5a5785b A bit more work on the regex stuff. diff -r 69f1cb7f9b38 -r a0e8d9c6300b tests.c --- a/tests.c Thu Jan 16 01:46:19 2020 +0200 +++ b/tests.c Thu Jan 16 03:33:11 2020 +0200 @@ -562,8 +562,8 @@ typedef struct { th_regex_char *str; + size_t nmatches; int flags; - BOOL result; } test_regex_def; @@ -583,16 +583,31 @@ for (const test_regex_def *def = list; def->str != NULL; def++) { - BOOL matched = FALSE; + th_regex_match_node *matches = NULL; + size_t nmatches; - if ((res = th_regex_match(reg, def->str, &matched, NULL, -1, def->flags)) != THERR_OK) + if ((res = th_regex_match(reg, def->str, &nmatches, &matches, -1, def->flags)) != THERR_OK) { THERR("Regex match returned error: %s\n", th_error_str(res)); goto out; } - printf(" '%s': %s\n", def->str, matched ? "YES" : "NO"); + printf(" '%s': matched %" PRIu_SIZE_T " time(s), testresult=%s\n", + def->str, + nmatches, + def->nmatches == nmatches ? "YES" : "NO"); + + for (th_regex_match_node *m = matches; + m != NULL; m = (th_regex_match_node *) m->node.next) + { + char *tmp = th_strndup(def->str + m->start, m->len); + printf(" match [%" PRIu_SIZE_T " ++ %" PRIu_SIZE_T "]: '%s'\n", + m->start, m->len, tmp); + th_free(tmp); + } + + th_regex_free_matches(matches); } out: @@ -865,12 +880,13 @@ test_regex_def tst1[] = { - { "abcfoabccg" , 0, TRUE }, - { "abcbcfoabccg" , 0, TRUE }, - { NULL, 0, FALSE } + { "abcfoabccg" , 1, 0 }, + { "abcbcfoabccg" , 1, 0 }, + { "abcbcfoabccgabcbcfoabccg" , 2, 0 }, + { NULL, 0, 0 } }; - test_regex_list("^a(bc){1,2}fo[oab]*cc?g", tst1); + test_regex_list("a(bc){1,2}fo[oab]*cc?g", tst1); } // diff -r 69f1cb7f9b38 -r a0e8d9c6300b th_regex.c --- a/th_regex.c Thu Jan 16 01:46:19 2020 +0200 +++ b/th_regex.c Thu Jan 16 03:33:11 2020 +0200 @@ -636,40 +636,76 @@ int th_regex_match(const th_regex_ctx *expr, const th_regex_char *haystack, - BOOL *pmatched, th_regex_match_node **pmatches, const ssize_t max, + size_t *pnmatches, th_regex_match_node **pmatches, const size_t maxmatches, const int flags) { -// th_regex_match_node *matches = NULL; - BOOL matched; - (void) pmatches; - (void) max; + size_t nmatches = 0; + + if (pnmatches != NULL) + *pnmatches = 0; // Check given pattern and string if (expr == NULL || haystack == NULL) return THERR_NULLPTR; // Start matching -#if 0 - size_t soffs, coffs; - soffs = coffs = 0; - while (haystack[soffs] != 0) + // XXX NOTE .. lots to think about and to take into account: + // - anchored and unanchored expressions + // - how to check if the expression has consumed all possibilities? + // .. + for (size_t soffs = 0; haystack[soffs] != 0; ) { + BOOL matched; + size_t coffs = soffs; + if ((matched = th_regex_do_match_expr(expr, haystack, &coffs, flags))) { + nmatches++; + + if (pnmatches != NULL) + *pnmatches = nmatches; + + if (pmatches != NULL) + { + th_regex_match_node *match = th_malloc0(sizeof(th_regex_match_node)); + if (match == NULL) + return THERR_MALLOC; + + match->start = soffs; + match->len = coffs - soffs; + + th_llist_append_node((th_llist_t **) pmatches, (th_llist_t *) match); + } + + if (maxmatches > 0 && nmatches >= maxmatches) + break; + + if (soffs == coffs) + soffs++; + else + soffs = coffs; } else { + soffs++; } } -#else - size_t offs = 0; - matched = th_regex_do_match_expr(expr, haystack, &offs, flags); -#endif - - if (pmatched != NULL) - *pmatched = matched; return THERR_OK; } + +static void th_regex_free_match(th_regex_match_node *node) +{ + (void) node; + // Nothing to do here at the moment +} + + +void th_regex_free_matches(th_regex_match_node *matches) +{ + th_llist_free_func_node((th_llist_t *) matches, + (void (*)(th_llist_t *)) th_regex_free_match); +} + #endif // TH_EXPERIMENTAL_REGEX diff -r 69f1cb7f9b38 -r a0e8d9c6300b th_regex.h --- a/th_regex.h Thu Jan 16 01:46:19 2020 +0200 +++ b/th_regex.h Thu Jan 16 03:33:11 2020 +0200 @@ -10,7 +10,9 @@ #ifndef TH_REGEX_H #define TH_REGEX_H -#include "th_util.h" +#include "th_types.h" +#include "th_datastruct.h" + #ifdef __cplusplus extern "C" { @@ -23,6 +25,13 @@ typedef char th_regex_char; +enum +{ + TH_REF_CASEFOLD = 0x0001, + TH_REF_ANCHORED = 0x0002, +}; + + struct th_regex_ctx; typedef struct @@ -54,7 +63,7 @@ typedef struct { -// th_llist_t node; + th_llist_t node; size_t start, len; } th_regex_match_node; @@ -62,12 +71,13 @@ // // Functions // -int th_regex_compile(th_regex_ctx **pexpr, const th_regex_char *pattern); -void th_regex_free(th_regex_ctx *expr); +int th_regex_compile(th_regex_ctx **pexpr, const th_regex_char *pattern); +void th_regex_free(th_regex_ctx *expr); -int th_regex_match(const th_regex_ctx *expr, const th_regex_char *haystack, - BOOL *pmatched, th_regex_match_node **pmatches, const ssize_t max, - const int flags); +int th_regex_match(const th_regex_ctx *expr, const th_regex_char *haystack, + size_t *pnmatches, th_regex_match_node **pmatches, const size_t maxmatches, + const int flags); +void th_regex_free_matches(th_regex_match_node *matches); #ifdef __cplusplus