annotate th_regex.c @ 613:2e3b81ae8c8a

More work on regexes.
author Matti Hamalainen <ccr@tnsp.org>
date Thu, 16 Jan 2020 16:01:27 +0200
parents cc9ec51b4875
children afcaf5e38f56
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1 /*
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
2 * Simple regular expression matching functionality
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
3 * Programmed and designed by Matti 'ccr' Hamalainen
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
4 * (C) Copyright 2020 Tecnic Software productions (TNSP)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
5 *
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
6 * Please read file 'COPYING' for information on license and distribution.
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
7 */
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
8 #ifdef TH_EXPERIMENTAL_REGEX
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
9 #include "th_util.h"
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
10 #include "th_regex.h"
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
11 //#include "th_datastruct.h"
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
12
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
13
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
14 //#define DBG_RE_COMPILE 1
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
15 //#define DBG_RE_FREE 1
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
16 #define DBG_RE_MATCH 1
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
17
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
18 #if defined(DBG_RE_COMPILE)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
19 # define DBG_RE_PRINT_COMPILE(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
20 #else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
21 # define DBG_RE_PRINT_COMPILE(...) do { } while (0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
22 #endif
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
23 #if defined(DBG_RE_FREE)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
24 # define DBG_RE_PRINT_FREE(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
25 #else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
26 # define DBG_RE_PRINT_FREE(...) do { } while (0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
27 #endif
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
28 #if defined(DBG_RE_MATCH)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
29 # define DBG_RE_PRINT_MATCH(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
30 #else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
31 # define DBG_RE_PRINT_MATCH(...) do { } while (0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
32 #endif
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
33
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
34
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
35 enum
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
36 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
37 TH_RE_MATCH_ONCE,
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
38 TH_RE_MATCH_COUNT_GREEDY,
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
39 TH_RE_MATCH_COUNT_NONGREEDY,
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
40 TH_RE_MATCH_ANCHOR_START,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
41 TH_RE_MATCH_ANCHOR_END,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
42 };
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
43
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
44
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
45 enum
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
46 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
47 TH_RE_TYPE_CHAR,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
48 TH_RE_TYPE_ANY_CHAR,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
49 TH_RE_TYPE_LIST,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
50 TH_RE_TYPE_LIST_REVERSE,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
51 TH_RE_TYPE_SUBEXPR,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
52 };
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
53
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
54
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
55 #if defined(DBG_RE_COMPILE) || defined(DBG_RE_FREE) || defined(DBG_RE_MATCH)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
56 static const char *re_match_modes[] =
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
57 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
58 "ONCE",
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
59 "COUNT GREEDY",
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
60 "COUNT NONGREEDY",
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
61 "ANCHOR_START",
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
62 "ANCHOR_END",
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
63 };
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
64
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
65 static const char *re_match_types[] =
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
66 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
67 "CHAR",
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
68 "ANY",
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
69 "LIST",
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
70 "LIST_REVERSE",
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
71 "SUBEXPR",
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
72 };
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
73 #endif
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
74
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
75
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
76 typedef struct
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
77 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
78 const th_regex_char *pattern;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
79 size_t offs;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
80
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
81 th_regex_ctx *data;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
82
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
83 size_t nstack, stacksize;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
84 th_regex_ctx **stack;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
85 } th_regex_parse_ctx;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
86
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
87
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
88 static void th_regex_node_init(th_regex_node *node)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
89 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
90 memset(node, 0, sizeof(th_regex_node));
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
91 node->mode = TH_RE_MATCH_ONCE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
92 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
93
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
94
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
95 static int th_regex_strndup(th_regex_char **pdst,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
96 const th_regex_char *src, const size_t len)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
97 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
98 if (pdst == NULL)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
99 return THERR_NULLPTR;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
100
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
101 if (UINTPTR_MAX / sizeof(th_regex_char) < len + 1)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
102 return THERR_BOUNDS;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
103
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
104 if ((*pdst = (th_regex_char *)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
105 th_malloc((len + 1) * sizeof(th_regex_char))) == NULL)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
106 return THERR_MALLOC;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
107
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
108 memcpy(*pdst, src, len * sizeof(th_regex_char));
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
109 (*pdst)[len] = 0;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
110
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
111 return THERR_OK;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
112 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
113
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
114
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
115 static int th_regex_ctx_get_prev_node(
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
116 th_regex_parse_ctx *ctx, th_regex_node **pnode)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
117 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
118 if (ctx->data != NULL && ctx->data->nnodes > 0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
119 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
120 *pnode = &ctx->data->nodes[ctx->data->nnodes - 1];
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
121 return THERR_OK;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
122 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
123 else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
124 return THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
125 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
126
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
127
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
128 static int th_regex_ctx_push(th_regex_parse_ctx *ctx)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
129 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
130 if (ctx->stack == NULL || ctx->nstack + 1 >= ctx->stacksize)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
131 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
132 ctx->stacksize += 16;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
133
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
134 if ((ctx->stack = th_realloc(ctx->stack,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
135 ctx->stacksize * sizeof(th_regex_node *))) == NULL)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
136 return THERR_MALLOC;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
137 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
138
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
139 ctx->stack[ctx->nstack] = ctx->data;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
140 ctx->nstack++;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
141 ctx->data = NULL;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
142
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
143 return THERR_OK;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
144 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
145
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
146
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
147 static int th_regex_ctx_pop(th_regex_parse_ctx *ctx, th_regex_ctx **data)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
148 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
149 if (ctx->nstack > 0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
150 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
151 *data = ctx->data;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
152 ctx->nstack--;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
153 ctx->data = ctx->stack[ctx->nstack];
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
154 return THERR_OK;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
155 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
156 else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
157 return THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
158 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
159
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
160
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
161 static int th_regex_ctx_node_commit(th_regex_parse_ctx *ctx, th_regex_node *node)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
162 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
163 th_regex_ctx *data = ctx->data;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
164
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
165 if (data == NULL)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
166 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
167 if ((data = ctx->data = th_malloc0(sizeof(th_regex_ctx))) == NULL)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
168 return THERR_MALLOC;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
169 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
170
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
171 if (data->nodes == NULL || data->nnodes + 1 >= data->nodessize)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
172 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
173 data->nodessize += 16;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
174 if ((data->nodes = th_realloc(data->nodes,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
175 data->nodessize * sizeof(th_regex_node))) == NULL)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
176 return THERR_MALLOC;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
177 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
178
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
179 memcpy(&data->nodes[data->nnodes], node, sizeof(th_regex_node));
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
180 data->nnodes++;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
181
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
182 DBG_RE_PRINT_COMPILE(
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
183 "node [%" PRIu_SIZE_T " / %" PRIu_SIZE_T "]: "
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
184 "mode=%d, type=%d, rmin=%" PRId_SSIZE_T ", rmax=%" PRId_SSIZE_T "\n",
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
185 data->nnodes, data->nodessize,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
186 node->mode, node->type,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
187 node->repeatMin, node->repeatMax);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
188
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
189 return THERR_OK;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
190 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
191
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
192
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
193 static BOOL th_regex_find_next(const th_regex_char *str,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
194 const size_t start, size_t *offs,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
195 const th_regex_char delim)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
196 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
197 for (*offs = start; str[*offs] != 0; (*offs)++)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
198 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
199 if (str[*offs] == delim)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
200 return TRUE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
201 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
202 return FALSE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
203 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
204
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
205
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
206 static BOOL th_regex_parse_ssize_t(const th_regex_char *str,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
207 ssize_t *value)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
208 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
209 th_regex_char ch;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
210 BOOL neg;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
211
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
212 if (*str == '-')
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
213 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
214 str++;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
215 neg = TRUE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
216 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
217 else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
218 neg = FALSE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
219
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
220 // Is the value negative?
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
221 while ((ch = *str++))
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
222 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
223 if (ch >= '0' && ch <= '9')
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
224 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
225 *value *= 10;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
226 *value += ch - '0';
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
227 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
228 else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
229 return FALSE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
230 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
231
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
232 if (neg)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
233 *value = -(*value);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
234
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
235 return TRUE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
236 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
237
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
238
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
239 int th_regex_compile(th_regex_ctx **pexpr, const th_regex_char *pattern)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
240 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
241 int res = THERR_OK;
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
242 th_regex_parse_ctx ctx;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
243 th_regex_node node, *pnode;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
244 th_regex_char *tmp = NULL;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
245 size_t start;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
246
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
247 if (pexpr == NULL || pattern == NULL)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
248 {
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
249 res = THERR_NULLPTR;
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
250 goto exit;
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
251 }
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
252
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
253 memset(&ctx, 0, sizeof(ctx));
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
254 ctx.pattern = pattern;
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
255
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
256 for (; ctx.pattern[ctx.offs] != 0; ctx.offs++)
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
257 {
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
258 th_regex_char cch = ctx.pattern[ctx.offs];
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
259 DBG_RE_PRINT_COMPILE("[%" PRIu_SIZE_T "] '%c'\n", ctx.offs, cch);
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
260 switch (cch)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
261 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
262 case '?':
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
263 case '*':
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
264 case '+':
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
265 if ((res = th_regex_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
266 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
267
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
268 if (cch == '?')
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
269 {
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
270 // Check if previous was a count
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
271 pnode->mode = (pnode->mode == TH_RE_MATCH_COUNT_GREEDY) ?
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
272 TH_RE_MATCH_COUNT_NONGREEDY : TH_RE_MATCH_COUNT_GREEDY;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
273
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
274 // Previous token is optional (repeat 0-1 times)
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
275 pnode->repeatMin = 0;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
276 pnode->repeatMax = 1;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
277 }
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
278 else
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
279 {
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
280 // Check if previous was a count
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
281 if (pnode->mode == TH_RE_MATCH_COUNT_GREEDY ||
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
282 pnode->mode == TH_RE_MATCH_COUNT_NONGREEDY)
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
283 {
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
284 res = THERR_INVALID_DATA;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
285 goto exit;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
286 }
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
287
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
288 pnode->mode = TH_RE_MATCH_COUNT_GREEDY;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
289
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
290 if (cch == '*')
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
291 {
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
292 // Previous token can repeat 0 or more times
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
293 pnode->repeatMin = 0;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
294 pnode->repeatMax = -1;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
295 }
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
296 else
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
297 {
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
298 // Previous token must repeat 1 or more times
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
299 pnode->repeatMin = 1;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
300 pnode->repeatMax = -1;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
301 }
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
302 }
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
303 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
304
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
305 case '{':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
306 // {n} | {min,max}
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
307 start = ctx.offs + 1;
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
308 if (!th_regex_find_next(ctx.pattern, start, &ctx.offs, '}'))
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
309 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
310 // End not found
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
311 res = THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
312 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
313 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
314
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
315 th_free(tmp);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
316
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
317 if ((res = th_regex_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK ||
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
318 (res = th_regex_strndup(&tmp, ctx.pattern + start,
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
319 ctx.offs - start)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
320 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
321
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
322 pnode->mode = TH_RE_MATCH_COUNT_GREEDY;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
323
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
324 if (th_regex_find_next(tmp, 0, &start, ','))
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
325 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
326 tmp[start] = 0;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
327 if (!th_regex_parse_ssize_t(tmp, &pnode->repeatMin) ||
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
328 !th_regex_parse_ssize_t(tmp + start + 1, &pnode->repeatMax))
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
329 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
330 res = THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
331 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
332 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
333 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
334 else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
335 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
336 if (!th_regex_parse_ssize_t(tmp, &pnode->repeatMin))
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
337 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
338 res = THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
339 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
340 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
341 pnode->repeatMax = pnode->repeatMin;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
342 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
343
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
344 if (pnode->repeatMin < 0 || pnode->repeatMax < 1 ||
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
345 pnode->repeatMax < pnode->repeatMin)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
346 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
347 // Invalid repeat counts
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
348 res = THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
349 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
350 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
351 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
352
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
353 case '(':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
354 // Start of subpattern
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
355 if ((res = th_regex_ctx_push(&ctx)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
356 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
357 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
358
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
359 case ')':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
360 // End of subpattern
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
361 th_regex_node_init(&node);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
362 node.type = TH_RE_TYPE_SUBEXPR;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
363
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
364 if ((res = th_regex_ctx_pop(&ctx, &node.match.expr)) != THERR_OK ||
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
365 (res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
366 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
367 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
368
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
369 case '^':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
370 // Start of line anchor
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
371 th_regex_node_init(&node);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
372 node.mode = TH_RE_MATCH_ANCHOR_START;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
373
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
374 if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
375 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
376 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
377
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
378 case '$':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
379 // End of line anchor
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
380 th_regex_node_init(&node);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
381 node.mode = TH_RE_MATCH_ANCHOR_END;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
382
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
383 if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
384 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
385 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
386
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
387 case '[':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
388 // Start of char list
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
389 start = ctx.offs + 1;
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
390 if (!th_regex_find_next(ctx.pattern, start, &ctx.offs, ']') ||
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
391 ctx.offs == start)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
392 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
393 res = THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
394 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
395 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
396
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
397 th_regex_node_init(&node);
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
398 node.type = (ctx.pattern[start] == '^') ?
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
399 TH_RE_TYPE_LIST_REVERSE : TH_RE_TYPE_LIST;
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
400 node.match.list.nchars = ctx.offs - start;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
401 if ((res = th_regex_strndup(&node.match.list.chars,
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
402 ctx.pattern + start, node.match.list.nchars)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
403 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
404
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
405 if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
406 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
407 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
408
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
409 case '.':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
410 // Any single character matches
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
411 th_regex_node_init(&node);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
412 node.type = TH_RE_TYPE_ANY_CHAR;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
413
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
414 if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
415 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
416 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
417
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
418 case '\\':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
419 // Literal escape
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
420 ctx.offs++;
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
421 if (ctx.pattern[ctx.offs] == 0)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
422 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
423 // End of pattern, error
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
424 res = THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
425 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
426 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
427
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
428 th_regex_node_init(&node);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
429 node.type = TH_RE_TYPE_CHAR;
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
430 node.match.chr = ctx.pattern[ctx.offs];
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
431
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
432 if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
433 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
434 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
435
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
436 default:
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
437 // Given character must match
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
438 th_regex_node_init(&node);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
439 node.type = TH_RE_TYPE_CHAR;
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
440 node.match.chr = ctx.pattern[ctx.offs];
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
441
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
442 if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
443 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
444 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
445 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
446 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
447
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
448 exit:
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
449 *pexpr = ctx.data;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
450 th_free(tmp);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
451 return res;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
452 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
453
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
454
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
455
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
456
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
457 void th_regex_free(th_regex_ctx *expr)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
458 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
459 if (expr != NULL)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
460 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
461 for (size_t n = 0; n < expr->nnodes; n++)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
462 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
463 th_regex_node *node = &expr->nodes[n];
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
464
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
465 DBG_RE_PRINT_FREE(
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
466 "node [%" PRIu_SIZE_T " / %" PRIu_SIZE_T "]: "
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
467 "mode=%d, type=%d, rmin=%" PRId_SSIZE_T ", rmax=%" PRId_SSIZE_T "\n",
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
468 n, expr->nnodes,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
469 node->mode, node->type,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
470 node->repeatMin, node->repeatMax);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
471
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
472 switch (node->type)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
473 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
474 case TH_RE_TYPE_SUBEXPR:
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
475 DBG_RE_PRINT_FREE(" SUBEXPR: %p vs %p\n", expr, node->match.expr);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
476 th_regex_free(node->match.expr);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
477 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
478
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
479 case TH_RE_TYPE_LIST:
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
480 case TH_RE_TYPE_LIST_REVERSE:
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
481 DBG_RE_PRINT_FREE(" list='%s'\n", node->match.list.chars);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
482 th_free(node->match.list.chars);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
483 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
484
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
485 #ifdef DBG_RE_FREE
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
486 case TH_RE_TYPE_ANY_CHAR:
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
487 DBG_RE_PRINT_FREE(" any char\n");
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
488 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
489
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
490 case TH_RE_TYPE_CHAR:
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
491 DBG_RE_PRINT_FREE(" char='%c'\n", node->match.chr);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
492 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
493 #endif
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
494 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
495 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
496
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
497 th_free(expr->nodes);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
498 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
499 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
500
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
501
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
502 static BOOL th_regex_do_match_expr(const th_regex_ctx *expr,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
503 const th_regex_char *haystack, size_t *offs, const int flags);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
504
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
505
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
506 static BOOL th_regex_do_match_node(const th_regex_char *haystack,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
507 size_t *offs, const th_regex_node *node, const int flags)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
508 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
509 th_regex_char cch;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
510 BOOL ret = FALSE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
511
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
512 DBG_RE_PRINT_MATCH(" node_START [%s]: '%s': ", re_match_types[node->type], haystack + *offs);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
513
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
514 switch (node->type)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
515 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
516 case TH_RE_TYPE_SUBEXPR:
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
517 DBG_RE_PRINT_MATCH("subexpr ..\n");
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
518 ret = th_regex_do_match_expr(node->match.expr, haystack, offs, flags);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
519 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
520
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
521 case TH_RE_TYPE_LIST:
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
522 case TH_RE_TYPE_LIST_REVERSE:
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
523 DBG_RE_PRINT_MATCH("[%s]\n", node->match.list.chars);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
524 ret = FALSE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
525 if ((cch = haystack[*offs]) == 0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
526 goto out;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
527
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
528 // Could be optimized, perhaps .. sort match.chars, binary search etc?
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
529 // XXX TODO Ranges and escapes are not supported yet
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
530 for (size_t n = 0; n < node->match.list.nchars; n++)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
531 if (node->match.list.chars[n] == cch)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
532 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
533 ret = TRUE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
534 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
535 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
536
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
537 if (node->type == TH_RE_TYPE_LIST_REVERSE)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
538 ret = !ret;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
539
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
540 (*offs)++;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
541 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
542
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
543 case TH_RE_TYPE_ANY_CHAR:
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
544 DBG_RE_PRINT_MATCH("\n");
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
545 if ((cch = haystack[*offs]) == 0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
546 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
547 ret = FALSE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
548 goto out;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
549 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
550
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
551 ret = TRUE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
552 (*offs)++;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
553 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
554
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
555 case TH_RE_TYPE_CHAR:
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
556 DBG_RE_PRINT_MATCH("'%c'\n", node->match.chr);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
557 if ((cch = haystack[*offs]) == 0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
558 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
559 ret = FALSE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
560 goto out;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
561 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
562
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
563 ret = (cch == node->match.chr);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
564 (*offs)++;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
565 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
566 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
567
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
568 out:
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
569 DBG_RE_PRINT_MATCH(" node_DONE [%s]: match %s\n", re_match_types[node->type], ret ? "YES" : "NO");
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
570 return ret;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
571 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
572
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
573
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
574 static BOOL th_regex_do_match_expr(const th_regex_ctx *expr,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
575 const th_regex_char *haystack, size_t *offs, const int flags)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
576 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
577 for (size_t n = 0; n < expr->nnodes; n++)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
578 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
579 const th_regex_node *node = &expr->nodes[n];
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
580 size_t soffs;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
581
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
582 DBG_RE_PRINT_MATCH(" expr [%" PRIu_SIZE_T "/%" PRIu_SIZE_T "]: %s ",
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
583 n, expr->nnodes, re_match_modes[node->mode]);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
584
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
585 switch (node->mode)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
586 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
587 case TH_RE_MATCH_ONCE:
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
588 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
589 DBG_RE_PRINT_MATCH("\n");
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
590 soffs = *offs;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
591 if (!th_regex_do_match_node(haystack, &soffs, node, flags))
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
592 return FALSE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
593 *offs = soffs;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
594 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
595 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
596
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
597 case TH_RE_MATCH_COUNT_GREEDY:
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
598 case TH_RE_MATCH_COUNT_NONGREEDY:
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
599 {
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
600 BOOL done = FALSE;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
601 ssize_t count = 0;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
602
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
603 DBG_RE_PRINT_MATCH("min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T "\n", node->repeatMin, node->repeatMax);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
604
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
605 do
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
606 {
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
607 BOOL match;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
608 soffs = *offs;
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
609
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
610 match = th_regex_do_match_node(haystack, &soffs, node, flags);
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
611 for (size_t qn = n + 1; qn < expr->nnodes && haystack[soffs] != 0; qn++)
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
612 {
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
613 const th_regex_node *next = &expr->nodes[qn];
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
614 do {
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
615 match = th_regex_do_match_node(haystack, &soffs, next, flags);
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
616 } while (haystack[soffs] != 0 && !match);
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
617 }
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
618
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
619 if (match)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
620 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
621 // Node matched
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
622 count++;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
623 done = (node->repeatMax > 0 && count >= node->repeatMax);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
624 *offs = soffs;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
625 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
626 else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
627 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
628 // Node did not match, check if we got the minimum if set
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
629 done = (node->repeatMin >= 0 && count >= node->repeatMin);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
630 soffs = *offs;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
631 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
632 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
633 } while (!done);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
634
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
635 DBG_RE_PRINT_MATCH("RESULT: %" PRId_SSIZE_T " = %s\n", count, done ? "YES" : "NO");
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
636
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
637 if (!done)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
638 return FALSE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
639
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
640 *offs = soffs;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
641 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
642 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
643
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
644 case TH_RE_MATCH_ANCHOR_START:
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
645 DBG_RE_PRINT_MATCH("offs=%" PRIu_SIZE_T "\n", *offs);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
646 if (*offs != 0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
647 return FALSE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
648 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
649
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
650 case TH_RE_MATCH_ANCHOR_END:
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
651 DBG_RE_PRINT_MATCH("is end=%d\n", haystack[*offs]);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
652 if (haystack[*offs] != 0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
653 return FALSE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
654 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
655 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
656 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
657
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
658 return TRUE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
659 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
660
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
661
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
662 int th_regex_match(const th_regex_ctx *expr, const th_regex_char *haystack,
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
663 size_t *pnmatches, th_regex_match_node **pmatches, const size_t maxmatches,
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
664 const int flags)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
665 {
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
666 size_t nmatches = 0;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
667
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
668 if (pnmatches != NULL)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
669 *pnmatches = 0;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
670
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
671 // Check given pattern and string
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
672 if (expr == NULL || haystack == NULL)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
673 return THERR_NULLPTR;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
674
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
675 // Start matching
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
676 // XXX NOTE .. lots to think about and to take into account:
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
677 // - anchored and unanchored expressions
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
678 // - how to check if the expression has consumed all possibilities?
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
679 // ..
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
680 for (size_t soffs = 0; haystack[soffs] != 0; )
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
681 {
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
682 BOOL matched;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
683 size_t coffs = soffs;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
684
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
685 DBG_RE_PRINT_MATCH("\nTRY_MATCH @ startoffs=%" PRIu_SIZE_T ": '%s'\n",
612
cc9ec51b4875 Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents: 611
diff changeset
686 soffs, haystack + soffs);
cc9ec51b4875 Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents: 611
diff changeset
687
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
688 if ((matched = th_regex_do_match_expr(expr, haystack, &coffs, flags)))
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
689 {
612
cc9ec51b4875 Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents: 611
diff changeset
690 // A match was found, increase count
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
691 nmatches++;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
692
612
cc9ec51b4875 Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents: 611
diff changeset
693 // Deliver to caller if required
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
694 if (pnmatches != NULL)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
695 *pnmatches = nmatches;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
696
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
697 if (pmatches != NULL)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
698 {
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
699 th_regex_match_node *match = th_malloc0(sizeof(th_regex_match_node));
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
700 if (match == NULL)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
701 return THERR_MALLOC;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
702
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
703 match->start = soffs;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
704 match->len = coffs - soffs;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
705
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
706 th_llist_append_node((th_llist_t **) pmatches, (th_llist_t *) match);
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
707 }
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
708
612
cc9ec51b4875 Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents: 611
diff changeset
709 // Check match count limit, if set
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
710 if (maxmatches > 0 && nmatches >= maxmatches)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
711 break;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
712
612
cc9ec51b4875 Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents: 611
diff changeset
713 // If offset was not advanced, increase by one
cc9ec51b4875 Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents: 611
diff changeset
714 // otherwise use end of match offset as new start
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
715 if (soffs == coffs)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
716 soffs++;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
717 else
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
718 soffs = coffs;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
719 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
720 else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
721 {
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
722 soffs++;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
723 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
724 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
725
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
726 return THERR_OK;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
727 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
728
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
729
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
730 static void th_regex_free_match(th_regex_match_node *node)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
731 {
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
732 (void) node;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
733 // Nothing to do here at the moment
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
734 }
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
735
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
736
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
737 void th_regex_free_matches(th_regex_match_node *matches)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
738 {
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
739 th_llist_free_func_node((th_llist_t *) matches,
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
740 (void (*)(th_llist_t *)) th_regex_free_match);
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
741 }
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
742
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
743 #endif // TH_EXPERIMENTAL_REGEX