annotate th_regex.c @ 639:8c957ad9d4c3

Some more work on regex stuff.
author Matti Hamalainen <ccr@tnsp.org>
date Thu, 23 Jan 2020 11:38:28 +0200
parents c4bca120bfb0
children 9e1f9e1d1487
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1 /*
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
2 * Simple regular expression matching functionality
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
3 * Programmed and designed by Matti 'ccr' Hamalainen
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
4 * (C) Copyright 2020 Tecnic Software productions (TNSP)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
5 *
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
6 * Please read file 'COPYING' for information on license and distribution.
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
7 */
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
8 #include "th_util.h"
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
9 #include "th_regex.h"
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
10
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
11
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
12 #ifdef TH_EXPERIMENTAL_REGEX_DEBUG
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
13 int th_dbg_re_flags = 0;
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
14 # define DBG_RE_COMPILE(...) do { if (th_dbg_re_flags & TH_DBG_RE_COMPILE) fprintf(stderr, __VA_ARGS__); } while (0)
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
15 # define DBG_RE_FREE(...) do { if (th_dbg_re_flags & TH_DBG_RE_FREE) fprintf(stderr, __VA_ARGS__); } while (0)
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
16 # define DBG_RE_MATCH(...) do { if (th_dbg_re_flags & TH_DBG_RE_MATCH) fprintf(stderr, __VA_ARGS__); } while (0)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
17 #else
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
18 # define DBG_RE_COMPILE(...)
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
19 # define DBG_RE_FREE(...)
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
20 # define DBG_RE_MATCH(...)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
21 #endif
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
22
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
23
614
afcaf5e38f56 Disable regex stuff from normal builds.
Matti Hamalainen <ccr@tnsp.org>
parents: 613
diff changeset
24 #ifdef TH_EXPERIMENTAL_REGEX
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
25
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
26 enum
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
27 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
28 TH_RE_MATCH_ONCE,
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
29 TH_RE_MATCH_COUNT_GREEDY,
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
30 TH_RE_MATCH_COUNT_NONGREEDY,
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
31 TH_RE_MATCH_ANCHOR_START,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
32 TH_RE_MATCH_ANCHOR_END,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
33 };
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
34
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
35
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
36 enum
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
37 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
38 TH_RE_TYPE_CHAR,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
39 TH_RE_TYPE_ANY_CHAR,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
40 TH_RE_TYPE_LIST,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
41 TH_RE_TYPE_LIST_REVERSE,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
42 TH_RE_TYPE_SUBEXPR,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
43 };
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
44
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
45
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
46 #ifdef TH_EXPERIMENTAL_REGEX_DEBUG
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
47 static const char *re_match_modes[] =
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
48 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
49 "ONCE",
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
50 "COUNT GREEDY",
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
51 "COUNT NONGREEDY",
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
52 "ANCHOR_START",
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
53 "ANCHOR_END",
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
54 };
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
55
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
56 static const char *re_match_types[] =
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
57 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
58 "CHAR",
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
59 "ANY",
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
60 "LIST",
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
61 "LIST_REVERSE",
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
62 "SUBEXPR",
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
63 };
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
64 #endif
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
65
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
66
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
67 typedef struct
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
68 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
69 const th_regex_char *pattern;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
70 size_t offs;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
71
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
72 th_regex_ctx *data;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
73
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
74 size_t nstack, stacksize;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
75 th_regex_ctx **stack;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
76 } th_regex_parse_ctx;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
77
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
78
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
79 static void th_regex_node_init(th_regex_node *node)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
80 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
81 memset(node, 0, sizeof(th_regex_node));
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
82 node->mode = TH_RE_MATCH_ONCE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
83 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
84
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
85
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
86 static int th_regex_strndup(th_regex_char **pdst,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
87 const th_regex_char *src, const size_t len)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
88 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
89 if (pdst == NULL)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
90 return THERR_NULLPTR;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
91
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
92 if (UINTPTR_MAX / sizeof(th_regex_char) < len + 1)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
93 return THERR_BOUNDS;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
94
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
95 if ((*pdst = (th_regex_char *)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
96 th_malloc((len + 1) * sizeof(th_regex_char))) == NULL)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
97 return THERR_MALLOC;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
98
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
99 memcpy(*pdst, src, len * sizeof(th_regex_char));
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
100 (*pdst)[len] = 0;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
101
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
102 return THERR_OK;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
103 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
104
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
105
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
106 static int th_regex_ctx_get_prev_node(
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
107 th_regex_parse_ctx *ctx, th_regex_node **pnode)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
108 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
109 if (ctx->data != NULL && ctx->data->nnodes > 0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
110 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
111 *pnode = &ctx->data->nodes[ctx->data->nnodes - 1];
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
112 return THERR_OK;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
113 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
114 else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
115 return THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
116 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
117
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
118
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
119 static int th_regex_ctx_push(th_regex_parse_ctx *ctx)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
120 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
121 if (ctx->stack == NULL || ctx->nstack + 1 >= ctx->stacksize)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
122 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
123 ctx->stacksize += 16;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
124
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
125 if ((ctx->stack = th_realloc(ctx->stack,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
126 ctx->stacksize * sizeof(th_regex_node *))) == NULL)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
127 return THERR_MALLOC;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
128 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
129
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
130 ctx->stack[ctx->nstack] = ctx->data;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
131 ctx->nstack++;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
132 ctx->data = NULL;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
133
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
134 return THERR_OK;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
135 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
136
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
137
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
138 static int th_regex_ctx_pop(th_regex_parse_ctx *ctx, th_regex_ctx **data)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
139 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
140 if (ctx->nstack > 0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
141 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
142 *data = ctx->data;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
143 ctx->nstack--;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
144 ctx->data = ctx->stack[ctx->nstack];
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
145 return THERR_OK;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
146 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
147 else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
148 return THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
149 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
150
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
151
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
152 static int th_regex_ctx_node_commit(th_regex_parse_ctx *ctx, th_regex_node *node)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
153 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
154 th_regex_ctx *data = ctx->data;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
155
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
156 if (data == NULL)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
157 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
158 if ((data = ctx->data = th_malloc0(sizeof(th_regex_ctx))) == NULL)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
159 return THERR_MALLOC;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
160 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
161
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
162 if (data->nodes == NULL || data->nnodes + 1 >= data->nodessize)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
163 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
164 data->nodessize += 16;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
165 if ((data->nodes = th_realloc(data->nodes,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
166 data->nodessize * sizeof(th_regex_node))) == NULL)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
167 return THERR_MALLOC;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
168 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
169
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
170 memcpy(&data->nodes[data->nnodes], node, sizeof(th_regex_node));
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
171 data->nnodes++;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
172
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
173 DBG_RE_COMPILE(
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
174 "node [%" PRIu_SIZE_T " / %" PRIu_SIZE_T "]: "
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
175 "mode=%d, type=%d, rmin=%" PRId_SSIZE_T ", rmax=%" PRId_SSIZE_T "\n",
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
176 data->nnodes, data->nodessize,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
177 node->mode, node->type,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
178 node->repeatMin, node->repeatMax);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
179
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
180 return THERR_OK;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
181 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
182
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
183
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
184 static BOOL th_regex_find_next(const th_regex_char *str,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
185 const size_t start, size_t *offs,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
186 const th_regex_char delim)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
187 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
188 for (*offs = start; str[*offs] != 0; (*offs)++)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
189 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
190 if (str[*offs] == delim)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
191 return TRUE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
192 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
193 return FALSE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
194 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
195
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
196
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
197 static BOOL th_regex_parse_ssize_t(const th_regex_char *str,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
198 ssize_t *value)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
199 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
200 th_regex_char ch;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
201 BOOL neg;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
202
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
203 if (*str == '-')
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
204 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
205 str++;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
206 neg = TRUE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
207 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
208 else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
209 neg = FALSE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
210
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
211 // Is the value negative?
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
212 while ((ch = *str++))
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
213 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
214 if (ch >= '0' && ch <= '9')
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
215 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
216 *value *= 10;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
217 *value += ch - '0';
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
218 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
219 else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
220 return FALSE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
221 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
222
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
223 if (neg)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
224 *value = -(*value);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
225
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
226 return TRUE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
227 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
228
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
229
639
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
230 static void th_regex_list_item_init(th_regex_list_item *item)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
231 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
232 memset(item, 0, sizeof(th_regex_list_item));
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
233 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
234
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
235
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
236 static int th_regex_list_add_item(th_regex_list *list, th_regex_list_item *item)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
237 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
238 if (list->items == NULL || list->nitems + 1 >= list->itemssize)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
239 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
240 list->itemssize += 16;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
241
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
242 if ((list->items = th_realloc(list->items,
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
243 list->itemssize * sizeof(th_regex_list_item))) == NULL)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
244 return THERR_MALLOC;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
245 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
246
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
247 memcpy(list->items + list->nitems, item, sizeof(th_regex_list_item));
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
248 list->nitems++;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
249
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
250 return THERR_OK;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
251 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
252
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
253
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
254 static void th_regex_list_free(th_regex_list *list)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
255 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
256 if (list != NULL)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
257 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
258 for (size_t n = 0; n < list->nitems; n++)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
259 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
260 th_free(list->items[n].chars);
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
261 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
262 th_free(list->items);
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
263 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
264 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
265
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
266
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
267 static int th_regex_parse_list(const th_regex_char *str,
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
268 const size_t slen, th_regex_list *list)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
269 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
270 th_regex_char *tmp = NULL;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
271 th_regex_list_item item;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
272 int res;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
273
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
274 if ((res = th_regex_strndup(&tmp, str, slen)) != THERR_OK)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
275 goto out;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
276
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
277 // Handle ranges like [A-Z]
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
278 for (size_t offs = 0; offs < slen; offs++)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
279 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
280 th_regex_char
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
281 *prev = (offs > 0) ? tmp + offs - 1 : NULL,
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
282 *curr = tmp + offs,
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
283 *next = (offs + 1 < slen) ? tmp + offs + 1 : NULL;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
284
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
285 if (*curr == '-')
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
286 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
287 if (prev != NULL && next != NULL)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
288 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
289 // Range
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
290 th_regex_list_item_init(&item);
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
291 item.type = 1;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
292 item.start = *prev;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
293 item.end = *next;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
294
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
295 if (item.start <= item.end)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
296 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
297 res = THERR_INVALID_DATA;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
298 goto out;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
299 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
300
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
301 *curr = *prev = *next = 0;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
302
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
303 if ((res = th_regex_list_add_item(list, &item)) != THERR_OK)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
304 goto out;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
305 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
306 else
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
307 if (next != NULL)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
308 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
309 res = THERR_INVALID_DATA;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
310 goto out;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
311 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
312 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
313 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
314
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
315 // Count number of remaining characters
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
316 th_regex_list_item_init(&item);
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
317 item.type = 0;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
318 item.nchars = 0;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
319
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
320 for (size_t offs = 0; offs < slen; offs++)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
321 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
322 th_regex_char curr = tmp[offs];
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
323 if (curr != 0)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
324 item.nchars++;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
325 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
326
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
327 if (item.nchars > 0)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
328 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
329 if ((item.chars = th_malloc(sizeof(th_regex_char) * item.nchars)) == NULL)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
330 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
331 res = THERR_MALLOC;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
332 goto out;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
333 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
334
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
335 for (size_t offs = 0, n = 0; offs < slen; offs++)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
336 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
337 th_regex_char curr = tmp[offs];
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
338 if (curr != 0)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
339 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
340 item.chars[n] = curr;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
341 n++;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
342 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
343 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
344
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
345 if ((res = th_regex_list_add_item(list, &item)) != THERR_OK)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
346 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
347 th_free(item.chars);
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
348 goto out;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
349 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
350 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
351
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
352 out:
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
353 th_free(tmp);
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
354 return res;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
355 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
356
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
357
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
358 int th_regex_compile(th_regex_ctx **pexpr, const th_regex_char *pattern)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
359 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
360 int res = THERR_OK;
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
361 th_regex_parse_ctx ctx;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
362 th_regex_node node, *pnode;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
363 th_regex_char *tmp = NULL;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
364 size_t start;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
365
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
366 if (pexpr == NULL || pattern == NULL)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
367 {
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
368 res = THERR_NULLPTR;
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
369 goto exit;
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
370 }
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
371
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
372 memset(&ctx, 0, sizeof(ctx));
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
373 ctx.pattern = pattern;
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
374
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
375 for (; ctx.pattern[ctx.offs] != 0; ctx.offs++)
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
376 {
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
377 th_regex_char cch = ctx.pattern[ctx.offs];
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
378 DBG_RE_COMPILE("[%" PRIu_SIZE_T "] '%c'\n", ctx.offs, cch);
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
379 switch (cch)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
380 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
381 case '?':
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
382 case '*':
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
383 case '+':
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
384 if ((res = th_regex_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
385 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
386
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
387 if (cch == '?')
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
388 {
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
389 // Check if previous was a count
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
390 pnode->mode = (pnode->mode == TH_RE_MATCH_COUNT_GREEDY) ?
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
391 TH_RE_MATCH_COUNT_NONGREEDY : TH_RE_MATCH_COUNT_GREEDY;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
392
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
393 // Previous token is optional (repeat 0-1 times)
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
394 pnode->repeatMin = 0;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
395 pnode->repeatMax = 1;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
396 }
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
397 else
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
398 {
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
399 // Check if previous was a count
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
400 if (pnode->mode == TH_RE_MATCH_COUNT_GREEDY ||
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
401 pnode->mode == TH_RE_MATCH_COUNT_NONGREEDY)
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
402 {
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
403 res = THERR_INVALID_DATA;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
404 goto exit;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
405 }
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
406
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
407 pnode->mode = TH_RE_MATCH_COUNT_GREEDY;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
408
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
409 if (cch == '*')
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
410 {
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
411 // Previous token can repeat 0 or more times
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
412 pnode->repeatMin = 0;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
413 pnode->repeatMax = -1;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
414 }
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
415 else
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
416 {
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
417 // Previous token must repeat 1 or more times
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
418 pnode->repeatMin = 1;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
419 pnode->repeatMax = -1;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
420 }
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
421 }
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
422 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
423
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
424 case '{':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
425 // {n} | {min,max}
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
426 start = ctx.offs + 1;
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
427 if (!th_regex_find_next(ctx.pattern, start, &ctx.offs, '}'))
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
428 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
429 // End not found
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
430 res = THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
431 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
432 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
433
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
434 th_free(tmp);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
435
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
436 if ((res = th_regex_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK ||
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
437 (res = th_regex_strndup(&tmp, ctx.pattern + start,
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
438 ctx.offs - start)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
439 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
440
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
441 pnode->mode = TH_RE_MATCH_COUNT_GREEDY;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
442
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
443 if (th_regex_find_next(tmp, 0, &start, ','))
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
444 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
445 tmp[start] = 0;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
446 if (!th_regex_parse_ssize_t(tmp, &pnode->repeatMin) ||
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
447 !th_regex_parse_ssize_t(tmp + start + 1, &pnode->repeatMax))
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
448 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
449 res = THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
450 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
451 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
452 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
453 else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
454 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
455 if (!th_regex_parse_ssize_t(tmp, &pnode->repeatMin))
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
456 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
457 res = THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
458 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
459 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
460 pnode->repeatMax = pnode->repeatMin;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
461 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
462
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
463 if (pnode->repeatMin < 0 || pnode->repeatMax < 1 ||
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
464 pnode->repeatMax < pnode->repeatMin)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
465 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
466 // Invalid repeat counts
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
467 res = THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
468 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
469 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
470 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
471
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
472 case '(':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
473 // Start of subpattern
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
474 if ((res = th_regex_ctx_push(&ctx)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
475 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
476 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
477
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
478 case ')':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
479 // End of subpattern
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
480 th_regex_node_init(&node);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
481 node.type = TH_RE_TYPE_SUBEXPR;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
482
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
483 if ((res = th_regex_ctx_pop(&ctx, &node.match.expr)) != THERR_OK ||
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
484 (res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
485 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
486 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
487
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
488 case '^':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
489 // Start of line anchor
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
490 th_regex_node_init(&node);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
491 node.mode = TH_RE_MATCH_ANCHOR_START;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
492
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
493 if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
494 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
495 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
496
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
497 case '$':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
498 // End of line anchor
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
499 th_regex_node_init(&node);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
500 node.mode = TH_RE_MATCH_ANCHOR_END;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
501
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
502 if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
503 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
504 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
505
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
506 case '[':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
507 // Start of char list
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
508 start = ctx.offs + 1;
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
509 if (!th_regex_find_next(ctx.pattern, start, &ctx.offs, ']') ||
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
510 ctx.offs == start)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
511 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
512 res = THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
513 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
514 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
515
638
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
516 // XXX TODO Parse/support ranges [0-9a-zA-Z_-]
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
517
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
518 th_regex_node_init(&node);
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
519 node.type = (ctx.pattern[start] == '^') ?
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
520 TH_RE_TYPE_LIST_REVERSE : TH_RE_TYPE_LIST;
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
521 node.match.list.nchars = ctx.offs - start;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
522 if ((res = th_regex_strndup(&node.match.list.chars,
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
523 ctx.pattern + start, node.match.list.nchars)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
524 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
525
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
526 if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
527 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
528 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
529
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
530 case '.':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
531 // Any single character matches
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
532 th_regex_node_init(&node);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
533 node.type = TH_RE_TYPE_ANY_CHAR;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
534
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
535 if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
536 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
537 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
538
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
539 case '\\':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
540 // Literal escape
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
541 ctx.offs++;
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
542 if (ctx.pattern[ctx.offs] == 0)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
543 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
544 // End of pattern, error
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
545 res = THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
546 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
547 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
548
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
549 th_regex_node_init(&node);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
550 node.type = TH_RE_TYPE_CHAR;
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
551 node.match.chr = ctx.pattern[ctx.offs];
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
552
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
553 if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
554 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
555 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
556
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
557 default:
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
558 // Given character must match
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
559 th_regex_node_init(&node);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
560 node.type = TH_RE_TYPE_CHAR;
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
561 node.match.chr = ctx.pattern[ctx.offs];
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
562
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
563 if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
564 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
565 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
566 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
567 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
568
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
569 exit:
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
570 *pexpr = ctx.data;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
571 th_free(tmp);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
572 return res;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
573 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
574
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
575
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
576 void th_regex_free(th_regex_ctx *expr)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
577 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
578 if (expr != NULL)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
579 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
580 for (size_t n = 0; n < expr->nnodes; n++)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
581 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
582 th_regex_node *node = &expr->nodes[n];
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
583
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
584 DBG_RE_FREE(
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
585 "node [%" PRIu_SIZE_T " / %" PRIu_SIZE_T "]: "
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
586 "mode=%d, type=%d, rmin=%" PRId_SSIZE_T ", rmax=%" PRId_SSIZE_T "\n",
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
587 n, expr->nnodes,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
588 node->mode, node->type,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
589 node->repeatMin, node->repeatMax);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
590
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
591 switch (node->type)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
592 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
593 case TH_RE_TYPE_SUBEXPR:
638
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
594 DBG_RE_FREE(" SUBEXPR: %p vs %p\n",
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
595 (void *) expr, (void *) node->match.expr);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
596 th_regex_free(node->match.expr);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
597 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
598
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
599 case TH_RE_TYPE_LIST:
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
600 case TH_RE_TYPE_LIST_REVERSE:
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
601 DBG_RE_FREE(" list='%s'\n", node->match.list.chars);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
602 th_free(node->match.list.chars);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
603 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
604
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
605 #ifdef TH_EXPERIMENTAL_REGEX_DEBUG
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
606 case TH_RE_TYPE_ANY_CHAR:
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
607 DBG_RE_FREE(" any char\n");
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
608 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
609
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
610 case TH_RE_TYPE_CHAR:
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
611 DBG_RE_FREE(" char='%c'\n", node->match.chr);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
612 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
613 #endif
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
614 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
615 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
616
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
617 th_free(expr->nodes);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
618 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
619 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
620
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
621
639
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
622 static BOOL th_regex_do_match_list(const th_regex_list *list, const th_regex_char cch)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
623 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
624 // Could be optimized, perhaps .. sort match.chars, binary search etc?
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
625 for (size_t nitem = 0; nitem < list->nitems; nitem++)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
626 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
627 const th_regex_list_item *item = &list->items[nitem];
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
628 if (item->type == 0)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
629 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
630 for (size_t n = 0; n < item->nchars; n++)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
631 if (item->chars[n] == cch)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
632 return TRUE;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
633 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
634 else
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
635 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
636 if (cch >= item->start && cch <= item->end)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
637 return TRUE;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
638 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
639 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
640
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
641 return FALSE;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
642 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
643
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
644
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
645 static BOOL th_regex_do_match_expr(const th_regex_ctx *expr,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
646 const th_regex_char *haystack, size_t *offs, const int flags);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
647
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
648
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
649 static BOOL th_regex_do_match_node(const th_regex_char *haystack,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
650 size_t *offs, const th_regex_node *node, const int flags)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
651 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
652 th_regex_char cch;
638
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
653 BOOL res = FALSE;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
654
638
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
655 DBG_RE_MATCH(" node_START [%s]: '%s': ",
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
656 re_match_types[node->type], haystack + *offs);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
657
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
658 switch (node->type)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
659 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
660 case TH_RE_TYPE_SUBEXPR:
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
661 DBG_RE_MATCH("subexpr ..\n");
638
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
662 res = th_regex_do_match_expr(node->match.expr, haystack, offs, flags);
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
663 return res;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
664 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
665
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
666 case TH_RE_TYPE_LIST:
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
667 case TH_RE_TYPE_LIST_REVERSE:
638
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
668 DBG_RE_MATCH("[%s]", node->match.list.chars);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
669 if ((cch = haystack[*offs]) == 0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
670 goto out;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
671
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
672 // Could be optimized, perhaps .. sort match.chars, binary search etc?
638
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
673 // XXX TODO Ranges are not supported yet
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
674 for (size_t n = 0; n < node->match.list.nchars; n++)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
675 if (node->match.list.chars[n] == cch)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
676 {
638
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
677 res = TRUE;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
678 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
679 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
680
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
681 if (node->type == TH_RE_TYPE_LIST_REVERSE)
638
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
682 res = !res;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
683
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
684 (*offs)++;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
685 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
686
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
687 case TH_RE_TYPE_ANY_CHAR:
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
688 if ((cch = haystack[*offs]) == 0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
689 goto out;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
690
638
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
691 res = TRUE;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
692 (*offs)++;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
693 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
694
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
695 case TH_RE_TYPE_CHAR:
638
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
696 DBG_RE_MATCH("'%c'", node->match.chr);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
697 if ((cch = haystack[*offs]) == 0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
698 goto out;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
699
638
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
700 res = (cch == node->match.chr);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
701 (*offs)++;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
702 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
703 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
704
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
705 out:
638
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
706 DBG_RE_MATCH(", match=%s\n",
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
707 res ? "YES" : "NO");
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
708
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
709 return res;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
710 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
711
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
712
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
713 static BOOL th_regex_do_match_expr(const th_regex_ctx *expr,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
714 const th_regex_char *haystack, size_t *offs, const int flags)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
715 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
716 for (size_t n = 0; n < expr->nnodes; n++)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
717 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
718 const th_regex_node *node = &expr->nodes[n];
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
719 size_t soffs;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
720
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
721 DBG_RE_MATCH(" expr [%" PRIu_SIZE_T "/%" PRIu_SIZE_T "]: %s ",
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
722 n, expr->nnodes, re_match_modes[node->mode]);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
723
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
724 switch (node->mode)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
725 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
726 case TH_RE_MATCH_ONCE:
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
727 {
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
728 DBG_RE_MATCH("\n");
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
729 soffs = *offs;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
730 if (!th_regex_do_match_node(haystack, &soffs, node, flags))
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
731 return FALSE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
732 *offs = soffs;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
733 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
734 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
735
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
736 case TH_RE_MATCH_COUNT_GREEDY:
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
737 case TH_RE_MATCH_COUNT_NONGREEDY:
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
738 {
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
739 BOOL done = FALSE;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
740 ssize_t count = 0;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
741
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
742 DBG_RE_MATCH("min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T "\n", node->repeatMin, node->repeatMax);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
743
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
744 do
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
745 {
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
746 BOOL match;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
747 soffs = *offs;
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
748
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
749 match = th_regex_do_match_node(haystack, &soffs, node, flags);
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
750 for (size_t qn = n + 1; qn < expr->nnodes && haystack[soffs] != 0; qn++)
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
751 {
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
752 const th_regex_node *next = &expr->nodes[qn];
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
753 do {
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
754 match = th_regex_do_match_node(haystack, &soffs, next, flags);
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
755 } while (haystack[soffs] != 0 && !match);
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
756 }
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
757
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
758 if (match)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
759 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
760 // Node matched
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
761 count++;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
762 done = (node->repeatMax > 0 && count >= node->repeatMax);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
763 *offs = soffs;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
764 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
765 else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
766 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
767 // Node did not match, check if we got the minimum if set
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
768 done = (node->repeatMin >= 0 && count >= node->repeatMin);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
769 soffs = *offs;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
770 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
771 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
772 } while (!done);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
773
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
774 DBG_RE_MATCH("RESULT: %" PRId_SSIZE_T " = %s\n", count, done ? "YES" : "NO");
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
775
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
776 if (!done)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
777 return FALSE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
778
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
779 *offs = soffs;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
780 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
781 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
782
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
783 case TH_RE_MATCH_ANCHOR_START:
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
784 DBG_RE_MATCH("offs=%" PRIu_SIZE_T "\n", *offs);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
785 if (*offs != 0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
786 return FALSE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
787 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
788
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
789 case TH_RE_MATCH_ANCHOR_END:
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
790 DBG_RE_MATCH("is end=%d\n", haystack[*offs]);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
791 if (haystack[*offs] != 0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
792 return FALSE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
793 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
794 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
795 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
796
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
797 return TRUE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
798 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
799
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
800
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
801 int th_regex_match(const th_regex_ctx *expr, const th_regex_char *haystack,
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
802 size_t *pnmatches, th_regex_match_node **pmatches, const size_t maxmatches,
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
803 const int flags)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
804 {
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
805 size_t nmatches = 0;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
806
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
807 if (pnmatches != NULL)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
808 *pnmatches = 0;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
809
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
810 // Check given pattern and string
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
811 if (expr == NULL || haystack == NULL)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
812 return THERR_NULLPTR;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
813
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
814 // Start matching
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
815 // XXX NOTE .. lots to think about and to take into account:
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
816 // - anchored and unanchored expressions
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
817 // - how to check if the expression has consumed all possibilities?
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
818 // ..
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
819 for (size_t soffs = 0; haystack[soffs] != 0; )
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
820 {
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
821 BOOL matched;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
822 size_t coffs = soffs;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
823
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
824 DBG_RE_MATCH("\nTRY_MATCH @ startoffs=%" PRIu_SIZE_T ": '%s'\n",
612
cc9ec51b4875 Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents: 611
diff changeset
825 soffs, haystack + soffs);
cc9ec51b4875 Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents: 611
diff changeset
826
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
827 if ((matched = th_regex_do_match_expr(expr, haystack, &coffs, flags)))
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
828 {
612
cc9ec51b4875 Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents: 611
diff changeset
829 // A match was found, increase count
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
830 nmatches++;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
831
612
cc9ec51b4875 Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents: 611
diff changeset
832 // Deliver to caller if required
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
833 if (pnmatches != NULL)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
834 *pnmatches = nmatches;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
835
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
836 if (pmatches != NULL)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
837 {
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
838 th_regex_match_node *match = th_malloc0(sizeof(th_regex_match_node));
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
839 if (match == NULL)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
840 return THERR_MALLOC;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
841
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
842 match->start = soffs;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
843 match->len = coffs - soffs;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
844
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
845 th_llist_append_node((th_llist_t **) pmatches, (th_llist_t *) match);
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
846 }
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
847
612
cc9ec51b4875 Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents: 611
diff changeset
848 // Check match count limit, if set
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
849 if (maxmatches > 0 && nmatches >= maxmatches)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
850 break;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
851
612
cc9ec51b4875 Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents: 611
diff changeset
852 // If offset was not advanced, increase by one
cc9ec51b4875 Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents: 611
diff changeset
853 // otherwise use end of match offset as new start
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
854 if (soffs == coffs)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
855 soffs++;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
856 else
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
857 soffs = coffs;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
858 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
859 else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
860 {
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
861 soffs++;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
862 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
863 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
864
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
865 return THERR_OK;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
866 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
867
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
868
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
869 static void th_regex_free_match(th_regex_match_node *node)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
870 {
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
871 (void) node;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
872 // Nothing to do here at the moment
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
873 }
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
874
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
875
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
876 void th_regex_free_matches(th_regex_match_node *matches)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
877 {
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
878 th_llist_free_func_node((th_llist_t *) matches,
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
879 (void (*)(th_llist_t *)) th_regex_free_match);
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
880 }
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
881
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
882 #endif // TH_EXPERIMENTAL_REGEX