annotate th_regex.c @ 642:3a35db5c1873

Cleanup.
author Matti Hamalainen <ccr@tnsp.org>
date Fri, 24 Jan 2020 09:39:09 +0200
parents 9a1ed82abefd
children a2bf1ea05b05
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1 /*
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
2 * Simple regular expression matching functionality
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
3 * Programmed and designed by Matti 'ccr' Hamalainen
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
4 * (C) Copyright 2020 Tecnic Software productions (TNSP)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
5 *
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
6 * Please read file 'COPYING' for information on license and distribution.
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
7 */
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
8 #include "th_util.h"
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
9 #include "th_regex.h"
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
10
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
11
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
12 #ifdef TH_EXPERIMENTAL_REGEX_DEBUG
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
13 # define DBG_RE_COMPILE(...) do { if (th_dbg_re_flags & TH_DBG_RE_COMPILE) fprintf(stdout, __VA_ARGS__); } while (0)
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
14 # define DBG_RE_FREE(...) do { if (th_dbg_re_flags & TH_DBG_RE_FREE) fprintf(stdout, __VA_ARGS__); } while (0)
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
15 # define DBG_RE_MATCH(...) do { if (th_dbg_re_flags & TH_DBG_RE_MATCH) fprintf(stdout, __VA_ARGS__); } while (0)
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
16
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
17 int th_dbg_re_flags = 0;
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
18
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
19 static const char *re_match_modes[] =
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
20 {
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
21 "ONCE",
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
22 "COUNT GREEDY",
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
23 "COUNT NONGREEDY",
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
24 "ANCHOR START",
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
25 "ANCHOR END",
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
26 };
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
27
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
28 static const char *re_match_types[] =
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
29 {
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
30 "CHAR",
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
31 "ANY",
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
32 "LIST",
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
33 "LIST REVERSE",
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
34 "SUBEXPR",
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
35 };
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
36
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
37 #else
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
38 # define DBG_RE_COMPILE(...)
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
39 # define DBG_RE_FREE(...)
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
40 # define DBG_RE_MATCH(...)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
41 #endif
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
42
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
43
614
afcaf5e38f56 Disable regex stuff from normal builds.
Matti Hamalainen <ccr@tnsp.org>
parents: 613
diff changeset
44 #ifdef TH_EXPERIMENTAL_REGEX
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
45
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
46 typedef struct
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
47 {
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
48 int type;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
49 th_regex_char_t start, end;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
50
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
51 size_t nchars;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
52 th_regex_char_t *chars;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
53 } th_regex_list_item_t;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
54
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
55
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
56 typedef struct
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
57 {
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
58 size_t nitems, itemssize;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
59 th_regex_list_item_t *items;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
60 } th_regex_list_t;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
61
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
62
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
63 typedef struct
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
64 {
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
65 int mode, type;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
66 ssize_t repeatMin, repeatMax;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
67
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
68 struct {
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
69 th_regex_char_t chr;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
70 th_regex_list_t list;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
71 struct th_regex_t *expr;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
72 } match;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
73 } th_regex_node_t;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
74
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
75
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
76 struct th_regex_t
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
77 {
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
78 size_t nnodes, nodessize;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
79 th_regex_node_t *nodes;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
80 };
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
81
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
82
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
83 enum
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
84 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
85 TH_RE_MATCH_ONCE,
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
86 TH_RE_MATCH_COUNT_GREEDY,
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
87 TH_RE_MATCH_COUNT_NONGREEDY,
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
88
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
89 TH_RE_MATCH_ANCHOR_START,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
90 TH_RE_MATCH_ANCHOR_END,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
91 };
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
92
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
93
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
94 enum
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
95 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
96 TH_RE_TYPE_CHAR,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
97 TH_RE_TYPE_ANY_CHAR,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
98 TH_RE_TYPE_LIST,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
99 TH_RE_TYPE_LIST_REVERSE,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
100 TH_RE_TYPE_SUBEXPR,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
101 };
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
102
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
103
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
104 typedef struct
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
105 {
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
106 const th_regex_char_t *pattern;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
107 size_t offs;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
108
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
109 th_regex_t *data;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
110
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
111 size_t nstack, stacksize;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
112 th_regex_t **stack;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
113 } th_regex_parse_ctx_t;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
114
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
115
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
116 static void th_regex_node_init(th_regex_node_t *node)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
117 {
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
118 memset(node, 0, sizeof(th_regex_node_t));
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
119 node->mode = TH_RE_MATCH_ONCE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
120 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
121
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
122
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
123 static int th_regex_strndup(th_regex_char_t **pdst,
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
124 const th_regex_char_t *src, const size_t len)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
125 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
126 if (pdst == NULL)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
127 return THERR_NULLPTR;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
128
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
129 if (UINTPTR_MAX / sizeof(th_regex_char_t) < len + 1)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
130 return THERR_BOUNDS;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
131
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
132 if ((*pdst = (th_regex_char_t *)
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
133 th_malloc((len + 1) * sizeof(th_regex_char_t))) == NULL)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
134 return THERR_MALLOC;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
135
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
136 memcpy(*pdst, src, len * sizeof(th_regex_char_t));
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
137 (*pdst)[len] = 0;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
138
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
139 return THERR_OK;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
140 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
141
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
142
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
143 static int th_regex_parse_ctx_get_prev_node(
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
144 th_regex_parse_ctx_t *ctx, th_regex_node_t **pnode)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
145 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
146 if (ctx->data != NULL && ctx->data->nnodes > 0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
147 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
148 *pnode = &ctx->data->nodes[ctx->data->nnodes - 1];
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
149 return THERR_OK;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
150 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
151 else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
152 return THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
153 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
154
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
155
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
156 static int th_regex_parse_ctx_push(th_regex_parse_ctx_t *ctx)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
157 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
158 if (ctx->stack == NULL || ctx->nstack + 1 >= ctx->stacksize)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
159 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
160 ctx->stacksize += 16;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
161
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
162 if ((ctx->stack = th_realloc(ctx->stack,
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
163 ctx->stacksize * sizeof(th_regex_node_t *))) == NULL)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
164 return THERR_MALLOC;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
165 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
166
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
167 ctx->stack[ctx->nstack] = ctx->data;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
168 ctx->nstack++;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
169 ctx->data = NULL;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
170
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
171 return THERR_OK;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
172 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
173
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
174
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
175 static int th_regex_parse_ctx_pop(th_regex_parse_ctx_t *ctx, th_regex_t **data)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
176 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
177 if (ctx->nstack > 0)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
178 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
179 *data = ctx->data;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
180 ctx->nstack--;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
181 ctx->data = ctx->stack[ctx->nstack];
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
182 return THERR_OK;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
183 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
184 else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
185 return THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
186 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
187
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
188
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
189 static int th_regex_parse_ctx_node_commit(th_regex_parse_ctx_t *ctx, th_regex_node_t *node)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
190 {
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
191 th_regex_t *data = ctx->data;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
192
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
193 if (data == NULL)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
194 {
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
195 if ((data = ctx->data = th_malloc0(sizeof(th_regex_t))) == NULL)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
196 return THERR_MALLOC;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
197 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
198
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
199 if (data->nodes == NULL || data->nnodes + 1 >= data->nodessize)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
200 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
201 data->nodessize += 16;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
202 if ((data->nodes = th_realloc(data->nodes,
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
203 data->nodessize * sizeof(th_regex_node_t))) == NULL)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
204 return THERR_MALLOC;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
205 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
206
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
207 memcpy(&data->nodes[data->nnodes], node, sizeof(th_regex_node_t));
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
208 data->nnodes++;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
209
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
210 DBG_RE_COMPILE(
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
211 "node [%" PRIu_SIZE_T " / %" PRIu_SIZE_T "]: "
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
212 "mode=%d, type=%d, rmin=%" PRId_SSIZE_T ", rmax=%" PRId_SSIZE_T "\n",
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
213 data->nnodes, data->nodessize,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
214 node->mode, node->type,
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
215 node->repeatMin, node->repeatMax);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
216
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
217 return THERR_OK;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
218 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
219
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
220
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
221 static BOOL th_regex_find_next(const th_regex_char_t *str,
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
222 const size_t start, size_t *offs,
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
223 const th_regex_char_t delim)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
224 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
225 for (*offs = start; str[*offs] != 0; (*offs)++)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
226 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
227 if (str[*offs] == delim)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
228 return TRUE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
229 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
230 return FALSE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
231 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
232
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
233
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
234 static BOOL th_regex_parse_ssize_t(const th_regex_char_t *str,
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
235 ssize_t *value)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
236 {
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
237 th_regex_char_t ch;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
238 BOOL neg;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
239
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
240 if (*str == '-')
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
241 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
242 str++;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
243 neg = TRUE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
244 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
245 else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
246 neg = FALSE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
247
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
248 // Is the value negative?
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
249 while ((ch = *str++))
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
250 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
251 if (ch >= '0' && ch <= '9')
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
252 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
253 *value *= 10;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
254 *value += ch - '0';
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
255 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
256 else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
257 return FALSE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
258 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
259
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
260 if (neg)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
261 *value = -(*value);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
262
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
263 return TRUE;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
264 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
265
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
266
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
267 static void th_regex_list_item_init(th_regex_list_item_t *item)
639
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
268 {
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
269 memset(item, 0, sizeof(th_regex_list_item_t));
639
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
270 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
271
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
272
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
273 static int th_regex_list_add_item(th_regex_list_t *list, th_regex_list_item_t *item)
639
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
274 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
275 if (list->items == NULL || list->nitems + 1 >= list->itemssize)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
276 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
277 list->itemssize += 16;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
278
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
279 if ((list->items = th_realloc(list->items,
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
280 list->itemssize * sizeof(th_regex_list_item_t))) == NULL)
639
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
281 return THERR_MALLOC;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
282 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
283
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
284 memcpy(&list->items[list->nitems], item, sizeof(th_regex_list_item_t));
639
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
285 list->nitems++;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
286
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
287 return THERR_OK;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
288 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
289
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
290
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
291 static void th_regex_list_free(th_regex_list_t *list)
639
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
292 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
293 if (list != NULL)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
294 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
295 for (size_t n = 0; n < list->nitems; n++)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
296 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
297 th_free(list->items[n].chars);
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
298 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
299 th_free(list->items);
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
300 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
301 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
302
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
303
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
304 static int th_regex_parse_list(const th_regex_char_t *str,
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
305 const size_t slen, th_regex_list_t *list)
639
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
306 {
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
307 th_regex_char_t *tmp = NULL;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
308 th_regex_list_item_t item;
639
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
309 int res;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
310
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
311 if ((res = th_regex_strndup(&tmp, str, slen)) != THERR_OK)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
312 goto out;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
313
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
314 // Handle ranges like [A-Z]
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
315 for (size_t offs = 0; offs < slen; offs++)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
316 {
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
317 th_regex_char_t
639
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
318 *prev = (offs > 0) ? tmp + offs - 1 : NULL,
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
319 *curr = tmp + offs,
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
320 *next = (offs + 1 < slen) ? tmp + offs + 1 : NULL;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
321
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
322 if (*curr == '-')
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
323 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
324 if (prev != NULL && next != NULL)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
325 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
326 // Range
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
327 th_regex_list_item_init(&item);
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
328 item.type = 1;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
329 item.start = *prev;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
330 item.end = *next;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
331
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
332 if (item.start <= item.end)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
333 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
334 res = THERR_INVALID_DATA;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
335 goto out;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
336 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
337
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
338 *curr = *prev = *next = 0;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
339
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
340 if ((res = th_regex_list_add_item(list, &item)) != THERR_OK)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
341 goto out;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
342 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
343 else
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
344 if (next != NULL)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
345 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
346 res = THERR_INVALID_DATA;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
347 goto out;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
348 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
349 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
350 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
351
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
352 // Count number of remaining characters
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
353 th_regex_list_item_init(&item);
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
354 item.type = 0;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
355 item.nchars = 0;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
356
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
357 for (size_t offs = 0; offs < slen; offs++)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
358 {
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
359 th_regex_char_t curr = tmp[offs];
639
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
360 if (curr != 0)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
361 item.nchars++;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
362 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
363
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
364 if (item.nchars > 0)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
365 {
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
366 if ((item.chars = th_malloc(sizeof(th_regex_char_t) * item.nchars)) == NULL)
639
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
367 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
368 res = THERR_MALLOC;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
369 goto out;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
370 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
371
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
372 for (size_t offs = 0, n = 0; offs < slen; offs++)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
373 {
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
374 th_regex_char_t curr = tmp[offs];
639
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
375 if (curr != 0)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
376 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
377 item.chars[n] = curr;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
378 n++;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
379 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
380 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
381
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
382 if ((res = th_regex_list_add_item(list, &item)) != THERR_OK)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
383 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
384 th_free(item.chars);
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
385 goto out;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
386 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
387 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
388
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
389 out:
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
390 th_free(tmp);
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
391 return res;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
392 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
393
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
394
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
395 int th_regex_compile(th_regex_t **pexpr, const th_regex_char_t *pattern)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
396 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
397 int res = THERR_OK;
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
398 th_regex_parse_ctx_t ctx;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
399 th_regex_node_t node, *pnode;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
400 th_regex_char_t *tmp = NULL;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
401 size_t start;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
402
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
403 if (pexpr == NULL || pattern == NULL)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
404 {
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
405 res = THERR_NULLPTR;
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
406 goto exit;
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
407 }
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
408
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
409 memset(&ctx, 0, sizeof(ctx));
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
410 ctx.pattern = pattern;
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
411
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
412 for (; ctx.pattern[ctx.offs] != 0; ctx.offs++)
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
413 {
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
414 th_regex_char_t cch = ctx.pattern[ctx.offs];
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
415 DBG_RE_COMPILE("[%" PRIu_SIZE_T "] '%c'\n", ctx.offs, cch);
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
416 switch (cch)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
417 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
418 case '?':
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
419 case '*':
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
420 case '+':
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
421 if ((res = th_regex_parse_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
422 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
423
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
424 if (cch == '?')
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
425 {
642
3a35db5c1873 Cleanup.
Matti Hamalainen <ccr@tnsp.org>
parents: 641
diff changeset
426 // Check if previous was a count
641
9a1ed82abefd Fix parsing of +? and *?.
Matti Hamalainen <ccr@tnsp.org>
parents: 640
diff changeset
427 if (pnode->mode == TH_RE_MATCH_COUNT_NONGREEDY)
9a1ed82abefd Fix parsing of +? and *?.
Matti Hamalainen <ccr@tnsp.org>
parents: 640
diff changeset
428 {
9a1ed82abefd Fix parsing of +? and *?.
Matti Hamalainen <ccr@tnsp.org>
parents: 640
diff changeset
429 res = THERR_INVALID_DATA;
9a1ed82abefd Fix parsing of +? and *?.
Matti Hamalainen <ccr@tnsp.org>
parents: 640
diff changeset
430 goto exit;
9a1ed82abefd Fix parsing of +? and *?.
Matti Hamalainen <ccr@tnsp.org>
parents: 640
diff changeset
431 }
9a1ed82abefd Fix parsing of +? and *?.
Matti Hamalainen <ccr@tnsp.org>
parents: 640
diff changeset
432 else
642
3a35db5c1873 Cleanup.
Matti Hamalainen <ccr@tnsp.org>
parents: 641
diff changeset
433 if (pnode->mode != TH_RE_MATCH_COUNT_GREEDY)
641
9a1ed82abefd Fix parsing of +? and *?.
Matti Hamalainen <ccr@tnsp.org>
parents: 640
diff changeset
434 {
9a1ed82abefd Fix parsing of +? and *?.
Matti Hamalainen <ccr@tnsp.org>
parents: 640
diff changeset
435 // Previous token is optional (repeat 0-1 times)
9a1ed82abefd Fix parsing of +? and *?.
Matti Hamalainen <ccr@tnsp.org>
parents: 640
diff changeset
436 pnode->repeatMin = 0;
9a1ed82abefd Fix parsing of +? and *?.
Matti Hamalainen <ccr@tnsp.org>
parents: 640
diff changeset
437 pnode->repeatMax = 1;
9a1ed82abefd Fix parsing of +? and *?.
Matti Hamalainen <ccr@tnsp.org>
parents: 640
diff changeset
438 }
642
3a35db5c1873 Cleanup.
Matti Hamalainen <ccr@tnsp.org>
parents: 641
diff changeset
439
3a35db5c1873 Cleanup.
Matti Hamalainen <ccr@tnsp.org>
parents: 641
diff changeset
440 pnode->mode = TH_RE_MATCH_COUNT_NONGREEDY;
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
441 }
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
442 else
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
443 {
641
9a1ed82abefd Fix parsing of +? and *?.
Matti Hamalainen <ccr@tnsp.org>
parents: 640
diff changeset
444 // Check if previous was a count ("**", "*+", etc.)
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
445 if (pnode->mode == TH_RE_MATCH_COUNT_GREEDY ||
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
446 pnode->mode == TH_RE_MATCH_COUNT_NONGREEDY)
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
447 {
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
448 res = THERR_INVALID_DATA;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
449 goto exit;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
450 }
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
451
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
452 pnode->mode = TH_RE_MATCH_COUNT_GREEDY;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
453
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
454 if (cch == '*')
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
455 {
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
456 // Previous token can repeat 0 or more times
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
457 pnode->repeatMin = 0;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
458 pnode->repeatMax = -1;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
459 }
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
460 else
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
461 {
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
462 // Previous token must repeat 1 or more times
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
463 pnode->repeatMin = 1;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
464 pnode->repeatMax = -1;
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
465 }
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
466 }
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
467 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
468
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
469 case '{':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
470 // {n} | {min,max}
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
471 start = ctx.offs + 1;
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
472 if (!th_regex_find_next(ctx.pattern, start, &ctx.offs, '}'))
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
473 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
474 // End not found
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
475 res = THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
476 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
477 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
478
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
479 th_free(tmp);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
480
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
481 if ((res = th_regex_parse_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK ||
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
482 (res = th_regex_strndup(&tmp, ctx.pattern + start,
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
483 ctx.offs - start)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
484 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
485
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
486 pnode->mode = TH_RE_MATCH_COUNT_GREEDY;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
487
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
488 if (th_regex_find_next(tmp, 0, &start, ','))
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
489 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
490 tmp[start] = 0;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
491 if (!th_regex_parse_ssize_t(tmp, &pnode->repeatMin) ||
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
492 !th_regex_parse_ssize_t(tmp + start + 1, &pnode->repeatMax))
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
493 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
494 res = THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
495 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
496 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
497 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
498 else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
499 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
500 if (!th_regex_parse_ssize_t(tmp, &pnode->repeatMin))
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
501 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
502 res = THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
503 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
504 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
505 pnode->repeatMax = pnode->repeatMin;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
506 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
507
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
508 if (pnode->repeatMin < 0 || pnode->repeatMax < 1 ||
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
509 pnode->repeatMax < pnode->repeatMin)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
510 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
511 // Invalid repeat counts
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
512 res = THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
513 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
514 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
515 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
516
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
517 case '(':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
518 // Start of subpattern
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
519 if ((res = th_regex_parse_ctx_push(&ctx)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
520 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
521 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
522
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
523 case ')':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
524 // End of subpattern
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
525 th_regex_node_init(&node);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
526 node.type = TH_RE_TYPE_SUBEXPR;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
527
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
528 if ((res = th_regex_parse_ctx_pop(&ctx, &node.match.expr)) != THERR_OK ||
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
529 (res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
530 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
531 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
532
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
533 case '^':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
534 // Start of line anchor
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
535 th_regex_node_init(&node);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
536 node.mode = TH_RE_MATCH_ANCHOR_START;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
537
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
538 if ((res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
539 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
540 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
541
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
542 case '$':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
543 // End of line anchor
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
544 th_regex_node_init(&node);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
545 node.mode = TH_RE_MATCH_ANCHOR_END;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
546
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
547 if ((res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
548 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
549 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
550
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
551 case '[':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
552 // Start of char list
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
553 start = ctx.offs + 1;
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
554 if (!th_regex_find_next(ctx.pattern, start, &ctx.offs, ']') ||
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
555 ctx.offs == start)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
556 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
557 res = THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
558 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
559 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
560
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
561 th_regex_node_init(&node);
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
562 if (ctx.pattern[start] == '^')
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
563 {
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
564 node.type = TH_RE_TYPE_LIST_REVERSE;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
565 start++;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
566 }
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
567 else
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
568 node.type = TH_RE_TYPE_LIST;
638
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
569
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
570 if ((res = th_regex_parse_list(ctx.pattern + start,
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
571 ctx.offs - start, &node.match.list) != THERR_OK) ||
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
572 (res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
573 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
574 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
575
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
576 case '.':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
577 // Any single character matches
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
578 th_regex_node_init(&node);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
579 node.type = TH_RE_TYPE_ANY_CHAR;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
580
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
581 if ((res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
582 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
583 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
584
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
585 case '\\':
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
586 // Literal escape
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
587 ctx.offs++;
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
588 if (ctx.pattern[ctx.offs] == 0)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
589 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
590 // End of pattern, error
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
591 res = THERR_INVALID_DATA;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
592 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
593 }
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
594 // fall-through
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
595
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
596 default:
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
597 // Given character must match
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
598 th_regex_node_init(&node);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
599 node.type = TH_RE_TYPE_CHAR;
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
600 node.match.chr = ctx.pattern[ctx.offs];
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
601
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
602 if ((res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
603 goto exit;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
604 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
605 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
606 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
607
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
608 exit:
611
d895b0fd6ad6 Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
609 *pexpr = ctx.data;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
610 th_free(tmp);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
611 return res;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
612 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
613
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
614
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
615 void th_regex_free(th_regex_t *expr)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
616 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
617 if (expr != NULL)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
618 {
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
619 for (size_t nnode = 0; nnode < expr->nnodes; nnode++)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
620 {
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
621 th_regex_node_t *node = &expr->nodes[nnode];
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
622
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
623 DBG_RE_FREE(
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
624 "node [%" PRIu_SIZE_T " / %" PRIu_SIZE_T "]: "
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
625 "mode=%s, type=%s, rmin=%" PRId_SSIZE_T ", rmax=%" PRId_SSIZE_T "\n",
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
626 nnode, expr->nnodes,
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
627 re_match_modes[node->mode],
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
628 re_match_types[node->type],
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
629 node->repeatMin, node->repeatMax);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
630
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
631 th_regex_free(node->match.expr);
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
632 th_regex_list_free(&node->match.list);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
633 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
634
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
635 th_free(expr->nodes);
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
636 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
637 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
638
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
639
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
640 static BOOL th_regex_match_list(const th_regex_list_t *list, const th_regex_char_t cch)
639
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
641 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
642 // Could be optimized, perhaps .. sort match.chars, binary search etc?
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
643 for (size_t nitem = 0; nitem < list->nitems; nitem++)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
644 {
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
645 const th_regex_list_item_t *item = &list->items[nitem];
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
646
639
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
647 if (item->type == 0)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
648 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
649 for (size_t n = 0; n < item->nchars; n++)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
650 if (item->chars[n] == cch)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
651 return TRUE;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
652 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
653 else
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
654 {
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
655 if (cch >= item->start && cch <= item->end)
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
656 return TRUE;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
657 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
658 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
659
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
660 return FALSE;
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
661 }
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
662
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 638
diff changeset
663
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
664 static BOOL th_regex_match_expr(
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
665 const th_regex_char_t *haystack,
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
666 size_t *poffs,
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
667 const th_regex_t *expr,
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
668 const size_t startnode,
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
669 const int flags
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
670 );
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
671
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
672
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
673 static BOOL th_regex_match_one(
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
674 const th_regex_char_t *haystack,
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
675 size_t *poffs,
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
676 const th_regex_node_t *node,
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
677 const int flags
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
678 )
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
679 {
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
680 th_regex_char_t cch;
638
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
681 BOOL res = FALSE;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
682
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
683 switch (node->type)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
684 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
685 case TH_RE_TYPE_SUBEXPR:
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
686 // push current context in stack
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
687 // new context for subexpr
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
688 res = th_regex_match_expr(haystack, poffs, node->match.expr, 0, flags);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
689 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
690
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
691 case TH_RE_TYPE_LIST:
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
692 case TH_RE_TYPE_LIST_REVERSE:
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
693 if ((cch = haystack[*poffs]) == 0)
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
694 res = FALSE;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
695 else
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
696 {
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
697 res = th_regex_match_list(&node->match.list, cch);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
698
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
699 if (node->type == TH_RE_TYPE_LIST_REVERSE)
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
700 res = !res;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
701
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
702 (*poffs)++;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
703 }
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
704 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
705
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
706 case TH_RE_TYPE_ANY_CHAR:
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
707 if ((cch = haystack[*poffs]) == 0)
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
708 res = FALSE;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
709 else
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
710 {
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
711 res = TRUE;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
712 (*poffs)++;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
713 }
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
714 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
715
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
716 case TH_RE_TYPE_CHAR:
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
717 if ((cch = haystack[*poffs]) == 0)
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
718 res = FALSE;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
719 else
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
720 {
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
721 res = (cch == node->match.chr);
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
722 (*poffs)++;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
723 }
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
724 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
725 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
726
638
c4bca120bfb0 Cleanups.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
727 return res;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
728 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
729
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
730
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
731 static BOOL th_regex_match_expr(
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
732 const th_regex_char_t *haystack,
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
733 size_t *poffs,
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
734 const th_regex_t *expr,
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
735 const size_t startnode,
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
736 const int flags
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
737 )
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
738 {
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
739 BOOL res = TRUE;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
740 size_t soffs = *poffs;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
741
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
742 for (size_t nnode = startnode; res && nnode < expr->nnodes; nnode++)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
743 {
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
744 const th_regex_node_t *node = &expr->nodes[nnode];
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
745
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
746 DBG_RE_MATCH(" expr [%" PRIu_SIZE_T "/%" PRIu_SIZE_T "]: %s %s '%s'\n",
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
747 nnode, expr->nnodes,
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
748 re_match_modes[node->mode], re_match_types[node->type],
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
749 haystack + *poffs);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
750
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
751 switch (node->mode)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
752 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
753 case TH_RE_MATCH_ONCE:
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
754 res = th_regex_match_one(haystack, poffs, node, flags);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
755 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
756
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
757 case TH_RE_MATCH_COUNT_GREEDY:
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
758 case TH_RE_MATCH_COUNT_NONGREEDY:
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
759 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
760 ssize_t count = 0;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
761
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
762 DBG_RE_MATCH(" COUNT min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T "\n",
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
763 node->repeatMin, node->repeatMax);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
764
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
765 do
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
766 {
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
767 BOOL match;
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
768 size_t toffs = *poffs, tnode = nnode;
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
769
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
770 do {
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
771 match = th_regex_match_one(haystack, &toffs, node, flags);
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
772 if (match && haystack[toffs] != 0)
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
773 {
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
774 for (tnode = nnode + 1; match && tnode < expr->nnodes && haystack[toffs] != 0; tnode++)
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
775 {
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
776 size_t noffs = toffs;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
777 match = th_regex_match_one(haystack, &toffs, &expr->nodes[tnode], flags);
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
778 DBG_RE_MATCH(" '%s': %d\n", haystack + noffs, match);
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
779 }
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
780 }
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
781 } while (!match && haystack[toffs] != 0);
613
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
782
2e3b81ae8c8a More work on regexes.
Matti Hamalainen <ccr@tnsp.org>
parents: 612
diff changeset
783 if (match)
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
784 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
785 // Node matched
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
786 count++;
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
787 *poffs = soffs;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
788 nnode = tnode;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
789 res = (node->repeatMax > 0 && count >= node->repeatMax);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
790 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
791 else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
792 {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
793 // Node did not match, check if we got the minimum if set
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
794 res = (node->repeatMin >= 0 && count >= node->repeatMin);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
795 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
796
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
797 } while (!res);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
798
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
799 DBG_RE_MATCH(" RESULT: count=%" PRId_SSIZE_T ", done=%s\n",
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
800 count, res ? "YES" : "NO");
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
801 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
802 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
803
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
804
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
805 case TH_RE_MATCH_ANCHOR_START:
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
806 res = (*poffs == 0);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
807 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
808
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
809 case TH_RE_MATCH_ANCHOR_END:
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
810 res = (haystack[*poffs] == 0);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
811 break;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
812 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
813 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
814
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
815 if (!res)
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
816 *poffs = soffs;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
817
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
818 return res;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
819 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
820
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
821
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
822 int th_regex_match(const th_regex_t *expr, const th_regex_char_t *haystack,
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
823 size_t *pnmatches, th_regex_match_t **pmatches, const size_t maxmatches,
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
824 const int flags)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
825 {
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
826 size_t nmatches = 0;
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
827 (void) flags;
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
828
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
829 if (pnmatches != NULL)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
830 *pnmatches = 0;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
831
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
832 // Check given pattern and string
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
833 if (expr == NULL || haystack == NULL)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
834 return THERR_NULLPTR;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
835
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
836 // Start matching
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
837 // XXX NOTE .. lots to think about and to take into account:
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
838 // - anchored and unanchored expressions
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
839 // - how to check if the expression has consumed all possibilities?
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
840 // ..
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
841 for (size_t soffs = 0; haystack[soffs] != 0; )
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
842 {
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
843 size_t coffs = soffs;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
844
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 614
diff changeset
845 DBG_RE_MATCH("\nTRY_MATCH @ startoffs=%" PRIu_SIZE_T ": '%s'\n",
612
cc9ec51b4875 Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents: 611
diff changeset
846 soffs, haystack + soffs);
cc9ec51b4875 Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents: 611
diff changeset
847
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
848 if (th_regex_match_expr(haystack, &coffs, expr, 0, flags))
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
849 {
612
cc9ec51b4875 Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents: 611
diff changeset
850 // A match was found, increase count
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
851 nmatches++;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
852
612
cc9ec51b4875 Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents: 611
diff changeset
853 // Deliver to caller if required
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
854 if (pnmatches != NULL)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
855 *pnmatches = nmatches;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
856
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
857 if (pmatches != NULL)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
858 {
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
859 th_regex_match_t *match = th_malloc0(sizeof(th_regex_match_t));
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
860 if (match == NULL)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
861 return THERR_MALLOC;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
862
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
863 match->start = soffs;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
864 match->len = coffs - soffs;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
865
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
866 th_llist_append_node((th_llist_t **) pmatches, (th_llist_t *) match);
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
867 }
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
868
612
cc9ec51b4875 Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents: 611
diff changeset
869 // Check match count limit, if set
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
870 if (maxmatches > 0 && nmatches >= maxmatches)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
871 break;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
872
612
cc9ec51b4875 Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents: 611
diff changeset
873 // If offset was not advanced, increase by one
cc9ec51b4875 Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents: 611
diff changeset
874 // otherwise use end of match offset as new start
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
875 if (soffs == coffs)
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
876 soffs++;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
877 else
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
878 soffs = coffs;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
879 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
880 else
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
881 {
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
882 soffs++;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
883 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
884 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
885
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
886 return THERR_OK;
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
887 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
888
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
889
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
890 static void th_regex_free_match(th_regex_match_t *node)
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
891 {
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
892 (void) node;
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
893 // Nothing to do here at the moment
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
894 }
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
895
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
896
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
897 void th_regex_free_matches(th_regex_match_t *matches)
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
898 {
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
899 th_llist_free_func_node((th_llist_t *) matches,
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
900 (void (*)(th_llist_t *)) th_regex_free_match);
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
901 }
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 609
diff changeset
902
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
903 #endif // TH_EXPERIMENTAL_REGEX