annotate th_regex.h @ 667:039aa00cbfbf

Work on regex matcher.
author Matti Hamalainen <ccr@tnsp.org>
date Mon, 27 Jan 2020 17:07:06 +0200
parents c5aa9ada1051
children 7493d4c9ff77
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1 /*
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
2 * Simple regular expression matching functionality
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
3 * Programmed and designed by Matti 'ccr' Hamalainen
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
4 * (C) Copyright 2020 Tecnic Software productions (TNSP)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
5 *
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
6 * Please read file 'COPYING' for information on license and distribution.
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
7 */
655
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
8 /// @file
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
9 /// @brief Simple regular expression matching functionality
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
10 #ifndef TH_REGEX_H
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
11 #define TH_REGEX_H
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
12
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 605
diff changeset
13 #include "th_datastruct.h"
651
18fe45e61b2b Moar re-work.
Matti Hamalainen <ccr@tnsp.org>
parents: 645
diff changeset
14 #include "th_ioctx.h"
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 605
diff changeset
15
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
16
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
17 #ifdef __cplusplus
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
18 extern "C" {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
19 #endif
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
20
639
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
21
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
22 //
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
23 // Definitions
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
24 //
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
25
655
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
26 /** @struct th_regex_t
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
27 * Structure containing the tokenized / compiled regular expression.
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
28 */
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
29 struct th_regex_t;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
30 typedef struct th_regex_t th_regex_t;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
31
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
32
655
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
33 /** @brief
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
34 * Linked list structure containing the information for matched
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
35 * sequences returned by th_regex_match().
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
36 */
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
37 typedef struct
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
38 {
655
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
39 th_llist_t node; ///< Internal linked list data
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
40
664
c5aa9ada1051 s/th_regex_char_t/th_char_t/g
Matti Hamalainen <ccr@tnsp.org>
parents: 655
diff changeset
41 size_t start; ///< Start offset of the match sequence in @p haystack in @c th_char_t units.
c5aa9ada1051 s/th_regex_char_t/th_char_t/g
Matti Hamalainen <ccr@tnsp.org>
parents: 655
diff changeset
42 size_t len; ///< Length of the match sequence in @p haystack in @c th_char_t units.
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
43 } th_regex_match_t;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
44
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
45
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
46 // Flags for th_regex_match()
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
47 // (not actually used or supported yet :D)
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 605
diff changeset
48 enum
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 605
diff changeset
49 {
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 605
diff changeset
50 TH_REF_CASEFOLD = 0x0001,
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 605
diff changeset
51 TH_REF_ANCHORED = 0x0002,
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 605
diff changeset
52 };
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 605
diff changeset
53
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 605
diff changeset
54
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
55 #ifdef TH_EXPERIMENTAL_REGEX_DEBUG
651
18fe45e61b2b Moar re-work.
Matti Hamalainen <ccr@tnsp.org>
parents: 645
diff changeset
56 extern th_ioctx *th_dbg_fh;
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
57 #endif
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
58
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
59
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
60 //
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
61 // Functions
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
62 //
664
c5aa9ada1051 s/th_regex_char_t/th_char_t/g
Matti Hamalainen <ccr@tnsp.org>
parents: 655
diff changeset
63 int th_regex_compile(th_regex_t **pexpr, const th_char_t *pattern);
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
64 void th_regex_free(th_regex_t *expr);
651
18fe45e61b2b Moar re-work.
Matti Hamalainen <ccr@tnsp.org>
parents: 645
diff changeset
65 void th_regex_dump(th_ioctx *fh, const int level, const th_regex_t *expr);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
66
664
c5aa9ada1051 s/th_regex_char_t/th_char_t/g
Matti Hamalainen <ccr@tnsp.org>
parents: 655
diff changeset
67 int th_regex_match(const th_regex_t *expr, const th_char_t *haystack,
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
68 size_t *pnmatches, th_regex_match_t **pmatches, const size_t maxmatches,
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 605
diff changeset
69 const int flags);
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
70
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
71 void th_regex_free_matches(th_regex_match_t *matches);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
72
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
73
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
74 #ifdef __cplusplus
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
75 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
76 #endif
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
77 #endif // TH_REGEX_H