annotate th_regex.h @ 669:7493d4c9ff77

Add some regex flags, features to be implemented "some day".
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 28 Jan 2020 20:10:16 +0200
parents c5aa9ada1051
children 4ca6a3b30fe8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1 /*
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
2 * Simple regular expression matching functionality
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
3 * Programmed and designed by Matti 'ccr' Hamalainen
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
4 * (C) Copyright 2020 Tecnic Software productions (TNSP)
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
5 *
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
6 * Please read file 'COPYING' for information on license and distribution.
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
7 */
655
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
8 /// @file
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
9 /// @brief Simple regular expression matching functionality
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
10 #ifndef TH_REGEX_H
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
11 #define TH_REGEX_H
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
12
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 605
diff changeset
13 #include "th_datastruct.h"
651
18fe45e61b2b Moar re-work.
Matti Hamalainen <ccr@tnsp.org>
parents: 645
diff changeset
14 #include "th_ioctx.h"
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 605
diff changeset
15
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
16
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
17 #ifdef __cplusplus
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
18 extern "C" {
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
19 #endif
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
20
639
8c957ad9d4c3 Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 635
diff changeset
21
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
22 //
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
23 // Definitions
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
24 //
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
25
655
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
26 /** @struct th_regex_t
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
27 * Structure containing the tokenized / compiled regular expression.
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
28 */
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
29 struct th_regex_t;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
30 typedef struct th_regex_t th_regex_t;
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
31
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
32
655
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
33 /** @brief
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
34 * Linked list structure containing the information for matched
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
35 * sequences returned by th_regex_match().
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
36 */
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
37 typedef struct
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
38 {
655
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
39 th_llist_t node; ///< Internal linked list data
ae601363fdad Doxygenization.
Matti Hamalainen <ccr@tnsp.org>
parents: 652
diff changeset
40
669
7493d4c9ff77 Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents: 664
diff changeset
41 int type; ///< Type of this match, TH_RE_MATCH_*
7493d4c9ff77 Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents: 664
diff changeset
42
664
c5aa9ada1051 s/th_regex_char_t/th_char_t/g
Matti Hamalainen <ccr@tnsp.org>
parents: 655
diff changeset
43 size_t start; ///< Start offset of the match sequence in @p haystack in @c th_char_t units.
c5aa9ada1051 s/th_regex_char_t/th_char_t/g
Matti Hamalainen <ccr@tnsp.org>
parents: 655
diff changeset
44 size_t len; ///< Length of the match sequence in @p haystack in @c th_char_t units.
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
45 } th_regex_match_t;
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
46
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
47
669
7493d4c9ff77 Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents: 664
diff changeset
48 /** @brief
7493d4c9ff77 Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents: 664
diff changeset
49 * Flags @c th_regex_match_t @c type field.
7493d4c9ff77 Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents: 664
diff changeset
50 */
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 605
diff changeset
51 enum
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 605
diff changeset
52 {
669
7493d4c9ff77 Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents: 664
diff changeset
53 TH_RE_MATCH_EXPR,
7493d4c9ff77 Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents: 664
diff changeset
54 TH_RE_MATCH_SUBEXPR,
7493d4c9ff77 Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents: 664
diff changeset
55 };
7493d4c9ff77 Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents: 664
diff changeset
56
7493d4c9ff77 Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents: 664
diff changeset
57
7493d4c9ff77 Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents: 664
diff changeset
58 /** @brief
7493d4c9ff77 Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents: 664
diff changeset
59 * Flags for th_regex_match(). NOTE! Not actually implemented or supported yet!
7493d4c9ff77 Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents: 664
diff changeset
60 */
7493d4c9ff77 Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents: 664
diff changeset
61 enum
7493d4c9ff77 Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents: 664
diff changeset
62 {
7493d4c9ff77 Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents: 664
diff changeset
63 TH_REF_CASEFOLD = 0x0001, ///< Use case-folding
7493d4c9ff77 Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents: 664
diff changeset
64 TH_REF_ANCHORED = 0x0002, ///< Implicitly consider expression "anchored" even without explicit ^$
7493d4c9ff77 Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents: 664
diff changeset
65 TH_REF_NEWLINE = 0x0004, ///< IF SET: Anchors will refer to line start/newline instead of string start/end
7493d4c9ff77 Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents: 664
diff changeset
66 TH_REF_SUBMATCH = 0x0008, ///< Include sub-expression matches in results
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 605
diff changeset
67 };
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 605
diff changeset
68
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 605
diff changeset
69
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
70 #ifdef TH_EXPERIMENTAL_REGEX_DEBUG
651
18fe45e61b2b Moar re-work.
Matti Hamalainen <ccr@tnsp.org>
parents: 645
diff changeset
71 extern th_ioctx *th_dbg_fh;
635
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
72 #endif
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
73
d191ded8a790 Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents: 610
diff changeset
74
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
75 //
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
76 // Functions
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
77 //
664
c5aa9ada1051 s/th_regex_char_t/th_char_t/g
Matti Hamalainen <ccr@tnsp.org>
parents: 655
diff changeset
78 int th_regex_compile(th_regex_t **pexpr, const th_char_t *pattern);
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
79 void th_regex_free(th_regex_t *expr);
651
18fe45e61b2b Moar re-work.
Matti Hamalainen <ccr@tnsp.org>
parents: 645
diff changeset
80 void th_regex_dump(th_ioctx *fh, const int level, const th_regex_t *expr);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
81
664
c5aa9ada1051 s/th_regex_char_t/th_char_t/g
Matti Hamalainen <ccr@tnsp.org>
parents: 655
diff changeset
82 int th_regex_match(const th_regex_t *expr, const th_char_t *haystack,
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
83 size_t *pnmatches, th_regex_match_t **pmatches, const size_t maxmatches,
610
a0e8d9c6300b A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents: 605
diff changeset
84 const int flags);
640
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
85
9e1f9e1d1487 Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents: 639
diff changeset
86 void th_regex_free_matches(th_regex_match_t *matches);
605
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
87
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
88
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
89 #ifdef __cplusplus
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
90 }
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
91 #endif
566e6ef41f9d Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
92 #endif // TH_REGEX_H