view th_regex.h @ 669:7493d4c9ff77

Add some regex flags, features to be implemented "some day".
author Matti Hamalainen <ccr@tnsp.org>
date Tue, 28 Jan 2020 20:10:16 +0200
parents c5aa9ada1051
children 4ca6a3b30fe8
line wrap: on
line source

/*
 * Simple regular expression matching functionality
 * Programmed and designed by Matti 'ccr' Hamalainen
 * (C) Copyright 2020 Tecnic Software productions (TNSP)
 *
 * Please read file 'COPYING' for information on license and distribution.
 */
/// @file
/// @brief Simple regular expression matching functionality
#ifndef TH_REGEX_H
#define TH_REGEX_H

#include "th_datastruct.h"
#include "th_ioctx.h"


#ifdef __cplusplus
extern "C" {
#endif


//
// Definitions
//

/** @struct th_regex_t
 * Structure containing the tokenized / compiled regular expression.
 */
struct th_regex_t;
typedef struct th_regex_t th_regex_t;


/** @brief
 * Linked list structure containing the information for matched
 * sequences returned by th_regex_match().
 */
typedef struct
{
    th_llist_t node;    ///< Internal linked list data

    int type;           ///< Type of this match, TH_RE_MATCH_*

    size_t start;       ///< Start offset of the match sequence in @p haystack in @c th_char_t units.
    size_t len;         ///< Length of the match sequence in @p haystack in @c th_char_t units.
} th_regex_match_t;


/** @brief
 * Flags @c th_regex_match_t @c type field.
 */
enum
{
    TH_RE_MATCH_EXPR,
    TH_RE_MATCH_SUBEXPR,
};


/** @brief
 * Flags for th_regex_match(). NOTE! Not actually implemented or supported yet!
 */
enum
{
    TH_REF_CASEFOLD     = 0x0001, ///< Use case-folding
    TH_REF_ANCHORED     = 0x0002, ///< Implicitly consider expression "anchored" even without explicit ^$
    TH_REF_NEWLINE      = 0x0004, ///< IF SET: Anchors will refer to line start/newline instead of string start/end
    TH_REF_SUBMATCH     = 0x0008, ///< Include sub-expression matches in results
};


#ifdef TH_EXPERIMENTAL_REGEX_DEBUG
extern th_ioctx *th_dbg_fh;
#endif


//
// Functions
//
int      th_regex_compile(th_regex_t **pexpr, const th_char_t *pattern);
void     th_regex_free(th_regex_t *expr);
void     th_regex_dump(th_ioctx *fh, const int level, const th_regex_t *expr);

int      th_regex_match(const th_regex_t *expr, const th_char_t *haystack,
         size_t *pnmatches, th_regex_match_t **pmatches, const size_t maxmatches,
         const int flags);

void     th_regex_free_matches(th_regex_match_t *matches);


#ifdef __cplusplus
}
#endif
#endif // TH_REGEX_H