view th_string.c @ 696:9bbacd72df3d

Allow src and dst to "overlap" for th_pstr_cpy().
author Matti Hamalainen <ccr@tnsp.org>
date Mon, 09 Mar 2020 19:13:52 +0200
parents 953e16582f25
children d687bbb54c1a
line wrap: on
line source

/*
 * Miscellaneous string-handling related utility-functions
 * Programmed and designed by Matti 'ccr' Hamalainen
 * (C) Copyright 2002-2020 Tecnic Software productions (TNSP)
 *
 * Please read file 'COPYING' for information on license and distribution.
 */
#include "th_util.h"
#include "th_string.h"

// Include printf implementation
#include "th_printf.c"


/**
 * Implementation of strchr() for th_char_t.
 * @param[in] src string to find 'ch' from
 * @returns pointer to the match position, NULL if no match found
 */
th_char_t *th_strchr(const th_char_t *str, const th_char_t ch)
{
    for (th_char_t *p = (th_char_t *) str; *p; p++)
    {
        if (*p == ch)
            return p;
    }
    return NULL;
}


/**
 * Implementation of strdup() with a @c NULL check.
 * @param[in] src string to create a copy of
 * @returns copy of the given string, or @c NULL if @p src was @c NULL or memory could not be allocated.
 */
th_char_t *th_strdup(const th_char_t *src)
{
    th_char_t *res;
    size_t len;

    if (src == NULL)
        return NULL;

    len = th_strlen(src);
    if ((res = th_malloc((len + 1) * sizeof(th_char_t))) == NULL)
        return NULL;

    memcpy(res, src, (len + 1) * sizeof(th_char_t));
    return res;
}


/**
 * Implementation of strndup() with @c NULL check. Copies up to @p n
 * characters from the source. A NUL terminator is always added,
 * unless @p src was @c NULL or memory could not be allocated, in which
 * case @c NULL pointer is returned.
 *
 * @param[in] src string pointer to be copied from
 * @param[in] n maximum number of characters to copy
 * @returns the resulting string or @c NULL pointer
 */
th_char_t *th_strndup(const th_char_t *src, const size_t n)
{
    th_char_t *res;
    size_t len;

    if (src == NULL)
        return NULL;

    len = th_strlen(src);
    if (len > n)
        len = n;

    if ((res = th_malloc((len + 1) * sizeof(th_char_t))) == NULL)
        return NULL;

    memcpy(res, src, len * sizeof(th_char_t));
    res[len] = 0;

    return res;
}


/**
 * Helper function for th_strdup_trim() and friends. Copies @p len characters from
 * given string, and trims whitespace from it according to specified @p flags.
 * See TH_TRIM_* in th_string.h. If @p len or the resulting trimmed string would
 * be empty (length 0), no copy is allocated and a @c NULL pointer is returned.
 * @param[in] src source string (does not need to be NUL terminated, as length must be specified)
 * @param[in] len length of the source string, or desired number of characters to copy at maximum
 * @param[in] flags trimming flags, see TH_TRIM_* in th_string.h
 * @returns the resulting string or @c NULL pointer
 */
static th_char_t * th_strdup_trim_do(const th_char_t *src, size_t len, const int flags)
{
    th_char_t *res;
    size_t start, end;

    if (len == 0)
        return NULL;

    // Trim start: find first non-whitespace character
    if (flags & TH_TRIM_START)
        for (start = 0; start < len && th_isspace(src[start]); start++);
    else
        start = 0;

    // Trim end: find last non-whitespace character
    if (flags & TH_TRIM_END)
        for (end = len - 1; end > start && th_isspace(src[end]); end--);
    else
        end = len;

    // Allocate memory for result
    if (src[end] == 0 || th_isspace(src[end]))
        return NULL;

    len = end - start + 1;
    if ((res = th_malloc((len + 1) * sizeof(th_char_t))) == NULL)
        return NULL;

    memcpy(res, src + start, len * sizeof(th_char_t));
    res[len] = 0;
    return res;
}


/**
 * Create copy of the given string, but trim the result according to specified @p flags.
 * See TH_TRIM_* in th_string.h. If length the resulting trimmed string would
 * be empty (0), no copy is allocated and a NULL pointer is returned.
 * @param[in] src source string
 * @param[in] flags trimming flags, see TH_TRIM_* in th_string.h
 * @returns the resulting string or @c NULL pointer
 */
th_char_t *th_strdup_trim(const th_char_t *src, const int flags)
{
    if (src == NULL)
        return NULL;

    return th_strdup_trim_do(src, th_strlen(src), flags);
}


/**
 * Create copy of the given string @p src, up to @p n characters (or less, if the string
 * is shorter.) The result is trimmed according to specified @p flags.
 * See TH_TRIM_* in th_string.h. If @p n or the resulting trimmed string would
 * be empty (length 0), no copy is allocated and a NULL pointer is returned.
 * @param[in] src source string
 * @param[in] n maximum number of characters to copy from the source string
 * @param[in] flags trimming flags, see TH_TRIM_* in th_string.h
 * @returns the resulting string or @c NULL pointer
 */
th_char_t *th_strndup_trim(const th_char_t *src, const size_t n, const int flags)
{
    size_t len;

    if (src == NULL || n == 0)
        return NULL;

    for (len = 0; len < n && src[len]; len++);

    return th_strdup_trim_do(src, len, flags);
}


th_char_t *th_strndup_no0(const th_char_t *src, const size_t len)
{
    th_char_t *res;

    if ((res = th_malloc((len + 1) * sizeof(th_char_t))) == NULL)
        return NULL;

    memcpy(res, src, len * sizeof(th_char_t));
    res[len] = 0;

    return res;
}


th_char_t *th_strndup_no0_trim(const th_char_t *src, const size_t len, const int flags)
{
    return th_strdup_trim_do(src, len, flags);
}


#ifdef TH_USE_INTERNAL_SPRINTF
static int th_pbuf_vputch(th_vprintf_ctx *ctx, const th_char_t ch)
{
    if (ctx->pos < ctx->size)
        ctx->buf[ctx->pos] = ch;

    ctx->pos++;
    ctx->ipos++;
    return ch;
}
#endif


/**
 * Wrapper for either libc vsnprintf() or th-libs internal
 * vsnprintf() implementation, as determined by a compile-time define.
 * @param[out] buf pointer to buffer to print to
 * @param[in] size available space in the buffer in bytes
 * @param[in] fmt format string
 * @param[in] ap a va_list variable arguments list structure
 */
int th_vsnprintf(th_char_t *buf, size_t size, const th_char_t *fmt, va_list ap)
{
#ifdef TH_USE_INTERNAL_SPRINTF
    int ret;
    th_vprintf_ctx ctx;
    ctx.buf = buf;
    ctx.size = size;
    ctx.pos = 0;
    ctx.ipos = 0;

    ret = th_vprintf_do(&ctx, th_pbuf_vputch, fmt, ap);

    if (ctx.pos < size)
        buf[ctx.pos] = 0;
    else
    if (size > 0)
        buf[size - 1] = 0;

    return ret;
#else
    return vsnprintf(buf, size, fmt, ap);
#endif
}


/**
 * Wrapper for either libc snprintf() or th-libs internal
 * snprintf() implementation, as determined by a compile-time define.
 * @param[out] buf pointer to buffer to print to
 * @param[in] size available space in the buffer in bytes
 * @param[in] fmt format string
 * @param[in] ... variable arguments
 */
int th_snprintf(th_char_t *buf, size_t size, const th_char_t *fmt, ...)
{
    int n;
    va_list ap;
    va_start(ap, fmt);
#ifdef TH_USE_INTERNAL_SPRINTF
    n = th_vsnprintf(buf, size, fmt, ap);
#else
    n = vsnprintf(buf, size, fmt, ap);
#endif
    va_end(ap);
    return n;
}


#ifdef TH_USE_INTERNAL_SPRINTF
static int th_stdio_vputch(th_vprintf_ctx *ctx, const th_char_t ch)
{
    ctx->pos++;
    ctx->ipos++;
    return fputc(ch, (FILE *) ctx->data);
}
#endif


int th_vfprintf(FILE *fh, const th_char_t *fmt, va_list ap)
{
#ifdef TH_USE_INTERNAL_SPRINTF
    th_vprintf_ctx ctx;
    ctx.data = (void *) fh;
    ctx.pos = 0;
    ctx.ipos = 0;

    return th_vprintf_do(&ctx, th_stdio_vputch, fmt, ap);
#else
    return vfprintf(fh, fmt, ap);
#endif
}


int th_fprintf(FILE *fh, const th_char_t *fmt, ...)
{
    int ret;
    va_list ap;
#ifdef TH_USE_INTERNAL_SPRINTF
    th_vprintf_ctx ctx;
#endif

    va_start(ap, fmt);
#ifdef TH_USE_INTERNAL_SPRINTF
    ctx.data = (void *) fh;
    ctx.pos = 0;
    ctx.ipos = 0;

    ret = th_vprintf_do(&ctx, th_stdio_vputch, fmt, ap);
#else
    ret = fprintf(fh, fmt, ap);
#endif
    va_end(ap);

    return ret;
}


/* Simulate a sprintf() that allocates memory
 */
#ifdef TH_USE_INTERNAL_SPRINTF
static int th_pbuf_alloc_vputch1(th_vprintf_ctx *ctx, const th_char_t ch)
{
    ctx->pos++;
    return ch;
}

static int th_pbuf_alloc_vputch2(th_vprintf_ctx *ctx, const th_char_t ch)
{
    ctx->buf[ctx->pos++] = ch;
    return ch;
}
#endif


/**
 * Combination of vsprintf() and strdup. Automatically allocates memory
 * for the resulting string.
 * @param[in] fmt format string
 * @param[in] args variable arguments list structure
 * @returns the resulting string or @c NULL pointer in case of error
 */
th_char_t *th_strdup_vprintf(const th_char_t *fmt, va_list args)
{
#ifdef TH_USE_INTERNAL_SPRINTF
    // Using internal printf() implementation
    th_vprintf_ctx ctx;
    va_list ap;

    // Get the size of the output
    va_copy(ap, args);
    ctx.pos = 0;
    th_vprintf_do(&ctx, th_pbuf_alloc_vputch1, fmt, ap);
    va_end(ap);

    // Allocate memory for it
    ctx.size = ctx.pos + 1;
    if ((ctx.buf = th_malloc(ctx.size * sizeof(th_char_t))) == NULL)
        return NULL;

    // Print the final result into the buffer
    va_copy(ap, args);
    ctx.pos = 0;
    th_vprintf_do(&ctx, th_pbuf_alloc_vputch2, fmt, ap);
    va_end(ap);
    ctx.buf[ctx.pos] = 0;

    return ctx.buf;

#else
    // Using libc vsnprintf()
    int size = 64;
    th_char_t *buf, *tmp;

    if (fmt == NULL)
        return NULL;

    if ((buf = th_malloc(size * sizeof(th_char_t))) == NULL)
        return NULL;

    while (1)
    {
        int n;
        va_list ap;
        va_copy(ap, args);
        n = vsnprintf(buf, size, fmt, ap);
        va_end(ap);

        if (n > -1 && n < size)
            return buf;
        if (n > -1)
            size = n + 1;
        else
            size *= 2;

        if ((tmp = th_realloc(buf, size * sizeof(th_char_t))) == NULL)
        {
            th_free(buf);
            return NULL;
        }
        else
            buf = tmp;
    }
#endif
}


/**
 * Combination of sprintf() and strdup. Automatically allocates memory
 * for the resulting string.
 * @param[in] fmt format string
 * @param[in] ... optional printf arguments
 * @returns the resulting string or @c NULL pointer in case of error
 */
th_char_t *th_strdup_printf(const th_char_t *fmt, ...)
{
    th_char_t *res;
    va_list ap;

    va_start(ap, fmt);
    res = th_strdup_vprintf(fmt, ap);
    va_end(ap);

    return res;
}


/**
 * A helper function that is given a pointer to a pointer of string,
 * which will be automatically freed (if necessary) and replaced with
 * a pointer to the newly created string.
 * @param[in,out] buf pointer to a char pointer/string
 * @param[in] fmt format string
 * @param[in] args variable arguments list structure
 */
void th_pstr_vprintf(th_char_t **buf, const th_char_t *fmt, va_list ap)
{
    th_char_t *tmp = th_strdup_vprintf(fmt, ap);
    th_free(*buf);
    *buf = tmp;
}


/**
 * A helper function that is given a pointer to a pointer of string,
 * which will be automatically freed (if necessary) and replaced with
 * a pointer to the newly created string.
 * @param[in,out] buf pointer to a char pointer/string
 * @param[in] fmt format string
 * @param[in] ... optional printf arguments
 */
void th_pstr_printf(th_char_t **buf, const th_char_t *fmt, ...)
{
    th_char_t *tmp;
    va_list ap;

    va_start(ap, fmt);
    tmp = th_strdup_vprintf(fmt, ap);
    va_end(ap);

    th_free(*buf);
    *buf = tmp;
}


/* Compare two strings ignoring case [strcasecmp, strncasecmp]
 */
int th_strcasecmp(const th_char_t *haystack, const th_char_t *needle)
{
    const th_char_t *s1 = haystack, *s2 = needle;
    assert(haystack != NULL);
    assert(needle != NULL);

    if (haystack == needle)
        return 0;

    while (*s1 && *s2)
    {
        int k = th_tolower(*s1) - th_tolower(*s2);
        if (k != 0)
            return k;
        s1++;
        s2++;
    }

    return th_tolower(*s1) - th_tolower(*s2);
}


int th_strncasecmp(const th_char_t *haystack, const th_char_t *needle, size_t n)
{
    const th_char_t *s1 = haystack, *s2 = needle;
    assert(haystack != NULL);
    assert(needle != NULL);

    if (haystack == needle)
        return 0;

    while (n > 0 && *s1 && *s2)
    {
        int k = th_tolower(*s1) - th_tolower(*s2);
        if (k != 0)
            return k;
        s1++;
        s2++;
        n--;
    }

    return th_tolower(*s1) - th_tolower(*s2);
}


/* Check if end of the given string str matches needle
 * case-insensitively, return pointer to start of the match,
 * if found, NULL otherwise.
 */
th_char_t *th_strrcasecmp(th_char_t *str, const th_char_t *needle)
{
    if (str == NULL || needle == NULL)
        return NULL;
    else
    {
        const size_t
            slen = th_strlen(str),
            nlen = th_strlen(needle);

        if (slen < nlen)
            return NULL;

        if (th_strcasecmp(str + slen - nlen, needle) == 0)
            return str + slen - nlen;
        else
            return NULL;
    }
}


/* Copy a given string over in *pdst.
 */
int th_pstr_cpy(th_char_t **pdst, const th_char_t *src)
{
    size_t slen;
    th_char_t *tmp;

    if (pdst == NULL || src == NULL)
        return THERR_NULLPTR;

    slen = th_strlen(src);
    if ((tmp = th_malloc((slen + 1) * sizeof(th_char_t))) == NULL)
        return THERR_MALLOC;

    memcpy(tmp, src, (slen + 1) * sizeof(th_char_t));

    th_free(*pdst);
    *pdst = tmp;

    return THERR_OK;
}


/* Concatenates a given string into string pointed by *pdst.
 */
int th_pstr_cat(th_char_t **pdst, const th_char_t *src)
{
    if (pdst == NULL || src == NULL)
        return THERR_NULLPTR;

    if (*pdst != NULL)
    {
        size_t dlen = strlen(*pdst), slen = strlen(src);
        if ((*pdst = th_realloc(*pdst, (dlen + slen + 1) * sizeof(th_char_t))) == NULL)
            return THERR_MALLOC;

        memcpy((*pdst) + dlen, src, (slen + 1) * sizeof(th_char_t));
    }
    else
    {
        size_t slen = strlen(src);
        if ((*pdst = th_malloc((slen + 1) * sizeof(th_char_t))) == NULL)
            return THERR_MALLOC;

        memcpy(*pdst, src, (slen + 1) * sizeof(th_char_t));
    }

    return THERR_OK;
}


int th_split_string_elems(const th_char_t *str, th_strelems_t *ctx, const th_char_t *sep)
{
    size_t start = 0, end;
    BOOL match = FALSE;

    if (str == NULL || ctx == NULL || sep == NULL)
        return THERR_NULLPTR;

    ctx->elems = NULL;
    ctx->nelems = 0;

    do
    {
        // Split foremost str element out
        match = FALSE;
        for (end = start; str[end] != 0; end++)
        {
            if (th_strchr(sep, str[end]))
            {
                match = TRUE;
                break;
            }
        }

        // If the element is there, create it
        if (str[start] != 0 && end >= start)
        {
            th_char_t *elem = th_strndup(str + start, end - start);
            if (elem == NULL)
                return THERR_MALLOC;

            if ((ctx->elems = th_realloc(ctx->elems, sizeof(th_char_t **) * (ctx->nelems + 1))) == NULL)
                return THERR_MALLOC;

            ctx->elems[ctx->nelems] = elem;
            ctx->nelems++;
        }

        start = end + 1;
    } while (match);

    return THERR_OK;
}


int th_split_string(const th_char_t *str, th_char_t ***elems, size_t *nelems, const th_char_t *sep)
{
    th_strelems_t ctx;
    int res;

    if (elems == NULL || nelems == NULL)
        return THERR_NULLPTR;

    if ((res = th_split_string_elems(str, &ctx, sep)) == THERR_OK)
        return res;

    *elems = ctx.elems;
    *nelems = ctx.nelems;

    return THERR_OK;
}


int th_join_string_elems(th_char_t **str, const th_strelems_t *ctx, const th_char_t *sep)
{
    size_t len, n, offs, seplen, *elemlens;

    if (str == NULL || ctx == NULL || sep == NULL)
        return THERR_NULLPTR;

    if ((elemlens = th_malloc(ctx->nelems * sizeof(size_t))) == NULL)
        return THERR_MALLOC;

    seplen = th_strlen(sep);

    for (len = n = 0; n < ctx->nelems; n++)
    {
        len += elemlens[n] = th_strlen(ctx->elems[n]);
    }

    len += 1 + n * seplen;

    if ((*str = th_malloc(len)) == NULL)
    {
        th_free(elemlens);
        return THERR_MALLOC;
    }

    for (offs = n = 0; n < ctx->nelems; n++)
    {
        if (n > 0)
        {
            memcpy((*str) + offs, sep, seplen * sizeof(th_char_t));
            offs += seplen;
        }

        memcpy((*str) + offs, ctx->elems[n], elemlens[n] * sizeof(th_char_t));
        offs += elemlens[n];
    }

    (*str)[offs] = 0;

    return THERR_OK;
}


int th_join_string(th_char_t **str, th_char_t **elems, const size_t nelems, const th_char_t *sep)
{
    th_strelems_t ctx;

    ctx.elems = elems;
    ctx.nelems = nelems;

    return th_join_string_elems(str, &ctx, sep);
}


void th_strelems_free(th_strelems_t *ctx)
{
    if (ctx != NULL)
    {
        for (size_t n = 0; n < ctx->nelems; n++)
        {
            th_free(ctx->elems[n]);
        }

        th_free(ctx->elems);
        ctx->elems = NULL;
        ctx->nelems = 0;
    }
}


/* Find next non-whitespace character in string.
 * Updates iPos into the position of such character and
 * returns pointer to the string.
 */
const th_char_t *th_findnext(const th_char_t *str, size_t *pos)
{
    assert(str != NULL);

    // Terminating NULL-character is not whitespace!
    while (th_isspace(str[*pos]))
        (*pos)++;

    return str + *pos;
}


/* Find next sep-character from string
 */
const th_char_t *th_findsep(const th_char_t *str, size_t *pos, const th_char_t sep)
{
    assert(str != NULL);

    while (str[*pos] && str[*pos] != sep)
        (*pos)++;

    return str + *pos;
}


/* Find next sep- or whitespace from string
 */
const th_char_t *th_findseporspace(const th_char_t *str, size_t *pos, const th_char_t sep)
{
    assert(str != NULL);

    while (!th_isspace(str[*pos]) && str[*pos] != sep)
        (*pos)++;

    return str + *pos;
}


/* Compare a string to a pattern. Case-SENSITIVE version.
 * The matching pattern can consist of any normal characters plus
 * wildcards ? and *. "?" matches any character and "*" matches
 * any number of characters.
 */
#define TH_STRGLOB_FUNC th_strmatch
#define TH_STRGLOB_COLLATE(px) (px)
#include "th_strglob.c"


/* Compare a string to a pattern. Case-INSENSITIVE version.
 */
#define TH_STRGLOB_FUNC th_strcasematch
#define TH_STRGLOB_COLLATE(px) th_tolower(px)
#include "th_strglob.c"


BOOL th_get_hex_triplet(const th_char_t *str, unsigned int *value)
{
    const th_char_t *p = str;
    int len;
    *value = 0;

    for (len = 0; *p && len < 4 * 2; p++, len++)
    {
        if (*p >= '0' && *p <= '9')
        {
            (*value) <<= 4;
            (*value) |= (*p - '0');
        }
        else
        if (*p >= 'A' && *p <= 'F')
        {
            (*value) <<= 4;
            (*value) |= (*p - 'A') + 10;
        }
        else
        if (*p >= 'a' && *p <= 'f')
        {
            (*value) <<= 4;
            (*value) |= (*p - 'a') + 10;
        }
        else
            return FALSE;
    }

    return (len >= 3 * 2 && len <= 4 * 2);
}


BOOL th_get_boolean(const th_char_t *str, BOOL *value)
{
    if (!th_strcasecmp(str, "yes") ||
        !th_strcasecmp(str, "on") ||
        !th_strcasecmp(str, "true") ||
        !th_strcasecmp(str, "1"))
    {
        *value = TRUE;
        return TRUE;
    }
    else
    if (!th_strcasecmp(str, "no") ||
        !th_strcasecmp(str, "off") ||
        !th_strcasecmp(str, "false") ||
        !th_strcasecmp(str, "0"))
    {
        *value = FALSE;
        return TRUE;
    }
    else
        return FALSE;
}


BOOL th_get_int(const th_char_t *str, unsigned int *value, BOOL *neg)
{
    int ch;
    BOOL hex = FALSE;

    // Is the value negative?
    if (*str == '-')
    {
        if (neg == NULL)
            return FALSE;

        *neg = TRUE;
        str++;
    }
    else
    if (neg != NULL)
        *neg = FALSE;

    // Is it hexadecimal?
    if (*str == '$')
    {
        hex = TRUE;
        str++;
    }
    else
    if (str[0] == '0' && str[1] == 'x')
    {
        hex = TRUE;
        str += 2;
    }

    // Parse the value
    *value = 0;
    if (hex)
    {
        while ((ch = *str++))
        {
            if (ch >= '0' && ch <= '9')
            {
                *value <<= 4;
                *value |= ch - '0';
            }
            else
            if (ch >= 'A' && ch <= 'F')
            {
                *value <<= 4;
                *value |= ch - 'A' + 10;
            }
            else
            if (ch >= 'a' && ch <= 'f')
            {
                *value <<= 4;
                *value |= ch - 'a' + 10;
            }
            else
                return FALSE;
        }
    }
    else
    {
        while ((ch = *str++))
        {
            if (ch >= '0' && ch <= '9')
            {
                *value *= 10;
                *value += ch - '0';
            }
            else
                return FALSE;
        }
    }
    return TRUE;
}