view th_string.c @ 0:728243125263

Import.
author Matti Hamalainen <ccr@tnsp.org>
date Thu, 20 Mar 2008 00:15:03 +0000
parents
children ecfa4e3597e3
line wrap: on
line source

/*
 * Miscellaneous string-handling related utility-functions
 * Programmed and designed by Matti 'ccr' Hamalainen
 * (C) Copyright 2002-2008 Tecnic Software productions (TNSP)
 *
 * Please read file 'COPYING' for information on license and distribution.
 */
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "th_string.h"

#define LPREV (pNode->pPrev)
#define LNEXT (pNode->pNext)

/* Allocate memory for a string with given length
 */
char_t *th_stralloc(const size_t l)
{
	assert(l > 0);
	return th_malloc(sizeof(char_t) * l);
}


char_t *th_strrealloc(char_t * s, const size_t l)
{
	assert(l > 0);
	return th_realloc(s, sizeof(char_t) * l);
}


/* Calculate the length of a string [strlen]
 */
size_t th_strlen(char_t * pStr)
{
	size_t l = 0;
	char_t *s = pStr;
	assert(pStr);

	while (*s) {
		s++;
		l++;
	}

	return l;
}


/* Duplicate a string [strdup]
 */
char_t *th_strdup(char_t * pStr)
{
	char_t *pResult, *s, *d;

	if (!pStr) return NULL;
	
	/* Allocate memory for destination */
	pResult = th_stralloc(th_strlen(pStr) + 1);
	if (!pResult)
		return NULL;

	/* Copy to the destination */
	s = pStr;
	d = pResult;
	while (*s) {
		*(d++) = *(s++);
	}
	*d = 0;

	return pResult;
}


/* Concatenate a string [strcat]
 */
char_t *th_strcat(char_t * pDest, char_t * pSource)
{
	char_t *s, *d;
	assert(pSource);
	assert(pDest);

	/* Copy to the destination */
	s = pSource;
	d = pDest;

	while (*d) d++;

	while (*s) {
		*(d++) = *(s++);
	}
	*d = 0;

	return pDest;
}


/* Copy a string [strcpy, strncpy]
 */
char_t *th_strcpy(char_t * pDest, char_t * pSource)
{
	char_t *s, *d;
	assert(pSource);
	assert(pDest);

	/* Copy to the destination */
	s = pSource;
	d = pDest;

	while (*s) {
		*(d++) = *(s++);
	}
	*d = 0;

	return pDest;
}


char_t *th_strncpy(char_t * pDest, char_t * pSource, size_t n)
{
	char_t *s, *d;
	size_t i;
	assert(pSource);
	assert(pDest);

	/* Copy to the destination */
	i = n;
	s = pSource;
	d = pDest;
	while (*s && (i > 0)) {
		*(d++) = *(s++);
		i--;
	}

	/* Fill rest of space with zeros */
	while (i > 0) {
		*(d++) = 0;
		i--;
	}

	/* Ensure that last is always zero */
	pDest[n - 1] = 0;

	return pDest;
}


/* Compare given strings [strcmp, strncmp]
 */
int th_strcmp(char_t * pStr1, char_t * pStr2)
{
	char_t *s1, *s2;
	assert(pStr1);
	assert(pStr2);

	/* Check the string pointers */
	if (pStr1 == pStr2)
		return 0;

	/* Go through the string */
	s1 = pStr1;
	s2 = pStr2;
	while (*s1 && *s2 && (*s1 == *s2)) {
		s1++;
		s2++;
	}

	return ((*s1) - (*s2));
}


int th_strncmp(char_t * pStr1, char_t * pStr2, size_t n)
{
	char_t *s1, *s2;
	assert(pStr1);
	assert(pStr2);

	/* Check the string pointers */
	if (pStr1 == pStr2)
		return 0;

	/* Go through the string */
	s1 = pStr1;
	s2 = pStr2;
	while ((n > 0) && *s1 && *s2 && (*s1 == *s2)) {
		s1++;
		s2++;
		n--;
	}

	if (n > 0)
		return ((*s1) - (*s2));
	else
		return 0;
}


/* Compare two strings ignoring case [strcasecmp, strncasecmp]
 */
int th_strcasecmp(char_t * pStr1, char_t * pStr2)
{
	char_t *s1, *s2;
	assert(pStr1);
	assert(pStr2);

	/* Check the string pointers */
	if (pStr1 == pStr2)
		return 0;

	/* Go through the string */
	s1 = pStr1;
	s2 = pStr2;
	while (*s1 && *s2 && (th_tolower(*s1) == th_tolower(*s2))) {
		s1++;
		s2++;
	}

	return (th_tolower(*s1) - th_tolower(*s2));
}


int th_strncasecmp(char_t * pStr1, char_t * pStr2, size_t n)
{
	char_t *s1, *s2;
	assert(pStr1);
	assert(pStr2);

	/* Check the string pointers */
	if (pStr1 == pStr2)
		return 0;

	/* Go through the string */
	s1 = pStr1;
	s2 = pStr2;
	while ((n > 0) && *s1 && *s2 && (th_tolower(*s1) == th_tolower(*s2))) {
		s1++;
		s2++;
		n--;
	}

	if (n > 0)
		return (th_tolower(*s1) - th_tolower(*s2));
	else
		return 0;
}


/* Remove all occurences of control characters, in-place.
 * Resulting string is always shorter or same length than original.
 */
void th_strip_ctrlchars(char_t * pStr)
{
	char_t *i, *j;
	assert(pStr);

	i = pStr;
	j = pStr;
	while (*i) {
		if (!th_iscntrl(*i))
			*(j++) = *i;
		i++;
	}

	*j = 0;
}


/* Locate a substring [strstr]
 */
char_t *th_strstr(char_t * haystack, char_t * needle)
{
	char_t *h, *n, *s;
	assert(haystack);
	assert(needle);

	/* If 'needle' is empty, we return 'haystack' */
	if (!*needle)
		return haystack;

	/* Search for 'needle' in 'haystack' */
	h = haystack;
	n = needle;
	while (*h) {
		/* Find possible start of 'needle' */
		while (*h && (*h != *n))
			h++;

		if (*h == *n) {
			/* Found, check rest */
			s = h;
			n = needle;
			while (*h) {
				if (!*n)
					return s;
				else
				if (*h != *n)
					break;
				n++;
				h++;
			}
			h = s;
			h++;
		}
	}

	return NULL;
}


/* Copy a given string over in *ppResult.
 */
int th_pstrcpy(char_t ** ppResult, char_t * pStr)
{
	assert(ppResult);

	/* Check the string pointers */
	if (!pStr)
		return -1;

	/* Allocate memory for destination */
	th_free(*ppResult);
	*ppResult = th_stralloc(th_strlen(pStr) + 1);
	if (!*ppResult)
		return -2;

	/* Copy to the destination */
	th_strcpy(*ppResult, pStr);

	return 0;
}


/* Concatenates a given string into string pointed by *ppResult.
 */
int th_pstrcat(char_t ** ppResult, char_t * pStr)
{
	assert(ppResult);

	/* Check the string pointers */
	if (!pStr)
		return -1;

	if (*ppResult != NULL) {
		*ppResult = th_strrealloc(*ppResult, th_strlen(*ppResult) + th_strlen(pStr) + 1);
		if (*ppResult == NULL)
			return -1;

		th_strcat(*ppResult, pStr);
	} else {
		*ppResult = th_stralloc(th_strlen(pStr) + 1);
		if (*ppResult == NULL)
			return -1;

		th_strcpy(*ppResult, pStr);
	}

	return 0;
}


/* Find next non-whitespace character in string.
 * Updates iPos into the position of such character and
 * returns pointer to the string.
 */
char_t *th_findnext(char_t * pStr, size_t * iPos)
{
	assert(pStr);

	/* Terminating NULL-character is not whitespace! */
	while (th_isspace(pStr[*iPos]))
		(*iPos)++;
	return &pStr[*iPos];
}


/* Find next chSep-character from string
 */
char_t *th_findsep(char_t * pStr, size_t * iPos, char_t chSep)
{
	assert(pStr);

	/* Terminating NULL-character is not digit! */
	while (pStr[*iPos] && (pStr[*iPos] != chSep))
		(*iPos)++;
	return &pStr[*iPos];
}


/* Find next chSep- or whitespace from string
 */
char_t *th_findseporspace(char_t * pStr, size_t * iPos, char_t chSep)
{
	assert(pStr);

	/* Terminating NULL-character is not digit! */
	while (!th_isspace(pStr[*iPos]) && (pStr[*iPos] != chSep))
		(*iPos)++;
	return &pStr[*iPos];
}


/* Compare a string to a pattern. Case-SENSITIVE version.
 * The matching pattern can consist of any normal characters plus
 * wildcards ? and *. "?" matches any character and "*" matches
 * any number of characters.
 */
BOOL th_strmatch(char_t * pStr, char_t * pPattern)
{
	BOOL didMatch, isAnyMode, isEnd;
	char_t *tmpPattern;

	/* Check given pattern and string */
	if (!pStr)
		return FALSE;
	if (!pPattern)
		return FALSE;

	/* Initialize */
	tmpPattern = NULL;
	didMatch = TRUE;
	isEnd = FALSE;
	isAnyMode = FALSE;

	/* Start comparision */
	do {
		didMatch = FALSE;
		switch (*pPattern) {
		case '?':
			/* Any single character matches */
			if (*pStr) {
				didMatch = TRUE;
				pPattern++;
				pStr++;
			}
			break;

		case '*':
			didMatch = TRUE;
			pPattern++;
			if (!*pPattern)
				isEnd = TRUE;
			isAnyMode = TRUE;
			tmpPattern = pPattern;
			break;

		case 0:
			if (isAnyMode) {
				if (*pStr)
					pStr++;
				else
					isEnd = TRUE;
			} else {
				if (*pStr) {
					if (tmpPattern) {
						isAnyMode = TRUE;
						pPattern = tmpPattern;
					} else
						didMatch = FALSE;
				} else
					isEnd = TRUE;
			}
			break;
		default:
			if (isAnyMode) {
				if ((*pPattern) == (*pStr)) {
					isAnyMode = FALSE;
					didMatch = TRUE;
				} else {
					if (*pStr) {
						didMatch = TRUE;
						pStr++;
					}
				}
			} else {
				if ((*pPattern) == (*pStr)) {
					didMatch = TRUE;
					if (*pPattern)
						pPattern++;
					if (*pStr)
						pStr++;
				} else {
					if (tmpPattern) {
						didMatch = TRUE;
						isAnyMode = TRUE;
						pPattern = tmpPattern;
					}
				}
			}

			if (!*pStr && !*pPattern)
				isEnd = TRUE;
			break;

		}		/* switch */

	} while ((didMatch) && (!isEnd));

	return didMatch;
}


/* Compare a string to a pattern. Case-INSENSITIVE version.
 */
BOOL th_strcasematch(char_t * pStr, char_t * pPattern)
{
	BOOL didMatch, isAnyMode, isEnd;
	char_t *tmpPattern;

	/* Check given pattern and string */
	if (!pStr)
		return FALSE;
	if (!pPattern)
		return FALSE;

	/* Initialize */
	tmpPattern = NULL;
	didMatch = TRUE;
	isEnd = FALSE;
	isAnyMode = FALSE;

	/* Start comparision */
	do {
		switch (*pPattern) {
		case '?':
			/* Any single character matches */
			if (*pStr) {
				pPattern++;
				pStr++;
			} else
				didMatch = FALSE;
			break;

		case '*':
			pPattern++;
			if (!*pPattern || (*pPattern == '?'))
				isEnd = TRUE;
			isAnyMode = TRUE;
			tmpPattern = pPattern;
			break;

		case 0:
			if (isAnyMode) {
				if (*pStr)
					pStr++;
				else
					isEnd = TRUE;
			} else {
				if (*pStr) {
					if (tmpPattern) {
						isAnyMode = TRUE;
						pPattern = tmpPattern;
					} else
						didMatch = FALSE;
				} else
					isEnd = TRUE;
			}
			break;

		default:
			if (isAnyMode) {
				if (th_tolower(*pPattern) == th_tolower(*pStr)) {
					isAnyMode = FALSE;
				} else {
					if (*pStr)
						pStr++;
					else
						didMatch = FALSE;
				}
			} else {
				if (th_tolower(*pPattern) == th_tolower(*pStr)) {
					if (*pPattern)
						pPattern++;
					if (*pStr)
						pStr++;
				} else {
					if (tmpPattern) {
						isAnyMode = TRUE;
						pPattern = tmpPattern;
					} else
						didMatch = FALSE;
				}
			}

			if (!*pStr && !*pPattern)
				isEnd = TRUE;
			break;

		}		/* switch */

	} while ((didMatch) && (!isEnd));

	return didMatch;
}


/*
 * Handling of string-lists and hashes
 */
t_str_node *th_strnode_new(char_t * pcStr, ulint_t nUsed, void *pData)
{
	t_str_node *pResult;

	/* Allocate memory for new node */
	pResult = (t_str_node *) th_calloc(1, sizeof(t_str_node));
	if (!pResult)
		return NULL;

	/* Set fields */
	th_pstrcpy(&pResult->pcStr, pcStr);
	pResult->nUsed = nUsed;
	pResult->pData = pData;

	return pResult;
}


void th_strnode_free(t_str_node * pNode)
{
	assert(pNode);

	th_free(pNode->pcStr);
	th_free(pNode);
}


/* Insert a new node into strlist
 */
void th_strlist_insert(t_str_node ** strList, t_str_node * pNode)
{
	assert(strList);
	assert(pNode);

	/* Insert into linked list */
	if (*strList) {
		/* The first node's pPrev points to last node */
		LPREV = (*strList)->pPrev;	/* New node's prev = Previous last node */
		(*strList)->pPrev->pNext = pNode;	/* Previous last node's next = New node */
		(*strList)->pPrev = pNode;	/* New last node = New node */
		LNEXT = NULL;	/* But next is NULL! */
	} else {
		(*strList) = pNode;	/* First node ... */
		LPREV = pNode;	/* ... it's also last */
		LNEXT = NULL;	/* But next is NULL! */
	}

}


/* Free a given strlist
 */
void th_strlist_free(t_str_node * strList)
{
	t_str_node *pNode, *nNode;

	pNode = strList;
	while (pNode) {
		nNode = pNode->pNext;
		th_strnode_free(pNode);
		pNode = nNode;
	}
}


/* Create a strIndex from strlist
 */
t_str_index *th_strlist_makeindex(t_str_node * strList)
{
	t_str_index *pResult;
	t_str_node *pCurr;
	ulint_t n;
	assert(strList);

	/* Computer number of nodes */
	for (n = 0, pCurr = strList; pCurr; pCurr = pCurr->pNext)
		n++;

	/* Check number of nodes */
	if (n == 0)
		return NULL;

	/* Allocate memory for index */
	pResult = (t_str_index *) th_calloc(1, sizeof(t_str_index));
	if (!pResult)
		return NULL;

	pResult->n = n;
	pResult->ppIndex = (t_str_node **) th_calloc(n, sizeof(t_str_node *));
	if (!pResult->ppIndex) {
		th_free(pResult);
		return NULL;
	}

	/* Create the index */
	for (n = 0, pCurr = strList; pCurr && (n < pResult->n); pCurr = pCurr->pNext)
		pResult->ppIndex[n++] = pCurr;

	return pResult;
}


/* Insert a node into given strhash
 */
int th_strhash_insert(t_str_hash strHash, t_str_node * pNode, BOOL ignoreCase)
{
	int i;
	assert(strHash);
	assert(pNode);
	assert(pNode->pcStr);

	if (ignoreCase)
		i = th_tolower(pNode->pcStr[0]);
	else
		i = pNode->pcStr[0];

	/* Check the hashcode */
	if ((i < 0) && (i >= SET_HASH_MAXINDEX))
		return -1;

	if (strHash[i]) {
		/* The first node's pPrev points to last node */
		pNode->pPrev = strHash[i]->pPrev;	/* New node's prev = Previous last node */
		strHash[i]->pPrev->pNext = pNode;	/* Previous last node's next = New node */
		strHash[i]->pPrev = pNode;	/* New last node = New node */
		pNode->pNext = NULL;	/* But next is NULL! */
	} else {
		strHash[i] = pNode;	/* First node */
		pNode->pPrev = pNode;	/* But also last */
		pNode->pNext = NULL;	/* But next is NULL! */
	}

	return 0;
}


/* Free a given strhash
 */
void th_strhash_free(t_str_hash strHash)
{
	int i;
	assert(strHash);

	for (i = 0; i < SET_HASH_MAXINDEX; i++)
		th_strlist_free(strHash[i]);
}


/* Change pData for matching entries to new value
 */
void th_strhash_change_pdata(t_str_hash strHash, void *pFind, void *pNew)
{
	t_str_node *pCurr;
	int i;
	assert(strHash);

	for (i = 0; i < SET_HASH_MAXINDEX; i++) {
		/* Find from linked list */
		pCurr = strHash[i];
		while (pCurr) {
			if (pCurr->pData == pFind)
				pCurr->pData = pNew;

			pCurr = pCurr->pNext;
		}
	}
}


/* Search a string from a given stringhash, either case-sensitive or insensitive
 */
t_str_node *th_strhash_search(t_str_hash strHash, char_t * findStr, BOOL ignoreCase)
{
	t_str_node *pCurr;
	int i;
	BOOL isFound;
	assert(strHash);
	assert(findStr);

	isFound = FALSE;
	pCurr = NULL;

	/* Check hashcode */
	if (ignoreCase)
		i = ((unsigned char) th_tolower(findStr[0]));
	else
		i = ((unsigned char) findStr[0]);

	if ((i < 0) && (i >= SET_HASH_MAXINDEX))
		return NULL;

	/* Find from linked list */
	pCurr = strHash[i];

	if (ignoreCase) {
		/* Case in-sensitive search */
		while (pCurr && !isFound) {
			if (th_strcasecmp(findStr, pCurr->pcStr) == 0)
				isFound = TRUE;
			else
				pCurr = pCurr->pNext;
		}
	} else {
		/* Case sensitive search */
		while (pCurr && !isFound) {
			if (th_strcmp(findStr, pCurr->pcStr) == 0)
				isFound = TRUE;
			else
				pCurr = pCurr->pNext;
		}
	}

	/* Return result */
	if (isFound)
		return pCurr;
	else
		return NULL;
}


/* Create a strIndex from strHash
 */
t_str_index *th_strhash_makeindex(t_str_hash strHash)
{
	t_str_index *pResult;
	t_str_node *pCurr;
	unsigned int n, i;
	assert(strHash);

	/* Computer number of nodes */
	for (n = i = 0; i < SET_HASH_MAXINDEX; i++) {
		pCurr = strHash[i];
		while (pCurr) {
			n++;
			pCurr = pCurr->pNext;
		}
	}

	/* Check number of nodes */
	if (n <= 0)
		return NULL;

	/* Allocate memory for index */
	pResult = (t_str_index *) th_calloc(1, sizeof(t_str_index));
	if (!pResult)
		return NULL;

	pResult->n = n;
	pResult->ppIndex = (t_str_node **) th_calloc(n, sizeof(t_str_node *));
	if (!pResult->ppIndex) {
		th_free(pResult);
		return NULL;
	}

	/* Create the index */
	for (n = i = 0; (i < SET_HASH_MAXINDEX) && (n < pResult->n); i++) {
		pCurr = strHash[i];
		while (pCurr && (n < pResult->n)) {
			pResult->ppIndex[n++] = pCurr;
			pCurr = pCurr->pNext;
		}
	}

	return pResult;
}


/* Free a given strIndex
 */
void th_strindex_free(t_str_index * strIndex)
{
	if (strIndex) {
		th_free(strIndex->ppIndex);
		th_free(strIndex);
	}
}


/* Compare two t_str_nodes by nUsed
 */
int th_strindex_cmp_used(const void *pNode1, const void *pNode2)
{
	t_str_node *pStr1, *pStr2;

	pStr1 = *(t_str_node **) pNode1;
	pStr2 = *(t_str_node **) pNode2;

	if (pStr1->nUsed > pStr2->nUsed)
		return -1;
	else if (pStr1->nUsed < pStr2->nUsed)
		return 1;
	else
		return 0;
}


/* Sort an strIndex by nUsed, using th_strindex_cmp_used()
 */
void th_strindex_sort_nused(t_str_index * strIndex)
{
	assert(strIndex);
	assert(strIndex->ppIndex);

	qsort(strIndex->ppIndex, strIndex->n, sizeof(t_str_node *), th_strindex_cmp_used);
}


/* Compare two t_str_nodes via strcmp()
 */
int th_strindex_cmp_alpha(const void *pNode1, const void *pNode2)
{
	t_str_node *pStr1, *pStr2;

	pStr1 = *(t_str_node **) pNode1;
	pStr2 = *(t_str_node **) pNode2;

	return th_strcmp(pStr1->pcStr, pStr2->pcStr);
}


/* Sort an strIndex by nUsed, using th_strindex_cmp_used()
 */
void th_strindex_sort_alpha(t_str_index * strIndex)
{
	assert(strIndex);
	assert(strIndex->ppIndex);

	qsort(strIndex->ppIndex, strIndex->n, sizeof(t_str_node *), th_strindex_cmp_alpha);
}