# HG changeset patch # User Matti Hamalainen # Date 1538336538 -10800 # Node ID 47f75154c7dbf0d7c575593caba05a468ff7475b # Parent 48e21e92af0ab9c2c22735ef8b493fe0e1221f3e Add internal fallback ISO-8859-1 to UTF-8 converter and improve iconv() support. diff -r 48e21e92af0a -r 47f75154c7db sidinfo.c --- a/sidinfo.c Mon Jul 09 17:15:57 2018 +0300 +++ b/sidinfo.c Sun Sep 30 22:42:18 2018 +0300 @@ -16,7 +16,9 @@ // Some constants -#define SET_DEF_CHARSET "utf-8" +#define SET_DEF_CHARSET "utf8" // NOTE! Do not change unless you are using iconv()!! + // The fallback converter does not handle other encodings. + #define SET_SLDB_FILENAME "Songlengths.md5" @@ -111,8 +113,8 @@ SIDLibSLDB *sidSLDB = NULL; +BOOL setUseChConv; #ifdef HAVE_ICONV -BOOL setUseChConv; iconv_t setChConv; #endif @@ -325,27 +327,62 @@ } +char *siConvertCharset(const char *src) +{ #ifdef HAVE_ICONV -char *siConvertCharset(iconv_t ctx, const char *src) -{ size_t srcLeft = strlen(src) + 1; size_t outLeft = srcLeft * 2; + char *srcPtr = (char *) src; char *outBuf, *outPtr; - char *srcPtr = (char *) src; if ((outBuf = outPtr = th_malloc(outLeft + 1)) == NULL) return NULL; while (srcLeft > 0) { - size_t ret = iconv(ctx, &srcPtr, &srcLeft, &outPtr, &outLeft); + size_t ret = iconv(setChConv, &srcPtr, &srcLeft, &outPtr, &outLeft); if (ret == (size_t) -1) break; } - return outBuf; +#else + // Fallback ISO-8859-1 to UTF-8 conversion + size_t srcSize = strlen(src), + outSize = srcSize * 2 + 1; + const uint8_t *srcPtr = (const uint8_t *) src; + uint8_t *outBuf, *outPtr; + if ((outBuf = outPtr = th_malloc(outSize + 1)) == NULL) + return NULL; + + while (srcSize > 0 && outSize >= 2) + { + if (*srcPtr < 0x80) + { + *outPtr++ = *srcPtr; + outSize--; + } + else + if (*srcPtr < 0xBF) + { + *outPtr++ = 0xC2; + *outPtr++ = *srcPtr; + outSize -= 2; + } + else + { + *outPtr++ = 0xC3; + *outPtr++ = (*srcPtr - 0x40); + outSize -= 2; + } + srcPtr++; + srcSize--; + } + + *outPtr++ = 0; +#endif + + return (char *) outBuf; } -#endif int siItemFormatStrPutInt(th_vprintf_ctx *ctx, th_vprintf_putch vputch, @@ -841,21 +878,16 @@ return; } - siPrintFieldPrefix(outFile, opt); - -#ifdef HAVE_ICONV if (setUseChConv && d_str != NULL && useConv) { - char *tmp2 = siConvertCharset(setChConv, d_str); + char *tmp2 = siConvertCharset(d_str); tmp = siEscapeString(tmp2, optEscapeChars); th_free(tmp2); } else tmp = siEscapeString(d_str, optEscapeChars); -#else - (void) useConv; - tmp = siEscapeString(d_str, optEscapeChars); -#endif + + siPrintFieldPrefix(outFile, opt); if ((str = siItemFormatStrPrint(fmt, opt, tmp, d_int)) != NULL) fputs(str, outFile); @@ -1142,6 +1174,8 @@ int main(int argc, char *argv[]) { + char *setLang = th_strdup(getenv("LANG")); + // Initialize th_init("SIDInfo", "PSID/RSID information displayer", "0.7.6", "By Matti 'ccr' Hamalainen (C) Copyright 2014-2018 TNSP", @@ -1151,19 +1185,32 @@ memset(&optFormat, 0, sizeof(optFormat)); - // Initialize iconv, check if we have language/charset -#ifdef HAVE_ICONV - char *setLang = th_strdup(getenv("LANG")); + // Get environment language if (setLang != NULL) { + // Get the character encoding part (e.g. "UTF-8" etc.) and + // strip out and lowercase everything (e.g. "utf8") + size_t i; char *ptr = strchr(setLang, '.'); - if (ptr != NULL) strcpy(setLang, ptr + 1); + ptr = (ptr == NULL) ? setLang : ptr + 1; + + for (i = 0; *ptr; ptr++) + { + if (*ptr != '-') + setLang[i++] = th_tolower(*ptr); + } + setLang[i] = 0; } +#ifdef HAVE_ICONV + // Initialize iconv, check if we have language/charset setChConv = iconv_open(setLang != NULL ? setLang : SET_DEF_CHARSET, "iso88591"); setUseChConv = setChConv != (iconv_t) -1; +#else + setUseChConv = strcmp(setLang, SET_DEF_CHARSET) == 0; +#endif + th_free(setLang); -#endif // Parse command line arguments if (!th_args_process(argc, argv, optList, optListN,