changeset 196:47f75154c7db

Add internal fallback ISO-8859-1 to UTF-8 converter and improve iconv() support.
author Matti Hamalainen <ccr@tnsp.org>
date Sun, 30 Sep 2018 22:42:18 +0300
parents 48e21e92af0a
children a7a0d8bf16cd
files sidinfo.c
diffstat 1 files changed, 68 insertions(+), 21 deletions(-) [+]
line wrap: on
line diff
--- a/sidinfo.c	Mon Jul 09 17:15:57 2018 +0300
+++ b/sidinfo.c	Sun Sep 30 22:42:18 2018 +0300
@@ -16,7 +16,9 @@
 
 
 // Some constants
-#define SET_DEF_CHARSET   "utf-8"
+#define SET_DEF_CHARSET   "utf8" // NOTE! Do not change unless you are using iconv()!!
+                                 // The fallback converter does not handle other encodings.
+
 #define SET_SLDB_FILENAME "Songlengths.md5"
 
 
@@ -111,8 +113,8 @@
 
 SIDLibSLDB *sidSLDB = NULL;
 
+BOOL    setUseChConv;
 #ifdef HAVE_ICONV
-BOOL    setUseChConv;
 iconv_t setChConv;
 #endif
 
@@ -325,27 +327,62 @@
 }
 
 
+char *siConvertCharset(const char *src)
+{
 #ifdef HAVE_ICONV
-char *siConvertCharset(iconv_t ctx, const char *src)
-{
     size_t srcLeft = strlen(src) + 1;
     size_t outLeft = srcLeft * 2;
+    char *srcPtr = (char *) src;
     char *outBuf, *outPtr;
-    char *srcPtr = (char *) src;
 
     if ((outBuf = outPtr = th_malloc(outLeft + 1)) == NULL)
         return NULL;
 
     while (srcLeft > 0)
     {
-        size_t ret = iconv(ctx, &srcPtr, &srcLeft, &outPtr, &outLeft);
+        size_t ret = iconv(setChConv, &srcPtr, &srcLeft, &outPtr, &outLeft);
         if (ret == (size_t) -1)
             break;
     }
 
-    return outBuf;
+#else
+    // Fallback ISO-8859-1 to UTF-8 conversion
+    size_t srcSize = strlen(src),
+           outSize = srcSize * 2 + 1;
+    const uint8_t *srcPtr = (const uint8_t *) src;
+    uint8_t *outBuf, *outPtr;
+    if ((outBuf = outPtr = th_malloc(outSize + 1)) == NULL)
+        return NULL;
+
+    while (srcSize > 0 && outSize >= 2)
+    {
+        if (*srcPtr < 0x80)
+        {
+            *outPtr++ = *srcPtr;
+            outSize--;
+        }
+        else
+        if (*srcPtr < 0xBF)
+        {
+            *outPtr++ = 0xC2;
+            *outPtr++ = *srcPtr;
+            outSize -= 2;
+        }
+        else
+        {
+            *outPtr++ = 0xC3;
+            *outPtr++ = (*srcPtr - 0x40);
+            outSize -= 2;
+        }
+        srcPtr++;
+        srcSize--;
+    }
+
+    *outPtr++ = 0;
+#endif
+
+    return (char *) outBuf;
 }
-#endif
 
 
 int siItemFormatStrPutInt(th_vprintf_ctx *ctx, th_vprintf_putch vputch,
@@ -841,21 +878,16 @@
             return;
     }
 
-    siPrintFieldPrefix(outFile, opt);
-
-#ifdef HAVE_ICONV
     if (setUseChConv && d_str != NULL && useConv)
     {
-        char *tmp2 = siConvertCharset(setChConv, d_str);
+        char *tmp2 = siConvertCharset(d_str);
         tmp = siEscapeString(tmp2, optEscapeChars);
         th_free(tmp2);
     }
     else
         tmp = siEscapeString(d_str, optEscapeChars);
-#else
-    (void) useConv;
-    tmp = siEscapeString(d_str, optEscapeChars);
-#endif
+
+    siPrintFieldPrefix(outFile, opt);
 
     if ((str = siItemFormatStrPrint(fmt, opt, tmp, d_int)) != NULL)
         fputs(str, outFile);
@@ -1142,6 +1174,8 @@
 
 int main(int argc, char *argv[])
 {
+    char *setLang = th_strdup(getenv("LANG"));
+
     // Initialize
     th_init("SIDInfo", "PSID/RSID information displayer", "0.7.6",
         "By Matti 'ccr' Hamalainen (C) Copyright 2014-2018 TNSP",
@@ -1151,19 +1185,32 @@
 
     memset(&optFormat, 0, sizeof(optFormat));
 
-    // Initialize iconv, check if we have language/charset
-#ifdef HAVE_ICONV
-    char *setLang = th_strdup(getenv("LANG"));
+    // Get environment language
     if (setLang != NULL)
     {
+        // Get the character encoding part (e.g. "UTF-8" etc.) and
+        // strip out and lowercase everything (e.g. "utf8")
+        size_t i;
         char *ptr = strchr(setLang, '.');
-        if (ptr != NULL) strcpy(setLang, ptr + 1);
+        ptr = (ptr == NULL) ? setLang : ptr + 1;
+
+        for (i = 0; *ptr; ptr++)
+        {
+            if (*ptr != '-')
+                setLang[i++] = th_tolower(*ptr);
+        }
+        setLang[i] = 0;
     }
 
+#ifdef HAVE_ICONV
+    // Initialize iconv, check if we have language/charset
     setChConv = iconv_open(setLang != NULL ? setLang : SET_DEF_CHARSET, "iso88591");
     setUseChConv = setChConv != (iconv_t) -1;
+#else
+    setUseChConv = strcmp(setLang, SET_DEF_CHARSET) == 0;
+#endif
+
     th_free(setLang);
-#endif
 
     // Parse command line arguments
     if (!th_args_process(argc, argv, optList, optListN,