comparison sidinfo.c @ 196:47f75154c7db

Add internal fallback ISO-8859-1 to UTF-8 converter and improve iconv() support.
author Matti Hamalainen <ccr@tnsp.org>
date Sun, 30 Sep 2018 22:42:18 +0300
parents c0849f47e10f
children a7a0d8bf16cd
comparison
equal deleted inserted replaced
195:48e21e92af0a 196:47f75154c7db
14 # include <iconv.h> 14 # include <iconv.h>
15 #endif 15 #endif
16 16
17 17
18 // Some constants 18 // Some constants
19 #define SET_DEF_CHARSET "utf-8" 19 #define SET_DEF_CHARSET "utf8" // NOTE! Do not change unless you are using iconv()!!
20 // The fallback converter does not handle other encodings.
21
20 #define SET_SLDB_FILENAME "Songlengths.md5" 22 #define SET_SLDB_FILENAME "Songlengths.md5"
21 23
22 24
23 enum 25 enum
24 { 26 {
109 111
110 PSFStack optFormat; 112 PSFStack optFormat;
111 113
112 SIDLibSLDB *sidSLDB = NULL; 114 SIDLibSLDB *sidSLDB = NULL;
113 115
116 BOOL setUseChConv;
114 #ifdef HAVE_ICONV 117 #ifdef HAVE_ICONV
115 BOOL setUseChConv;
116 iconv_t setChConv; 118 iconv_t setChConv;
117 #endif 119 #endif
118 120
119 121
120 // Define option arguments 122 // Define option arguments
323 325
324 return TRUE; 326 return TRUE;
325 } 327 }
326 328
327 329
330 char *siConvertCharset(const char *src)
331 {
328 #ifdef HAVE_ICONV 332 #ifdef HAVE_ICONV
329 char *siConvertCharset(iconv_t ctx, const char *src)
330 {
331 size_t srcLeft = strlen(src) + 1; 333 size_t srcLeft = strlen(src) + 1;
332 size_t outLeft = srcLeft * 2; 334 size_t outLeft = srcLeft * 2;
335 char *srcPtr = (char *) src;
333 char *outBuf, *outPtr; 336 char *outBuf, *outPtr;
334 char *srcPtr = (char *) src;
335 337
336 if ((outBuf = outPtr = th_malloc(outLeft + 1)) == NULL) 338 if ((outBuf = outPtr = th_malloc(outLeft + 1)) == NULL)
337 return NULL; 339 return NULL;
338 340
339 while (srcLeft > 0) 341 while (srcLeft > 0)
340 { 342 {
341 size_t ret = iconv(ctx, &srcPtr, &srcLeft, &outPtr, &outLeft); 343 size_t ret = iconv(setChConv, &srcPtr, &srcLeft, &outPtr, &outLeft);
342 if (ret == (size_t) -1) 344 if (ret == (size_t) -1)
343 break; 345 break;
344 } 346 }
345 347
346 return outBuf; 348 #else
347 } 349 // Fallback ISO-8859-1 to UTF-8 conversion
350 size_t srcSize = strlen(src),
351 outSize = srcSize * 2 + 1;
352 const uint8_t *srcPtr = (const uint8_t *) src;
353 uint8_t *outBuf, *outPtr;
354 if ((outBuf = outPtr = th_malloc(outSize + 1)) == NULL)
355 return NULL;
356
357 while (srcSize > 0 && outSize >= 2)
358 {
359 if (*srcPtr < 0x80)
360 {
361 *outPtr++ = *srcPtr;
362 outSize--;
363 }
364 else
365 if (*srcPtr < 0xBF)
366 {
367 *outPtr++ = 0xC2;
368 *outPtr++ = *srcPtr;
369 outSize -= 2;
370 }
371 else
372 {
373 *outPtr++ = 0xC3;
374 *outPtr++ = (*srcPtr - 0x40);
375 outSize -= 2;
376 }
377 srcPtr++;
378 srcSize--;
379 }
380
381 *outPtr++ = 0;
348 #endif 382 #endif
383
384 return (char *) outBuf;
385 }
349 386
350 387
351 int siItemFormatStrPutInt(th_vprintf_ctx *ctx, th_vprintf_putch vputch, 388 int siItemFormatStrPutInt(th_vprintf_ctx *ctx, th_vprintf_putch vputch,
352 const int value, const int f_radix, int f_flags, int f_width, int f_prec, 389 const int value, const int f_radix, int f_flags, int f_width, int f_prec,
353 const BOOL f_unsig, char *(f_alt)(const char *buf, const size_t blen, const int vret, const int flags)) 390 const BOOL f_unsig, char *(f_alt)(const char *buf, const size_t blen, const int vret, const int flags))
839 876
840 default: 877 default:
841 return; 878 return;
842 } 879 }
843 880
844 siPrintFieldPrefix(outFile, opt);
845
846 #ifdef HAVE_ICONV
847 if (setUseChConv && d_str != NULL && useConv) 881 if (setUseChConv && d_str != NULL && useConv)
848 { 882 {
849 char *tmp2 = siConvertCharset(setChConv, d_str); 883 char *tmp2 = siConvertCharset(d_str);
850 tmp = siEscapeString(tmp2, optEscapeChars); 884 tmp = siEscapeString(tmp2, optEscapeChars);
851 th_free(tmp2); 885 th_free(tmp2);
852 } 886 }
853 else 887 else
854 tmp = siEscapeString(d_str, optEscapeChars); 888 tmp = siEscapeString(d_str, optEscapeChars);
855 #else 889
856 (void) useConv; 890 siPrintFieldPrefix(outFile, opt);
857 tmp = siEscapeString(d_str, optEscapeChars);
858 #endif
859 891
860 if ((str = siItemFormatStrPrint(fmt, opt, tmp, d_int)) != NULL) 892 if ((str = siItemFormatStrPrint(fmt, opt, tmp, d_int)) != NULL)
861 fputs(str, outFile); 893 fputs(str, outFile);
862 894
863 siPrintFieldSeparator(outFile); 895 siPrintFieldSeparator(outFile);
1140 } 1172 }
1141 1173
1142 1174
1143 int main(int argc, char *argv[]) 1175 int main(int argc, char *argv[])
1144 { 1176 {
1177 char *setLang = th_strdup(getenv("LANG"));
1178
1145 // Initialize 1179 // Initialize
1146 th_init("SIDInfo", "PSID/RSID information displayer", "0.7.6", 1180 th_init("SIDInfo", "PSID/RSID information displayer", "0.7.6",
1147 "By Matti 'ccr' Hamalainen (C) Copyright 2014-2018 TNSP", 1181 "By Matti 'ccr' Hamalainen (C) Copyright 2014-2018 TNSP",
1148 "This program is distributed under a 3-clause BSD -style license."); 1182 "This program is distributed under a 3-clause BSD -style license.");
1149 1183
1150 th_verbosity = 0; 1184 th_verbosity = 0;
1151 1185
1152 memset(&optFormat, 0, sizeof(optFormat)); 1186 memset(&optFormat, 0, sizeof(optFormat));
1153 1187
1188 // Get environment language
1189 if (setLang != NULL)
1190 {
1191 // Get the character encoding part (e.g. "UTF-8" etc.) and
1192 // strip out and lowercase everything (e.g. "utf8")
1193 size_t i;
1194 char *ptr = strchr(setLang, '.');
1195 ptr = (ptr == NULL) ? setLang : ptr + 1;
1196
1197 for (i = 0; *ptr; ptr++)
1198 {
1199 if (*ptr != '-')
1200 setLang[i++] = th_tolower(*ptr);
1201 }
1202 setLang[i] = 0;
1203 }
1204
1205 #ifdef HAVE_ICONV
1154 // Initialize iconv, check if we have language/charset 1206 // Initialize iconv, check if we have language/charset
1155 #ifdef HAVE_ICONV
1156 char *setLang = th_strdup(getenv("LANG"));
1157 if (setLang != NULL)
1158 {
1159 char *ptr = strchr(setLang, '.');
1160 if (ptr != NULL) strcpy(setLang, ptr + 1);
1161 }
1162
1163 setChConv = iconv_open(setLang != NULL ? setLang : SET_DEF_CHARSET, "iso88591"); 1207 setChConv = iconv_open(setLang != NULL ? setLang : SET_DEF_CHARSET, "iso88591");
1164 setUseChConv = setChConv != (iconv_t) -1; 1208 setUseChConv = setChConv != (iconv_t) -1;
1209 #else
1210 setUseChConv = strcmp(setLang, SET_DEF_CHARSET) == 0;
1211 #endif
1212
1165 th_free(setLang); 1213 th_free(setLang);
1166 #endif
1167 1214
1168 // Parse command line arguments 1215 // Parse command line arguments
1169 if (!th_args_process(argc, argv, optList, optListN, 1216 if (!th_args_process(argc, argv, optList, optListN,
1170 argHandleOpt, NULL, OPTH_ONLY_OPTS)) 1217 argHandleOpt, NULL, OPTH_ONLY_OPTS))
1171 return -1; 1218 return -1;