comparison sidinfo.c @ 278:d73ccb155878

Implement support for outputting CP850 and CP437 in our simple fallback character set convertor.
author Matti Hamalainen <ccr@tnsp.org>
date Mon, 06 Jan 2020 20:28:55 +0200
parents 158f4f613787
children d5ab136cdc97
comparison
equal deleted inserted replaced
277:a23dae1a4849 278:d73ccb155878
17 17
18 // 18 //
19 // Some constants 19 // Some constants
20 // 20 //
21 21
22 // Default character encoding to convert to
23 // NOTE! Do not change unless you are using iconv()!!
24 // The fallback converter does not handle other encodings.
25 #define SET_DEF_CHARSET "utf8"
26
27 // HVSC documents directory 22 // HVSC documents directory
28 #define SET_HVSC_DOCUMENTS "DOCUMENTS" 23 #define SET_HVSC_DOCUMENTS "DOCUMENTS"
29 24
30 // Songlengths database filename prefix (.md5|.txt appended) 25 // Songlengths database filename prefix (.md5|.txt appended)
31 #define SET_SLDB_FILEBASE "Songlengths" 26 #define SET_SLDB_FILEBASE "Songlengths"
44 enum 39 enum
45 { 40 {
46 OTYPE_OTHER = 0, 41 OTYPE_OTHER = 0,
47 OTYPE_STR = 1, 42 OTYPE_STR = 1,
48 OTYPE_INT = 2, 43 OTYPE_INT = 2,
44 };
45
46
47 enum
48 {
49 TH_LANG_UTF8,
50 TH_LANG_ISO88591,
51 TH_LANG_CP850,
52 TH_LANG_CP437,
49 }; 53 };
50 54
51 55
52 typedef struct 56 typedef struct
53 { 57 {
128 132
129 SIDLibSLDB *sidSLDB = NULL; 133 SIDLibSLDB *sidSLDB = NULL;
130 SIDLibSTILDB *sidSTILDB = NULL; 134 SIDLibSTILDB *sidSTILDB = NULL;
131 135
132 136
133 BOOL setUseChConv; 137 BOOL setUseOutConv;
134 #ifdef HAVE_ICONV 138 #ifdef HAVE_ICONV
135 iconv_t setChConv; 139 iconv_t setIConvCtx;
140 #else
141 int setOutLang;
136 #endif 142 #endif
137 143
138 144
139 // Define option arguments 145 // Define option arguments
140 static const th_optarg optList[] = 146 static const th_optarg optList[] =
380 386
381 return TRUE; 387 return TRUE;
382 } 388 }
383 389
384 390
391 static const uint8_t si_lang_iso88591_to_cp850[16*6] = {
392 0xff, 0xad, 0xbd, 0x9c, 0xcf, 0xbe, 0xdd, 0xf5, 0xf9, 0xb8, 0xa6, 0xae, 0xaa, 0xf0, 0xa9, 0xee,
393 0xf8, 0xf1, 0xfd, 0xfc, 0xef, 0xe6, 0xf4, 0xfa, 0xf7, 0xfb, 0xa7, 0xaf, 0xac, 0xab, 0xf3, 0xa8,
394 0xb7, 0xb5, 0xb6, 0xc7, 0x8e, 0x8f, 0x92, 0x80, 0xd4, 0x90, 0xd2, 0xd3, 0xde, 0xd6, 0xd7, 0xd8,
395 0xd1, 0xa5, 0xe3, 0xe0, 0xe2, 0xe5, 0x99, 0x9e, 0x9d, 0xeb, 0xe9, 0xea, 0x9a, 0xed, 0xe8, 0xe1,
396 0x85, 0xa0, 0x83, 0xc6, 0x84, 0x86, 0x91, 0x87, 0x8a, 0x82, 0x88, 0x89, 0x8d, 0xa1, 0x8c, 0x8b,
397 0xd0, 0xa4, 0x95, 0xa2, 0x93, 0xe4, 0x94, 0xf6, 0x9b, 0x97, 0xa3, 0x96, 0x81, 0xec, 0xe7, 0x98,
398 };
399
400 static const uint8_t si_lang_iso88591_to_cp437[16*6] = {
401 0xff, 0xad, 0x9b, 0x9c, 0x00, 0x9d, 0x00, 0x00, 0x00, 0x00, 0xa6, 0xae, 0xaa, 0x00, 0x00, 0x00,
402 0xf8, 0xf1, 0xfd, 0x00, 0x00, 0xe6, 0x00, 0xfa, 0x00, 0x00, 0xa7, 0xaf, 0xac, 0xab, 0x00, 0xa8,
403 0x00, 0x00, 0x00, 0x00, 0x8e, 0x8f, 0x92, 0x80, 0x00, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
404 0x00, 0xa5, 0x00, 0x00, 0x00, 0x00, 0x99, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9a, 0x00, 0x00, 0xe1,
405 0x85, 0xa0, 0x83, 0x00, 0x84, 0x86, 0x91, 0x87, 0x8a, 0x82, 0x88, 0x89, 0x8d, 0xa1, 0x8c, 0x8b,
406 0x00, 0xa4, 0x95, 0xa2, 0x93, 0x00, 0x94, 0xf6, 0x00, 0x97, 0xa3, 0x96, 0x81, 0x00, 0x00, 0x98,
407 };
408
409
385 char *siConvertCharset(const char *src) 410 char *siConvertCharset(const char *src)
386 { 411 {
387 #ifdef HAVE_ICONV 412 #ifdef HAVE_ICONV
388 size_t srcLeft = strlen(src) + 1; 413 size_t srcLeft = strlen(src) + 1;
389 size_t outLeft = srcLeft * 2; 414 size_t outLeft = srcLeft * 2;
393 if ((outBuf = outPtr = th_malloc(outLeft + 1)) == NULL) 418 if ((outBuf = outPtr = th_malloc(outLeft + 1)) == NULL)
394 return NULL; 419 return NULL;
395 420
396 while (srcLeft > 0) 421 while (srcLeft > 0)
397 { 422 {
398 size_t ret = iconv(setChConv, &srcPtr, &srcLeft, &outPtr, &outLeft); 423 size_t ret = iconv(setIConvCtx, &srcPtr, &srcLeft, &outPtr, &outLeft);
399 if (ret == (size_t) -1) 424 if (ret == (size_t) -1)
400 break; 425 break;
401 } 426 }
402 427
403 #else 428 #else
404 // Fallback ISO-8859-1 to UTF-8 conversion 429 // Fallback conversion of ISO-8859-1 to X
405 size_t srcSize = strlen(src), 430 size_t srcSize = strlen(src), outSize, minLeft;
406 outSize = srcSize * 2 + 1;
407 const uint8_t *srcPtr = (const uint8_t *) src; 431 const uint8_t *srcPtr = (const uint8_t *) src;
432 const uint8_t *tab;
408 uint8_t *outBuf, *outPtr; 433 uint8_t *outBuf, *outPtr;
409 if ((outBuf = outPtr = th_malloc(outSize + 1)) == NULL) 434
435 switch (setOutLang)
436 {
437 case TH_LANG_UTF8:
438 outSize = srcSize * 2;
439 minLeft = 2;
440 break;
441
442 default:
443 outSize = srcSize;
444 minLeft = 1;
445 }
446
447 if ((outBuf = outPtr = th_malloc(outSize)) == NULL)
410 return NULL; 448 return NULL;
411 449
412 while (srcSize > 0 && outSize >= 2) 450 while (srcSize > 0 && outSize >= minLeft)
413 { 451 {
414 if (*srcPtr < 0x80) 452 switch (setOutLang)
415 { 453 {
416 *outPtr++ = *srcPtr; 454 case TH_LANG_UTF8:
417 outSize--; 455 // Not 100% correct really, but close enough
418 } 456 if (*srcPtr < 0x80)
419 else 457 {
420 if (*srcPtr < 0xBF) 458 *outPtr++ = *srcPtr;
421 { 459 outSize--;
422 *outPtr++ = 0xC2; 460 }
423 *outPtr++ = *srcPtr; 461 else
424 outSize -= 2; 462 if (*srcPtr < 0xBF)
425 } 463 {
426 else 464 *outPtr++ = 0xC2;
427 { 465 *outPtr++ = *srcPtr;
428 *outPtr++ = 0xC3; 466 outSize -= 2;
429 *outPtr++ = (*srcPtr - 0x40); 467 }
430 outSize -= 2; 468 else
431 } 469 {
470 *outPtr++ = 0xC3;
471 *outPtr++ = *srcPtr - 0x40;
472 outSize -= 2;
473 }
474 break;
475
476 case TH_LANG_ISO88591:
477 *outPtr++ = *srcPtr;
478 outSize--;
479 break;
480
481 case TH_LANG_CP850:
482 case TH_LANG_CP437:
483 // Not 100% correct either, but close enough
484 tab = (setOutLang == TH_LANG_CP850) ? si_lang_iso88591_to_cp850 : si_lang_iso88591_to_cp437;
485
486 if (*srcPtr < 0x7f)
487 *outPtr++ = *srcPtr;
488 else
489 if (*srcPtr >= 0xA0)
490 *outPtr++ = tab[*srcPtr - 0xA0];
491 else
492 *outPtr++ = '?';
493
494 outSize--;
495 break;
496 }
497
432 srcPtr++; 498 srcPtr++;
433 srcSize--; 499 srcSize--;
434 } 500 }
435 501
436 *outPtr++ = 0; 502 *outPtr++ = 0;
941 1007
942 1008
943 static void siPrintPSIDInfoLine(FILE *outFile, BOOL *shown, 1009 static void siPrintPSIDInfoLine(FILE *outFile, BOOL *shown,
944 const char *fmt, const int otype, 1010 const char *fmt, const int otype,
945 const char *d_str, const int d_int, 1011 const char *d_str, const int d_int,
946 const BOOL useConv) 1012 const BOOL convert)
947 { 1013 {
948 char *str, *tmp; 1014 char *str, *tmp;
949 1015
950 if (setUseChConv && d_str != NULL && useConv) 1016 if (setUseOutConv && d_str != NULL && convert)
951 { 1017 {
952 char *tmp2 = siConvertCharset(d_str); 1018 char *tmp2 = siConvertCharset(d_str);
953 tmp = siEscapeString(tmp2, optEscapeChars); 1019 tmp = siEscapeString(tmp2, optEscapeChars);
954 th_free(tmp2); 1020 th_free(tmp2);
955 } 1021 }
1316 { 1382 {
1317 if (*ptr != '-') 1383 if (*ptr != '-')
1318 setLang[i++] = th_tolower(*ptr); 1384 setLang[i++] = th_tolower(*ptr);
1319 } 1385 }
1320 setLang[i] = 0; 1386 setLang[i] = 0;
1321 }
1322 1387
1323 #ifdef HAVE_ICONV 1388 #ifdef HAVE_ICONV
1324 // Initialize iconv, check if we have language/charset 1389 // Initialize iconv, check if we have language/charset
1325 setChConv = iconv_open(setLang != NULL ? setLang : SET_DEF_CHARSET, "iso88591"); 1390 setIConvCtx = iconv_open("utf8", "iso88591");
1326 setUseChConv = setChConv != (iconv_t) -1; 1391 setUseOutConv = setIConvCtx != (iconv_t) -1;
1327 #else 1392 #else
1328 setUseChConv = setLang != NULL && strcmp(setLang, SET_DEF_CHARSET) == 0; 1393 // Check if we can use our fallback converter
1394 if (strcmp(setLang, "utf8") == 0)
1395 setOutLang = TH_LANG_UTF8;
1396 else
1397 if (strcmp(setLang, "iso88591") == 0 ||
1398 strcmp(setLang, "cp819") == 0 ||
1399 strcmp(setLang, "latin1") == 0 ||
1400 strcmp(setLang, "cp28591") == 0)
1401 setOutLang = TH_LANG_ISO88591;
1402 else
1403 if (strcmp(setLang, "cp850") == 0)
1404 setOutLang = TH_LANG_CP850;
1405 else
1406 if (strcmp(setLang, "cp437") == 0)
1407 setOutLang = TH_LANG_CP437;
1408 else
1409 setOutLang = TH_LANG_ISO88591;
1410
1411 setUseOutConv = setOutLang != TH_LANG_ISO88591;
1329 #endif 1412 #endif
1413 }
1330 1414
1331 // Parse command line arguments 1415 // Parse command line arguments
1332 if (!th_args_process(argc, argv, optList, optListN, 1416 if (!th_args_process(argc, argv, optList, optListN,
1333 argHandleOpt, NULL, OPTH_ONLY_OPTS)) 1417 argHandleOpt, NULL, OPTH_ONLY_OPTS))
1334 goto out; 1418 goto out;
1335 1419
1336 THMSG(2, "Requested output LANG='%s', use charset conversion=%s\n", 1420 THMSG(2, "Requested output LANG='%s', use charset conversion=%s\n",
1337 setLang, setUseChConv ? "yes" : "no"); 1421 setLang, setUseOutConv ? "yes" : "no");
1338 1422
1339 if (optOneLineFieldSep != NULL) 1423 if (optOneLineFieldSep != NULL)
1340 { 1424 {
1341 // For one-line format, disable parsing and prefixes 1425 // For one-line format, disable parsing and prefixes
1342 optParsable = FALSE; 1426 optParsable = FALSE;
1481 } 1565 }
1482 1566
1483 out: 1567 out:
1484 1568
1485 #ifdef HAVE_ICONV 1569 #ifdef HAVE_ICONV
1486 if (setUseChConv) 1570 if (setUseOutConv)
1487 iconv_close(setChConv); 1571 iconv_close(setIConvCtx);
1488 #endif 1572 #endif
1489 1573
1490 th_free(setLang); 1574 th_free(setLang);
1491 1575
1492 siClearStack(&optFormat); 1576 siClearStack(&optFormat);