Mercurial > hg > sidinfo
comparison sidinfo.c @ 278:d73ccb155878
Implement support for outputting CP850 and CP437 in our simple fallback
character set convertor.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Mon, 06 Jan 2020 20:28:55 +0200 |
parents | 158f4f613787 |
children | d5ab136cdc97 |
comparison
equal
deleted
inserted
replaced
277:a23dae1a4849 | 278:d73ccb155878 |
---|---|
17 | 17 |
18 // | 18 // |
19 // Some constants | 19 // Some constants |
20 // | 20 // |
21 | 21 |
22 // Default character encoding to convert to | |
23 // NOTE! Do not change unless you are using iconv()!! | |
24 // The fallback converter does not handle other encodings. | |
25 #define SET_DEF_CHARSET "utf8" | |
26 | |
27 // HVSC documents directory | 22 // HVSC documents directory |
28 #define SET_HVSC_DOCUMENTS "DOCUMENTS" | 23 #define SET_HVSC_DOCUMENTS "DOCUMENTS" |
29 | 24 |
30 // Songlengths database filename prefix (.md5|.txt appended) | 25 // Songlengths database filename prefix (.md5|.txt appended) |
31 #define SET_SLDB_FILEBASE "Songlengths" | 26 #define SET_SLDB_FILEBASE "Songlengths" |
44 enum | 39 enum |
45 { | 40 { |
46 OTYPE_OTHER = 0, | 41 OTYPE_OTHER = 0, |
47 OTYPE_STR = 1, | 42 OTYPE_STR = 1, |
48 OTYPE_INT = 2, | 43 OTYPE_INT = 2, |
44 }; | |
45 | |
46 | |
47 enum | |
48 { | |
49 TH_LANG_UTF8, | |
50 TH_LANG_ISO88591, | |
51 TH_LANG_CP850, | |
52 TH_LANG_CP437, | |
49 }; | 53 }; |
50 | 54 |
51 | 55 |
52 typedef struct | 56 typedef struct |
53 { | 57 { |
128 | 132 |
129 SIDLibSLDB *sidSLDB = NULL; | 133 SIDLibSLDB *sidSLDB = NULL; |
130 SIDLibSTILDB *sidSTILDB = NULL; | 134 SIDLibSTILDB *sidSTILDB = NULL; |
131 | 135 |
132 | 136 |
133 BOOL setUseChConv; | 137 BOOL setUseOutConv; |
134 #ifdef HAVE_ICONV | 138 #ifdef HAVE_ICONV |
135 iconv_t setChConv; | 139 iconv_t setIConvCtx; |
140 #else | |
141 int setOutLang; | |
136 #endif | 142 #endif |
137 | 143 |
138 | 144 |
139 // Define option arguments | 145 // Define option arguments |
140 static const th_optarg optList[] = | 146 static const th_optarg optList[] = |
380 | 386 |
381 return TRUE; | 387 return TRUE; |
382 } | 388 } |
383 | 389 |
384 | 390 |
391 static const uint8_t si_lang_iso88591_to_cp850[16*6] = { | |
392 0xff, 0xad, 0xbd, 0x9c, 0xcf, 0xbe, 0xdd, 0xf5, 0xf9, 0xb8, 0xa6, 0xae, 0xaa, 0xf0, 0xa9, 0xee, | |
393 0xf8, 0xf1, 0xfd, 0xfc, 0xef, 0xe6, 0xf4, 0xfa, 0xf7, 0xfb, 0xa7, 0xaf, 0xac, 0xab, 0xf3, 0xa8, | |
394 0xb7, 0xb5, 0xb6, 0xc7, 0x8e, 0x8f, 0x92, 0x80, 0xd4, 0x90, 0xd2, 0xd3, 0xde, 0xd6, 0xd7, 0xd8, | |
395 0xd1, 0xa5, 0xe3, 0xe0, 0xe2, 0xe5, 0x99, 0x9e, 0x9d, 0xeb, 0xe9, 0xea, 0x9a, 0xed, 0xe8, 0xe1, | |
396 0x85, 0xa0, 0x83, 0xc6, 0x84, 0x86, 0x91, 0x87, 0x8a, 0x82, 0x88, 0x89, 0x8d, 0xa1, 0x8c, 0x8b, | |
397 0xd0, 0xa4, 0x95, 0xa2, 0x93, 0xe4, 0x94, 0xf6, 0x9b, 0x97, 0xa3, 0x96, 0x81, 0xec, 0xe7, 0x98, | |
398 }; | |
399 | |
400 static const uint8_t si_lang_iso88591_to_cp437[16*6] = { | |
401 0xff, 0xad, 0x9b, 0x9c, 0x00, 0x9d, 0x00, 0x00, 0x00, 0x00, 0xa6, 0xae, 0xaa, 0x00, 0x00, 0x00, | |
402 0xf8, 0xf1, 0xfd, 0x00, 0x00, 0xe6, 0x00, 0xfa, 0x00, 0x00, 0xa7, 0xaf, 0xac, 0xab, 0x00, 0xa8, | |
403 0x00, 0x00, 0x00, 0x00, 0x8e, 0x8f, 0x92, 0x80, 0x00, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
404 0x00, 0xa5, 0x00, 0x00, 0x00, 0x00, 0x99, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9a, 0x00, 0x00, 0xe1, | |
405 0x85, 0xa0, 0x83, 0x00, 0x84, 0x86, 0x91, 0x87, 0x8a, 0x82, 0x88, 0x89, 0x8d, 0xa1, 0x8c, 0x8b, | |
406 0x00, 0xa4, 0x95, 0xa2, 0x93, 0x00, 0x94, 0xf6, 0x00, 0x97, 0xa3, 0x96, 0x81, 0x00, 0x00, 0x98, | |
407 }; | |
408 | |
409 | |
385 char *siConvertCharset(const char *src) | 410 char *siConvertCharset(const char *src) |
386 { | 411 { |
387 #ifdef HAVE_ICONV | 412 #ifdef HAVE_ICONV |
388 size_t srcLeft = strlen(src) + 1; | 413 size_t srcLeft = strlen(src) + 1; |
389 size_t outLeft = srcLeft * 2; | 414 size_t outLeft = srcLeft * 2; |
393 if ((outBuf = outPtr = th_malloc(outLeft + 1)) == NULL) | 418 if ((outBuf = outPtr = th_malloc(outLeft + 1)) == NULL) |
394 return NULL; | 419 return NULL; |
395 | 420 |
396 while (srcLeft > 0) | 421 while (srcLeft > 0) |
397 { | 422 { |
398 size_t ret = iconv(setChConv, &srcPtr, &srcLeft, &outPtr, &outLeft); | 423 size_t ret = iconv(setIConvCtx, &srcPtr, &srcLeft, &outPtr, &outLeft); |
399 if (ret == (size_t) -1) | 424 if (ret == (size_t) -1) |
400 break; | 425 break; |
401 } | 426 } |
402 | 427 |
403 #else | 428 #else |
404 // Fallback ISO-8859-1 to UTF-8 conversion | 429 // Fallback conversion of ISO-8859-1 to X |
405 size_t srcSize = strlen(src), | 430 size_t srcSize = strlen(src), outSize, minLeft; |
406 outSize = srcSize * 2 + 1; | |
407 const uint8_t *srcPtr = (const uint8_t *) src; | 431 const uint8_t *srcPtr = (const uint8_t *) src; |
432 const uint8_t *tab; | |
408 uint8_t *outBuf, *outPtr; | 433 uint8_t *outBuf, *outPtr; |
409 if ((outBuf = outPtr = th_malloc(outSize + 1)) == NULL) | 434 |
435 switch (setOutLang) | |
436 { | |
437 case TH_LANG_UTF8: | |
438 outSize = srcSize * 2; | |
439 minLeft = 2; | |
440 break; | |
441 | |
442 default: | |
443 outSize = srcSize; | |
444 minLeft = 1; | |
445 } | |
446 | |
447 if ((outBuf = outPtr = th_malloc(outSize)) == NULL) | |
410 return NULL; | 448 return NULL; |
411 | 449 |
412 while (srcSize > 0 && outSize >= 2) | 450 while (srcSize > 0 && outSize >= minLeft) |
413 { | 451 { |
414 if (*srcPtr < 0x80) | 452 switch (setOutLang) |
415 { | 453 { |
416 *outPtr++ = *srcPtr; | 454 case TH_LANG_UTF8: |
417 outSize--; | 455 // Not 100% correct really, but close enough |
418 } | 456 if (*srcPtr < 0x80) |
419 else | 457 { |
420 if (*srcPtr < 0xBF) | 458 *outPtr++ = *srcPtr; |
421 { | 459 outSize--; |
422 *outPtr++ = 0xC2; | 460 } |
423 *outPtr++ = *srcPtr; | 461 else |
424 outSize -= 2; | 462 if (*srcPtr < 0xBF) |
425 } | 463 { |
426 else | 464 *outPtr++ = 0xC2; |
427 { | 465 *outPtr++ = *srcPtr; |
428 *outPtr++ = 0xC3; | 466 outSize -= 2; |
429 *outPtr++ = (*srcPtr - 0x40); | 467 } |
430 outSize -= 2; | 468 else |
431 } | 469 { |
470 *outPtr++ = 0xC3; | |
471 *outPtr++ = *srcPtr - 0x40; | |
472 outSize -= 2; | |
473 } | |
474 break; | |
475 | |
476 case TH_LANG_ISO88591: | |
477 *outPtr++ = *srcPtr; | |
478 outSize--; | |
479 break; | |
480 | |
481 case TH_LANG_CP850: | |
482 case TH_LANG_CP437: | |
483 // Not 100% correct either, but close enough | |
484 tab = (setOutLang == TH_LANG_CP850) ? si_lang_iso88591_to_cp850 : si_lang_iso88591_to_cp437; | |
485 | |
486 if (*srcPtr < 0x7f) | |
487 *outPtr++ = *srcPtr; | |
488 else | |
489 if (*srcPtr >= 0xA0) | |
490 *outPtr++ = tab[*srcPtr - 0xA0]; | |
491 else | |
492 *outPtr++ = '?'; | |
493 | |
494 outSize--; | |
495 break; | |
496 } | |
497 | |
432 srcPtr++; | 498 srcPtr++; |
433 srcSize--; | 499 srcSize--; |
434 } | 500 } |
435 | 501 |
436 *outPtr++ = 0; | 502 *outPtr++ = 0; |
941 | 1007 |
942 | 1008 |
943 static void siPrintPSIDInfoLine(FILE *outFile, BOOL *shown, | 1009 static void siPrintPSIDInfoLine(FILE *outFile, BOOL *shown, |
944 const char *fmt, const int otype, | 1010 const char *fmt, const int otype, |
945 const char *d_str, const int d_int, | 1011 const char *d_str, const int d_int, |
946 const BOOL useConv) | 1012 const BOOL convert) |
947 { | 1013 { |
948 char *str, *tmp; | 1014 char *str, *tmp; |
949 | 1015 |
950 if (setUseChConv && d_str != NULL && useConv) | 1016 if (setUseOutConv && d_str != NULL && convert) |
951 { | 1017 { |
952 char *tmp2 = siConvertCharset(d_str); | 1018 char *tmp2 = siConvertCharset(d_str); |
953 tmp = siEscapeString(tmp2, optEscapeChars); | 1019 tmp = siEscapeString(tmp2, optEscapeChars); |
954 th_free(tmp2); | 1020 th_free(tmp2); |
955 } | 1021 } |
1316 { | 1382 { |
1317 if (*ptr != '-') | 1383 if (*ptr != '-') |
1318 setLang[i++] = th_tolower(*ptr); | 1384 setLang[i++] = th_tolower(*ptr); |
1319 } | 1385 } |
1320 setLang[i] = 0; | 1386 setLang[i] = 0; |
1321 } | |
1322 | 1387 |
1323 #ifdef HAVE_ICONV | 1388 #ifdef HAVE_ICONV |
1324 // Initialize iconv, check if we have language/charset | 1389 // Initialize iconv, check if we have language/charset |
1325 setChConv = iconv_open(setLang != NULL ? setLang : SET_DEF_CHARSET, "iso88591"); | 1390 setIConvCtx = iconv_open("utf8", "iso88591"); |
1326 setUseChConv = setChConv != (iconv_t) -1; | 1391 setUseOutConv = setIConvCtx != (iconv_t) -1; |
1327 #else | 1392 #else |
1328 setUseChConv = setLang != NULL && strcmp(setLang, SET_DEF_CHARSET) == 0; | 1393 // Check if we can use our fallback converter |
1394 if (strcmp(setLang, "utf8") == 0) | |
1395 setOutLang = TH_LANG_UTF8; | |
1396 else | |
1397 if (strcmp(setLang, "iso88591") == 0 || | |
1398 strcmp(setLang, "cp819") == 0 || | |
1399 strcmp(setLang, "latin1") == 0 || | |
1400 strcmp(setLang, "cp28591") == 0) | |
1401 setOutLang = TH_LANG_ISO88591; | |
1402 else | |
1403 if (strcmp(setLang, "cp850") == 0) | |
1404 setOutLang = TH_LANG_CP850; | |
1405 else | |
1406 if (strcmp(setLang, "cp437") == 0) | |
1407 setOutLang = TH_LANG_CP437; | |
1408 else | |
1409 setOutLang = TH_LANG_ISO88591; | |
1410 | |
1411 setUseOutConv = setOutLang != TH_LANG_ISO88591; | |
1329 #endif | 1412 #endif |
1413 } | |
1330 | 1414 |
1331 // Parse command line arguments | 1415 // Parse command line arguments |
1332 if (!th_args_process(argc, argv, optList, optListN, | 1416 if (!th_args_process(argc, argv, optList, optListN, |
1333 argHandleOpt, NULL, OPTH_ONLY_OPTS)) | 1417 argHandleOpt, NULL, OPTH_ONLY_OPTS)) |
1334 goto out; | 1418 goto out; |
1335 | 1419 |
1336 THMSG(2, "Requested output LANG='%s', use charset conversion=%s\n", | 1420 THMSG(2, "Requested output LANG='%s', use charset conversion=%s\n", |
1337 setLang, setUseChConv ? "yes" : "no"); | 1421 setLang, setUseOutConv ? "yes" : "no"); |
1338 | 1422 |
1339 if (optOneLineFieldSep != NULL) | 1423 if (optOneLineFieldSep != NULL) |
1340 { | 1424 { |
1341 // For one-line format, disable parsing and prefixes | 1425 // For one-line format, disable parsing and prefixes |
1342 optParsable = FALSE; | 1426 optParsable = FALSE; |
1481 } | 1565 } |
1482 | 1566 |
1483 out: | 1567 out: |
1484 | 1568 |
1485 #ifdef HAVE_ICONV | 1569 #ifdef HAVE_ICONV |
1486 if (setUseChConv) | 1570 if (setUseOutConv) |
1487 iconv_close(setChConv); | 1571 iconv_close(setIConvCtx); |
1488 #endif | 1572 #endif |
1489 | 1573 |
1490 th_free(setLang); | 1574 th_free(setLang); |
1491 | 1575 |
1492 siClearStack(&optFormat); | 1576 siClearStack(&optFormat); |