# HG changeset patch # User Matti Hamalainen # Date 1578335335 -7200 # Node ID d73ccb1558787d8f4fb4b3dcdfde0718fb40dc47 # Parent a23dae1a484927fd249c5e012a58e1bc3c004801 Implement support for outputting CP850 and CP437 in our simple fallback character set convertor. diff -r a23dae1a4849 -r d73ccb155878 sidinfo.c --- a/sidinfo.c Mon Jan 06 18:16:25 2020 +0200 +++ b/sidinfo.c Mon Jan 06 20:28:55 2020 +0200 @@ -19,11 +19,6 @@ // Some constants // -// Default character encoding to convert to -// NOTE! Do not change unless you are using iconv()!! -// The fallback converter does not handle other encodings. -#define SET_DEF_CHARSET "utf8" - // HVSC documents directory #define SET_HVSC_DOCUMENTS "DOCUMENTS" @@ -49,6 +44,15 @@ }; +enum +{ + TH_LANG_UTF8, + TH_LANG_ISO88591, + TH_LANG_CP850, + TH_LANG_CP437, +}; + + typedef struct { int cmd; @@ -130,9 +134,11 @@ SIDLibSTILDB *sidSTILDB = NULL; -BOOL setUseChConv; +BOOL setUseOutConv; #ifdef HAVE_ICONV -iconv_t setChConv; +iconv_t setIConvCtx; +#else +int setOutLang; #endif @@ -382,6 +388,25 @@ } +static const uint8_t si_lang_iso88591_to_cp850[16*6] = { +0xff, 0xad, 0xbd, 0x9c, 0xcf, 0xbe, 0xdd, 0xf5, 0xf9, 0xb8, 0xa6, 0xae, 0xaa, 0xf0, 0xa9, 0xee, +0xf8, 0xf1, 0xfd, 0xfc, 0xef, 0xe6, 0xf4, 0xfa, 0xf7, 0xfb, 0xa7, 0xaf, 0xac, 0xab, 0xf3, 0xa8, +0xb7, 0xb5, 0xb6, 0xc7, 0x8e, 0x8f, 0x92, 0x80, 0xd4, 0x90, 0xd2, 0xd3, 0xde, 0xd6, 0xd7, 0xd8, +0xd1, 0xa5, 0xe3, 0xe0, 0xe2, 0xe5, 0x99, 0x9e, 0x9d, 0xeb, 0xe9, 0xea, 0x9a, 0xed, 0xe8, 0xe1, +0x85, 0xa0, 0x83, 0xc6, 0x84, 0x86, 0x91, 0x87, 0x8a, 0x82, 0x88, 0x89, 0x8d, 0xa1, 0x8c, 0x8b, +0xd0, 0xa4, 0x95, 0xa2, 0x93, 0xe4, 0x94, 0xf6, 0x9b, 0x97, 0xa3, 0x96, 0x81, 0xec, 0xe7, 0x98, +}; + +static const uint8_t si_lang_iso88591_to_cp437[16*6] = { +0xff, 0xad, 0x9b, 0x9c, 0x00, 0x9d, 0x00, 0x00, 0x00, 0x00, 0xa6, 0xae, 0xaa, 0x00, 0x00, 0x00, +0xf8, 0xf1, 0xfd, 0x00, 0x00, 0xe6, 0x00, 0xfa, 0x00, 0x00, 0xa7, 0xaf, 0xac, 0xab, 0x00, 0xa8, +0x00, 0x00, 0x00, 0x00, 0x8e, 0x8f, 0x92, 0x80, 0x00, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xa5, 0x00, 0x00, 0x00, 0x00, 0x99, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9a, 0x00, 0x00, 0xe1, +0x85, 0xa0, 0x83, 0x00, 0x84, 0x86, 0x91, 0x87, 0x8a, 0x82, 0x88, 0x89, 0x8d, 0xa1, 0x8c, 0x8b, +0x00, 0xa4, 0x95, 0xa2, 0x93, 0x00, 0x94, 0xf6, 0x00, 0x97, 0xa3, 0x96, 0x81, 0x00, 0x00, 0x98, +}; + + char *siConvertCharset(const char *src) { #ifdef HAVE_ICONV @@ -395,40 +420,81 @@ while (srcLeft > 0) { - size_t ret = iconv(setChConv, &srcPtr, &srcLeft, &outPtr, &outLeft); + size_t ret = iconv(setIConvCtx, &srcPtr, &srcLeft, &outPtr, &outLeft); if (ret == (size_t) -1) break; } #else - // Fallback ISO-8859-1 to UTF-8 conversion - size_t srcSize = strlen(src), - outSize = srcSize * 2 + 1; + // Fallback conversion of ISO-8859-1 to X + size_t srcSize = strlen(src), outSize, minLeft; const uint8_t *srcPtr = (const uint8_t *) src; + const uint8_t *tab; uint8_t *outBuf, *outPtr; - if ((outBuf = outPtr = th_malloc(outSize + 1)) == NULL) + + switch (setOutLang) + { + case TH_LANG_UTF8: + outSize = srcSize * 2; + minLeft = 2; + break; + + default: + outSize = srcSize; + minLeft = 1; + } + + if ((outBuf = outPtr = th_malloc(outSize)) == NULL) return NULL; - while (srcSize > 0 && outSize >= 2) + while (srcSize > 0 && outSize >= minLeft) { - if (*srcPtr < 0x80) - { - *outPtr++ = *srcPtr; - outSize--; - } - else - if (*srcPtr < 0xBF) + switch (setOutLang) { - *outPtr++ = 0xC2; - *outPtr++ = *srcPtr; - outSize -= 2; + case TH_LANG_UTF8: + // Not 100% correct really, but close enough + if (*srcPtr < 0x80) + { + *outPtr++ = *srcPtr; + outSize--; + } + else + if (*srcPtr < 0xBF) + { + *outPtr++ = 0xC2; + *outPtr++ = *srcPtr; + outSize -= 2; + } + else + { + *outPtr++ = 0xC3; + *outPtr++ = *srcPtr - 0x40; + outSize -= 2; + } + break; + + case TH_LANG_ISO88591: + *outPtr++ = *srcPtr; + outSize--; + break; + + case TH_LANG_CP850: + case TH_LANG_CP437: + // Not 100% correct either, but close enough + tab = (setOutLang == TH_LANG_CP850) ? si_lang_iso88591_to_cp850 : si_lang_iso88591_to_cp437; + + if (*srcPtr < 0x7f) + *outPtr++ = *srcPtr; + else + if (*srcPtr >= 0xA0) + *outPtr++ = tab[*srcPtr - 0xA0]; + else + *outPtr++ = '?'; + + outSize--; + break; } - else - { - *outPtr++ = 0xC3; - *outPtr++ = (*srcPtr - 0x40); - outSize -= 2; - } + srcPtr++; srcSize--; } @@ -943,11 +1009,11 @@ static void siPrintPSIDInfoLine(FILE *outFile, BOOL *shown, const char *fmt, const int otype, const char *d_str, const int d_int, - const BOOL useConv) + const BOOL convert) { char *str, *tmp; - if (setUseChConv && d_str != NULL && useConv) + if (setUseOutConv && d_str != NULL && convert) { char *tmp2 = siConvertCharset(d_str); tmp = siEscapeString(tmp2, optEscapeChars); @@ -1318,15 +1384,33 @@ setLang[i++] = th_tolower(*ptr); } setLang[i] = 0; - } #ifdef HAVE_ICONV - // Initialize iconv, check if we have language/charset - setChConv = iconv_open(setLang != NULL ? setLang : SET_DEF_CHARSET, "iso88591"); - setUseChConv = setChConv != (iconv_t) -1; + // Initialize iconv, check if we have language/charset + setIConvCtx = iconv_open("utf8", "iso88591"); + setUseOutConv = setIConvCtx != (iconv_t) -1; #else - setUseChConv = setLang != NULL && strcmp(setLang, SET_DEF_CHARSET) == 0; + // Check if we can use our fallback converter + if (strcmp(setLang, "utf8") == 0) + setOutLang = TH_LANG_UTF8; + else + if (strcmp(setLang, "iso88591") == 0 || + strcmp(setLang, "cp819") == 0 || + strcmp(setLang, "latin1") == 0 || + strcmp(setLang, "cp28591") == 0) + setOutLang = TH_LANG_ISO88591; + else + if (strcmp(setLang, "cp850") == 0) + setOutLang = TH_LANG_CP850; + else + if (strcmp(setLang, "cp437") == 0) + setOutLang = TH_LANG_CP437; + else + setOutLang = TH_LANG_ISO88591; + + setUseOutConv = setOutLang != TH_LANG_ISO88591; #endif + } // Parse command line arguments if (!th_args_process(argc, argv, optList, optListN, @@ -1334,7 +1418,7 @@ goto out; THMSG(2, "Requested output LANG='%s', use charset conversion=%s\n", - setLang, setUseChConv ? "yes" : "no"); + setLang, setUseOutConv ? "yes" : "no"); if (optOneLineFieldSep != NULL) { @@ -1483,8 +1567,8 @@ out: #ifdef HAVE_ICONV - if (setUseChConv) - iconv_close(setChConv); + if (setUseOutConv) + iconv_close(setIConvCtx); #endif th_free(setLang);