Mercurial > hg > dmlib
view tools/fanalyze.c @ 2208:90ec1ec89c56
Revamp the palette handling in lib64gfx somewhat, add helper functions to
lib64util for handling external palette file options and add support for
specifying one of the "internal" palettes or external (.act) palette file to
gfxconv and 64vw.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Fri, 14 Jun 2019 05:01:12 +0300 |
parents | 1662730053d0 |
children | 837c79747ea4 |
line wrap: on
line source
/* * Fanalyze - Analyze similarities between multiple files * Programmed and designed by Matti 'ccr' Hamalainen * (C) Copyright 2018 Tecnic Software productions (TNSP) * * Please read file 'COPYING' for information on license and distribution. */ #include "dmtool.h" #include "dmlib.h" #include "dmargs.h" #include "dmfile.h" #define SET_MAX_FILES 64 #define SET_MAX_ELEMS 256 #define SET_MAX_VALUES 64 /* Typedefs */ typedef struct { Uint8 stats[SET_MAX_ELEMS]; Uint8 variants, data; int interest[16]; int interestF[16]; } DMCompElem; typedef struct { int count; Uint8 value; } DMStatValue; typedef struct { DMStatValue cv[SET_MAX_ELEMS]; } DMStats; typedef struct { char *filename; Uint8 *data; size_t size; // offset, crop_start, crop_end, doCrop? DMStats stats; } DMSourceFile; enum { DMGV_UINT8 = 0, DMGV_UINT16_LE, DMGV_UINT16_BE, DMGV_UINT32_LE, DMGV_UINT32_BE, DMGV_last }; enum { DMGS_HEX = 0, DMGS_DEC, DMGS_last }; typedef struct { int type; int disp; Uint32 value; } DMGrepValue; typedef struct { char *name; Uint32 nmax; unsigned int bsize; } DMGrepType; static const DMGrepType dmGrepTypes[DMGV_last] = { { "8bit (byte)" , (1ULL << 8) - 1, 1 }, { "16bit (word) LE" , (1ULL << 16) - 1, 2 }, { "16bit (word) BE" , (1ULL << 16) - 1, 2 }, { "32bit (word) LE" , (1ULL << 32) - 1, 4 }, { "32bit (word) BE" , (1ULL << 32) - 1, 4 }, }; typedef struct { char *name; char *fmt; } DMGrepDisp; static const DMGrepDisp dmGrepDisp[DMGS_last] = { { "hex", "x" }, { "dec", "d" }, }; enum { FA_ANALYZE, FA_GREP, FA_OFFSET, }; /* Global variables */ int setMode = FA_ANALYZE; int nsrcFiles = 0; // Number of source files DMSourceFile srcFiles[SET_MAX_FILES]; // Source file names DMStats totalStats; int nsetGrepValues = 0; DMGrepValue setGrepValues[SET_MAX_VALUES]; /* Arguments */ static const DMOptArg optList[] = { { 0, '?', "help", "Show this help", OPT_NONE }, { 1, 'v', "verbose", "Be more verbose", OPT_NONE }, { 2, 'g', "grep", "Binary grep <val>[,<le|be>[8|16|32]]", OPT_ARGREQ }, { 3, 'o', "offset", "Show data in offset <offset>,<le|be>[8|16|32][d|x]]", OPT_ARGREQ }, }; static const int optListN = sizeof(optList) / sizeof(optList[0]); void argShowHelp() { dmPrintBanner(stdout, dmProgName, "[options] <input file #1> <input file #2> [...]"); dmArgsPrintHelp(stdout, optList, optListN, 0); } int argParseGrepValue(const char *arg, const int mode) { const char *sep = strchr(arg, ','); char *vspec, *vstr; int vdisp = DMGS_HEX, vtype = -1, ret = DMERR_OK; Uint32 vval; if (setMode != FA_ANALYZE && setMode != mode) { dmErrorMsg("Options specifying multiple operating modes can't be used.\n"); return DMERR_INVALID_ARGS; } setMode = mode; // Do we have spec? if (sep != NULL) { vspec = dm_strdup_trim(sep + 1, DM_TRIM_BOTH); vstr = dm_strndup_trim(arg, sep - arg, DM_TRIM_BOTH); } else { vspec = NULL; vstr = dm_strdup(arg); } // Parse spec if any if (vspec != NULL) { BOOL vendianess = TRUE; char *vtmp = vspec; // Get endianess specifier, if any if (dm_strncasecmp(vtmp, "le", 2) == 0) { vendianess = TRUE; vtmp += 2; } else if (dm_strncasecmp(vtmp, "be", 2) == 0) { vendianess = FALSE; vtmp += 2; } // Get value bit size if (strncmp(vtmp, "8", 1) == 0) { vtype = DMGV_UINT8; vtmp += 1; } else if (strncmp(vtmp, "16", 2) == 0) { vtype = vendianess ? DMGV_UINT16_LE : DMGV_UINT16_BE; vtmp += 2; } else if (strncmp(vtmp, "32", 2) == 0) { vtype = vendianess ? DMGV_UINT32_LE : DMGV_UINT32_BE; vtmp += 2; } else { ret = dmError(DMERR_INVALID_ARGS, "Invalid grep type '%s'.\n", vspec); goto out; } switch (tolower(*vtmp)) { case 'd': vdisp = DMGS_DEC; break; case 'x': case 'h': vdisp = DMGS_HEX; break; case 0: break; default: ret = dmError(DMERR_INVALID_ARGS, "Invalid grep view type '%s'.\n", vspec); goto out; } } // Get value if (!dmGetIntVal(vstr, &vval, NULL)) { ret = dmError(DMERR_INVALID_ARGS, "Not a valid integer value '%s'.\n", vstr); goto out; } if (mode == FA_GREP) { // Check if we need to guess size if (vtype < 0) { for (int n = DMGV_last; n >= 0; n--) { const DMGrepType *def = &dmGrepTypes[n]; if (vval <= def->nmax) vtype = n; } } if (vtype < 0) { ret = dmError(DMERR_INVALID_ARGS, "Could not guess value type for '%s'.\n", arg); goto out; } // Check range if (vval > dmGrepTypes[vtype].nmax) { ret = dmError(DMERR_INVALID_ARGS, "Integer value %d <= %d <= %d out of range for type %s.\n", vval, 0, dmGrepTypes[vtype].nmax, dmGrepTypes[vtype].name); goto out; } } else if (mode == FA_OFFSET) { if (vtype < 0) vtype = DMGV_UINT8; } if (nsetGrepValues < SET_MAX_VALUES) { DMGrepValue *node = &setGrepValues[nsetGrepValues++]; node->type = vtype; node->disp = vdisp; node->value = vval; dmMsg(1, "Grep value %s : %d / 0x%x\n", dmGrepTypes[vtype].name, vval, vval); } else { ret = dmError(DMERR_BOUNDS, "Too many values specified (max %d).", SET_MAX_VALUES); } out: dmFree(vspec); dmFree(vstr); return ret; } BOOL argHandleOpt(const int optN, char *optArg, char *currArg) { (void) optArg; switch (optN) { case 0: argShowHelp(); exit(0); break; case 1: dmVerbosity++; break; case 2: return argParseGrepValue(optArg, FA_GREP) == DMERR_OK; case 3: return argParseGrepValue(optArg, FA_OFFSET) == DMERR_OK; default: dmErrorMsg("Unknown argument '%s'.\n", currArg); return FALSE; } return TRUE; } BOOL argHandleNonOpt(char *currArg) { if (nsrcFiles < SET_MAX_FILES) { DMSourceFile *file = &srcFiles[nsrcFiles++]; file->filename = currArg; return TRUE; } else { dmErrorMsg("Maximum number of input files exceeded (%d).\n", SET_MAX_FILES); return TRUE; } } void dmInitStats(DMStats *stats) { for (size_t n = 0; n < SET_MAX_ELEMS; n++) { stats->cv[n].count = 0; stats->cv[n].value = n; } } int dmCompareStatFunc(const void *va, const void *vb) { const DMStatValue *pa = va, *pb = vb; return pb->count - pa->count; } void dmPrintStats(DMStats *stats, const int nmax, const size_t size) { qsort(&stats->cv, SET_MAX_ELEMS, sizeof(DMStatValue), dmCompareStatFunc); for (int n = 0; n < nmax; n++) { printf("$%02x (%d = %1.2f%%), ", stats->cv[n].value, stats->cv[n].count, ((float) stats->cv[n].count * 100.0f) / (float) size); } printf("\n\n"); } BOOL dmGetData(const int type, const DMSourceFile *file, const size_t offs, Uint32 *mval) { Uint8 *data = file->data + offs; if (offs + dmGrepTypes[type].bsize >= file->size) { *mval = 0; return FALSE; } switch (type) { case DMGV_UINT8: *mval = *((Uint8 *) data); break; case DMGV_UINT16_LE: *mval = DM_LE16_TO_NATIVE(*((Uint16 *) data)); break; case DMGV_UINT16_BE: *mval = DM_BE16_TO_NATIVE(*((Uint16 *) data)); break; case DMGV_UINT32_LE: *mval = DM_LE32_TO_NATIVE(*((Uint32 *) data)); break; case DMGV_UINT32_BE: *mval = DM_BE32_TO_NATIVE(*((Uint32 *) data)); break; default: *mval = 0; return FALSE; } return TRUE; } int main(int argc, char *argv[]) { DMCompElem *compBuf = NULL; size_t compBufSize = 0, totalSize = 0; int res; dmInitProg("fanalyze", "File format analyzer", "0.3", NULL, NULL); dmVerbosity = 1; dmInitStats(&totalStats); // Parse arguments if (!dmArgsProcess(argc, argv, optList, optListN, argHandleOpt, argHandleNonOpt, OPTH_BAILOUT)) exit(1); if (nsrcFiles < 1) { dmErrorMsg("Nothing to do. (try --help)\n"); goto out; } // Read input files for (int nfile = 0; nfile < nsrcFiles; nfile++) { DMSourceFile *file = &srcFiles[nfile]; if ((res = dmReadDataFile(NULL, file->filename, &file->data, &file->size)) != DMERR_OK) { dmErrorMsg("Could not read '%s': %s\n", file->filename, dmErrorStr(res)); goto out; } dmPrint(2, "Input #%d: '%s', %" DM_PRIu_SIZE_T " bytes.\n", nfile + 1, file->filename, file->size); if (!compBufSize || file->size < compBufSize) compBufSize = file->size; totalSize += file->size; dmInitStats(&file->stats); } // // Check what operating mode we are in // if (setMode == FA_GREP) { for (int nfile = 0; nfile < nsrcFiles; nfile++) { DMSourceFile *file = &srcFiles[nfile]; dmPrint(0, "\n%s\n", file->filename); for (int n = 0; n < nsetGrepValues; n++) { DMGrepValue *node = &setGrepValues[n]; const DMGrepType *def = &dmGrepTypes[node->type]; for (size_t offs = 0; offs + def->bsize < file->size; offs++) { Uint32 mval; dmGetData(node->type, file, offs, &mval); if (mval == node->value) { dmPrint(0, "%08x : %s match %d / 0x%x\n", offs, def->name, mval, mval); } } } } } else if (setMode == FA_OFFSET) { for (int nfile = 0; nfile < nsrcFiles; nfile++) { DMSourceFile *file = &srcFiles[nfile]; dmPrint(1, "#%03d: %s\n", nfile + 1, file->filename); } printf(" offset :"); for (int nfile = 0; nfile < nsrcFiles; nfile++) printf(" %03d ", nfile + 1); printf("\n"); printf("=========="); for (int nfile = 0; nfile < nsrcFiles; nfile++) printf("==========="); printf("\n"); for (int n = 0; n < nsetGrepValues; n++) { DMGrepValue *node = &setGrepValues[n]; const DMGrepType *def = &dmGrepTypes[node->type]; printf("%08x : ", node->value); for (int nfile = 0; nfile < nsrcFiles; nfile++) { DMSourceFile *file = &srcFiles[nfile]; Uint32 mval; char mstr[32]; int npad, nwidth; if (dmGetData(node->type, file, node->value, &mval)) { char mfmt[16]; nwidth = def->bsize * 2; snprintf(mfmt, sizeof(mfmt), "%%0%d%s", nwidth, dmGrepDisp[node->disp].fmt); snprintf(mstr, sizeof(mstr), mfmt, mval); } else { strcpy(mstr, "----"); nwidth = 4; } npad = (10 - nwidth) / 2; for (int q = 0; q < npad; q++) fputc(' ', stdout); fputs(mstr, stdout); for (int q = 0; q < npad; q++) fputc(' ', stdout); } printf(" [%s]\n", dmGrepDisp[node->disp].name); } } else if (setMode == FA_ANALYZE) { // Allocate comparision buffer // XXX: integer overflow? dmPrint(2, "Allocating %d element (%d bytes) comparision buffer.\n", compBufSize, compBufSize * sizeof(DMCompElem)); if ((compBuf = dmCalloc(compBufSize, sizeof(DMCompElem))) == NULL) { dmErrorMsg("Out of memory. Could not allocate comparision buffer!\n"); goto out; } // Begin analyzing .. dmPrint(2, "Analyzing ..\n"); for (int nfile = 0; nfile < nsrcFiles; nfile++) { DMSourceFile *file = &srcFiles[nfile]; for (size_t offs = 0; offs < file->size; offs++) { Uint8 bv = file->data[offs]; totalStats.cv[bv].count++; file->stats.cv[bv].count++; } for (size_t offs = 0; offs < compBufSize; offs++) { Uint8 data = offs < file->size ? file->data[offs] : 0; compBuf[offs].stats[data]++; } } for (size_t offs = 0; offs < compBufSize; offs++) { DMCompElem *el = &compBuf[offs]; for (int n = 0; n < SET_MAX_ELEMS; n++) { if (el->stats[n] > 0) { el->variants++; el->data = n; } } } // Display results for (size_t offs = 0, n = 0; offs < compBufSize; offs++) { DMCompElem *el = &compBuf[offs]; BOOL var = el->variants > 1; if (n == 0) printf("%08" DM_PRIx_SIZE_T " | ", offs); if (var) printf("[%2d] ", el->variants); else printf(" %02x ", el->data); if (++n >= 16) { printf("\n"); n = 0; } } printf("\n"); // Attempt further analysis for (int nfile = 0; nfile < nsrcFiles; nfile++) { DMSourceFile *file = &srcFiles[nfile]; size_t len = file->size > compBufSize ? compBufSize : file->size; for (size_t offs = 0; offs + 4 < len; offs++) { DMCompElem *elem = &compBuf[offs]; for (int variant = 3; variant >= 0; variant--) { size_t nmax = (variant < 2) ? sizeof(Uint16) : sizeof(Uint32); Uint32 tmp = 0; for (size_t n = 0; n < nmax; n++) { size_t boffs = (variant & 1) ? n : nmax - n; tmp <<= 8; tmp |= file->data[offs + boffs]; } if (file->size - tmp < 32) { elem->interest[variant] += 32 - (file->size - tmp); elem->interestF[variant]++; } } } } printf("\nMore findings:\n"); for (size_t offs = 0; offs + 4 < compBufSize; offs++) { DMCompElem *elem = &compBuf[offs]; for (int variant = 0; variant < 4; variant++) if (elem->interestF[variant] > 0) { printf("%08" DM_PRIx_SIZE_T " | V%d : %d / %d\n", offs, variant, elem->interestF[variant], elem->interest[variant]); } } printf("\nGlobal most used bytes:\n"); dmPrintStats(&totalStats, 16, totalSize); for (int nfile = 0; nfile < nsrcFiles; nfile++) { DMSourceFile *file = &srcFiles[nfile]; printf("Most used bytes for '%s':\n", file->filename); dmPrintStats(&file->stats, 16, file->size); } } else { dmErrorMsg("Invalid operating mode?\n"); } out: for (int nfile = 0; nfile < nsrcFiles; nfile++) { DMSourceFile *file = &srcFiles[nfile]; dmFree(file->data); } return 0; }