# HG changeset patch # User Matti Hamalainen # Date 1530487612 -10800 # Node ID 4a4c3e15b8c2994560712e10534d079ff8573778 # Parent ea6337c873c6fdb6dd80d7ef5f03422908142b74 Add more analyzing. diff -r ea6337c873c6 -r 4a4c3e15b8c2 tools/fanalyze.c --- a/tools/fanalyze.c Sun Jul 01 23:09:34 2018 +0300 +++ b/tools/fanalyze.c Mon Jul 02 02:26:52 2018 +0300 @@ -10,16 +10,40 @@ #include "dmargs.h" #include "dmfile.h" -#define SET_MAX_FILES (8) +#define SET_MAX_FILES 16 +#define SET_MAX_ELEMS 256 /* Typedefs */ typedef struct { + Uint8 stats[SET_MAX_ELEMS]; + Uint8 variants, data; + int interest[16]; + int interestF[16]; +} DMCompElem; + + +typedef struct +{ + int count; + Uint8 value; +} DMStatValue; + + +typedef struct +{ + DMStatValue cv[SET_MAX_ELEMS]; +} DMStats; + + +typedef struct +{ char *filename; Uint8 *data; size_t size; // offset, crop_start, crop_end, doCrop? + DMStats stats; } DMSourceFile; @@ -27,6 +51,7 @@ */ int nsrcFiles = 0; // Number of source files DMSourceFile srcFiles[SET_MAX_FILES]; // Source file names +DMStats totalStats; /* Arguments @@ -83,28 +108,57 @@ { dmErrorMsg("Maximum number of input files exceeded (%d).\n", SET_MAX_FILES); - return FALSE; + return TRUE; + } +} + + +void dmInitStats(DMStats *stats) +{ + for (size_t n = 0; n < SET_MAX_ELEMS; n++) + { + stats->cv[n].count = 0; + stats->cv[n].value = n; } } -#define SET_MAX_ELEMS 256 -typedef struct +int dmCompareStatFunc(const void *va, const void *vb) +{ + const DMStatValue *pa = va, *pb = vb; + return pb->count - pa->count; +} + + +void dmSortStats(DMStats *stats) { - Uint8 counts[SET_MAX_ELEMS]; - Uint8 variants, data; -} DMCompElem; + qsort(&stats->cv, sizeof(DMStatValue), SET_MAX_ELEMS, dmCompareStatFunc); +} + + +void dmPrintStats(DMStats *stats, const int nmax, const size_t size) +{ + for (int n = 0; n < nmax; n++) + { + printf("$%02x (%1.2f%%), ", + stats->cv[n].value, + ((float) stats->cv[n].count * 100.0f) / (float) size); + } + printf("\n\n"); +} int main(int argc, char *argv[]) { DMCompElem *compBuf = NULL; - size_t compBufSize = 0; + size_t compBufSize = 0, totalSize = 0; int res; dmInitProg("fanalyze", "File format analyzer", "0.1", NULL, NULL); dmVerbosity = 1; + dmInitStats(&totalStats); + // Parse arguments if (!dmArgsProcess(argc, argv, optList, optListN, argHandleOpt, argHandleNonOpt, OPTH_BAILOUT)) @@ -132,6 +186,8 @@ if (!compBufSize || file->size < compBufSize) compBufSize = file->size; + + totalSize += file->size; } // Allocate comparision buffer @@ -150,19 +206,32 @@ for (int nfile = 0; nfile < nsrcFiles; nfile++) { DMSourceFile *file = &srcFiles[nfile]; + dmInitStats(&file->stats); + + for (size_t offs = 0; offs < file->size; offs++) + { + Uint8 bv = file->data[offs]; + totalStats.cv[bv].count++; + file->stats.cv[bv].count++; + } + for (size_t offs = 0; offs < compBufSize; offs++) { Uint8 data = offs < file->size ? file->data[offs] : 0; - compBuf[offs].counts[data]++; + compBuf[offs].stats[data]++; } + + dmSortStats(&file->stats); } + dmSortStats(&totalStats); + for (size_t offs = 0; offs < compBufSize; offs++) { DMCompElem *el = &compBuf[offs]; for (int n = 0; n < SET_MAX_ELEMS; n++) { - if (el->counts[n] > 0) + if (el->stats[n] > 0) { el->variants++; el->data = n; @@ -193,6 +262,61 @@ printf("\n"); + // Attempt further analysis + for (int nfile = 0; nfile < nsrcFiles; nfile++) + { + DMSourceFile *file = &srcFiles[nfile]; + size_t len = file->size > compBufSize ? compBufSize : file->size; + for (size_t offs = 0; offs + 4 < len; offs++) + { + DMCompElem *elem = &compBuf[offs]; + + for (int variant = 3; variant >= 0; variant--) + { + size_t nmax = (variant < 2) ? sizeof(Uint16) : sizeof(Uint32); + Uint32 tmp = 0; + + for (size_t n = 0; n < nmax; n++) + { + size_t boffs = (variant & 1) ? n : nmax - n; + + tmp <<= 8; + tmp |= file->data[offs + boffs]; + } + + if (file->size - tmp < 32) + { + elem->interest[variant] += 32 - (file->size - tmp); + elem->interestF[variant]++; + } + } + } + } + + printf("\nMore findings:\n"); + for (size_t offs = 0; offs + 4 < compBufSize; offs++) + { + DMCompElem *elem = &compBuf[offs]; + + for (int variant = 0; variant < 4; variant++) + if (elem->interestF[variant] > 0) + { + printf("%08" DM_PRIx_SIZE_T " | V%d : %d / %d\n", + offs, variant, + elem->interestF[variant], elem->interest[variant]); + } + } + + printf("\nGlobal most used bytes:\n"); + dmPrintStats(&totalStats, 16, totalSize); + + for (int nfile = 0; nfile < nsrcFiles; nfile++) + { + DMSourceFile *file = &srcFiles[nfile]; + printf("Most used bytes for '%s':\n", file->filename); + dmPrintStats(&file->stats, 16, file->size); + } + out: for (int nfile = 0; nfile < nsrcFiles; nfile++) {