Mercurial > hg > dmlib
view tools/fanalyze.c @ 2011:8e38fa3c4f98
Fix use of qsort().
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Sun, 08 Jul 2018 00:47:22 +0300 |
parents | 4a4c3e15b8c2 |
children | 8a9ef75fd3cd |
line wrap: on
line source
/* * Fanalyze - Analyze similarities between multiple files * Programmed and designed by Matti 'ccr' Hamalainen * (C) Copyright 2018 Tecnic Software productions (TNSP) * * Please read file 'COPYING' for information on license and distribution. */ #include "dmtool.h" #include "dmlib.h" #include "dmargs.h" #include "dmfile.h" #define SET_MAX_FILES 16 #define SET_MAX_ELEMS 256 /* Typedefs */ typedef struct { Uint8 stats[SET_MAX_ELEMS]; Uint8 variants, data; int interest[16]; int interestF[16]; } DMCompElem; typedef struct { int count; Uint8 value; } DMStatValue; typedef struct { DMStatValue cv[SET_MAX_ELEMS]; } DMStats; typedef struct { char *filename; Uint8 *data; size_t size; // offset, crop_start, crop_end, doCrop? DMStats stats; } DMSourceFile; /* Global variables */ int nsrcFiles = 0; // Number of source files DMSourceFile srcFiles[SET_MAX_FILES]; // Source file names DMStats totalStats; /* Arguments */ static const DMOptArg optList[] = { { 0, '?', "help", "Show this help", OPT_NONE }, { 1, 'v', "verbose", "Be more verbose", OPT_NONE }, }; static const int optListN = sizeof(optList) / sizeof(optList[0]); void argShowHelp() { dmPrintBanner(stdout, dmProgName, "[options] <input file #1> <input file #2> [...]"); dmArgsPrintHelp(stdout, optList, optListN, 0); } BOOL argHandleOpt(const int optN, char *optArg, char *currArg) { (void) optArg; switch (optN) { case 0: argShowHelp(); exit(0); break; case 1: dmVerbosity++; break; default: dmErrorMsg("Unknown argument '%s'.\n", currArg); return FALSE; } return TRUE; } BOOL argHandleNonOpt(char *currArg) { if (nsrcFiles < SET_MAX_FILES) { DMSourceFile *file = &srcFiles[nsrcFiles++]; file->filename = currArg; return TRUE; } else { dmErrorMsg("Maximum number of input files exceeded (%d).\n", SET_MAX_FILES); return TRUE; } } void dmInitStats(DMStats *stats) { for (size_t n = 0; n < SET_MAX_ELEMS; n++) { stats->cv[n].count = 0; stats->cv[n].value = n; } } int dmCompareStatFunc(const void *va, const void *vb) { const DMStatValue *pa = va, *pb = vb; return pb->count - pa->count; } void dmPrintStats(DMStats *stats, const int nmax, const size_t size) { qsort(&stats->cv, SET_MAX_ELEMS, sizeof(DMStatValue), dmCompareStatFunc); for (int n = 0; n < nmax; n++) { printf("$%02x (%d = %1.2f%%), ", stats->cv[n].value, stats->cv[n].count, ((float) stats->cv[n].count * 100.0f) / (float) size); } printf("\n\n"); } int main(int argc, char *argv[]) { DMCompElem *compBuf = NULL; size_t compBufSize = 0, totalSize = 0; int res; dmInitProg("fanalyze", "File format analyzer", "0.1", NULL, NULL); dmVerbosity = 1; dmInitStats(&totalStats); // Parse arguments if (!dmArgsProcess(argc, argv, optList, optListN, argHandleOpt, argHandleNonOpt, OPTH_BAILOUT)) exit(1); if (nsrcFiles < 1) { dmErrorMsg("Nothing to do. (try --help)\n"); goto out; } // Read input files for (int nfile = 0; nfile < nsrcFiles; nfile++) { DMSourceFile *file = &srcFiles[nfile]; if ((res = dmReadDataFile(NULL, file->filename, &file->data, &file->size)) != DMERR_OK) { dmErrorMsg("Could not read '%s': %s\n", file->filename, dmErrorStr(res)); goto out; } dmPrint(2, "Input #%d: '%s', %" DM_PRIu_SIZE_T " bytes.\n", nfile + 1, file->filename, file->size); if (!compBufSize || file->size < compBufSize) compBufSize = file->size; totalSize += file->size; dmInitStats(&file->stats); } // Allocate comparision buffer // XXX: integer overflow? dmPrint(2, "Allocating %d element (%d bytes) comparision buffer.\n", compBufSize, compBufSize * sizeof(DMCompElem)); if ((compBuf = dmCalloc(compBufSize, sizeof(DMCompElem))) == NULL) { dmErrorMsg("Out of memory. Could not allocate comparision buffer!\n"); goto out; } // Begin analyzing .. dmPrint(2, "Analyzing ..\n"); for (int nfile = 0; nfile < nsrcFiles; nfile++) { DMSourceFile *file = &srcFiles[nfile]; for (size_t offs = 0; offs < file->size; offs++) { Uint8 bv = file->data[offs]; totalStats.cv[bv].count++; file->stats.cv[bv].count++; } for (size_t offs = 0; offs < compBufSize; offs++) { Uint8 data = offs < file->size ? file->data[offs] : 0; compBuf[offs].stats[data]++; } } for (size_t offs = 0; offs < compBufSize; offs++) { DMCompElem *el = &compBuf[offs]; for (int n = 0; n < SET_MAX_ELEMS; n++) { if (el->stats[n] > 0) { el->variants++; el->data = n; } } } // Display results for (size_t offs = 0, n = 0; offs < compBufSize; offs++) { DMCompElem *el = &compBuf[offs]; BOOL var = el->variants > 1; if (n == 0) printf("%08" DM_PRIx_SIZE_T " | ", offs); if (var) printf("[%2d] ", el->variants); else printf(" %02x ", el->data); if (++n >= 16) { printf("\n"); n = 0; } } printf("\n"); // Attempt further analysis for (int nfile = 0; nfile < nsrcFiles; nfile++) { DMSourceFile *file = &srcFiles[nfile]; size_t len = file->size > compBufSize ? compBufSize : file->size; for (size_t offs = 0; offs + 4 < len; offs++) { DMCompElem *elem = &compBuf[offs]; for (int variant = 3; variant >= 0; variant--) { size_t nmax = (variant < 2) ? sizeof(Uint16) : sizeof(Uint32); Uint32 tmp = 0; for (size_t n = 0; n < nmax; n++) { size_t boffs = (variant & 1) ? n : nmax - n; tmp <<= 8; tmp |= file->data[offs + boffs]; } if (file->size - tmp < 32) { elem->interest[variant] += 32 - (file->size - tmp); elem->interestF[variant]++; } } } } printf("\nMore findings:\n"); for (size_t offs = 0; offs + 4 < compBufSize; offs++) { DMCompElem *elem = &compBuf[offs]; for (int variant = 0; variant < 4; variant++) if (elem->interestF[variant] > 0) { printf("%08" DM_PRIx_SIZE_T " | V%d : %d / %d\n", offs, variant, elem->interestF[variant], elem->interest[variant]); } } printf("\nGlobal most used bytes:\n"); dmPrintStats(&totalStats, 16, totalSize); for (int nfile = 0; nfile < nsrcFiles; nfile++) { DMSourceFile *file = &srcFiles[nfile]; printf("Most used bytes for '%s':\n", file->filename); dmPrintStats(&file->stats, 16, file->size); } out: for (int nfile = 0; nfile < nsrcFiles; nfile++) { DMSourceFile *file = &srcFiles[nfile]; dmFree(file->data); } return 0; }