changeset 1682:2cfb4806cf71

Add simple and naively implemented multi-file bindiff type file analyzer utility 'fanalyze'.
author Matti Hamalainen <ccr@tnsp.org>
date Thu, 31 May 2018 15:15:50 +0300
parents bd68c9adc7ca
children 187a9f3c9e88
files Makefile.gen tools/fanalyze.c
diffstat 2 files changed, 205 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile.gen	Thu May 31 13:46:23 2018 +0300
+++ b/Makefile.gen	Thu May 31 15:15:50 2018 +0300
@@ -239,7 +239,7 @@
 ### Dependancies
 ifeq ($(DM_BUILD_TOOLS),yes)
 ifeq ($(DM_USE_STDIO),yes)
-TOOL_BINARIES+= objlink data2inc gfxconv gentab
+TOOL_BINARIES+= objlink data2inc gfxconv gentab fanalyze
 ifeq ($(SUP_MODLOAD),yes)
 TOOL_BINARIES+= dumpmod mod2wav
 TESTS_BINARIES+= plrtest
@@ -485,6 +485,10 @@
 	@echo " LINK $+"
 	@$(CC) -o $@ $(filter %.o %.a,$+) $(DM_LDFLAGS) $(TOOL_LDFLAGS)
 
+$(TOOL_BINPATH)fanalyze$(EXEEXT): $(OBJPATH)fanalyze.o $(DMLIB_A)
+	@echo " LINK $+"
+	@$(CC) -o $@ $(filter %.o %.a,$+) $(DM_LDFLAGS) $(TOOL_LDFLAGS)
+
 $(TOOL_BINPATH)gfxconv$(EXEEXT): $(OBJPATH)gfxconv.o $(OBJPATH)lib64gfx.o $(OBJPATH)lib64fmts.o $(OBJPATH)libgfx.o $(DMLIB_A)
 	@echo " LINK $+"
 	@$(CC) -o $@ $(filter %.o %.a,$+) $(DM_LDFLAGS) $(TOOL_LDFLAGS) $(LIBPNG_LDFLAGS) $(ZLIB_LDFLAGS)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/fanalyze.c	Thu May 31 15:15:50 2018 +0300
@@ -0,0 +1,200 @@
+/*
+ * Fanalyze - Analyze similarities between multiple files
+ * Programmed and designed by Matti 'ccr' Hamalainen
+ * (C) Copyright 2018 Tecnic Software productions (TNSP)
+ *
+ * Please read file 'COPYING' for information on license and distribution.
+ */
+#include "dmtool.h"
+#include "dmlib.h"
+#include "dmargs.h"
+#include "dmfile.h"
+
+#define SET_MAX_FILES    (8)
+
+
+/* Typedefs
+ */
+typedef struct
+{
+    char *filename;
+    Uint8 *data;
+    size_t size; // offset, crop_start, crop_end, doCrop?
+} DMSourceFile;
+
+
+/* Global variables
+ */
+int           nsrcFiles = 0;              // Number of source files
+DMSourceFile  srcFiles[SET_MAX_FILES];    // Source file names
+
+
+/* Arguments
+ */
+static const DMOptArg optList[] =
+{
+    {  0, '?', "help",        "Show this help", OPT_NONE },
+    {  1, 'v', "verbose",     "Be more verbose", OPT_NONE },
+};
+
+static const int optListN = sizeof(optList) / sizeof(optList[0]);
+
+
+void argShowHelp()
+{
+    dmPrintBanner(stdout, dmProgName, "[options] <input file #1> <input file #2> [...]");
+    dmArgsPrintHelp(stdout, optList, optListN, 0);
+}
+
+
+BOOL argHandleOpt(const int optN, char *optArg, char *currArg)
+{
+    switch (optN)
+    {
+    case 0:
+        argShowHelp();
+        exit(0);
+        break;
+
+    case 1:
+        dmVerbosity++;
+        break;
+
+    default:
+        dmErrorMsg("Unknown argument '%s'.\n", currArg);
+        return FALSE;
+    }
+
+    return TRUE;
+}
+
+
+BOOL argHandleNonOpt(char *currArg)
+{
+    if (nsrcFiles < SET_MAX_FILES)
+    {
+        DMSourceFile *file = &srcFiles[nsrcFiles++];
+        file->filename = currArg;
+        return TRUE;
+    }
+    else
+    {
+        dmErrorMsg("Maximum number of input files exceeded (%d).\n",
+            SET_MAX_FILES);
+        return FALSE;
+    }
+}
+
+
+#define SET_MAX_ELEMS 256
+typedef struct
+{
+    Uint8 counts[SET_MAX_ELEMS];
+    Uint8 variants, data;
+} DMCompElem;
+
+
+int main(int argc, char *argv[])
+{
+    DMCompElem *compBuf = NULL;
+    size_t compBufSize = 0;
+    int res;
+
+    dmInitProg("fanalyze", "File format analyzer", "0.1", NULL, NULL);
+    dmVerbosity = 1;
+
+    // Parse arguments
+    if (!dmArgsProcess(argc, argv, optList, optListN,
+        argHandleOpt, argHandleNonOpt, OPTH_BAILOUT))
+        exit(1);
+
+    if (nsrcFiles < 1)
+    {
+        dmErrorMsg("Nothing to do. (try --help)\n");
+        goto out;
+    }
+
+    // Read input files
+    for (int nfile = 0; nfile < nsrcFiles; nfile++)
+    {
+        DMSourceFile *file = &srcFiles[nfile];
+        dmPrint(2, "Input #%d: %s\n", nfile + 1, file->filename);
+        if ((res = dmReadDataFile(NULL, file->filename, &file->data, &file->size)) != DMERR_OK)
+        {
+            dmErrorMsg("Could not read '%s': %s\n",
+                file->filename, dmErrorStr(res));
+            goto out;
+        }
+
+        if (file->size > compBufSize)
+            compBufSize = file->size;
+    }
+
+    // Allocate comparision buffer
+    // XXX: integer overflow?
+    dmPrint(2, "Allocating %d element (%d bytes) comparision buffer.\n",
+        compBufSize, compBufSize * sizeof(DMCompElem));
+
+    if ((compBuf = dmCalloc(compBufSize, sizeof(DMCompElem))) == NULL)
+    {
+        dmErrorMsg("Out of memory. Could not allocate comparision buffer!\n");
+        goto out;
+    }
+
+    // Begin analyzing ..
+    dmPrint(2, "Analyzing ..\n");
+    for (int nfile = 0; nfile < nsrcFiles; nfile++)
+    {
+        DMSourceFile *file = &srcFiles[nfile];
+        for (size_t offs = 0; offs < compBufSize; offs++)
+        if (offs < file->size)
+            compBuf[offs].counts[file->data[offs]]++;
+        else
+            compBuf[offs].counts[0]++;
+    }
+
+    for (size_t offs = 0; offs < compBufSize; offs++)
+    {
+        DMCompElem *el = &compBuf[offs];
+        for (int n = 0; n < SET_MAX_ELEMS; n++)
+        {
+            if (el->counts[n] > 0)
+            {
+                el->variants++;
+                el->data = n;
+            }
+        }
+    }
+
+    // Display results
+    for (size_t offs = 0, n = 0; offs < compBufSize; offs++)
+    {
+        DMCompElem *el = &compBuf[offs];
+        BOOL var = el->variants > 1;
+
+        if (n == 0)
+            printf("%08x | ", offs);
+
+        if (var)
+            printf("[%2d] ", el->variants);
+        else
+            printf(" %02x  ", el->data);
+
+        if (++n >= 16)
+        {
+            printf("\n");
+            n = 0;
+        }
+    }
+
+    printf("\n");
+
+out:
+    for (int nfile = 0; nfile < nsrcFiles; nfile++)
+    {
+        DMSourceFile *file = &srcFiles[nfile];
+        dmFree(file->data);
+    }
+
+    return 0;
+}