Mercurial > hg > dmlib
comparison tools/fanalyze.c @ 1996:4a4c3e15b8c2
Add more analyzing.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Mon, 02 Jul 2018 02:26:52 +0300 |
parents | ea6337c873c6 |
children | 8e38fa3c4f98 |
comparison
equal
deleted
inserted
replaced
1995:ea6337c873c6 | 1996:4a4c3e15b8c2 |
---|---|
8 #include "dmtool.h" | 8 #include "dmtool.h" |
9 #include "dmlib.h" | 9 #include "dmlib.h" |
10 #include "dmargs.h" | 10 #include "dmargs.h" |
11 #include "dmfile.h" | 11 #include "dmfile.h" |
12 | 12 |
13 #define SET_MAX_FILES (8) | 13 #define SET_MAX_FILES 16 |
14 #define SET_MAX_ELEMS 256 | |
14 | 15 |
15 | 16 |
16 /* Typedefs | 17 /* Typedefs |
17 */ | 18 */ |
18 typedef struct | 19 typedef struct |
19 { | 20 { |
21 Uint8 stats[SET_MAX_ELEMS]; | |
22 Uint8 variants, data; | |
23 int interest[16]; | |
24 int interestF[16]; | |
25 } DMCompElem; | |
26 | |
27 | |
28 typedef struct | |
29 { | |
30 int count; | |
31 Uint8 value; | |
32 } DMStatValue; | |
33 | |
34 | |
35 typedef struct | |
36 { | |
37 DMStatValue cv[SET_MAX_ELEMS]; | |
38 } DMStats; | |
39 | |
40 | |
41 typedef struct | |
42 { | |
20 char *filename; | 43 char *filename; |
21 Uint8 *data; | 44 Uint8 *data; |
22 size_t size; // offset, crop_start, crop_end, doCrop? | 45 size_t size; // offset, crop_start, crop_end, doCrop? |
46 DMStats stats; | |
23 } DMSourceFile; | 47 } DMSourceFile; |
24 | 48 |
25 | 49 |
26 /* Global variables | 50 /* Global variables |
27 */ | 51 */ |
28 int nsrcFiles = 0; // Number of source files | 52 int nsrcFiles = 0; // Number of source files |
29 DMSourceFile srcFiles[SET_MAX_FILES]; // Source file names | 53 DMSourceFile srcFiles[SET_MAX_FILES]; // Source file names |
54 DMStats totalStats; | |
30 | 55 |
31 | 56 |
32 /* Arguments | 57 /* Arguments |
33 */ | 58 */ |
34 static const DMOptArg optList[] = | 59 static const DMOptArg optList[] = |
81 } | 106 } |
82 else | 107 else |
83 { | 108 { |
84 dmErrorMsg("Maximum number of input files exceeded (%d).\n", | 109 dmErrorMsg("Maximum number of input files exceeded (%d).\n", |
85 SET_MAX_FILES); | 110 SET_MAX_FILES); |
86 return FALSE; | 111 return TRUE; |
87 } | 112 } |
88 } | 113 } |
89 | 114 |
90 | 115 |
91 #define SET_MAX_ELEMS 256 | 116 void dmInitStats(DMStats *stats) |
92 typedef struct | 117 { |
93 { | 118 for (size_t n = 0; n < SET_MAX_ELEMS; n++) |
94 Uint8 counts[SET_MAX_ELEMS]; | 119 { |
95 Uint8 variants, data; | 120 stats->cv[n].count = 0; |
96 } DMCompElem; | 121 stats->cv[n].value = n; |
122 } | |
123 } | |
124 | |
125 | |
126 int dmCompareStatFunc(const void *va, const void *vb) | |
127 { | |
128 const DMStatValue *pa = va, *pb = vb; | |
129 return pb->count - pa->count; | |
130 } | |
131 | |
132 | |
133 void dmSortStats(DMStats *stats) | |
134 { | |
135 qsort(&stats->cv, sizeof(DMStatValue), SET_MAX_ELEMS, dmCompareStatFunc); | |
136 } | |
137 | |
138 | |
139 void dmPrintStats(DMStats *stats, const int nmax, const size_t size) | |
140 { | |
141 for (int n = 0; n < nmax; n++) | |
142 { | |
143 printf("$%02x (%1.2f%%), ", | |
144 stats->cv[n].value, | |
145 ((float) stats->cv[n].count * 100.0f) / (float) size); | |
146 } | |
147 printf("\n\n"); | |
148 } | |
97 | 149 |
98 | 150 |
99 int main(int argc, char *argv[]) | 151 int main(int argc, char *argv[]) |
100 { | 152 { |
101 DMCompElem *compBuf = NULL; | 153 DMCompElem *compBuf = NULL; |
102 size_t compBufSize = 0; | 154 size_t compBufSize = 0, totalSize = 0; |
103 int res; | 155 int res; |
104 | 156 |
105 dmInitProg("fanalyze", "File format analyzer", "0.1", NULL, NULL); | 157 dmInitProg("fanalyze", "File format analyzer", "0.1", NULL, NULL); |
106 dmVerbosity = 1; | 158 dmVerbosity = 1; |
159 | |
160 dmInitStats(&totalStats); | |
107 | 161 |
108 // Parse arguments | 162 // Parse arguments |
109 if (!dmArgsProcess(argc, argv, optList, optListN, | 163 if (!dmArgsProcess(argc, argv, optList, optListN, |
110 argHandleOpt, argHandleNonOpt, OPTH_BAILOUT)) | 164 argHandleOpt, argHandleNonOpt, OPTH_BAILOUT)) |
111 exit(1); | 165 exit(1); |
130 dmPrint(2, "Input #%d: '%s', %" DM_PRIu_SIZE_T " bytes.\n", | 184 dmPrint(2, "Input #%d: '%s', %" DM_PRIu_SIZE_T " bytes.\n", |
131 nfile + 1, file->filename, file->size); | 185 nfile + 1, file->filename, file->size); |
132 | 186 |
133 if (!compBufSize || file->size < compBufSize) | 187 if (!compBufSize || file->size < compBufSize) |
134 compBufSize = file->size; | 188 compBufSize = file->size; |
189 | |
190 totalSize += file->size; | |
135 } | 191 } |
136 | 192 |
137 // Allocate comparision buffer | 193 // Allocate comparision buffer |
138 // XXX: integer overflow? | 194 // XXX: integer overflow? |
139 dmPrint(2, "Allocating %d element (%d bytes) comparision buffer.\n", | 195 dmPrint(2, "Allocating %d element (%d bytes) comparision buffer.\n", |
148 // Begin analyzing .. | 204 // Begin analyzing .. |
149 dmPrint(2, "Analyzing ..\n"); | 205 dmPrint(2, "Analyzing ..\n"); |
150 for (int nfile = 0; nfile < nsrcFiles; nfile++) | 206 for (int nfile = 0; nfile < nsrcFiles; nfile++) |
151 { | 207 { |
152 DMSourceFile *file = &srcFiles[nfile]; | 208 DMSourceFile *file = &srcFiles[nfile]; |
209 dmInitStats(&file->stats); | |
210 | |
211 for (size_t offs = 0; offs < file->size; offs++) | |
212 { | |
213 Uint8 bv = file->data[offs]; | |
214 totalStats.cv[bv].count++; | |
215 file->stats.cv[bv].count++; | |
216 } | |
217 | |
153 for (size_t offs = 0; offs < compBufSize; offs++) | 218 for (size_t offs = 0; offs < compBufSize; offs++) |
154 { | 219 { |
155 Uint8 data = offs < file->size ? file->data[offs] : 0; | 220 Uint8 data = offs < file->size ? file->data[offs] : 0; |
156 compBuf[offs].counts[data]++; | 221 compBuf[offs].stats[data]++; |
157 } | 222 } |
158 } | 223 |
224 dmSortStats(&file->stats); | |
225 } | |
226 dmSortStats(&totalStats); | |
227 | |
159 | 228 |
160 for (size_t offs = 0; offs < compBufSize; offs++) | 229 for (size_t offs = 0; offs < compBufSize; offs++) |
161 { | 230 { |
162 DMCompElem *el = &compBuf[offs]; | 231 DMCompElem *el = &compBuf[offs]; |
163 for (int n = 0; n < SET_MAX_ELEMS; n++) | 232 for (int n = 0; n < SET_MAX_ELEMS; n++) |
164 { | 233 { |
165 if (el->counts[n] > 0) | 234 if (el->stats[n] > 0) |
166 { | 235 { |
167 el->variants++; | 236 el->variants++; |
168 el->data = n; | 237 el->data = n; |
169 } | 238 } |
170 } | 239 } |
191 } | 260 } |
192 } | 261 } |
193 | 262 |
194 printf("\n"); | 263 printf("\n"); |
195 | 264 |
265 // Attempt further analysis | |
266 for (int nfile = 0; nfile < nsrcFiles; nfile++) | |
267 { | |
268 DMSourceFile *file = &srcFiles[nfile]; | |
269 size_t len = file->size > compBufSize ? compBufSize : file->size; | |
270 for (size_t offs = 0; offs + 4 < len; offs++) | |
271 { | |
272 DMCompElem *elem = &compBuf[offs]; | |
273 | |
274 for (int variant = 3; variant >= 0; variant--) | |
275 { | |
276 size_t nmax = (variant < 2) ? sizeof(Uint16) : sizeof(Uint32); | |
277 Uint32 tmp = 0; | |
278 | |
279 for (size_t n = 0; n < nmax; n++) | |
280 { | |
281 size_t boffs = (variant & 1) ? n : nmax - n; | |
282 | |
283 tmp <<= 8; | |
284 tmp |= file->data[offs + boffs]; | |
285 } | |
286 | |
287 if (file->size - tmp < 32) | |
288 { | |
289 elem->interest[variant] += 32 - (file->size - tmp); | |
290 elem->interestF[variant]++; | |
291 } | |
292 } | |
293 } | |
294 } | |
295 | |
296 printf("\nMore findings:\n"); | |
297 for (size_t offs = 0; offs + 4 < compBufSize; offs++) | |
298 { | |
299 DMCompElem *elem = &compBuf[offs]; | |
300 | |
301 for (int variant = 0; variant < 4; variant++) | |
302 if (elem->interestF[variant] > 0) | |
303 { | |
304 printf("%08" DM_PRIx_SIZE_T " | V%d : %d / %d\n", | |
305 offs, variant, | |
306 elem->interestF[variant], elem->interest[variant]); | |
307 } | |
308 } | |
309 | |
310 printf("\nGlobal most used bytes:\n"); | |
311 dmPrintStats(&totalStats, 16, totalSize); | |
312 | |
313 for (int nfile = 0; nfile < nsrcFiles; nfile++) | |
314 { | |
315 DMSourceFile *file = &srcFiles[nfile]; | |
316 printf("Most used bytes for '%s':\n", file->filename); | |
317 dmPrintStats(&file->stats, 16, file->size); | |
318 } | |
319 | |
196 out: | 320 out: |
197 for (int nfile = 0; nfile < nsrcFiles; nfile++) | 321 for (int nfile = 0; nfile < nsrcFiles; nfile++) |
198 { | 322 { |
199 DMSourceFile *file = &srcFiles[nfile]; | 323 DMSourceFile *file = &srcFiles[nfile]; |
200 dmFree(file->data); | 324 dmFree(file->data); |