# HG changeset patch # User Matti Hamalainen # Date 1634164732 -10800 # Node ID 84c0facfc43c10ef8a9cd2f84b663a925a9a7b05 # Parent cfa260872b237a3f2f685dd021eb842f20c142d3 Merge changes from upstream v0.1.4. diff -r cfa260872b23 -r 84c0facfc43c INSTALL --- a/INSTALL Fri Oct 08 02:40:00 2021 +0300 +++ b/INSTALL Thu Oct 14 01:38:52 2021 +0300 @@ -3,7 +3,7 @@ dxa should build out of the box with any even vaguely recent version of gcc. You might need to do some tweakage for non-Unix systems or cc. I have tested -it on AIX, NetBSD and Mac OS X/Darwin. -- Cameron Kaiser +it on AIX, Linux, NetBSD and Mac OS X (PowerPC and x86_64). -- Cameron Kaiser 1. Uncomment LONG_OPTIONS in options.h if you have getopts_long() and want long options (--example). This is purely optional. diff -r cfa260872b23 -r 84c0facfc43c Makefile --- a/Makefile Fri Oct 08 02:40:00 2021 +0300 +++ b/Makefile Thu Oct 14 01:38:52 2021 +0300 @@ -3,7 +3,7 @@ TARGETS = dxa OBJECTS = scan.o vector.o dump.o table.o label.o main.o -DXA_VERSION = 0.1.3++4 +DXA_VERSION = 0.1.4++ # choose the compiler and flags diff -r cfa260872b23 -r 84c0facfc43c dump.c --- a/dump.c Fri Oct 08 02:40:00 2021 +0300 +++ b/dump.c Thu Oct 14 01:38:52 2021 +0300 @@ -1,8 +1,8 @@ /*\ - * dxa v0.1.3 -- symbolic 65xx disassembler + * dxa -- symbolic 65xx disassembler * * Copyright (C) 1993, 1994 Marko M\"akel\"a - * Changes for dxa (C) 2004, 2006 Cameron Kaiser + * Changes for dxa (C) 2004-2019 Cameron Kaiser * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,18 +19,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * Marko does not maintain dxa, so questions specific to dxa should be - * sent to me at ckaiser@floodgap.com . Otherwise, - * - * Contacting the author: + * sent to me at ckaiser@floodgap.com. * - * Via Internet E-mail: - * - * - * Via Snail Mail: - * Marko M\"akel\"a - * Sillitie 10 A - * FIN-01480 VANTAA - * Finland \*/ #define _DUMP_C_ @@ -116,6 +106,8 @@ fprintf(stdout, "%s\t.word $%04x", lineprefix, StartAddress); fprintf (stdout, "\n%s\t* = $%04x\n\n", lineprefix, StartAddress); + if(BasicHeaderLength) + fprintf(stdout, "; %d byte BASIC header.\n", BasicHeaderLength); for (address = StartAddress; (ADDR_T)(address - StartAddress) < @@ -149,20 +141,21 @@ } if (FindNextEntry (NULL, address, ~0, WRN_INSTR_WRITTEN_TO)) - fprintf (stdout, "%s; Instruction opcode accessed.\n", lineprefix); + fprintf (stdout, "%s; Instruction opcode $%04x accessed.\n", + lineprefix, address); entry = NULL; while ((entry = FindNextEntry (entry, address + counter, 0, 0))) switch (entry->type) { case WRN_PARAM_WRITTEN_TO: - fprintf (stdout, "%s; Instruction parameter accessed.\n", - lineprefix); + fprintf (stdout, "%s; Instruction parameter $%04x accessed.\n", + lineprefix, address + counter); break; case WRN_PARAM_JUMPED_TO: - fprintf (stdout, "%s; Instruction parameter jumped to.\n", - lineprefix); + fprintf (stdout, "%s; Instruction parameter $%04x jumped to.\n", + lineprefix, address + counter); break; } } diff -r cfa260872b23 -r 84c0facfc43c dxa.1 --- a/dxa.1 Fri Oct 08 02:40:00 2021 +0300 +++ b/dxa.1 Thu Oct 14 01:38:52 2021 +0300 @@ -1,4 +1,4 @@ -.TH DXA "1" "31 January 2007" +.TH DXA "1" "31 January 2019" .SH NAME dxa \- 6502/R65C02 disassembler @@ -89,12 +89,15 @@ Specifies an address (in hexadecimal) that is declared to be a valid routine. .B It is strongly recommended that you specify the initial execution address as a routine. -For example, for a Commodore 64 binary with a -.B SYS 2064 -header, add +For example, for a Commodore 64 binary with a BASIC header that performs +.B SYS +.BR 2064 , +specify .B \-r0810 -so that disassembly starts at that location. This may have interactions with -datablock detection +so that disassembly starts at that location (or use the +.B \-U +option, which can automatically do this for you). Note that specifying this +manually may have interactions with datablock detection .RB ( \-d ). .TP .B \--routines filename @@ -225,6 +228,35 @@ .BR \-g , then that address will be used here too. .TP +.B \--no-detect-basic +.TP +.B \-u +.TP +.B \--detect-basic +.TP +.B \-U +If the starting address is recognized as a typical BASIC entry point +(currently supported for Commodore computers), then +.B dxa +will attempt to see if a BASIC header is present, and if so, determine its +length and mark the section as a completely dead +datablock not eligible for further disassembly or referencing. If the +first line is a construct such as +.B 10 SYS +.BR 2061 , +then +.B dxa +will additionally parse the provided address and mark it as a valid routine +if the address is within the boundaries of the disassembled file. +Note that although its heuristics +are designed to be permissive, it may nevertheless misinterpret certain files +with intentionally pathologic line link addresses, and unusual applications +where the linked machine code is designed to actually +.I modify +the BASIC text may not +disassemble correctly with this option. These are highly atypical situations, +so this option will likely become the default in a future release. +.TP .B \--verbose .TP .B \-v @@ -366,16 +398,6 @@ .LP 65816 opcodes are not (yet) supported. .LP -The disassembler can easily be confused by the common idiom of tacking on -BASIC text to call an appended ML routine. There probably should be a special -case option for this. One workaround is to use the -.B \--datablock -option and specify the range as unused (such as in the case of -.B 10 SYS2061 -(Commodore), giving -.B \-b ?0801-080c -to ignore that range as data). -.LP There are a few options Marko created that aren't hooked up to anything (and are not documented here on purpose). I might finish these later. @@ -394,9 +416,9 @@ .B d65 0.2.1 by Marko Mäkelä. Original package (C)1993, 1994, 2000 Marko Mäkelä. Additional changes -(C)2006 Cameron Kaiser. +(C)2006-2019 Cameron Kaiser. .B dxa is maintained independently. -.SHWEBSITE +.SH WEBSITE http://www.floodgap.com/retrotech/xa/ diff -r cfa260872b23 -r 84c0facfc43c label.c --- a/label.c Fri Oct 08 02:40:00 2021 +0300 +++ b/label.c Thu Oct 14 01:38:52 2021 +0300 @@ -1,7 +1,8 @@ /*\ - * dxa v0.1.1 -- symbolic 65xx disassembler + * dxa -- symbolic 65xx disassembler * * Copyright (C) 1993, 1994 Marko M\"akel\"a + * Copyright (C) 2019 Cameron Kaiser * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,16 +18,6 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * - * Contacting the author: - * - * Via Internet E-mail: - * - * - * Via Snail Mail: - * Marko M\"akel\"a - * Sillitie 10 A - * FIN-01480 VANTAA - * Finland \*/ #define _LABEL_C_ @@ -84,10 +75,11 @@ if (!IsLabeled (address)) { // dirty kludge to allow zero page stuff to still work. this sometimes // guesses wrong - if (admode == zp) + if (admode == zp) { snprintf(defaultlabel, sizeof(defaultlabel), "$%02x", address); - else + } else { snprintf(defaultlabel, sizeof(defaultlabel), "$%04x", address); + } return defaultlabel; } diff -r cfa260872b23 -r 84c0facfc43c main.c --- a/main.c Fri Oct 08 02:40:00 2021 +0300 +++ b/main.c Thu Oct 14 01:38:52 2021 +0300 @@ -2,7 +2,8 @@ * dxa -- symbolic 65xx disassembler * * Based on d65 Copyright (C) 1993, 1994 Marko M\"akel\"a - * Changes for dxa (C) 2005, 2006 Cameron Kaiser + * Changes for dxa (C) 2005-2019 Cameron Kaiser + * Modifications for ++ version (c) 2015-2021 Matti 'ccr' Hamalainen * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,18 +20,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * Marko does not maintain dxa, so questions specific to dxa should be - * sent to me at ckaiser@floodgap.com . Otherwise, - * - * Contacting the author: + * sent to me at ckaiser@floodgap.com. * - * Via Internet E-mail: - * - * - * Via Snail Mail: - * Marko M\"akel\"a - * Sillitie 10 A - * FIN-01480 VANTAA - * Finland \*/ #define _MAIN_C_ @@ -103,6 +94,8 @@ { "no-word-sa", 0, 0, 'q' }, { "get-sa", 0, 0, 'G' }, { "no-get-sa", 0, 0, 'g' }, + { "detect-basic", 0, 0, 'U' }, + { "no-detect-basic", 0, 0, 'u' }, { NULL, 0, 0, 0 } }; #endif /* LONG_OPTIONS */ @@ -144,11 +137,11 @@ while (!fFinished) #ifdef LONG_OPTIONS switch (getopt_long (argc, argv, - "?b:B:L:r:R:h:l:a:d:p:g:t:eEnNsSjJoOcCmMvVwWxXqQG", + "?b:B:L:r:R:h:l:a:d:p:g:t:eEnNsSjJoOcCmMvVwWxXqQGuU", cmd_options, &option_index)) { #else switch (getopt (argc, argv, - "?b:B:L:r:R:h:l:a:d:p:g:t:eEnNsSjJoOcCmMvVwWxXqQG")){ + "?b:B:L:r:R:h:l:a:d:p:g:t:eEnNsSjJoOcCmMvVwWxXqQGuU")){ #endif /* LONG_OPTIONS */ case -1: case ':': @@ -403,6 +396,12 @@ case 'Q': Options |= B_SA_WORD; break; + case 'u': + Options &= ~B_DETECT_BASIC; + break; + case 'U': + Options |= B_DETECT_BASIC; + break; case 'G': Options |= B_GET_SA; break; @@ -470,8 +469,8 @@ Usage: fprintf (stderr, "dxa %s -- symbolic 65xx disassembler\n", DXA_VERSION); fprintf (stderr, "Based on d65 copyright (C) 1993-4 Marko M\"akel\"a\n"); - fprintf (stderr, "Changes for dxa copyright (c) 2006-7 Cameron Kaiser\n"); - fprintf (stderr, "Modifications for ++ version (c) 2015 Matti 'ccr' Hamalainen \n\n"); + fprintf (stderr, "Changes for dxa copyright (c) 2006-19 Cameron Kaiser\n\n"); + fprintf (stderr, "Modifications for ++ version (c) 2015-2021 Matti 'ccr' Hamalainen \n\n"); fprintf (stderr, "Usage: %s [options] [filename]\n", prog); return 1; } @@ -487,7 +486,6 @@ StartAddress |= (unsigned)fgetc (file) << 8; } - if (feof (file)) { fprintf (stderr, "%s: Error reading the file.\n", prog); return 3; @@ -513,8 +511,91 @@ fclose (file); + BasicHeaderLength = 0; + if ((Options & B_DETECT_BASIC) && ((EndAddress - StartAddress) > 11) && (0 || + StartAddress == 0x0401 || /* PET */ + StartAddress == 0x0801 || /* C64 */ + StartAddress == 0x1001 || /* VIC, 16, +4 */ + StartAddress == 0x1c01 || /* 128 */ + 0)) { + /* If this file starts at a typical BASIC starting address, try to mark + that as data. Bonus points for turning SYS xxxx into an entry point. + For example, 1010 SYS 2061 comes out like this: + .byt $0b,$08,$0a + .byt $0a,$9e,$32 + .byt $30,$36,$31 + .byt $00,$00,$00 + */ + + /* Heuristic: try to validate the line link address. If it seems sane, + process further. */ + ADDR_T ll = Memory[StartAddress] + (Memory[StartAddress+1] << 8); + if ((ll > StartAddress) && (ll < EndAddress - 2) && Memory[ll] == 0) { + ADDR_T offs = StartAddress + 5; /* byte after presumed first token */ + ADDR_T val = 0; + ADDR_T i = 0; + int ok = 0; + + /* See if there is an encoded SYS address in the first line. */ + if (Memory[StartAddress+4] == 0x9e /* SYS */) { + for(; offs < StartAddress + 11; offs++) { /* stop overrun */ + if (Memory[offs] == 0x00) { + ok = 1; + break; + } + if (Memory[offs] == 32) /* space */ + continue; + if (Memory[offs] < 48 || Memory[offs] > 57) { + ok = 0; + break; + } + if (val > 6553 || (val == 6553 && Memory[offs] > 53)) { + ok = 0; /* imminent overflow */ + break; + } + val = (val * 10) + (Memory[offs] - 48); + } + + if (ok && val > StartAddress && val < EndAddress) { + /* Address validates; mark it as an entry point. */ + AddEntry (val, val, RTN_SURE); + if (fVerbose) +fprintf(stderr, "%s: SYS %d found, marking as entry point\n", prog, val); + } + } + + /* Try to find the end of BASIC text. Three nulls needed. */ + ok = 0; + for(; offs < EndAddress; offs++) { + if (Memory[offs] == 0) { + ok++; + if (ok == 3) { + BasicHeaderLength = (offs - StartAddress) + 1; + + /* Mark entire length of BASIC text as dead. */ + for(i = StartAddress; i < offs; i++) { + SetMemType(i, MEM_UNPROCESSED); + SetMemFlag(i); + } + if (fVerbose) +fprintf(stderr, "%s: BASIC text marked as dead through $%04x\n", prog, offs); + break; + } else + continue; + } + ok = 0; + } + if (ok < 3 && fVerbose) +fprintf(stderr, "%s: warning: couldn't find a valid end of BASIC text\n", prog); + + } else if (fVerbose) +fprintf(stderr, "%s: warning: BASIC starting address $%04x, but invalid line\n", + prog, StartAddress); + + } + if (fVerbose) - fprintf (stderr, "%s: disassembling %X-%X\n", prog, + fprintf (stderr, "%s: disassembling $%04x-$%04x\n", prog, StartAddress, EndAddress); if (ScanSpecified ()) { diff -r cfa260872b23 -r 84c0facfc43c options.h --- a/options.h Fri Oct 08 02:40:00 2021 +0300 +++ b/options.h Thu Oct 14 01:38:52 2021 +0300 @@ -117,6 +117,8 @@ unprocessed in the previous phases */ #define O_DBL_STRICT 3072 /* if a "sure" routine contains illegal code, exit the unassembling process immediately */ +#define B_DETECT_BASIC 524288 /* if SA = $0801, define $0801-... as data */ +#define B_NO_DETECT_BASIC 0 /* ... or don't */ /* IMPROVED DATA BLOCK DETECTION */ #define B_JMP_LOUSE 0 /* `stupid' jumps cause only warnings to the diff -r cfa260872b23 -r 84c0facfc43c scan.c --- a/scan.c Fri Oct 08 02:40:00 2021 +0300 +++ b/scan.c Thu Oct 14 01:38:52 2021 +0300 @@ -1,7 +1,8 @@ /*\ - * dxa v0.1.1 -- symbolic 65xx disassembler + * dxa -- symbolic 65xx disassembler * * Copyright (C) 1993, 1994 Marko M\"akel\"a + * Copyright (C) 2019 Cameron Kaiser * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,16 +18,6 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * - * Contacting the author: - * - * Via Internet E-mail: - * - * - * Via Snail Mail: - * Marko M\"akel\"a - * Sillitie 10 A - * FIN-01480 VANTAA - * Finland \*/ #define _SCAN_C_ @@ -42,6 +33,8 @@ opcodes *instr; unsigned int size, counter; + if (fVerbose) + fprintf(stderr, "\n%s: scanning sure section $%04x", prog, scanstart); for (address = scanstart;; address += size) { if (GetMemFlag (address)) /* rest of routine not valid */ @@ -588,7 +581,7 @@ (unsigned int)entry->address); return 1; } - entry = FindNextEntryType (NULL, ~0, RTN_SURE); + entry = FindNextEntryType (NULL, ~0, RTN_SURE); // valgrind fart DeleteEntry (entry); } @@ -801,7 +794,7 @@ } if (fVerbose) - fprintf (stderr, "\r%s: scanning at %X", prog, address); + fprintf (stderr, "\n%s: scanning at $%04x", prog, address); if (!ScanPotential (address)) { while ((entry = FindNextEntryType (NULL, ~RTN_B_TEMPORARY, diff -r cfa260872b23 -r 84c0facfc43c structures.h --- a/structures.h Fri Oct 08 02:40:00 2021 +0300 +++ b/structures.h Thu Oct 14 01:38:52 2021 +0300 @@ -1,7 +1,8 @@ /*\ - * dxa v0.1.1 -- symbolic 65xx disassembler + * dxa -- symbolic 65xx disassembler * * Copyright (C) 1993, 1994 Marko M\"akel\"a + * Copyright (C) 2006-2019 Cameron Kaiser * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,16 +18,6 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * - * Contacting the author: - * - * Via Internet E-mail: - * - * - * Via Snail Mail: - * Marko M\"akel\"a - * Sillitie 10 A - * FIN-01480 VANTAA - * Finland \*/ /* structures.h - memory structures and related constants and macros */ @@ -124,6 +115,9 @@ static inline void DOPutLabel(unsigned int address, const char *name, const int line) { + (void) name; + (void) line; + MemLabel[((ADDR_T)address) / (8 * sizeof *MemLabel)] |= (1 << (address % (8 * sizeof *MemLabel))); } @@ -135,6 +129,9 @@ static inline void DOPutReference(unsigned int address, const char *name, const int line) { + (void) name; + (void) line; + MemReferenced[address]++; } @@ -245,12 +242,13 @@ #ifndef _MAIN_C_ extern char *prog; -extern ADDR_T StartAddress, EndAddress; +extern ADDR_T StartAddress, EndAddress, BasicHeaderLength; extern int fVerbose; #else char *prog; ADDR_T StartAddress, EndAddress; int fVerbose = FALSE; +ADDR_T BasicHeaderLength = 0; #endif /* _MAIN_C_ */ #ifndef _TABLE_C_