changeset 14:84c0facfc43c

Merge changes from upstream v0.1.4.
author Matti Hamalainen <ccr@tnsp.org>
date Thu, 14 Oct 2021 01:38:52 +0300
parents cfa260872b23
children 89183953bddc
files INSTALL Makefile dump.c dxa.1 label.c main.c options.h scan.c structures.h
diffstat 9 files changed, 175 insertions(+), 94 deletions(-) [+]
line wrap: on
line diff
--- a/INSTALL	Fri Oct 08 02:40:00 2021 +0300
+++ b/INSTALL	Thu Oct 14 01:38:52 2021 +0300
@@ -3,7 +3,7 @@
 
 dxa should build out of the box with any even vaguely recent version of gcc.
 You might need to do some tweakage for non-Unix systems or cc. I have tested
-it on AIX, NetBSD and Mac OS X/Darwin. -- Cameron Kaiser
+it on AIX, Linux, NetBSD and Mac OS X (PowerPC and x86_64). -- Cameron Kaiser
 
 1. Uncomment LONG_OPTIONS in options.h if you have getopts_long() and want
    long options (--example). This is purely optional.
--- a/Makefile	Fri Oct 08 02:40:00 2021 +0300
+++ b/Makefile	Thu Oct 14 01:38:52 2021 +0300
@@ -3,7 +3,7 @@
 
 TARGETS = dxa
 OBJECTS = scan.o vector.o dump.o table.o label.o main.o
-DXA_VERSION = 0.1.3++4
+DXA_VERSION = 0.1.4++
 
 # choose the compiler and flags
 
--- a/dump.c	Fri Oct 08 02:40:00 2021 +0300
+++ b/dump.c	Thu Oct 14 01:38:52 2021 +0300
@@ -1,8 +1,8 @@
 /*\
- *  dxa v0.1.3 -- symbolic 65xx disassembler
+ *  dxa -- symbolic 65xx disassembler
  *
  *  Copyright (C) 1993, 1994 Marko M\"akel\"a
- *  Changes for dxa (C) 2004, 2006 Cameron Kaiser
+ *  Changes for dxa (C) 2004-2019 Cameron Kaiser
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -19,18 +19,8 @@
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
  *  Marko does not maintain dxa, so questions specific to dxa should be
- *  sent to me at ckaiser@floodgap.com . Otherwise,
- *
- *  Contacting the author:
+ *  sent to me at ckaiser@floodgap.com.
  *
- *   Via Internet E-mail:
- *      <Marko.Makela@FTP.FUNET.FI>
- *
- *   Via Snail Mail:
- *      Marko M\"akel\"a
- *      Sillitie 10 A
- *      FIN-01480 VANTAA
- *      Finland
 \*/
 
 #define _DUMP_C_
@@ -116,6 +106,8 @@
 	fprintf(stdout, "%s\t.word $%04x", lineprefix, StartAddress);
 
   fprintf (stdout, "\n%s\t* = $%04x\n\n", lineprefix, StartAddress);
+  if(BasicHeaderLength)
+	fprintf(stdout, "; %d byte BASIC header.\n", BasicHeaderLength);
 
 
   for (address = StartAddress; (ADDR_T)(address - StartAddress) <
@@ -149,20 +141,21 @@
 	}
 
         if (FindNextEntry (NULL, address, ~0, WRN_INSTR_WRITTEN_TO))
-          fprintf (stdout, "%s; Instruction opcode accessed.\n", lineprefix);
+          fprintf (stdout, "%s; Instruction opcode $%04x accessed.\n",
+                   lineprefix, address);
 
         entry = NULL;
 
         while ((entry = FindNextEntry (entry, address + counter, 0, 0)))
           switch (entry->type) {
           case WRN_PARAM_WRITTEN_TO:
-            fprintf (stdout, "%s; Instruction parameter accessed.\n",
-		     lineprefix);
+            fprintf (stdout, "%s; Instruction parameter $%04x accessed.\n",
+		     lineprefix, address + counter);
             break;
 
           case WRN_PARAM_JUMPED_TO:
-            fprintf (stdout, "%s; Instruction parameter jumped to.\n",
-		     lineprefix);
+            fprintf (stdout, "%s; Instruction parameter $%04x jumped to.\n",
+		     lineprefix, address + counter);
             break;
           }
       }
--- a/dxa.1	Fri Oct 08 02:40:00 2021 +0300
+++ b/dxa.1	Thu Oct 14 01:38:52 2021 +0300
@@ -1,4 +1,4 @@
-.TH DXA "1" "31 January 2007"
+.TH DXA "1" "31 January 2019"
 
 .SH NAME
 dxa \- 6502/R65C02 disassembler
@@ -89,12 +89,15 @@
 Specifies an address (in hexadecimal) that is declared to be a valid routine.
 .B It is strongly recommended
 that you specify the initial execution address as a routine.
-For example, for a Commodore 64 binary with a
-.B SYS 2064
-header, add
+For example, for a Commodore 64 binary with a BASIC header that performs
+.B SYS
+.BR 2064 ,
+specify
 .B \-r0810
-so that disassembly starts at that location. This may have interactions with
-datablock detection
+so that disassembly starts at that location (or use the
+.B \-U
+option, which can automatically do this for you). Note that specifying this
+manually may have interactions with datablock detection
 .RB ( \-d ).
 .TP
 .B \--routines filename
@@ -225,6 +228,35 @@
 .BR \-g ,
 then that address will be used here too.
 .TP
+.B \--no-detect-basic
+.TP
+.B \-u
+.TP
+.B \--detect-basic
+.TP
+.B \-U
+If the starting address is recognized as a typical BASIC entry point
+(currently supported for Commodore computers), then
+.B dxa
+will attempt to see if a BASIC header is present, and if so, determine its
+length and mark the section as a completely dead
+datablock not eligible for further disassembly or referencing. If the
+first line is a construct such as
+.B 10 SYS
+.BR 2061 ,
+then 
+.B dxa
+will additionally parse the provided address and mark it as a valid routine
+if the address is within the boundaries of the disassembled file.
+Note that although its heuristics
+are designed to be permissive, it may nevertheless misinterpret certain files
+with intentionally pathologic line link addresses, and unusual applications
+where the linked machine code is designed to actually
+.I modify
+the BASIC text may not
+disassemble correctly with this option. These are highly atypical situations,
+so this option will likely become the default in a future release.
+.TP
 .B \--verbose
 .TP
 .B \-v
@@ -366,16 +398,6 @@
 .LP
 65816 opcodes are not (yet) supported.
 .LP
-The disassembler can easily be confused by the common idiom of tacking on
-BASIC text to call an appended ML routine. There probably should be a special
-case option for this. One workaround is to use the
-.B \--datablock
-option and specify the range as unused (such as in the case of
-.B 10 SYS2061
-(Commodore), giving
-.B \-b ?0801-080c
-to ignore that range as data).
-.LP
 There are a few options Marko created that aren't hooked up to anything (and
 are not documented here on purpose). I might finish these later.
 
@@ -394,9 +416,9 @@
 .B d65
 0.2.1 by Marko Mäkelä.
 Original package (C)1993, 1994, 2000 Marko Mäkelä. Additional changes
-(C)2006 Cameron Kaiser.
+(C)2006-2019 Cameron Kaiser.
 .B dxa
 is maintained independently.
 
-.SHWEBSITE
+.SH WEBSITE
 http://www.floodgap.com/retrotech/xa/
--- a/label.c	Fri Oct 08 02:40:00 2021 +0300
+++ b/label.c	Thu Oct 14 01:38:52 2021 +0300
@@ -1,7 +1,8 @@
 /*\
- *  dxa v0.1.1 -- symbolic 65xx disassembler
+ *  dxa -- symbolic 65xx disassembler
  *
  *  Copyright (C) 1993, 1994 Marko M\"akel\"a
+ *  Copyright (C) 2019 Cameron Kaiser
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -17,16 +18,6 @@
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
- *  Contacting the author:
- *
- *   Via Internet E-mail:
- *      <Marko.Makela@FTP.FUNET.FI>
- *
- *   Via Snail Mail:
- *      Marko M\"akel\"a
- *      Sillitie 10 A
- *      FIN-01480 VANTAA
- *      Finland
 \*/
 
 #define _LABEL_C_
@@ -84,10 +75,11 @@
   if (!IsLabeled (address)) {
     // dirty kludge to allow zero page stuff to still work. this sometimes
     // guesses wrong
-    if (admode == zp)
+    if (admode == zp) {
       snprintf(defaultlabel, sizeof(defaultlabel), "$%02x", address);
-    else
+    } else {
       snprintf(defaultlabel, sizeof(defaultlabel), "$%04x", address);
+    }
 
     return defaultlabel;
   }
--- a/main.c	Fri Oct 08 02:40:00 2021 +0300
+++ b/main.c	Thu Oct 14 01:38:52 2021 +0300
@@ -2,7 +2,8 @@
  *  dxa -- symbolic 65xx disassembler
  *
  *  Based on d65 Copyright (C) 1993, 1994 Marko M\"akel\"a
- *  Changes for dxa (C) 2005, 2006 Cameron Kaiser
+ *  Changes for dxa (C) 2005-2019 Cameron Kaiser
+ *  Modifications for ++ version (c) 2015-2021 Matti 'ccr' Hamalainen <ccr@tnsp.org>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -19,18 +20,8 @@
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
  *  Marko does not maintain dxa, so questions specific to dxa should be
- *  sent to me at ckaiser@floodgap.com . Otherwise,
- *
- *  Contacting the author:
+ *  sent to me at ckaiser@floodgap.com.
  *
- *   Via Internet E-mail:
- *      <Marko.Makela@FTP.FUNET.FI>
- *
- *   Via Snail Mail:
- *      Marko M\"akel\"a
- *      Sillitie 10 A
- *      FIN-01480 VANTAA
- *      Finland
 \*/
 
 #define _MAIN_C_
@@ -103,6 +94,8 @@
     { "no-word-sa", 0, 0, 'q' },
     { "get-sa", 0, 0, 'G' },
     { "no-get-sa", 0, 0, 'g' },
+    { "detect-basic", 0, 0, 'U' },
+    { "no-detect-basic", 0, 0, 'u' },
     { NULL, 0, 0, 0 }
   };
 #endif /* LONG_OPTIONS */
@@ -144,11 +137,11 @@
   while (!fFinished)
 #ifdef LONG_OPTIONS
     switch (getopt_long (argc, argv,
-			 "?b:B:L:r:R:h:l:a:d:p:g:t:eEnNsSjJoOcCmMvVwWxXqQG",
+			 "?b:B:L:r:R:h:l:a:d:p:g:t:eEnNsSjJoOcCmMvVwWxXqQGuU",
                          cmd_options, &option_index)) {
 #else
     switch (getopt (argc, argv,
-	"?b:B:L:r:R:h:l:a:d:p:g:t:eEnNsSjJoOcCmMvVwWxXqQG")){
+	"?b:B:L:r:R:h:l:a:d:p:g:t:eEnNsSjJoOcCmMvVwWxXqQGuU")){
 #endif /* LONG_OPTIONS */
     case -1:
     case ':':
@@ -403,6 +396,12 @@
     case 'Q':
 	Options |= B_SA_WORD;
 	break;
+    case 'u':
+	Options &= ~B_DETECT_BASIC;
+	break;
+    case 'U':
+	Options |= B_DETECT_BASIC;
+	break;
     case 'G':
 	Options |= B_GET_SA;
 	break;
@@ -470,8 +469,8 @@
   Usage:
     fprintf (stderr, "dxa %s -- symbolic 65xx disassembler\n", DXA_VERSION);
     fprintf (stderr, "Based on d65 copyright (C) 1993-4 Marko M\"akel\"a\n");
-    fprintf (stderr, "Changes for dxa copyright (c) 2006-7 Cameron Kaiser\n");
-    fprintf (stderr, "Modifications for ++ version (c) 2015 Matti 'ccr' Hamalainen <ccr@tnsp.org>\n\n");
+    fprintf (stderr, "Changes for dxa copyright (c) 2006-19 Cameron Kaiser\n\n");
+    fprintf (stderr, "Modifications for ++ version (c) 2015-2021 Matti 'ccr' Hamalainen <ccr@tnsp.org>\n\n");
     fprintf (stderr, "Usage: %s [options] [filename]\n", prog);
     return 1;
   }
@@ -487,7 +486,6 @@
   	StartAddress |= (unsigned)fgetc (file) << 8;
   }
 
-
   if (feof (file)) {
     fprintf (stderr, "%s: Error reading the file.\n", prog);
     return 3;
@@ -513,8 +511,91 @@
 
   fclose (file);
 
+  BasicHeaderLength = 0;
+  if ((Options & B_DETECT_BASIC) && ((EndAddress - StartAddress) > 11) && (0 ||
+	StartAddress == 0x0401 || /* PET */
+	StartAddress == 0x0801 || /* C64 */
+	StartAddress == 0x1001 || /* VIC, 16, +4 */
+	StartAddress == 0x1c01 || /* 128 */
+      0)) {
+    /* If this file starts at a typical BASIC starting address, try to mark
+       that as data. Bonus points for turning SYS xxxx into an entry point.
+       For example, 1010 SYS 2061 comes out like this:
+        .byt $0b,$08,$0a
+        .byt $0a,$9e,$32
+        .byt $30,$36,$31
+        .byt $00,$00,$00
+    */
+
+    /* Heuristic: try to validate the line link address. If it seems sane,
+	process further. */
+    ADDR_T ll = Memory[StartAddress] + (Memory[StartAddress+1] << 8);
+    if ((ll > StartAddress) && (ll < EndAddress - 2) && Memory[ll] == 0) {
+        ADDR_T offs = StartAddress + 5; /* byte after presumed first token */
+	ADDR_T val = 0;
+	ADDR_T i = 0;
+	int ok = 0;
+
+    	/* See if there is an encoded SYS address in the first line. */
+    	if (Memory[StartAddress+4] == 0x9e /* SYS */) {
+		for(; offs < StartAddress + 11; offs++) { /* stop overrun */
+    			if (Memory[offs] == 0x00) {
+				ok = 1;
+				break;
+			}
+			if (Memory[offs] == 32) /* space */
+				continue;
+			if (Memory[offs] < 48 || Memory[offs] > 57) {
+				ok = 0;
+				break;
+			}
+			if (val > 6553 || (val == 6553 && Memory[offs] > 53)) {
+				ok = 0; /* imminent overflow */
+				break;
+			}
+			val = (val * 10) + (Memory[offs] - 48); 
+		}
+
+		if (ok && val > StartAddress && val < EndAddress) {
+			/* Address validates; mark it as an entry point. */
+			AddEntry (val, val, RTN_SURE);
+			if (fVerbose)
+fprintf(stderr, "%s: SYS %d found, marking as entry point\n", prog, val);
+		}
+	}
+
+	/* Try to find the end of BASIC text. Three nulls needed. */
+	ok = 0;
+	for(; offs < EndAddress; offs++) {
+		if (Memory[offs] == 0) {
+			ok++;
+			if (ok == 3) {
+				BasicHeaderLength = (offs - StartAddress) + 1;
+
+				/* Mark entire length of BASIC text as dead. */
+				for(i = StartAddress; i < offs; i++) {
+					SetMemType(i, MEM_UNPROCESSED);
+					SetMemFlag(i);
+				}
+				if (fVerbose)
+fprintf(stderr, "%s: BASIC text marked as dead through $%04x\n", prog, offs);
+				break;
+			} else
+				continue;
+		}
+		ok = 0;
+	}
+	if (ok < 3 && fVerbose)
+fprintf(stderr, "%s: warning: couldn't find a valid end of BASIC text\n", prog);
+
+     } else if (fVerbose)
+fprintf(stderr, "%s: warning: BASIC starting address $%04x, but invalid line\n",
+	prog, StartAddress);
+
+  }
+
   if (fVerbose)
-    fprintf (stderr, "%s: disassembling %X-%X\n", prog,
+    fprintf (stderr, "%s: disassembling $%04x-$%04x\n", prog,
              StartAddress, EndAddress);
 
   if (ScanSpecified ()) {
--- a/options.h	Fri Oct 08 02:40:00 2021 +0300
+++ b/options.h	Thu Oct 14 01:38:52 2021 +0300
@@ -117,6 +117,8 @@
                               unprocessed in the previous phases */
 #define O_DBL_STRICT  3072 /* if a "sure" routine contains illegal code,
                               exit the unassembling process immediately */
+#define B_DETECT_BASIC	524288 /* if SA = $0801, define $0801-... as data */
+#define B_NO_DETECT_BASIC 0    /* ... or don't */
 
                            /* IMPROVED DATA BLOCK DETECTION */
 #define B_JMP_LOUSE   0    /* `stupid' jumps cause only warnings to the
--- a/scan.c	Fri Oct 08 02:40:00 2021 +0300
+++ b/scan.c	Thu Oct 14 01:38:52 2021 +0300
@@ -1,7 +1,8 @@
 /*\
- *  dxa v0.1.1 -- symbolic 65xx disassembler
+ *  dxa -- symbolic 65xx disassembler
  *
  *  Copyright (C) 1993, 1994 Marko M\"akel\"a
+ *  Copyright (C) 2019 Cameron Kaiser
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -17,16 +18,6 @@
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
- *  Contacting the author:
- *
- *   Via Internet E-mail:
- *      <Marko.Makela@FTP.FUNET.FI>
- *
- *   Via Snail Mail:
- *      Marko M\"akel\"a
- *      Sillitie 10 A
- *      FIN-01480 VANTAA
- *      Finland
 \*/
 
 #define _SCAN_C_
@@ -42,6 +33,8 @@
   opcodes *instr;
 
   unsigned int size, counter;
+  if (fVerbose)
+    fprintf(stderr, "\n%s: scanning sure section $%04x", prog, scanstart);
 
   for (address = scanstart;; address += size) {
     if (GetMemFlag (address)) /* rest of routine not valid */
@@ -588,7 +581,7 @@
 			(unsigned int)entry->address);
 		return 1;
 	}
-    entry = FindNextEntryType (NULL, ~0, RTN_SURE);
+    entry = FindNextEntryType (NULL, ~0, RTN_SURE); // valgrind fart
     DeleteEntry (entry);
   }
 
@@ -801,7 +794,7 @@
       }
 
     if (fVerbose)
-      fprintf (stderr, "\r%s: scanning at %X", prog, address);
+      fprintf (stderr, "\n%s: scanning at $%04x", prog, address);
 
     if (!ScanPotential (address)) {
       while ((entry = FindNextEntryType (NULL, ~RTN_B_TEMPORARY,
--- a/structures.h	Fri Oct 08 02:40:00 2021 +0300
+++ b/structures.h	Thu Oct 14 01:38:52 2021 +0300
@@ -1,7 +1,8 @@
 /*\
- *  dxa v0.1.1 -- symbolic 65xx disassembler
+ *  dxa -- symbolic 65xx disassembler
  *
  *  Copyright (C) 1993, 1994 Marko M\"akel\"a
+ *  Copyright (C) 2006-2019 Cameron Kaiser
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -17,16 +18,6 @@
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
- *  Contacting the author:
- *
- *   Via Internet E-mail:
- *      <Marko.Makela@FTP.FUNET.FI>
- *
- *   Via Snail Mail:
- *      Marko M\"akel\"a
- *      Sillitie 10 A
- *      FIN-01480 VANTAA
- *      Finland
 \*/
 
 /* structures.h - memory structures and related constants and macros */
@@ -124,6 +115,9 @@
 
 static inline void DOPutLabel(unsigned int address, const char *name, const int line)
 {
+  (void) name;
+  (void) line;
+
   MemLabel[((ADDR_T)address) / (8 * sizeof *MemLabel)] |= (1 << (address % (8 * sizeof *MemLabel)));
 }
 
@@ -135,6 +129,9 @@
 
 static inline void DOPutReference(unsigned int address, const char *name, const int line)
 {
+  (void) name;
+  (void) line;
+
   MemReferenced[address]++;
 }
 
@@ -245,12 +242,13 @@
 
 #ifndef _MAIN_C_
 extern char *prog;
-extern ADDR_T StartAddress, EndAddress;
+extern ADDR_T StartAddress, EndAddress, BasicHeaderLength;
 extern int fVerbose;
 #else
 char *prog;
 ADDR_T StartAddress, EndAddress;
 int fVerbose = FALSE;
+ADDR_T BasicHeaderLength = 0;
 #endif /* _MAIN_C_ */
 
 #ifndef _TABLE_C_