changeset 1505:155c87cda4b7

Cleaned up parser code.
author Matti Hamalainen <ccr@tnsp.org>
date Sat, 07 Feb 2015 06:11:24 +0200
parents 521f7f5726dc
children b91105ab4c1f
files liblocfile.c liblocfile.h
diffstat 2 files changed, 246 insertions(+), 293 deletions(-) [+]
line wrap: on
line diff
--- a/liblocfile.c	Sat Feb 07 06:06:56 2015 +0200
+++ b/liblocfile.c	Sat Feb 07 06:11:24 2015 +0200
@@ -139,6 +139,23 @@
 }
 
 
+void locFreeMarkerData(LocMarker *marker)
+{
+    int i;
+
+    for (i = 0; i < LOC_MAX_NAMES; i++)
+    {
+        th_free(marker->names[i].name);
+        th_free(marker->coders[i].name);
+        marker->names[i].name = NULL;
+        marker->coders[i].name = NULL;
+    }
+
+    th_free(marker->uri);
+    marker->uri = NULL;
+}
+
+
 enum
 {
     PM_IDLE = 0,
@@ -157,6 +174,31 @@
 }
 
 
+static void locPMSet(LocFileInfo *f, int parseMode, int nextMode)
+{
+    f->prevMode = f->parseMode;
+
+    if (parseMode != -1)
+        f->parseMode = parseMode;
+
+    if (nextMode != -1)
+        f->nextMode = nextMode;
+}
+
+
+static void locPMErr(LocFileInfo *f, const char *fmt, ...)
+{
+    va_list ap;
+
+    fprintf(stderr, "[%s:%d @ %d]: ", f->filename, f->lineNum, f->field);
+    f->parseMode = PM_ERROR;
+
+    va_start(ap, fmt);
+    vfprintf(stderr, fmt, ap);
+    va_end(ap);
+}
+
+
 static BOOL checkFlag(int flags, int mask, int flag)
 {
     if (mask)
@@ -169,9 +211,9 @@
 static BOOL checkMutex(LocFileInfo *f, int *flags, int mask, int flag)
 {
     if (!checkFlag(*flags, mask, 0) &&
-        !checkFlag(*flags, mask, flag)) {
-        THERR("Invalid flags setting on line #%d in '%s'\n",
-            f->lineNum, f->filename);
+        !checkFlag(*flags, mask, flag))
+    {
+        locPMErr(f, "Invalid flags setting.\n");
         return FALSE;
     }
     else
@@ -216,13 +258,11 @@
         {
         case '\n':
         case '\r':
-            THERR("Unexpected EOL inside text field on line #%d of '%s'.\n",
-                  f->lineNum, f->filename);
+            locPMErr(f, "Unexpected EOL inside text field.\n");
             return NULL;
 
         case EOF:
-            THERR("Unexpected EOF inside text field on line #%d of '%s'.\n",
-                  f->lineNum, f->filename);
+            locPMErr(f, "Unexpected EOF inside text field.\n");
             return NULL;
 
         case '\\':
@@ -230,9 +270,7 @@
             i = locFGetc(f);
             if (i == EOF)
             {
-                THERR
-                    ("Unexpected EOF inside text field on line #%d of '%s'.\n",
-                     f->lineNum, f->filename);
+                locPMErr(f, "Unexpected EOF inside text field.\n");
                 return NULL;
             }
             else if (i == '\n' || i == '\r')
@@ -340,47 +378,190 @@
 }
 
 
+static void locParseMultiField(LocFileInfo *f, char *fieldsep, char sep, const char *desc, LocName *data)
 {
+    if (f->subfield < 0)
+    {
+        f->subfield = 0;
+        f->fieldsep = fieldsep;
+        f->sep = sep;
+    }
+
+    if (f->sep == sep)
+    {
+        if (f->subfield < LOC_MAX_NAMES)
+        {
+            th_free(data[f->subfield].name);
+            data[f->subfield++].name = parseFieldString(f, f->fieldsep);
+            locPMSet(f, PM_NEXT, PM_FIELD_SEP);
+            if (!strchr(f->fieldsep, f->ch))
+                locPMErr(f, "Expected field separator '%s' after %s.\n", f->fieldsep, desc);
+        }
+        else
+            locPMErr(f, "Too many %s (max %d).\n", desc, LOC_MAX_NAMES);
+    }
+    else
+    {
+        f->fieldsep = ";";
+        f->subfield = -1;
+        f->field++;
+        locPMSet(f, PM_FIELD, -1);
+    }
+}
 
 
-static BOOL locCheckForEOL(LocFileInfo *f, int *parseMode)
+static void locParseLocField(LocFileInfo *f, MapLocations *l, LocMarker *marker)
 {
+    BOOL res = FALSE;
+    char *tmpStr;
+    int i;
+
     if (f->ch == '\n' || f->ch == '\r')
     {
-        *parseMode = PM_ERROR;
-        THERR("Unexpected EOL on line #%d of '%s'.\n",
-            f->lineNum, f->filename);
-        return FALSE;
+        locPMErr(f, "Unexpected end of line.\n");
+        return;
     }
-    else
-        return TRUE;
+
+    switch (f->field)
+    {
+    case 1:            /* X-coordinate */
+        res = parseFieldInt(f, &marker->x);
+        f->fieldsep = ";";
+        if (res)
+        {
+            f->field++;
+            locPMSet(f, PM_NEXT, PM_FIELD_SEP);
+        }
+        else
+            locPMErr(f, "Error parsing X-coordinate.\n");
+        break;
+
+    case 2:            /* Y-coordinate */
+        res = parseFieldInt(f, &marker->y);
+        if (res)
+        {
+            f->field++;
+            locPMSet(f, PM_NEXT, PM_FIELD_SEP);
+        }
+        else
+            locPMErr(f, "Error parsing X-coordinate.\n");
+        break;
+
+    case 3:            /* Label orientation and flags */
+        res = parseFieldInt(f, &marker->dir);
+        if (res)
+            res = parseFlags(f, &marker->flags);
+
+        if (res)
+        {
+            f->field++;
+            locPMSet(f, PM_NEXT, PM_FIELD_SEP);
+        }
+        break;
+
+    case 4: /* Location name(s) */
+        locParseMultiField(f, "|;", '|', "location names", marker->names);
+        break;
+
+    case 5:            /* Coders */
+        locParseMultiField(f, ",;", ',', "coder names", marker->coders);
+        break;
+
+    case 6:            /* Date */
+        marker->valid = FALSE;
+        tmpStr = parseFieldString(f, f->fieldsep);
+        if (tmpStr && tmpStr[0])
+        {
+            if (sscanf(tmpStr, LOC_TIMEFMT, &marker->added.day, &marker->added.month, &marker->added.year) == 3)
+                marker->valid = TRUE;
+            else
+            {
+                locPMErr(f, "Warning, invalid timestamp '%s' in '%s'.\n",
+                    tmpStr, marker->names[0].name);
+            }
+        }
+        if (!strchr(f->fieldsep, f->ch))
+        {
+            locPMErr(f, "Expected field separator '%s' after DATE.\n", f->fieldsep);
+        }
+        else
+        {
+            f->field++;
+            locPMSet(f, PM_NEXT, PM_FIELD_SEP);
+        }
+        th_free(tmpStr);
+        break;
+
+    case 7:            /* URI */
+        th_free(marker->uri);
+        marker->uri = parseFieldString(f, f->fieldsep);
+        f->field++;
+        locPMSet(f, PM_NEXT, PM_FIELD_SEP);
+        printf("XXX: marker->uri='%s'\n", marker->uri);
+        break;
+
+    case 8:            /* Freeform */
+        tmpStr = parseFieldString(f, "\r\n");
+
+        /* Check coordinates */
+        if (marker->x < 1 || marker->y < 1)
+        {
+            locPMErr(f, "Invalid X or Y coordinate (%d, %d), for location '%s'. Must be > 0.\n",
+                marker->x, marker->y, marker->names[0].name);
+        }
+
+        /* Check if location already exists */
+        marker->x = marker->x + marker->ox - 1;
+        marker->y = marker->y + marker->oy - 1;
+
+        i = locFindByCoords(l, marker->x, marker->y, TRUE);
+        if (i >= 0)
+        {
+            LocMarker *tloc = l->locations[i];
+            locPMErr(f, "Warning, location already in list! (%d,%d) '%s' <-> (%d,%d) '%s'\n",
+                tloc->x, tloc->y, tloc->names[0].name,
+                marker->x, marker->y, marker->names[0].name);
+        }
+        else
+        {
+            /* Add new location to our list */
+            locAddNew(l, marker->x, marker->y, marker->dir, marker->flags,
+                      marker->names, marker->coders, &marker->added,
+                      marker->valid, marker->uri, tmpStr, f);
+            locPMSet(f, PM_IDLE, -1);
+        }
+
+        locFreeMarkerData(marker);
+        th_free(tmpStr);
+        break;
+
+    default:
+        locPMErr(f, "FATAL ERROR! Invalid state=%d!\n", f->parseMode);
+    }
 }
 
 
 BOOL locParseLocStream(LocFileInfo *f, MapLocations *l, int offX, int offY)
 {
-    int parseMode, prevMode, nextMode, field, subfield, sep;
-    int tmpX, tmpY, tmpOrient, tmpFlags, i;
-    char *fieldsep = NULL, *tmpStr, *tmpURI = NULL;
-    char *tmpLocNames[LOC_MAX_NAMES], *tmpCoderNames[LOC_MAX_NAMES];
-    BOOL res, tmpTimeSet = FALSE, versionSet = FALSE;
-    DateStruct tmpTime;
+    int i;
+    LocMarker marker;
 
-    /* Parse data */
-    memset(tmpLocNames, 0, sizeof(tmpLocNames));
-    memset(tmpCoderNames, 0, sizeof(tmpCoderNames));
-    parseMode = PM_IDLE;
-    nextMode = prevMode = PM_ERROR;
-    field = subfield = sep = -1;
-    res = FALSE;
+    memset(&marker, 0, sizeof(marker));
+    marker.ox = offX;
+    marker.oy = offY;
+
+    f->parseMode = PM_IDLE;
+    f->nextMode = f->prevMode = PM_ERROR;
+    f->field = f->subfield = f->sep = -1;
+
     f->ch = locFGetc(f);
     do
     {
-        switch (parseMode)
+        switch (f->parseMode)
         {
         case PM_IDLE:
             if (f->ch == EOF)
-                parseMode = PM_EOF;
+                locPMSet(f, PM_EOF, -1);
             else if (f->ch == '\r')
             {
                 f->lineNum++;
@@ -395,15 +576,13 @@
             }
             else if (f->ch == '#')
             {
-                prevMode = parseMode;
-                parseMode = PM_COMMENT;
-                nextMode = PM_IDLE;
+                locPMSet(f, PM_COMMENT, PM_IDLE);
             }
             else if (isdigit(f->ch))
             {
                 /* Start of a record */
-                parseMode = PM_FIELD;
-                field = 1;
+                locPMSet(f, PM_FIELD, -1);
+                f->field = 1;
             }
             else if (isspace(f->ch))
             {
@@ -412,8 +591,7 @@
             else
             {
                 /* Syntax error */
-                parseMode = PM_ERROR;
-                THERR("Syntax error in '%s' line #%d.\n",
+                locPMErr(f, "Syntax error in '%s' line #%d.\n",
                     f->filename, f->lineNum);
             }
             break;
@@ -426,17 +604,17 @@
                 if (f->ch == '\n')
                     f->ch = locFGetc(f);
                 f->lineNum++;
-                prevMode = parseMode;
-                parseMode = nextMode;
+                f->prevMode = f->parseMode;
+                f->parseMode = f->nextMode;
                 break;
             case '\n':
                 f->ch = locFGetc(f);
                 f->lineNum++;
-                prevMode = parseMode;
-                parseMode = nextMode;
+                f->prevMode = f->parseMode;
+                f->parseMode = f->nextMode;
                 break;
             case EOF:
-                parseMode = PM_EOF;
+                f->parseMode = PM_EOF;
                 break;
             default:
                 f->ch = locFGetc(f);
@@ -444,7 +622,7 @@
                 /* Because loc file identification should be the first
                  * comment line, we check it here.
                  */
-                if (versionSet || !isalpha(f->ch))
+                if (f->versionSet || !isalpha(f->ch))
                     break;
 
                 char *tmp = parseFieldString(f, "(\n\r");
@@ -463,7 +641,7 @@
                                 "Refusing to read due to potential incompatibilities. If you neverthless "
                                 "wish to proceed, change the loc file's version to match internal version.\n",
                                 verMajor, verMinor, LOC_VERSION_MAJOR, LOC_VERSION_MINOR);
-                            parseMode = PM_ERROR;
+                            f->parseMode = PM_ERROR;
                         }
                         else if (verMinor != LOC_VERSION_MINOR)
                         {
@@ -476,7 +654,7 @@
                     {
                         THERR("Invalid or malformed LOC file, version not found (%s).\n",
                              verStr);
-                        parseMode = PM_ERROR;
+                        f->parseMode = PM_ERROR;
                     }
                     th_free(verStr);
                 }
@@ -484,10 +662,10 @@
                 {
                     THERR("Invalid LOC file, the file ID is missing ('# %s (version %d.%d)' should be the first line.)\n",
                         LOC_MAGIC, LOC_VERSION_MAJOR, LOC_VERSION_MINOR);
-                    parseMode = PM_ERROR;
+                    f->parseMode = PM_ERROR;
                 }
                 th_free(tmp);
-                versionSet = TRUE;
+                f->versionSet = TRUE;
                 break;
             }
             break;
@@ -496,9 +674,7 @@
             switch (f->ch)
             {
             case EOF:
-                parseMode = PM_ERROR;
-                THERR("Unexpected end of file on line #%d of '%s'.\n",
-                      f->lineNum, f->filename);
+                locPMErr(f, "Unexpected end of file.\n");
                 break;
             case 32:
             case 9:
@@ -509,9 +685,7 @@
                 i = locFGetc(f);
                 if (i != '\n' && i != '\r')
                 {
-                    parseMode = PM_ERROR;
-                    THERR("Expected EOL on line #%d of '%s'.\n",
-                          f->lineNum, f->filename);
+                    locPMErr(f, "Expected end of line.\n");
                 }
                 else
                 {
@@ -522,264 +696,41 @@
                 }
                 break;
             default:
-                prevMode = parseMode;
-                parseMode = nextMode;
+                f->prevMode = f->parseMode;
+                f->parseMode = f->nextMode;
                 break;
             }
             break;
 
         case PM_FIELD_SEP:
-            if (strchr(fieldsep, f->ch) != NULL)
+            if (strchr(f->fieldsep, f->ch) != NULL)
             {
-                sep = f->ch;
-                prevMode = parseMode;
-                nextMode = PM_FIELD;
-                parseMode = PM_NEXT;
+                f->sep = f->ch;
                 f->ch = locFGetc(f);
+                locPMSet(f, PM_NEXT, PM_FIELD);
             }
             else
             {
-                parseMode = PM_ERROR;
-                THERR("Expected field separator '%s', got '%c' on line #%d of '%s'.\n",
-                    fieldsep, f->ch, f->lineNum, f->filename);
+                locPMErr(f, "Expected field separator '%s', got '%c' (%d).\n",
+                    f->fieldsep, f->ch, f->ch);
             }
             break;
 
         case PM_FIELD:
-            if (field > 0 && field < 8 && !locCheckForEOL(f, &parseMode))
-                break;
-
-            switch (field)
-            {
-            case 1:            /* X-coordinate */
-                res = parseFieldInt(f, &tmpX);
-                fieldsep = ";";
-                if (res)
-                {
-                    field++;
-                    prevMode = parseMode;
-                    nextMode = PM_FIELD_SEP;
-                    parseMode = PM_NEXT;
-                }
-                break;
-
-            case 2:            /* Y-coordinate */
-                res = parseFieldInt(f, &tmpY);
-                if (res)
-                {
-                    field++;
-                    prevMode = parseMode;
-                    nextMode = PM_FIELD_SEP;
-                    parseMode = PM_NEXT;
-                }
-                break;
-
-            case 3:            /* Label orientation and flags */
-                res = parseFieldInt(f, &tmpOrient);
-                if (res)
-                    res = parseFlags(f, &tmpFlags);
-
-                if (res)
-                {
-                    field++;
-                    prevMode = parseMode;
-                    nextMode = PM_FIELD_SEP;
-                    parseMode = PM_NEXT;
-                }
-                break;
-
-            case 4:            /* Location name(s) */
-                if (subfield < 0)
-                {
-                    subfield = 0;
-                    fieldsep = "|;";
-                    sep = '|';
-                }
-                if (sep == '|')
-                {
-                    if (subfield < LOC_MAX_NAMES)
-                    {
-                        th_free(tmpLocNames[subfield]);
-                        tmpLocNames[subfield++] = parseFieldString(f, fieldsep);
-                        prevMode = parseMode;
-                        nextMode = PM_FIELD_SEP;
-                        parseMode = PM_NEXT;
-                        if (!strchr(fieldsep, f->ch))
-                        {
-                            parseMode = PM_ERROR;
-                            THERR("Expected field separator '%s' after LOCNAMES on line #%d of '%s'.\n",
-                                fieldsep, f->lineNum, f->filename);
-                        }
-                    }
-                    else
-                    {
-                        parseMode = PM_ERROR;
-                        THERR("Too many location names (max %d) on line #%d of '%s'.\n",
-                            LOC_MAX_NAMES, f->lineNum, f->filename);
-                    }
-                }
-                else
-                {
-                    fieldsep = ";";
-                    subfield = -1;
-                    field++;
-                    prevMode = parseMode;
-                    parseMode = PM_FIELD;
-                }
-                break;
-
-            case 5:            /* Coders */
-                if (subfield < 0)
-                {
-                    subfield = 0;
-                    fieldsep = ",;";
-                    sep = ',';
-                }
-                if (sep == ',')
-                {
-                    if (subfield < LOC_MAX_NAMES)
-                    {
-                        th_free(tmpCoderNames[subfield]);
-                        tmpCoderNames[subfield++] =
-                            parseFieldString(f, fieldsep);
-                        prevMode = parseMode;
-                        nextMode = PM_FIELD_SEP;
-                        parseMode = PM_NEXT;
-                        if (!strchr(fieldsep, f->ch))
-                        {
-                            parseMode = PM_ERROR;
-                            THERR(
-                                "Expected field separator '%s' after CODERNAMES on line #%d of '%s'.\n",
-                                fieldsep, f->lineNum, f->filename);
-                        }
-                    }
-                    else
-                    {
-                        parseMode = PM_ERROR;
-                        THERR(
-                            "Too many coder names (max %d) on line #%d of '%s'.\n",
-                            LOC_MAX_NAMES, f->lineNum, f->filename);
-                    }
-                }
-                else
-                {
-                    fieldsep = ";";
-                    subfield = -1;
-                    field++;
-                    prevMode = parseMode;
-                    parseMode = PM_FIELD;
-                }
-                break;
-
-            case 6:            /* Date */
-                tmpTimeSet = FALSE;
-                tmpStr = parseFieldString(f, fieldsep);
-                if (tmpStr && tmpStr[0])
-                {
-                    if (sscanf
-                        (tmpStr, LOC_TIMEFMT, &tmpTime.day, &tmpTime.month,
-                         &tmpTime.year) == 3)
-                        tmpTimeSet = TRUE;
-                    else
-                    {
-                        THERR("Warning, invalid timestamp '%s' in '%s' (line #%d in '%s')\n",
-                            tmpStr, tmpLocNames[0], f->lineNum, f->filename);
-                        parseMode = PM_ERROR;
-                    }
-                }
-                if (!strchr(fieldsep, f->ch))
-                {
-                    parseMode = PM_ERROR;
-                    THERR("Expected field separator '%s' after DATE on line #%d of '%s'.\n",
-                        fieldsep, f->lineNum, f->filename);
-                }
-                else
-                {
-                    field++;
-                    prevMode = parseMode;
-                    nextMode = PM_FIELD_SEP;
-                    parseMode = PM_NEXT;
-                }
-                th_free(tmpStr);
-                break;
-
-            case 7:            /* URI */
-                th_free(tmpURI);
-                tmpURI = parseFieldString(f, fieldsep);
-                field++;
-                prevMode = parseMode;
-                nextMode = PM_FIELD_SEP;
-                parseMode = PM_NEXT;
-                break;
-
-            case 8:            /* Freeform */
-                tmpStr = parseFieldString(f, "\r\n");
-
-                /* Check coordinates */
-                if (tmpX < 1 || tmpY < 1)
-                {
-                    THERR("Invalid X or Y coordinate (%d, %d), for location '%s' on line #%d of '%s'. Must be > 0.\n",
-                        tmpX, tmpY, tmpLocNames[0], f->lineNum, f->filename);
-                    parseMode = PM_ERROR;
-                }
-
-                /* Check if location already exists */
-                tmpX = tmpX + offX - 1;
-                tmpY = tmpY + offY - 1;
-
-                i = locFindByCoords(l, tmpX, tmpY, TRUE);
-                if (i >= 0)
-                {
-                    LocMarker *tloc = l->locations[i];
-                    THERR("Warning, location already in list! (%d,%d) '%s' <-> (%d,%d) '%s'\n",
-                        tloc->x, tloc->y, tloc->names[0].name, tmpX, tmpY,
-                        tmpLocNames[0]);
-                    parseMode = PM_ERROR;
-                }
-                else
-                {
-                    /* Add new location to our list */
-                    prevMode = parseMode;
-                    parseMode = PM_IDLE;
-                }
-                for (i = 0; i < LOC_MAX_NAMES; i++)
-                {
-                    th_free(tmpLocNames[i]);
-                    th_free(tmpCoderNames[i]);
-                    tmpLocNames[i] = tmpCoderNames[i] = NULL;
-                }
-                th_free(tmpStr);
-                break;
-
-            default:
-                THERR("FATAL ERROR! Invalid state=%d!\n", parseMode);
-                parseMode = PM_ERROR;
-            }
-            if (!res)
-            {
-                parseMode = PM_ERROR;
-                THERR("Error parsing field %d on line #%d of '%s'.\n",
-                    field, f->lineNum, f->filename);
-            }
+            locParseLocField(f, l, &marker);
             break;
 
         default:
-            THERR("Invalid state in loc-file parser - mode=%d, prev=%d, next=%d, line=%d, file='%s'.\n",
-                parseMode, prevMode, nextMode, f->lineNum, f->filename);
-            parseMode = PM_ERROR;
+            locPMErr(f, "Invalid state in loc-file parser - mode=%d, prev=%d, next=%d.\n",
+                f->parseMode, f->prevMode, f->nextMode);
             break;
         }
     }
-    while (parseMode != PM_ERROR && parseMode != PM_EOF);
+    while (f->parseMode != PM_ERROR && f->parseMode != PM_EOF);
 
-    for (i = 0; i < LOC_MAX_NAMES; i++)
-    {
-        th_free(tmpLocNames[i]);
-        th_free(tmpCoderNames[i]);
-    }
-    th_free(tmpURI);
+    locFreeMarkerData(&marker);
 
-    return (parseMode == PM_EOF);
+    return (f->parseMode == PM_EOF);
 }
 
 
--- a/liblocfile.h	Sat Feb 07 06:06:56 2015 +0200
+++ b/liblocfile.h	Sat Feb 07 06:11:24 2015 +0200
@@ -92,8 +92,10 @@
 
     // File parsing data
     FILE *fp;
-    size_t lineNum;
-    int ch;
+    unsigned int lineNum;
+    BOOL versionSet;
+    int ch, parseMode, prevMode, nextMode, field, subfield, sep;
+    char *fieldsep;
 } LocFileInfo;