view lxmldump.py @ 5:274b2091137c

Some more work on cleaning this up.
author Matti Hamalainen <ccr@tnsp.org>
date Mon, 10 May 2021 21:38:11 +0300
parents 60b789dfee32
children 34a89d61dbe7
line wrap: on
line source

#!/usr/bin/python3 -B
# coding=utf-8
###
### lxmldump - Dump ISO/FDIS 1951 XML file data
### Programmed and designed by Matti 'ccr' Hämäläinen <ccr@tnsp.org>
### (C) Copyright 2021 Tecnic Software productions (TNSP)
###
### Python 3.7+ required!
###
import sys
import signal
import re
from pathlib import Path
import xml.etree.ElementTree as xmlET
import unicodedata

assert sys.version_info >= (3, 7)


###
### Default settings
###
pkk_cfg = {
    "dump": False,
    "normalize": False,
}


###
### Misc. helper functions, etc
###
def pkk_cleanup():
    return 0


## Wrapper for print()
def pkk_print(smsg):
    if pkk_cfg["normalize"]:
        sys.stdout.write(unicodedata.normalize("NFC", smsg))
    else:
        sys.stdout.write(smsg)


## Fatal error handler
def pkk_fatal(smsg):
    print(u"ERROR: "+ smsg)
    sys.exit(1)


## Handler for SIGINT signals
def pkk_signal_handler(signal, frame):
    pkk_cleanup()
    print(u"\nQuitting due to SIGINT / Ctrl+C!")
    sys.exit(1)


###
### Main program starts
###
signal.signal(signal.SIGINT, pkk_signal_handler)


### Check if we have arguments
pkk_show_help = False
pkk_filenames = [] 
argc = 1
while argc < len(sys.argv):
    arg = sys.argv[argc]

    needs_param = False
    if argc + 1 < len(sys.argv):
        param = sys.argv[argc + 1]
    else:
        param = None

    # Check for option type arg
    if arg[0:1] == "-":
        oarg = arg
        arg = arg.lstrip("-")

        if arg == "help" or arg == "h":
            pkk_show_help = True
        elif arg == "dump" or arg == "d":
            pkk_cfg["dump"] = True
        elif arg == "normalize" or arg == "n":
            pkk_cfg["normalize"] = True
        else:
            pkk_fatal(u"Invalid option argument '{0}'.".format(oarg))

        if needs_param and param == None:
            pkk_fatal(u"Option '{0}' requires an argument.".format(oarg))
    else:
        # Non-option argument
        pkk_filenames.append(arg)

    if needs_param:
        argc += 2
    else:
        argc += 1


### Show help if requested
if pkk_show_help or len(pkk_filenames) == 0:
    print(u"lxmldump - Dump ISO/FDIS 1951 XML file data")
    print(u"Usage: {0} <options> <input xml file(s)>".
        format(str(Path(sys.argv[0]).name)))
    print(u"")
    print(u"       --help              Show this help")
    print(u"  -d,  --dump              Dump mode")
    print(u"")
    sys.exit(0)


###
### Main
###
def pkk_dump_simple_node(lnode, indent):
    stmp = ""
    if lnode.text != None:
        tmp = str(lnode.text).strip()
        if tmp != "":
            stmp = " \""+ tmp +"\""

    pkk_print("{}{} {}{}".format("    " * indent, lnode.tag, lnode.attrib, stmp))
    for qnode in lnode.findall("./*"):
        pkk_dump_simple_node(qnode, indent + 1)


def pkk_dump_node(dnode):
    wlist = []
    dlist = []
    for wnode in dnode.findall("./HeadwordCtn"):
        for qnode in wnode.findall("./SearchForm"):
            wlist.append(str(qnode.text).strip())

        for qnode in wnode.findall("./Definition"):
            dlist.append(str(qnode.text).strip())

    for wnode in dnode.findall("./SenseGrp"):
        for qnode in wnode.findall("./Definition"):
            dlist.append(str(qnode.text).strip())

    pkk_print("{} : {}".format(", ".join(wlist), " ; ".join(dlist)))


for filename in pkk_filenames:
    # Parse XML file into element tree
    try:
        uxml = xmlET.parse(filename)
    except Exception as e:
        pkk_fatal(u"SVG/XML parsing failed: {0}".format(str(e)))

    # Dump output
    try:
        xroot = uxml.getroot()
        for dnode in xroot.findall("./DictionaryEntry"):
            if pkk_cfg["dump"]:
                pkk_dump_simple_node(dnode, 0)
                print("\n\n")
            else:
                pkk_dump_node(dnode)

    except (BrokenPipeError, IOError) as e:
        sys.stderr.close()
        sys.exit(1)

pkk_cleanup()
sys.exit(0)