view lxmldump.py @ 1:0e5f705a895b

Moar.
author Matti Hamalainen <ccr@tnsp.org>
date Thu, 29 Apr 2021 14:25:12 +0300
parents bddf1c283e51
children 7ce08dea935b 60b789dfee32
line wrap: on
line source

#!/usr/bin/python3 -B
# coding=utf-8
###
### ISO/FDIS 1951 lxmldump
###
import sys
import signal
import re
from pathlib import Path
import xml.etree.ElementTree as xmlET

assert sys.version_info >= (3, 7)


###
### Default settings
###
pkk_cfg = {
    "verbosity": 1,
    "dump": False,
}


###
### Misc. helper functions, etc
###
def pkk_cleanup():
    return 0


## Wrapper for print()
def pkk_print(level, smsg):
    if pkk_cfg["verbosity"] >= level:
        print(smsg)


## Fatal error handler
def pkk_fatal(smsg):
    print(u"ERROR: "+ smsg)
    sys.exit(1)


## Handler for SIGINT signals
def pkk_signal_handler(signal, frame):
    pkk_cleanup()
    print(u"\nQuitting due to SIGINT / Ctrl+C!")
    sys.exit(1)


###
### Main program starts
###
signal.signal(signal.SIGINT, pkk_signal_handler)


### Check if we have arguments
pkk_show_help = False
pkk_filenames = [] 
argc = 1
while argc < len(sys.argv):
    arg = sys.argv[argc]

    needs_param = False
    if argc + 1 < len(sys.argv):
        param = sys.argv[argc + 1]
    else:
        param = None

    # Check for option type arg
    if arg[0:1] == "-":
        oarg = arg
        arg = arg.lstrip("-")

        if arg == "help" or arg == "h":
            pkk_show_help = True
        elif arg == "dump" or arg == "d":
            pkk_cfg["dump"] = True
        elif arg == "v" or arg == "verbosity":
            needs_param = True
            pkk_cfg["verbosity"] = param
        else:
            pkk_fatal(u"Invalid option argument '{0}'.".format(oarg))

        if needs_param and param == None:
            pkk_fatal(u"Option '{0}' requires an argument.".format(oarg))
    else:
        # Non-option argument
        pkk_filenames.append(arg)

    if needs_param:
        argc += 2
    else:
        argc += 1


### Show help if requested
if pkk_show_help or len(pkk_filenames) == 0:
    print(u"lxmldump - Dump ISO/FDIS 1951 XML file data")
    print(u"Usage: {0} <options> <input xml file(s)>".
        format(str(Path(sys.argv[0]).name)))
    print(u"")
    print(u"       --help              Show this help")
#    print(u"  -v,  --verbosity <0-3>   Set verbosity")
    print(u"  -d,  --dump              Dump mode")
    print(u"")
    sys.exit(0)




###
### Main
###
def pkk_recursive_dump(lnode, indent):
    stmp = ""
    if lnode.text != None:
        tmp = str(lnode.text).strip()
        if tmp != "":
            stmp = " \""+ tmp +"\""

    print("{}{} {}{}".format("    " * indent, lnode.tag, lnode.attrib, stmp))
    for qnode in lnode.findall("./*"):
        pkk_recursive_dump(qnode, indent + 1)


for filename in pkk_filenames:
    # Parse XML file into element tree
    try:
        uxml = xmlET.parse(filename)
    except Exception as e:
        pkk_fatal(u"SVG/XML parsing failed: {0}".format(str(e)))

    # Dump output
    try:
        xroot = uxml.getroot()
        for dnode in xroot.findall("./DictionaryEntry"):
            if pkk_cfg["dump"]:
                pkk_recursive_dump(dnode, 0)
                print("\n\n")
            else:
                wlist = []
                dlist = []
                for wnode in dnode.findall("./HeadwordCtn"):
                    for qnode in wnode.findall("./SearchForm"):
                        wlist.append(str(qnode.text).strip())
                    for qnode in wnode.findall("./Definition"):
                        dlist.append(str(qnode.text).strip())

                for wnode in dnode.findall("./SenseGrp"):
                    for qnode in wnode.findall("./Definition"):
                        dlist.append(str(qnode.text).strip())

                print("{} : {}".format(", ".join(wlist), " ; ".join(dlist)))

    except (BrokenPipeError, IOError) as e:
        sys.stderr.close()
        sys.exit(1)

pkk_cleanup()
sys.exit(0)