Mercurial > hg > batmud > ggrtf
view docs/normalizeml.pl @ 1229:2871db57f976
Fix inclusion and expanding of SYSTEM entities.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Sat, 29 May 2010 06:52:07 +0000 |
parents | c4a4c72c247a |
children | a3c3591f9a74 |
line wrap: on
line source
#!/usr/bin/perl -w # # Utility for "normalizing" XML/SGML files # Programmed by Matti 'ccr' Hamalainen <ccr@tnsp.org> # (C) Copyright 2007,2009 Tecnic Software productions (TNSP) # use strict; use warnings; sub dorep($$) { my $str = $_[0]; my @vals = split(/ /, $_[1]); $str =~ s/\$(\d+)/$vals[$1 - 1]/eg; return $str; } my %xmlentities = (); my $entMode = 0; my $entData; my $entName; binmode(STDIN, ":utf8"); binmode(STDOUT, ":utf8"); while (<STDIN>) { if (/<!ENTITY ([A-Za-z][A-Za-z0-9_]+) +SYSTEM +\"([^\"]*)\">/) { # Handle external entities my $name = $1; my $extfname = $2; local($/, *INFILE); open(INFILE, "<", $extfname) or die("Could not open entity file '$extfname'.\n"); $xmlentities{$name} = <INFILE>; close(INFILE); } elsif (/<!ENTITY ([A-Za-z][A-Za-z0-9_]+) \"(.*?)\">/) { # One-line entities $xmlentities{$1} = $2; } elsif (/<!ENTITY ([A-Za-z][A-Za-z0-9_]+) \"(.*)$/) { # Multi-line entities $entName = $1; $entData = $2; $entMode = 1; } elsif ($entMode == 1) { if (/^(.*)\">/) { $entData .= $1; $xmlentities{$entName} = $entData; $entMode = 0; } else { $entData .= $_; } } else { # Expand entities for five levels at most my $str = $_; for (my $depth = 1; $depth < 5; $depth++) { while (my ($k, $v) = each(%xmlentities)) { $str =~ s/&$k;/$v/g; $str =~ s/&$k\s+([A-Za-z0-9 ]+);/dorep($v,$1)/eg; } last unless ($str =~ /&/); } print $str; } }