#!/usr/bin/perl -w
# Converts OASIS 9401 socats to XML catalogs
# No copyright, no warranty, use as you will
# John Cowan asserts the moral right to be known as the author of this software
#
# Frdric Schtz <schutz@mathgen.ch>
#   16/03/2003: added doctype at start of document (tr9401.dtd)

use strict;
$/ = undef;

my $lines = 1;			# current line number
my $token;			# current token type
my $value;			# current token value
my @commentq;			# queue of comments
my $kw;				# current keyword
my @args;			# arguments to current keyword
my $groups;			# number of groups entered

$_ = <>;
init();
recover:
scan();
while ($token ne "EOF") {
	parse();
	emit();
	dequeue();
	}
fin();

# Scan the next token into $token, putting its value in $value
sub scan {
rescan:
	if (($value) = /^([ \t\r\n]+)/) {	# whitespace
		$_ = $';
		$lines++ while $value =~ s/\n//;
		}
	if (/^--/) {		# comment
		($value) = /^--([^-]+(?:-[^-]+)*)--/;
		$_ = $';
		push @commentq, $value;
		$lines++ while $value =~ s/\n//;
		goto rescan;
		}
	if ($_ eq "") {		# end of input
		$token = "EOF";
		return;
		}
	if (($value) = /^([^"' \t\r\n]+)/) {	# non-string
		$_ = $';
		$token = ($value =~ /[\\\/.<>]/) ? "NONSYM" : "SYM";
		my $uct = uc($value);
		if ($uct eq "OVERRIDE" || $uct eq "SYSTEM"
				|| $uct eq "DELEGATE" || $uct eq "PUBLIC"
				|| $uct eq "DTDDECL" || $uct eq "ENTITY"
				|| $uct eq "DOCTYPE" || $uct eq "LINKTYPE"
				|| $uct eq "NOTATION" || $uct eq "SGMLDECL"
				|| $uct eq "DOCUMENT" || $uct eq "BASE"
				|| $uct eq "CATALOG") {
			$value = $uct;
			$token = "KW";
			}
		return;
		}
	if (($value) = /^"([^"]*)"/) {	# double-quoted string
		$_ = $';
		$token = "LIT";
		return;
		}
	if (($value) = /^'([^']*)'/) {		# single-quoted string
		$_ = $';
		$token = "LIT";
		return;
		}
	die "can't happen";
	}

# Syntax error in input
sub yammer {
	my ($msg) = @_;
	warn "$msg at line $lines\n";
	goto recover;
	}

# Parse tokens into xcatalog entries
sub parse {
	$kw = $value;
	@args = ();
	if ($token eq "SYM") {		# unknown keyword
		while (1) {
			scan();
			last if $token eq "KW" || $token eq "EOF";
			last if $token eq "SYM" && @args != 0;
			push @args, $value;
			}
		return;
		}
	yammer "$value not a valid keyword" unless $token eq "KW";
	scan();
	if ($kw eq "PUBLIC" || $kw eq "DTDDECL") {
		yammer "$value not a public id" unless $token eq "LIT";
		push @args, $value;
		scan();
		push @args, $value;
		scan();
		return;
		}
	if ($kw eq "ENTITY" || $kw eq "DOCTYPE" || $kw eq "LINKTYPE"
			|| $kw eq "NOTATION") {
		push @args, $value;
		scan();
		push @args, $value;
		scan();
		return;
		}
	if ($kw eq "SGMLDECL" || $kw eq "DOCUMENT" || $kw eq "BASE"
			|| $kw eq "CATALOG") {
		push @args, $value;
		scan();
		return;
		}
	if ($kw eq "SYSTEM") {
		yammer "$value not a system id" unless $token eq "LIT";
		push @args, $value;
		scan();
		push @args, $value;
		scan();
		return;
		}
	if ($kw eq "DELEGATE") {
		yammer "$value not a partial public id"
			unless $token eq "LIT";
		push @args, $value;
		scan();
		push @args, $value;
		scan();
		return;
		}
	if ($kw eq "OVERRIDE") {
		$value = uc($value);
		yammer "OVERRIDE requires YES or NO"
			unless $value eq "YES" || $value eq "NO";
		push @args, $value;
		scan();
		return;
		}
	die "can't happen";
	}

# Emit the XML catalog entry
sub emit {
	my $arg;
	foreach $arg (@args) {
		$arg = ($arg =~ /"/) ? "'$arg'" : "\"$arg\"";
		}
	if ($kw eq "SYSTEM") {
		print "<system systemId=$args[0] uri=$args[1]/>\n";
		}
	elsif ($kw eq "DELEGATE") {
		print "<delegatePublic publicIdStartString=$args[0] ";
		print "catalog=$args[1]/>\n";
		}
	elsif ($kw eq "OVERRIDE") {
		my $prefer = ($args[0] =~ /YES/) ? "\"public\""
				: "\"system\"";
		print "<group prefer=$prefer>\n";
		$groups++;
		}
	elsif ($kw eq "PUBLIC") {
		print "<public publicId=$args[0] uri=$args[1]/>\n";
		}
	elsif ($kw eq "DTDDECL") {
		print "<soc:dtddecl publicId=$args[0] uri=$args[1]/>\n";
		}
	elsif ($kw eq "ENTITY") {
		print "<soc:entity name=$args[0] uri=$args[1]/>\n";
		}
	elsif ($kw eq "DOCTYPE") {
		print "<soc:doctype name=$args[0] uri=$args[1]/>\n";
		}
	elsif ($kw eq "LINKTYPE") {
		print "<soc:linktype name=$args[0] uri=$args[1]/>\n";
		}
	elsif ($kw eq "NOTATION") {
		print "<soc:notation name=$args[0] uri=$args[1]/>\n";
		}
	elsif ($kw eq "SGMLDECL") {
		print "<soc:sgmldecl uri=$args[0]/>\n";
		}
	elsif ($kw eq "DOCUMENT") {
		print "<soc:document uri=$args[0]/>\n";
		}
	elsif ($kw eq "BASE") {
		print "<group xml:base=$args[0]>\n";
		$groups++;
		}
	elsif ($kw eq "CATALOG") {
		print "<nextCatalog catalog=$args[0]/>\n";
		}
	else {
		print "<unk:$kw ";
		my $i;
		for ($i = 0; $i <= $#args; $i++) {
			print "   arg$i=$args[$i]\n";
			}
		print "   />\n";
		}
	}

# dequeue comments
sub dequeue {
	my $comment;
	foreach $comment (@commentq) {
		print "<!--$comment-->\n";
		}
	@commentq = ();
	}

# start xml catalog
sub init {

print <<END;
<?xml version='1.0'?>
<!DOCTYPE catalog PUBLIC "-//GlobalTransCorp//DTD XML Catalogs V1.0-Based Extension V1.0//EN"
    "http://globaltranscorp.org/oasis/catalog/xml/tr9401.dtd">

END

	print "<catalog xmlns=\"urn:oasis:names:tc:entity:xmlns:xml:catalog\"\n";
	print "         xmlns:soc=\"urn:oasis:names:tc:entity:xmlns:tr9401:catalog\"\n";
	print "         xmlns:unk=\"urn:oasis:names:tc:entity:xmlns:unknown\"\n";
	print "         >\n";
	}

# wrap up xml catalog
sub fin {
	while ($groups--) {
		print "</group>\n";
		}
	print "</catalog>\n";
	}
