# This script converts one BNC SGML file to XML
#
#     Usage: xmlify ABC OUTDIR [HTMLDIR] [DEBUG]
#
# First turn the first argument (e.g. ABC) into a full file name
echo Exmlifying $1
# INDIR is the directory holding BNC text files
export INDIR=/C/BNC-World/Texts/
export FILE=`expr substr $1 1 1`/`expr substr $1 1 2`/$1
echo Input from $INDIR$FILE
test -f $INDIR$FILE || echo "No such file as $INDIR$FILE"
test -f $INDIR$FILE || exit 42

cp $INDIR$FILE $1
#this is a dirty trick needed for some of the BNC text files
#perl -pi -e"s/<\/bncDoc>//" $1

export OUTDIR=$2
test -d $OUTDIR || mkdir $OUTDIR
test -d $OUTDIR || exit 42

# make a driver file
echo "<!DOCTYPE bnc SYSTEM \"bnc-fake.dtd\" [<!ENTITY % charents SYSTEM \"charents.ent\"> %charents;"  >temp
echo "<!ENTITY theText SYSTEM \"$1\">" >>temp
echo "]><bnc>&theText;</bnc>" >> temp

# use osx to run the driver file to generate one XML file, 
# discarding other output
osx  -xno-nl-in-tag -xno-expand-external  -xno-expand-internal bnc.dec temp >/dev/null

# catenate the XML file with a suitable DTD
echo "<!DOCTYPE bncDoc SYSTEM \"bnc-xml.dtd\" [<!ENTITY % BNCchars SYSTEM \"bnc-xml-chars.dtd\"> %BNCchars;]>" > temp.xml
cat $1.xml >> temp.xml

# prettyprint it and save results in output directory
xsltproc -o $OUTDIR/$1.xml prettyprint.xsl temp.xml
echo "Output is in $OUTDIR/$1.xml"

# optional 3rd argument is a directory to hold an HTML version 
test -d $3 && xsltproc -o $3/$1.html display.xsl $OUTDIR/$1.xml
test -d $3 && echo "HTML version is in $3/$1.html"

# clean up unless there is a 4th argument
test $4 || (unlink $1 ;unlink $1.xml;unlink temp.xml ;unlink temp)