# This script converts one BNC SGML file to XML # # Usage: xmlify ABC OUTDIR [HTMLDIR] [DEBUG] # # First turn the first argument (e.g. ABC) into a full file name echo Exmlifying $1 # INDIR is the directory holding BNC text files export INDIR=/C/BNC-World/Texts/ export FILE=`expr substr $1 1 1`/`expr substr $1 1 2`/$1 echo Input from $INDIR$FILE test -f $INDIR$FILE || echo "No such file as $INDIR$FILE" test -f $INDIR$FILE || exit 42 cp $INDIR$FILE $1 #this is a dirty trick needed for some of the BNC text files #perl -pi -e"s/<\/bncDoc>//" $1 export OUTDIR=$2 test -d $OUTDIR || mkdir $OUTDIR test -d $OUTDIR || exit 42 # make a driver file echo " temp echo "[ %charents;" >>temp echo "" >>temp echo "]>&theText;" >> temp # use osx to run the driver file to generate one XML file, # discarding other output osx -xno-nl-in-tag -xno-expand-external / -xno-expand-internal bnc.dec temp >/dev/null # catenate the XML file with a suitable DTD echo " temp.xml echo " [ %BNCchars;]>" >> temp.xml cat $1.xml >> temp.xml # prettyprint it and save results in output directory xsltproc -o $OUTDIR/$1.xml prettyprint.xsl temp.xml echo "Output is in $OUTDIR/$1.xml" # optional 3rd argument is a directory to hold an HTML version test -d $3 && xsltproc -o $3/$1.html display.xsl $OUTDIR/$1.xml test -d $3 && echo "HTML version is in $3/$1.html" # clean up unless there is a 4th argument test $4 || (unlink $1 ;unlink $1.xml;unlink temp.xml ;unlink temp)