#
# ExpConv::ExpConv.pm
#

use strict;

package ExpConv;

use vars qw(@EXPORT @ISA %entities);
use XML::Handler::CanonXMLWriter;
use Text::ParseWords;

require Exporter;

@ISA = qw(Exporter XML::Handler::CanonXMLWriter);
@EXPORT = qw (start_element);

sub entity_decl {
    my $self = shift; my $entity = shift;
    $entities{$entity->{Name}} = $entity;
}

sub start_element {
    my $self = shift; my $element = shift;

    if ($element->{Name} eq "xptr" || $element->{Name} eq "xref") {
        my $key; my $from;
        my $attrs = $element->{Attributes};
        my $doc;

        if ($attrs->{"doc"}) {
            $doc = $entities{$attrs->{"doc"}};
            if (! $doc) {
                print STDERR "Warning: doc attribute specified on element"
                    . " $element->{Name} but "
                    . $attrs->{"doc"}
                    . " is not a valid entity reference; using this document"
                    . " as the base document.\n";
            }
        }

        my $xp = $self->generateXpAttr
            ($attrs->{"from"}, $attrs->{"to"}, $doc);
        my $xfrom = $self->generateXfromAttr($attrs->{"from"}, $doc);
        my $xto = $self->generateXtoAttr($attrs->{"to"}, $xfrom, $doc);

        if ($xp ne "") {
            $self->setNewAttr("xp", $xp, $attrs, $element);
        }
        if ($xfrom ne "") {
            $self->setNewAttr("xfrom", $xfrom, $attrs, $element);
        }
        if ($xto ne "") {
            $self->setNewAttr("xto", $xto, $attrs, $element);
        }
    }

    $self->SUPER::start_element($element);
}

sub setNewAttr {
    my $self = shift; my $name = shift; my $value = shift; my $attrs = shift;
    my $element = shift;

    if ($attrs->{$name}) {
        print STDERR "Warning: found $name attribute on $element->{Name}, which will be overwritten.\n";
    }

    $attrs->{$name} = $value;

}


# If "to" is not specified, it defaults to the duplicate of "from".
# If "from" is not specified, error.
sub generateXpAttr {
    my $self = shift; my $from = shift; my $to = shift; my $doc = shift;

    if (! $from) {
        if ($doc) {
            return $self->generateUsingDoc($doc);
        }
        print STDERR "Warning: from attribute not specified; xp will not be generated.\n";
        return;
    }

    if (! $to) {
        return $self->generateXfromAttr ($from, $doc, 1);
    }

    my $xfrom = $self->generateXfromAttr ($from, 0, 0);
    my $xto = $self->generateXtoAttr ($to, $xfrom, 0, 0);
    my $result = "xpointer($xfrom/range-to($xto))";

# If "doc" was specified, generate a URI like so: doclocation.xml/xpointer().
# Note that TEI extended pointers allow no way to specify "from" one doc
# "to" another.
    if ($doc) {
        return $self->generateUsingDoc($doc, $result);
    } else {
        return $result;
    }

}

sub generateXfromAttr {
    my $self = shift; my $val = shift; my $doc = shift; my $wrapResult = shift;

    if ($val eq "") {
        return "";
    }

    if ($wrapResult eq "") {
        # default to true
        $wrapResult = 1;
    }

    my $result;

    if ($self->containsForeignSchemes($val)) {
        $result = $self->convert ($val, 1, "");
    } else {
        $result = $self->convert ($val, 0, "");
    }

    # Note that if doc is specified, we ignore $wrapResult.
    if ($doc) {
        if (! $doc->{SystemId}) {
            print STDERR "Warning: entity used in \"doc\" attribute "
                . "has no system id: $doc->{Name}\n";
        }
        return $doc->{SystemId} . "/xpointer($result)";
    }

    if ($wrapResult) {
        return "xpointer($result)";
    } else {
        return $result;
    }
}

sub generateXtoAttr {
    my $self = shift; my $val = shift; my $xfrom = shift; my $doc = shift;
    my $wrapResult = shift;

    if ($val eq "") {
        return "";
    }

    if ($wrapResult eq "") {
        # default to true
        $wrapResult = 1;
    }

    my $result;

    if ($self->containsForeignSchemes($val)) {
        $result = $self->convert ($val, 1, $xfrom);
    } else {
        $result = $self->convert ($val, 0, $xfrom);
    }

    # Note that if doc is specified, we ignore $wrapResult.
    if ($doc) {
        if (! $doc->{SystemId}) {
            print STDERR "Warning: entity used in \"doc\" attribute "
                . "has no system id: $doc->{Name}\n";
        }
        return $doc->{SystemId} . "/xpointer($result)";
    }

    if ($wrapResult) {
        return "xpointer($result)";
    } else {
        return $result;
    }
}

sub generateUsingDoc {
    my $self = shift; my $doc = shift; my $result = shift;
    if (! $doc->{SystemId}) {
        print STDERR "Warning: entity used in \"doc\" attribute "
            . "has no system id: $doc->{Name}\n";
    } else {
        if ($result) {
            return $doc->{SystemId} . "/$result";
        } else {
            return $doc->{SystemId};
        }
# FIXME what does it mean to have a relative pointer into a remote doc?
    }
}

sub containsForeignSchemes {
    my $self = shift; my $val = shift;

    # do any tokens in $val contain "foreign" or "hyq"?
    return (grep /(^foreign$)|(^hyq$)/i, split /([\s\(\)])/, $val != -1);
}

sub convert {
    my $self = shift; my $val = shift; my $wrapIndividually = shift;
    my $xfrom = shift;

    if ($val eq "") {
        return;
    }

    my $token;
    my $lastToken;
    my $lastCommand;
    my $inParen = 0;
    my $stepsNum = 0;
    my @steps;
    my $out;

    my @tokens = split /([\s\(\)])/, $val;
    foreach $token (@tokens) {
        if ($token eq "(") {
            if ($inParen > 0) {
                $steps[$stepsNum - 1] .= $token;
            } else {
                $stepsNum++;
            }

            $inParen++;
            $lastToken = $token;
            next;
        }

        if ($token eq ")") {
            $inParen--;
            if ($inParen != 0) {
                $steps[$stepsNum - 1] .= $token;
            }
            $lastToken = $token;
            next;
        }

        if ($inParen > 0) {
            $steps[$stepsNum - 1] .= $token;
            $lastToken = $token;
            next;
        }

        if ($self->isCommand ($token)) {
            if ($lastCommand ne "") {
                $out = $self->doCommand($out, $wrapIndividually, $xfrom,
                                        $lastCommand, @steps);
            }
            @steps = ();
            $lastCommand = $token;
            $stepsNum = 0;
            $lastToken = $token;
            next;
        }

        # don't reset $lastToken for these tokens
        if ($token eq "" || $token eq " ") {
            next;
        }
    }

    # handle final command
    if ($lastCommand ne "") {
        $out = $self->doCommand ($out, $wrapIndividually, $xfrom,
                                 $lastCommand, @steps);
    }

    return $out;
}

sub doCommand {
    my $self = shift; my $in = shift; my $wrapIndividually = shift;
    my $xfrom = shift;
    my $command = shift; my @steps = @_;
    my $step; my $out;

# Notes:
# 1. We need to get passed $out (rather than just passing back a
# string to be appended to it) so that in the case of ROOT we can
# throw away everything preceding it.
# 2. If $wrapIndividually is true, wrap the string "xpointer()" around
# xpointer schema parts. If not, don't (assuming the caller wants to
# wrap one big "xpointer" around the whole result, or put it in a
# range-to()).

    # save original $in for use later
    $out = $in;

    if ($out ne "" and $out ne "/" and ! $wrapIndividually) {
        $out .= "/";
    }

    if ($command =~ /^ROOT$/i) {
        $out = "/";
    }

    elsif ($command =~ /^DESCENDANT$/i) {
        $out .= "descendant::" . $self->convSteps (@steps);
    }

    elsif ($command =~ /^PREVIOUS$/i) {
        $out .= "preceding-sibling::" . $self->convSteps (@steps);
    }

    elsif ($command =~ /^PRECEDING$/i) {
        $out .= "preceding::" . $self->convSteps (@steps);
    }

    elsif ($command =~ /^CHILD$/i) {
        # child:: is the default axis -- but just in case some other axis
        # had been previously specified, let's specify it here
        $out .= "child::" . $self->convSteps (@steps);
    }

    elsif ($command =~ /^ANCESTOR$/i) {
        $out .= "ancestor::" . $self->convSteps (@steps);
    }

    elsif ($command =~ /^NEXT$/i) {
        $out .= "following-sibling::" . $self->convSteps (@steps);
    }

    elsif ($command =~ /^FOLLOWING$/i) {
        $out .= "following::" . $self->convSteps (@steps);
    }

# | 'HERE' // location of the xptr
# Note: throw away previous location terms; this one overrides them.
# (Alternatively, we could have chosen to error.)
    elsif ($command =~ /^HERE$/i) {
        $out = ".";
    }

# | 'ID' '(' NAME ')' // only one ID allowed.
    elsif ($command =~ /^ID$/i) {
        $out .= "id('$steps[0]')";
    }

# | 'REF' '(' characters ')' // only one ref allowed
    elsif ($command =~ /^REF$/i) {
        $out .= "UNSUPPORTED_REF(" . $steps[0] . ")";
    }

# | 'TOKEN' '(' range ')'
    elsif ($command =~ /^TOKEN$/i) {
        $out .= "UNSUPPORTED_TOKEN(" . $steps[0] . ")";
    }

# | 'STR' '(' range ')'
    elsif ($command =~ /^STR$/i) {
        $out = "substring($in, " . join (", ", split (/ /, $steps[0])) . ")";
    }

# | 'SPACE' '(' NAME ')' pointpair
    elsif ($command =~ /^SPACE$/i) {
        $out .= "UNSUPPORTED_SPACE(" . $steps[0] . ")("
            . $steps[1] . ")(" . $steps[2] . ")";
    }

    elsif ($command =~ /^PATTERN$/i) {
        $out .= "UNSUPPORTED_PATTERN(" . $steps[0] . ")";
    }

# | 'FOREIGN' parms
# Translate into XPointer scheme
    elsif ($command =~ /^FOREIGN$/i) {
        my $schemeName = shift @steps;
        $out .= $schemeName . "(" . join (" ", @steps) . ")";
    }

# | 'HYQ' parms
# Translate into XPointer scheme
    elsif ($command =~ /^HYQ$/i) {
        $out .= "HYQ(" . join (" ", @steps) . ")";
    }

# | 'DITTO' // valid only in TO att.
    # Note: we produce no error if this is used in 'from'.
    elsif ($command =~ /^DITTO$/i) {
        # Ditto is only valid as the first location term. If there have been
        # others, throw them away.
        $out = $xfrom;
    }

    else {
        print STDERR "Unknown locterm: $command; skipping\n";
    }

    if ($wrapIndividually and $command !~ /^FOREIGN$/i and
        $command !~ /^HYQ$/i) {

        $out = "xpointer($out)";
    }

    return $out;
}

sub convSteps {
    my $self = shift; my @steps = @_;
    my $result; my $step;
    my $first = 1;

    foreach $step (@steps) {
        if ($first) {
            $result .= $self->convStep($step);
            $first = 0;
        } else {
            $result .= "/" . $self->convStep($step);
        }
    }
    return $result;
}


# step     ::=  instance
#          |    instance element
#          |    instance element avspecs

# avspecs  ::=  attribute value
#          |    avspecs attribute value

# instance ::=  'ALL'
#          |    signed

# signed   ::=  NUMBER                        // default sign is +
#          |    '+' NUMBER
#          |    '-' NUMBER

# element  ::=  NAME
#          |    '#CDATA'
#          |    '*'
#          |    '(' regular ')'
sub convStep {
    my $self = shift; my $step = shift;
    my $instance; my $element; my @avspecs; my $result;

    ($instance, $element, @avspecs) = split /\s/, $step;

    if ($element eq "") {
        $element = "*";
    } elsif ($element eq "#CDATA") {
        $element = "text()";
    } elsif ($element =~ /^\(/) {
        $element = "UNSUPPORTED_REGEXP$element";
    }

    $result = $element;

    # avspecs: attrname attrval attrname attrval
    my $avspec;
    my $attr = 1;
    foreach $avspec (@avspecs) {
        if ($attr) {
            $result .= "[@" . $avspec . "='";
            $attr = 0;
        } else {
            $result .= "$avspec']";
            $attr = 1;
        }
    }

    if ($self->isSigned($instance)) {
        $result .= "[$instance]";
    }

    return $result;
}

sub isSigned {
    my $self = shift; my $token = shift;

    return $token =~ /^[+-]?\d$/;
}

sub isCommand {
    my $self = shift; my $token = shift;

    if ($token =~ /^ROOT$/i ||
        $token =~ /^HERE$/i ||
        $token =~ /^DESCENDANT$/i ||
        $token =~ /^ANCESTOR$/i ||
        $token =~ /^PREVIOUS$/i ||
        $token =~ /^NEXT$/i ||
        $token =~ /^CHILD$/i ||
        $token =~ /^ID$/i ||
        $token =~ /^REF$/i ||
        $token =~ /^PRECEDING$/i ||
        $token =~ /^FOLLOWING$/i ||
        $token =~ /^PATTERN$/i ||
        $token =~ /^SPACE$/i ||
        $token =~ /^HYQ$/i ||
        $token =~ /^TOKEN$/i ||
        $token =~ /^STR$/i ||
        $token =~ /^DITTO$/i ||
        $token =~ /^FOREIGN$/i) {
        return 1;
    } else {
        return 0;
    }
}


return 1;

