package Class::XPath;
use 5.006;
use strict;
use warnings;
our $VERSION = '1.4';
use Carp qw(croak);
use constant DEBUG => 0;
our $NAME = qr/[\w:]+/;
sub foreach_node (&@);
sub import {
my $pkg = shift;
return unless @_;
my $target = (caller())[0];
$pkg->add_methods(@_, target => $target, from_import => 1);
}
{
my %required = map { ($_,1) }
qw(get_name get_parent get_children
get_attr_names get_attr_value get_content
get_root call_match call_xpath);
sub add_methods {
my $pkg = shift;
my %args = (call_match => 'match',
call_xpath => 'xpath',
@_);
my $from_import = delete $args{from_import};
my $target = delete $args{target};
croak("Missing 'target' parameter to ${pkg}->add_methods()")
unless defined $target;
local $_;
for (keys %args) {
croak("Unrecognized parameter '$_' " .
($from_import ? " on 'use $pkg' line. " :
"passed to ${pkg}->add_methods()"))
unless $required{$_};
}
for (keys %required) {
croak("Missing required parameter '$_' " .
($from_import ? " on 'use $pkg' line. " :
"in call to ${pkg}->add_methods()"))
unless exists $args{$_};
}
for (grep { /^get_/ } keys %args) {
next if ref $args{$_} and ref $args{$_} eq 'CODE';
$args{$_} = eval "sub { shift->$args{$_}(\@_) };";
croak("Unable to compile sub for '$_' : $@") if $@;
}
{
no strict 'refs';
*{"${target}::$args{call_match}"} =
sub { $pkg->match($_[0], \%args, $_[1]) };
*{"${target}::$args{call_xpath}"} =
sub { $pkg->xpath($_[0], \%args) }
}
}}
sub match {
my ($pkg, $self, $args, $xpath) = @_;
my ($get_root, $get_parent, $get_children, $get_name) =
@{$args}{qw(get_root get_parent get_children get_name)};
croak("Bad call to $args->{call_match}: missing xpath argument.")
unless defined $xpath;
print STDERR "match('$xpath') called.\n" if DEBUG;
return $get_root->($self) if $xpath eq '/';
return $self if $xpath eq '.';
my (@patterns, @targets);
if ($xpath =~ m!^//(.*)$!) {
$xpath = $1;
foreach_node { push(@targets, $_) } $get_root->($self), $get_children;
} elsif ($xpath =~ m!^/(.*)$!) {
$xpath = $1;
@targets = ($get_root->($self));
} elsif ($xpath =~ m!^\.\./(.*)$!) {
$xpath = $1;
@targets = ($get_parent->($self));
} elsif ($xpath =~ m!^\./(.*)$!) {
$xpath = $1;
@targets = ($self);
} else {
@targets = ($self);
}
my @parts = split('/', $xpath);
my $count = 0;
for (@parts) {
$count++;
if (/^$NAME$/) {
push(@patterns, { name => $_ });
} elsif (/^($NAME)\[(-?\d+)\]$/o) {
push(@patterns, { name => $1, index => $2 });
} elsif (/^($NAME)\[\@($NAME)\s*=\s*"([^"]+)"\]$/o or
/^($NAME)\[\@($NAME)\s*=\s*'([^']+)'\]$/o) {
push(@patterns, { name => $1, attr => $2, value => $3 });
} elsif (/^($NAME)\[\@($NAME)\s*(=|>|<|<=|>=|!=)\s*(\d+)\]$/o) {
push(@patterns, { name => $1, attr => $2, op => $3, value => $4 });
} elsif (/^($NAME)\[($NAME|\.)\s*=\s*"([^"]+)"\]$/o or
/^($NAME)\[($NAME|\.)\s*=\s*'([^']+)'\]$/o) {
push(@patterns, { name => $1, child => $2, value => $3 });
} elsif (/^($NAME)\[($NAME|\.)\s*(=|>|<|<=|>=|!=)\s*(\d+)\]$/) {
push(@patterns, { name => $1, child => $2, op => $3, value => $4 });
} elsif (/^\@($NAME)$/) {
push(@patterns, { attr => $1 });
croak("Bad call to $args->{call_match}: '$xpath' contains an attribute selector in the middle of the expression.")
if $count != @parts;
} else {
croak("Bad call to $args->{call_match}: '$xpath' contains unknown token '$_'");
}
}
croak("Bad call to $args->{call_match}: '$xpath' contains no search tokens.")
unless @patterns;
my @results = map { $pkg->_do_match($_, $args, @patterns) } @targets;
return @results;
}
sub _do_match {
my ($pkg, $self, $args, @patterns) = @_;
my ($get_parent, $get_children, $get_name, $get_attr_value, $get_attr_names, $get_content) =
@{$args}{qw(get_parent get_children get_name get_attr_value get_attr_names get_content)};
local $_;
print STDERR "_do_match(" . $get_name->($self) . " => " .
join(', ', map { '{' . join(',', %$_) . '}' } @patterns) .
") called.\n"
if DEBUG;
my $pat = shift @patterns;
my @results;
my @kids;
{ no warnings 'uninitialized';
@kids = grep { $get_name->($_) eq $pat->{name} } $get_children->($self);
}
if (defined $pat->{index}) {
push @results, $kids[$pat->{index}]
if (abs($pat->{index}) <= $#kids);
} elsif (defined $pat->{attr}) {
if (defined $pat->{name}) {
my $op = $pat->{op} || 'eq';
foreach my $kid (@kids) {
my $value = $get_attr_value->($kid, $pat->{attr});
push(@results, $kid)
if ($op eq 'eq' and $value eq $pat->{value}) or
($op eq '=' and $value == $pat->{value}) or
($op eq '!=' and $value != $pat->{value}) or
($op eq '>' and $value > $pat->{value}) or
($op eq '<' and $value < $pat->{value}) or
($op eq '>=' and $value >= $pat->{value}) or
($op eq '<=' and $value <= $pat->{value});
}
}
else {
my $attr = $pat->{attr};
push(@results, $get_attr_value->($self, $attr))
if grep { $_ eq $attr } $get_attr_names->($self);
}
} elsif (defined $pat->{child}) {
croak("Can't process child pattern without name")
unless defined $pat->{name};
my $op = $pat->{op} || 'eq';
foreach my $kid (@kids) {
foreach (
$pat->{child} eq "." ? $kid
: grep {$get_name->($_) eq $pat->{child}} $get_children->($kid)
) {
my $value;
foreach_node {
my $txt = $get_content->($_);
$value .= $txt if defined $txt;
} $_, $get_children;
next unless defined $value;
push(@results, $kid)
if ($op eq 'eq' and $value eq $pat->{value}) or
($op eq '=' and $value == $pat->{value}) or
($op eq '!=' and $value != $pat->{value}) or
($op eq '>' and $value > $pat->{value}) or
($op eq '<' and $value < $pat->{value}) or
($op eq '>=' and $value >= $pat->{value}) or
($op eq '<=' and $value <= $pat->{value});
}
}
} else {
push @results, @kids;
}
return @results unless @patterns;
return map { $pkg->_do_match($_, $args, @patterns) } @results;
}
sub xpath {
my ($pkg, $self, $args) = @_;
my ($get_parent, $get_children, $get_name) =
@{$args}{qw(get_parent get_children get_name)};
my $parent = $get_parent->($self);
return '/' unless defined $parent;
my $name = $get_name->($self);
my $count = 0;
for my $kid ($get_children->($parent)) {
last if $kid == $self;
$count++ if $get_name->($kid) eq $name;
}
return $pkg->xpath($parent, $args) .
($get_parent->($parent) ? '/' : '') .
$name . '[' . $count . ']';
}
sub foreach_node (&@) {
my ($code, $node, $get_children) = @_;
my @stack = ($node);
while (@stack) {
local $_ = shift(@stack);
$code->();
push(@stack, $get_children->($_));
}
}
1;
__END__