summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordakkar <dakkar@sardina.(none)>2009-06-27 13:41:41 +0200
committerdakkar <dakkar@sardina.(none)>2009-06-27 13:41:41 +0200
commit742a86af2a7e15cebbed6e2ca4f56b87170b9f7d (patch)
tree454d8417975ce972a66308a1e0c93fb11dc8d4a5
downloadClass-XPath-master.tar.gz
Class-XPath-master.tar.bz2
Class-XPath-master.zip
version 1.4, from CPANHEADmaster
-rw-r--r--Changes22
-rw-r--r--MANIFEST11
-rw-r--r--Makefile.PL13
-rw-r--r--README41
-rw-r--r--XPath.pm619
-rw-r--r--t/00load.t11
-rw-r--r--t/01simple.t162
-rw-r--r--t/02html.html146
-rw-r--r--t/02html.t54
-rw-r--r--t/Simple.pm31
10 files changed, 1110 insertions, 0 deletions
diff --git a/Changes b/Changes
new file mode 100644
index 0000000..63c3552
--- /dev/null
+++ b/Changes
@@ -0,0 +1,22 @@
+Revision history for Perl extension Class::XPath.
+
+1.4 Sun Feb 29 17:58:02 EST 2004
+ - Added ':' as an allowed character in names for nodes and
+ attributes. (Timothy Appnel)
+
+1.3 Thu Nov 13 12:00:00 2003
+ - Added support for matching child content. (ex. /Books[title="Foo"])
+ (Mark Addison)
+
+1.2 Tue Aug 19 12:00:00 2003
+ - Added attribute selectors (/page/@title) (Tim Peoples)
+ - Added self matcher (./page) (Tim Peoples)
+
+1.1 Mon May 05 12:00:00 2003
+ - Added attibute matchers (page[@number>1], page[@header="Title Page"])
+ - Added parent target (../child[1])
+ - Fixed a bug in the way named methods were handled
+
+1.0 Sat Mar 29 12:00:00 2003
+ - First release.
+
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..06e2eae
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,11 @@
+Changes
+Makefile.PL
+MANIFEST
+README
+XPath.pm
+t/00load.t
+t/01simple.t
+t/02html.t
+t/02html.html
+t/Simple.pm
+
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100644
index 0000000..e0b3355
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,13 @@
+use 5.006;
+use ExtUtils::MakeMaker;
+
+WriteMakefile(
+ NAME => 'Class::XPath',
+ VERSION_FROM => 'XPath.pm',
+ PREREQ_PM => {
+ Test::More => "0.47",
+ Carp => 0,
+ },
+ ABSTRACT_FROM => 'XPath.pm',
+ AUTHOR => 'Sam Tregar <sam@tregar.com>',
+ );
diff --git a/README b/README
new file mode 100644
index 0000000..01d0b8e
--- /dev/null
+++ b/README
@@ -0,0 +1,41 @@
+Class::XPath 1.4
+
+This module adds XPath-style matching to your object trees. This means
+that you can find nodes using an XPath-esque query with "match()" from
+anywhere in the tree. Also, the "xpath()" method returns a unqique path
+to a given node which can be used as an identifier.
+
+NOTE: This module is not yet a complete XPath implementation. Over
+time I expect the subset of XPath supported to grow. See the SYNTAX
+documentation in the module POD for details on the current level of
+support.
+
+CHANGES
+
+ - Added ':' as an allowed character in names for nodes and
+ attributes. (Timothy Appnel)
+
+INSTALLATION
+
+To install this module type the following:
+
+ perl Makefile.PL
+ make
+ make test
+ make install
+
+DEPENDENCIES
+
+This module requires Perl 5.6.0. This module also requires these other
+modules and libraries:
+
+ Test::More
+ Carp
+
+
+COPYRIGHT AND LICENSE
+
+Copyright (C) 2002 Sam Tregar
+
+This program is free software; you can redistribute it and/or modify
+it under the same terms as Perl 5 itself.
diff --git a/XPath.pm b/XPath.pm
new file mode 100644
index 0000000..0c442d0
--- /dev/null
+++ b/XPath.pm
@@ -0,0 +1,619 @@
+package Class::XPath;
+
+use 5.006;
+use strict;
+use warnings;
+
+our $VERSION = '1.4';
+use Carp qw(croak);
+use constant DEBUG => 0;
+
+# regex fragment for names in XPath expressions
+our $NAME = qr/[\w:]+/;
+
+# declare prototypes
+sub foreach_node (&@);
+
+# handle request to build methods from 'use Class::XPath'.
+sub import {
+ my $pkg = shift;
+ return unless @_;
+ my $target = (caller())[0];
+ # hand off to add_methods
+ $pkg->add_methods(@_, target => $target, from_import => 1);
+}
+
+{
+ # setup lists of required params
+ my %required = map { ($_,1) }
+ qw(get_name get_parent get_children
+ get_attr_names get_attr_value get_content
+ get_root call_match call_xpath);
+
+# add the xpath and match methods to
+sub add_methods {
+ my $pkg = shift;
+ my %args = (call_match => 'match',
+ call_xpath => 'xpath',
+ @_);
+ my $from_import = delete $args{from_import};
+ my $target = delete $args{target};
+ croak("Missing 'target' parameter to ${pkg}->add_methods()")
+ unless defined $target;
+
+ # check args
+ local $_;
+ for (keys %args) {
+ croak("Unrecognized parameter '$_' " .
+ ($from_import ? " on 'use $pkg' line. " :
+ "passed to ${pkg}->add_methods()"))
+ unless $required{$_};
+ }
+ for (keys %required) {
+ croak("Missing required parameter '$_' " .
+ ($from_import ? " on 'use $pkg' line. " :
+ "in call to ${pkg}->add_methods()"))
+ unless exists $args{$_};
+ }
+
+ # translate get_* method names to sub-refs
+ for (grep { /^get_/ } keys %args) {
+ next if ref $args{$_} and ref $args{$_} eq 'CODE';
+ $args{$_} = eval "sub { shift->$args{$_}(\@_) };";
+ croak("Unable to compile sub for '$_' : $@") if $@;
+ }
+
+ # install code into requested names to call real match/xpath with
+ # supplied %args
+ {
+ no strict 'refs';
+ *{"${target}::$args{call_match}"} =
+ sub { $pkg->match($_[0], \%args, $_[1]) };
+ *{"${target}::$args{call_xpath}"} =
+ sub { $pkg->xpath($_[0], \%args) }
+ }
+}}
+
+sub match {
+ my ($pkg, $self, $args, $xpath) = @_;
+ my ($get_root, $get_parent, $get_children, $get_name) =
+ @{$args}{qw(get_root get_parent get_children get_name)};
+
+ croak("Bad call to $args->{call_match}: missing xpath argument.")
+ unless defined $xpath;
+
+ print STDERR "match('$xpath') called.\n" if DEBUG;
+
+ # / is the root. This should probably work as part of the
+ # algorithm, but it doesn't.
+ return $get_root->($self) if $xpath eq '/';
+
+ # . is self. This should also work as part of the algorithm,
+ # but it doesn't.
+ return $self if $xpath eq '.';
+
+ # break up an incoming xpath into a set of @patterns to match
+ # against a list of @target elements
+ my (@patterns, @targets);
+
+ # target aquisition
+ if ($xpath =~ m!^//(.*)$!) {
+ $xpath = $1;
+ # this is a match-anywhere pattern, which should be tried on
+ # all nodes
+ foreach_node { push(@targets, $_) } $get_root->($self), $get_children;
+ } elsif ($xpath =~ m!^/(.*)$!) {
+ $xpath = $1;
+ # this match starts at the root
+ @targets = ($get_root->($self));
+ } elsif ($xpath =~ m!^\.\./(.*)$!) {
+ $xpath = $1;
+ # this match starts at the parent
+ @targets = ($get_parent->($self));
+ } elsif ($xpath =~ m!^\./(.*)$!) {
+ $xpath = $1;
+ @targets = ($self);
+ } else {
+ # this match starts here
+ @targets = ($self);
+ }
+
+ # pattern breakdown
+ my @parts = split('/', $xpath);
+ my $count = 0;
+ for (@parts) {
+ $count++;
+ if (/^$NAME$/) {
+ # it's a straight name match
+ push(@patterns, { name => $_ });
+ } elsif (/^($NAME)\[(-?\d+)\]$/o) {
+ # it's an indexed name
+ push(@patterns, { name => $1, index => $2 });
+ } elsif (/^($NAME)\[\@($NAME)\s*=\s*"([^"]+)"\]$/o or
+ /^($NAME)\[\@($NAME)\s*=\s*'([^']+)'\]$/o) {
+ # it's a string attribute match
+ push(@patterns, { name => $1, attr => $2, value => $3 });
+ } elsif (/^($NAME)\[\@($NAME)\s*(=|>|<|<=|>=|!=)\s*(\d+)\]$/o) {
+ # it's a numeric attribute match
+ push(@patterns, { name => $1, attr => $2, op => $3, value => $4 });
+ } elsif (/^($NAME)\[($NAME|\.)\s*=\s*"([^"]+)"\]$/o or
+ /^($NAME)\[($NAME|\.)\s*=\s*'([^']+)'\]$/o) {
+ # it's a string child match
+ push(@patterns, { name => $1, child => $2, value => $3 });
+ } elsif (/^($NAME)\[($NAME|\.)\s*(=|>|<|<=|>=|!=)\s*(\d+)\]$/) {
+ # it's a numeric child match
+ push(@patterns, { name => $1, child => $2, op => $3, value => $4 });
+ } elsif (/^\@($NAME)$/) {
+ # it's an attribute name
+ push(@patterns, { attr => $1 });
+
+ # it better be last
+ croak("Bad call to $args->{call_match}: '$xpath' contains an attribute selector in the middle of the expression.")
+ if $count != @parts;
+ } else {
+ # unrecognized token
+ croak("Bad call to $args->{call_match}: '$xpath' contains unknown token '$_'");
+ }
+ }
+
+ croak("Bad call to $args->{call_match}: '$xpath' contains no search tokens.")
+ unless @patterns;
+
+ # apply the patterns to all available targets and collect results
+ my @results = map { $pkg->_do_match($_, $args, @patterns) } @targets;
+
+ return @results;
+}
+
+# the underlying match engine. this takes a list of patterns and
+# applies them to child elements
+sub _do_match {
+ my ($pkg, $self, $args, @patterns) = @_;
+ my ($get_parent, $get_children, $get_name, $get_attr_value, $get_attr_names, $get_content) =
+ @{$args}{qw(get_parent get_children get_name get_attr_value get_attr_names get_content)};
+ local $_;
+
+ print STDERR "_do_match(" . $get_name->($self) . " => " .
+ join(', ', map { '{' . join(',', %$_) . '}' } @patterns) .
+ ") called.\n"
+ if DEBUG;
+
+ # get pattern to apply to direct descendants
+ my $pat = shift @patterns;
+
+ # find matches and put in @results
+ my @results;
+ my @kids;
+
+ { no warnings 'uninitialized';
+ @kids = grep { $get_name->($_) eq $pat->{name} } $get_children->($self);
+ }
+
+ if (defined $pat->{index}) {
+ # get a child by index
+ push @results, $kids[$pat->{index}]
+ if (abs($pat->{index}) <= $#kids);
+ } elsif (defined $pat->{attr}) {
+ if (defined $pat->{name}) {
+ # default op is 'eq' for string matching
+ my $op = $pat->{op} || 'eq';
+
+ # do attribute matching
+ foreach my $kid (@kids) {
+ my $value = $get_attr_value->($kid, $pat->{attr});
+ push(@results, $kid)
+ if ($op eq 'eq' and $value eq $pat->{value}) or
+ ($op eq '=' and $value == $pat->{value}) or
+ ($op eq '!=' and $value != $pat->{value}) or
+ ($op eq '>' and $value > $pat->{value}) or
+ ($op eq '<' and $value < $pat->{value}) or
+ ($op eq '>=' and $value >= $pat->{value}) or
+ ($op eq '<=' and $value <= $pat->{value});
+ }
+ }
+ else {
+ my $attr = $pat->{attr};
+ push(@results, $get_attr_value->($self, $attr))
+ if grep { $_ eq $attr } $get_attr_names->($self);
+ }
+ } elsif (defined $pat->{child}) {
+ croak("Can't process child pattern without name")
+ unless defined $pat->{name};
+ # default op is 'eq' for string matching
+ my $op = $pat->{op} || 'eq';
+ # do attribute matching
+ foreach my $kid (@kids) {
+ foreach (
+ $pat->{child} eq "." ? $kid
+ : grep {$get_name->($_) eq $pat->{child}} $get_children->($kid)
+ ) {
+ my $value;
+ foreach_node {
+ my $txt = $get_content->($_);
+ $value .= $txt if defined $txt;
+ } $_, $get_children;
+ next unless defined $value;
+ push(@results, $kid)
+ if ($op eq 'eq' and $value eq $pat->{value}) or
+ ($op eq '=' and $value == $pat->{value}) or
+ ($op eq '!=' and $value != $pat->{value}) or
+ ($op eq '>' and $value > $pat->{value}) or
+ ($op eq '<' and $value < $pat->{value}) or
+ ($op eq '>=' and $value >= $pat->{value}) or
+ ($op eq '<=' and $value <= $pat->{value});
+ }
+ }
+ } else {
+ push @results, @kids;
+ }
+
+ # all done?
+ return @results unless @patterns;
+
+ # apply remaining patterns on matching kids
+ return map { $pkg->_do_match($_, $args, @patterns) } @results;
+}
+
+
+sub xpath {
+ my ($pkg, $self, $args) = @_;
+ my ($get_parent, $get_children, $get_name) =
+ @{$args}{qw(get_parent get_children get_name)};
+
+ my $parent = $get_parent->($self);
+ return '/' unless defined $parent; # root's xpath is /
+
+ # get order within same-named nodes in the parent
+ my $name = $get_name->($self);
+ my $count = 0;
+ for my $kid ($get_children->($parent)) {
+ last if $kid == $self;
+ $count++ if $get_name->($kid) eq $name;
+ }
+
+ # construct xpath using parent's xpath and our name and count
+ return $pkg->xpath($parent, $args) .
+ ($get_parent->($parent) ? '/' : '') .
+ $name . '[' . $count . ']';
+}
+
+
+# does a depth first traversal in a stack
+sub foreach_node (&@) {
+ my ($code, $node, $get_children) = @_;
+ my @stack = ($node);
+ while (@stack) {
+ local $_ = shift(@stack);
+ $code->();
+ push(@stack, $get_children->($_));
+ }
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Class::XPath - adds xpath matching to object trees
+
+=head1 SYNOPSIS
+
+In your node class, use Class::XPath:
+
+ # generate xpath() and match() using Class::XPath
+ use Class::XPath
+
+ get_name => 'name', # get the node name with the 'name' method
+
+ get_parent => 'parent', # get parent with the 'parent' method
+
+ get_root => \&get_root, # call get_root($node) to get the root
+
+ get_children => 'kids', # get children with the 'kids' method
+
+ get_attr_names => 'param', # get names and values of attributes
+ get_attr_value => 'param', # from param
+
+ get_content => 'data', # get content from the 'data' method
+
+ ;
+
+Now your objects support XPath-esque matching:
+
+ # find all pages, anywhere in the tree
+ @nodes = $node->match('//page');
+
+ # returns an XPath like "/page[1]/paragraph[2]"
+ $xpath = $node->xpath();
+
+=head1 DESCRIPTION
+
+This module adds XPath-style matching to your object trees. This
+means that you can find nodes using an XPath-esque query with
+C<match()> from anywhere in the tree. Also, the C<xpath()> method
+returns a unique path to a given node which can be used as an
+identifier.
+
+To use this module you must already have an OO implementation of a
+tree. The tree must be a true tree - all nodes have a single parent
+and the tree must have a single root node. Also, the order of
+children within a node must be stable.
+
+B<NOTE:> This module is not yet a complete XPath implementation. Over
+time I expect the subset of XPath supported to grow. See the SYNTAX
+documentation for details on the current level of support.
+
+=head1 USAGE
+
+This module is used by providing it with information about how your
+class works. Class::XPath uses this information to build the
+C<match()> and C<xpath()> methods for your class. The parameters
+passed to 'use Class::XPath' may be set with strings, indicating
+method names, or subroutine references. They are:
+
+=over
+
+=item get_name (required)
+
+Returns the name of this node. This will be used as the element name
+when evaluating an XPath match. The value returned must matches
+/^[\w:]+$/.
+
+=item get_parent (required)
+
+Returns the parent of this node. The root node must return undef from
+the get_parent method.
+
+=item get_children (required)
+
+Returns a list of child nodes, in order.
+
+=item get_attr_names (required)
+
+Returns a list of available attribute names. The values returned must
+match /^[\w:]+$/).
+
+=item get_attr_value (required)
+
+Called with a single parameter, the name of the attribute. Returns
+the value associated with that attribute. The value returned must be
+C<undef> if no value exists for the attribute.
+
+=item get_content (required)
+
+Returns the contents of the node. In XML this is text between start
+and end tags.
+
+=item get_root (required)
+
+Returns the root node of this tree.
+
+=item call_match (optional)
+
+Set this to the name of the C<match()> method to generate. Defaults
+to 'match'.
+
+=item call_xpath (optional)
+
+Set this to the name of the C<xpath()> method to generate. Defaults
+to 'xpath'.
+
+=back
+
+=head2 ALTERNATE USAGE
+
+If you're using someone else's OO tree module, and you don't want to
+subclass it, you can still use Class::XPath to add XPath matching to
+it. This is done by calling C<Class::XPath->add_methods()> with all
+the options usually passed to C<use> and one extra one, C<target>.
+For example, to add xpath() and match() to HTML::Element (the node
+class for HTML::TreeBuilder):
+
+ # add Class::XPath routines to HTML::Element
+ Class::XPath->add_methods(target => 'HTML::Element',
+ get_parent => 'parent',
+ get_name => 'tag',
+ get_attr_names =>
+ sub { my %attr = shift->all_external_attr;
+ return keys %attr; },
+ get_attr_value =>
+ sub { my %attr = shift->all_external_attr;
+ return $attr{$_[0]}; },
+ get_children =>
+ sub { grep { ref $_ } shift->content_list },
+ get_content =>
+ sub { grep { not ref $_ } shift->content_list },
+ get_root =>
+ sub { local $_=shift;
+ while($_->parent) { $_ = $_->parent }
+ return $_; });
+
+
+Now you can load up an HTML file and do XPath matching on it:
+
+ my $root = HTML::TreeBuilder->new;
+ $root->parse_file("foo.html");1
+
+ # get a list of all paragraphs
+ my @paragraphs = $root->match('//p');
+
+ # get the title element
+ my ($title) = $root->match('/head/title');
+
+=head1 GENERATED METHODS
+
+This module generates two public methods for your class:
+
+=over
+
+=item C<< @results = $node->match('/xpath/expression') >>
+
+This method performs an XPath match against the tree to which this
+node belongs. See the SYNTAX documentation for the range of supported
+expressions. The return value is either a list of node objects, a list
+of values (when retrieving specific attributes) or an empty list if no
+matches could be found. If your XPath expression cannot be parsed then
+the method will die.
+
+You can change the name of this method with the 'call_match' option
+described above.
+
+=item C<< $xpath = $node->xpath() >>
+
+Get an xpath to uniquely identify this node. Can be used with match()
+to find the element later. The xpath returned is guaranteed to be
+unqiue within the element tree. For example, the third node named
+"paragraph" inside node named "page" has the xpath
+"/page[1]/paragraph[2]".
+
+You can change the name of this method with the 'call_xpath' option
+described above.
+
+=back
+
+=head1 SYNTAX
+
+This module supports a small subset of XPath at the moment. Here is a
+list of the type of expressions it knows about:
+
+=over
+
+=item .
+
+Selects and returns the current node.
+
+=item name
+
+=item ./name
+
+Selects a list of nodes called 'name' in the tree below the current
+node.
+
+=item /name
+
+Selects a list of nodes called 'name' directly below the root of the
+tree.
+
+=item //name
+
+Selects all nodes with a matching name, anywhere in the tree.
+
+=item parent/child/grandchild
+
+Selects a list of grandchildren for all children of all parents.
+
+=item parent[1]/child[2]
+
+Selects a single child by indexing into the children lists.
+
+=item parent[-1]/child[0]
+
+Selects the first child of the last parent. In the real XPath they
+spell this 'parent[last()]/child[0]' but supporting the Perl syntax is
+practically free here. Eventually I'll support the XPath style too.
+
+=item ../child[2]
+
+Selects the second child from the parent of the current node.
+Currently .. only works at the start of an XPath, mostly because I
+can't imagine using it anywhere else.
+
+=item child[@id=10]
+
+Selects the child node with an 'id' attribute of 10.
+
+=item child[@id>10]
+
+Selects all the child nodes with an 'id' attribute greater than 10.
+Other supported operators are '<', '<=', '>=' and '!='.
+
+=item child[@category="sports"]
+
+Selects the child with an 'category' attribute of "sports". The value
+must be a quoted string (single or double) and no escaping is allowed.
+
+=item child[title="Hello World"]
+
+Selects the child with a 'title' child element whose content is "Hello World".
+The value must be a quoted string (single or double) and no escaping is allowed.
+e.g.
+
+ <child>
+ <title>Hello World</title>
+ </child>
+
+=item //title[.="Hello World"]
+
+Selects all 'title' elements whose content is "Hello World".
+
+=item child/@attr
+
+Returns the list of values for all attributes "attr" within each child.
+
+=item //@attr
+
+Returns the list of values for all attributes "attr" within each node.
+
+=back
+
+B<NOTE:> this module has no support for Unicode. If this is a problem
+for you please consider sending me a patch. I'm certain that I don't
+know enough about Unicode to do it right myself.
+
+=head1 BUGS
+
+I know of no bugs in this module. If you find one, please file a bug
+report at:
+
+ http://rt.cpan.org
+
+Alternately you can email me directly at sam@tregar.com. Please
+include the version of the module and a complete test case that
+demonstrates the bug.
+
+=head1 TODO
+
+Planned future work:
+
+=over
+
+=item *
+
+Support more of XPath!
+
+=item *
+
+Do more to detect broken get_* functions. Maybe use Carp::Assert and
+a special mode for use during development?
+
+=back
+
+=head1 ACKNOWLEDGMENTS
+
+I would like to thank the creators of XPath for their fine work and
+the W3C for supporting them in their efforts.
+
+The following people have sent me patches and/or suggestions:
+
+ Tim Peoples
+ Mark Addison
+ Timothy Appnel
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2002 Sam Tregar
+
+This program is free software; you can redistribute it and/or modify
+it under the same terms as Perl 5 itself.
+
+=head1 AUTHOR
+
+Sam Tregar <sam@tregar.com>
+
+=head1 SEE ALSO
+
+The XPath W3C Recommendation:
+
+ http://www.w3.org/TR/xpath
+
+=cut
diff --git a/t/00load.t b/t/00load.t
new file mode 100644
index 0000000..814784e
--- /dev/null
+++ b/t/00load.t
@@ -0,0 +1,11 @@
+use Test::More qw(no_plan);
+BEGIN { use_ok('Class::XPath',
+ get_name => 'name',
+ get_parent => 'parent',
+ get_root => 'root',
+ get_children => 'kids',
+ get_attr_names => 'param',
+ get_attr_value => 'param',
+ get_content => 'data',
+ ); }
+
diff --git a/t/01simple.t b/t/01simple.t
new file mode 100644
index 0000000..e73b9f9
--- /dev/null
+++ b/t/01simple.t
@@ -0,0 +1,162 @@
+use Test::More tests => 218;
+use strict;
+use warnings;
+use lib 't/';
+BEGIN { use_ok('Simple') };
+
+# construct a small tree
+my $root = Simple->new_root(name => 'root');
+isa_ok($root, 'Simple');
+can_ok($root, 'match', 'xpath');
+$root->add_kid(
+ name => 'some:page', foo => 10, bar => 'bif')->add_kid(
+ name => 'kidfoo', data => 10);
+$root->add_kid(
+ name => 'some:page', foo => 20, bar => 'bof')->add_kid(
+ name => 'kidfoo', data => 20);
+$root->add_kid(
+ name => 'some:page', foo => 30, bar => 'bongo')->add_kid(
+ name => 'kidfoo', data => 30);
+my @pages = $root->kids;
+for my $page (@pages) {
+ isa_ok($page, 'Simple');
+ can_ok($page, 'match', 'xpath');
+ for (0 .. 9) {
+ $page->add_kid(name => 'paragraph', data => "$page->{bar}$_" );
+ $page->add_kid(name => 'image') if $_ % 2;
+ }
+}
+#use Data::Dumper;
+#warn "tree:",Dumper($root),"\n";
+
+# root's xpath should be /
+is($root->xpath(), '/');
+
+# page xpath tests
+is($pages[0]->xpath, '/some:page[0]');
+is($pages[1]->xpath, '/some:page[1]');
+is($pages[2]->xpath, '/some:page[2]');
+
+# paragraph xpath tests
+foreach my $page (@pages) {
+ my @para = grep { $_->name eq 'paragraph' } $page->kids;
+ for (my $x = 0; $x < $#para; $x++) {
+ is($para[$x]->xpath, $page->xpath . "/paragraph[$x]");
+ }
+ my @images = grep { $_->name eq 'image' } $page->kids;
+ for (my $x = 0; $x < $#images; $x++) {
+ is($images[$x]->xpath, $page->xpath . "/image[$x]");
+ }
+}
+
+# test match against returned xpaths
+is($root->match($pages[0]->xpath), 1);
+is(($root->match($pages[0]->xpath))[0], $pages[0]);
+is($root->match($pages[1]->xpath), 1);
+is(($root->match($pages[1]->xpath))[0], $pages[1]);
+is($root->match($pages[2]->xpath), 1);
+is(($root->match($pages[2]->xpath))[0], $pages[2]);
+
+# test paragraph xpath matching, both from the page and the root
+foreach my $page (@pages) {
+ my @para = grep { $_->name eq 'paragraph' } $page->kids;
+ for (my $x = 0; $x < $#para; $x++) {
+ is($para[$x]->match($page->xpath), 1);
+ is(($para[$x]->match($page->xpath))[0], $page);
+ is(($root->match($page->xpath))[0], $page);
+ }
+}
+
+# test local name query
+is($root->match('some:page'), 3);
+is(($root->match('some:page'))[0]->match('paragraph'), 10);
+
+# test global name query
+is($root->match('//paragraph'), 30);
+
+# test parent context
+foreach my $page (@pages) {
+ my @para = grep { $_->name eq 'paragraph' } $page->kids;
+ for (my $x = 0; $x < $#para; $x++) {
+ is(($para[$x]->match("../paragraph[$x]"))[0], $para[$x]);
+ }
+}
+
+# test string attribute matching
+is($root->match('some:page[@bar="bif"]'), 1);
+is(($root->match('some:page[@bar="bif"]'))[0], $pages[0]);
+is($root->match('some:page[@bar="bof"]'), 1);
+is(($root->match('some:page[@bar="bof"]'))[0], $pages[1]);
+is($root->match("some:page[\@bar='bongo']"), 1);
+is(($root->match("some:page[\@bar='bongo']"))[0], $pages[2]);
+
+# test numeric attribute matching
+is($root->match('some:page[@foo=10]'), 1);
+is(($root->match('some:page[@foo=10]'))[0], $pages[0]);
+is($root->match('some:page[@foo=20]'), 1);
+is(($root->match('some:page[@foo=20]'))[0], $pages[1]);
+is($root->match('some:page[@foo=30]'), 1);
+is(($root->match('some:page[@foo=30]'))[0], $pages[2]);
+
+is($root->match('some:page[@foo>10]'), 2);
+is(($root->match('some:page[@foo>10]'))[0], $pages[1]);
+is(($root->match('some:page[@foo>10]'))[1], $pages[2]);
+
+is($root->match('some:page[@foo<10]'), 0);
+
+is($root->match('some:page[@foo!=10]'), 2);
+
+is($root->match('some:page[@foo<=10]'), 1);
+
+is($root->match('some:page[@foo>=10]'), 3);
+
+# test attribute value retrieval
+is($root->match('/some:page[0]/@foo'), 1);
+eq_array([$root->match('/some:page/@foo')], [qw( 10 20 30 )]);
+is(($root->match('/some:page[-1]/@bar'))[0], 'bongo');
+eq_array([$root->match('/some:page/@bar')], [qw( bif bof bongo )]);
+
+# make sure bad use of @foo is caught
+eval { $root->match('/some:page[0]/@foo/bar'); };
+like($@, qr/Bad call.*contains an attribute selector in the middle of the expression/);
+
+# test string child matching
+is($root->match('some:page[paragraph="bif0"]'), 1, "Child node string match");
+is(($root->match('some:page[paragraph="bif0"]'))[0], $pages[0]);
+is($root->match('some:page[paragraph="bif3"]'), 1, "Child node string match");
+is(($root->match('some:page[paragraph="bif3"]'))[0], $pages[0]);
+
+is($root->match('some:page[paragraph="bof0"]'), 1, "Child node string match");
+is(($root->match('some:page[paragraph="bof0"]'))[0], $pages[1]);
+is($root->match('some:page[paragraph="bof3"]'), 1, "Child node string match");
+is(($root->match('some:page[paragraph="bof3"]'))[0], $pages[1]);
+
+is($root->match('some:page[paragraph="bongo0"]'), 1, "Child node string match");
+is(($root->match('some:page[paragraph="bongo0"]'))[0], $pages[2]);
+is($root->match('some:page[paragraph="bongo3"]'), 1, "Child node string match");
+is(($root->match('some:page[paragraph="bongo3"]'))[0], $pages[2]);
+
+# test numeric child matching
+is($root->match('some:page[kidfoo=10]'), 1, "Child node = match");
+is(($root->match('some:page[kidfoo=10]'))[0], $pages[0]);
+is($root->match('some:page[kidfoo=20]'), 1, "Child node = match");
+is(($root->match('some:page[kidfoo=20]'))[0], $pages[1]);
+is($root->match('some:page[kidfoo=30]'), 1, "Child node = match");
+is(($root->match('some:page[kidfoo=30]'))[0], $pages[2]);
+
+is($root->match('some:page[kidfoo>10]'), 2, "Child node > match");
+is(($root->match('some:page[kidfoo>10]'))[0], $pages[1]);
+is(($root->match('some:page[kidfoo>10]'))[1], $pages[2]);
+
+is($root->match('some:page[kidfoo<10]'), 0, "Child node < match");
+
+is($root->match('some:page[kidfoo!=10]'), 2, "Child node != match");
+
+is($root->match('some:page[kidfoo<=10]'), 1, "Child node <= match");
+
+is($root->match('some:page[kidfoo>=10]'), 3, "Child node >= match");
+
+is($root->match('some:page[.="10bif0bif1bif2bif3bif4bif5bif6bif7bif8bif9"]'), 1,
+"Complex child node string match");
+is(($root->match('some:page[.="10bif0bif1bif2bif3bif4bif5bif6bif7bif8bif9"]'))[0], $pages[0]);
+
diff --git a/t/02html.html b/t/02html.html
new file mode 100644
index 0000000..7a86dd5
--- /dev/null
+++ b/t/02html.html
@@ -0,0 +1,146 @@
+<html>
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+ <link rel="stylesheet" href="/s/style.css" type="text/css">
+ <title>search.cpan.org: The CPAN Search Site</title>
+ </head>
+ <body >
+
+<table width="100%"><tr><td rowspan=2 width="5%">
+<a href="/"><img src="/s/img/cpan_banner.png" alt="The CPAN Search Site"></a>
+</td>
+<td>
+<div class=menubar>&nbsp;
+ <a class=m href="/">Home</a> &nbsp;&nbsp;
+ <a class=m href="/author/">Authors</a> &nbsp;&nbsp;
+ <a class=m href="/recent">Recent</a> &nbsp;&nbsp;
+
+ <a class=m href="/site.html">About</a> &nbsp;&nbsp;
+ <a class=m href="/mirror">Mirrors</a> &nbsp;&nbsp;
+ <a class=m href="/faq.html">FAQ</a> &nbsp;&nbsp;
+ <a class=m href="/feedback">Feedback</a>
+&nbsp;</div>
+
+</td></tr><tr><td>
+
+<form method=get action="/search" name=f><table><tr><td>
+<input type="text" name="query" value="tree" size=35 ></td></tr><tr><td>in <select name="mode">
+ <option value="all">All</option>
+ <option value="module" >Modules</option>
+ <option value="dist" >Distributions</option>
+ <option value="author" >Authors</option>
+
+</select>&nbsp;<input type="submit" value="CPAN Search">
+</td></tr></table>
+</form>
+
+</td></tr></table>
+
+
+
+
+<br><div class=t4>
+<small>
+Results <b>1</b> - <b>10</b> of
+<b>1467</b> Found</small></div>
+
+<div class=pages><table width="100%"><tr>
+<td><small> <b>1</b> &middot;
+ <a href="/search?m=all&q=tree&s=11">2</a> &middot;
+ <a href="/search?m=all&q=tree&s=21">3</a> &middot;
+ <a href="/search?m=all&q=tree&s=31">4</a> &middot;
+
+ <a href="/search?m=all&q=tree&s=41">5</a> &middot;
+ <a href="/search?m=all&q=tree&s=51">6</a> &middot;
+ <a href="/search?m=all&q=tree&s=11">Next &gt;&gt;</a>
+</small></td>
+<td align=right><small><b>Page Size</b>:
+<a href="/search?m=all&q=tree&s=1&n=10">10</a>&nbsp;
+<a href="/search?m=all&q=tree&s=1&n=20">20</a>&nbsp;
+
+<a href="/search?m=all&q=tree&s=1&n=50">50</a>&nbsp;
+<a href="/search?m=all&q=tree&s=1&n=100">100</a>&nbsp;
+</small></td></tr></table>
+</div>
+ <p><a href="/author/DERHAAG/FramesReady-1.014/lib/HTTP/Response/Tree.pm"><b>HTTP::Response::Tree</b></a>
+<br /><small>Class for a tree of multiple HTTP::Response objects</small>
+<br /><small> <a href="/author/DERHAAG/FramesReady-1.014/">FramesReady-1.014</a> -
+ <span class=date>28 Apr 2002</span> -
+ <a href="/author/DERHAAG/">Alan E Derhaag</a>
+
+</small>
+ <p><a href="/author/MLEHMANN/Tree-M-0.03/M.pm"><b>Tree::M</b></a>
+<br /><small>implement M-trees for efficient &quot;metric/multimedia-searches&quot;</small>
+<br /><small> <a href="/author/MLEHMANN/Tree-M-0.03/">Tree-M-0.03</a> -
+ <span class=date>27 Jul 2001</span> -
+ <a href="/author/MLEHMANN/">Marc Lehmann</a>
+</small>
+
+ <p><a href="/author/RSAVAGE/Class-Tree-1.23/Tree.pm"><b>Class::Tree</b></a>
+<br /><small>Build and print hierarchical information such as directory trees and C++ classes.</small>
+<br /><small> <a href="/author/RSAVAGE/Class-Tree-1.23/">Class-Tree-1.23</a> -
+ <span class=date>22 Mar 2003</span> -
+ <a href="/author/RSAVAGE/">Ron Savage</a>
+</small>
+ <p><a href="/author/SBURKE/HTML-Tree-3.17/lib/HTML/Tree.pm"><b>HTML::Tree</b></a>
+
+<br /><small>overview of HTML::TreeBuilder et al</small>
+<br /><small> <a href="/author/SBURKE/HTML-Tree-3.17/">HTML-Tree-3.17</a> -
+ <span class=date>18 Jan 2003</span> -
+ <a href="/author/SBURKE/">Sean M. Burke</a>
+</small>
+ <p><a href="/author/BJEPS/DBIx-Tree-1.9/Tree.pm"><b>DBIx::Tree</b></a>
+<br /><small>Perl module for generating a tree from a self-referential table</small>
+
+<br /><small> <a href="/author/BJEPS/DBIx-Tree-1.9/">DBIx-Tree-1.9</a> -
+ <span class=date>14 Feb 2003</span> -
+ <a href="/author/BJEPS/">Brian Jepson</a>
+</small>
+ <p><a href="/author/SWMCD/Pod-Tree-1.10/Tree.pm"><b>Pod::Tree</b></a>
+<br /><small>Create a static syntax tree for a POD</small>
+<br /><small> <a href="/author/SWMCD/Pod-Tree-1.10/">Pod-Tree-1.10</a> -
+ <span class=date>29 Jan 2003</span> -
+ <a href="/author/SWMCD/">Steven McDougall</a>
+
+</small>
+ <p><a href="/author/BIRNEY/bioperl-1.2/Bio/Tree/Tree.pm"><b>Bio::Tree::Tree</b></a>
+<br /><small>An Implementation of TreeI interface.</small>
+<br /><small> <a href="/author/BIRNEY/bioperl-1.2/">bioperl-1.2</a> -
+ <span class=date>31 Dec 2002</span> -
+ <a href="/author/BIRNEY/">Ewan Birney</a>
+</small>
+ <p><a href="/author/BIRNEY/bioperl-1.2/Bio/Tree/RandomFactory.pm"><b>Bio::Tree::RandomFactory</b></a>
+
+<br /><small>TreeFactory for generating Random Trees</small>
+<br /><small> <a href="/author/BIRNEY/bioperl-1.2/">bioperl-1.2</a> -
+ <span class=date>31 Dec 2002</span> -
+ <a href="/author/BIRNEY/">Ewan Birney</a>
+</small>
+ <p><a href="/author/BIRNEY/bioperl-1.2/Bio/Tree/TreeI.pm"><b>Bio::Tree::TreeI</b></a>
+<br /><small>A Tree object suitable for lots of things, designed originally for Phylogenetic Trees.</small>
+
+<br /><small> <a href="/author/BIRNEY/bioperl-1.2/">bioperl-1.2</a> -
+ <span class=date>31 Dec 2002</span> -
+ <a href="/author/BIRNEY/">Ewan Birney</a>
+</small>
+ <p><a href="/author/BIRNEY/bioperl-1.2/Bio/Taxonomy/Tree.pm"><b>Bio::Taxonomy::Tree</b></a>
+<br /><small>An Organism Level Implementation of TreeI interface.</small>
+<br /><small> <a href="/author/BIRNEY/bioperl-1.2/">bioperl-1.2</a> -
+ <span class=date>31 Dec 2002</span> -
+ <a href="/author/BIRNEY/">Ewan Birney</a>
+
+</small>
+<br>
+
+<center> <b>1</b> &middot;
+ <a href="/search?m=all&q=tree&s=11">2</a> &middot;
+ <a href="/search?m=all&q=tree&s=21">3</a> &middot;
+ <a href="/search?m=all&q=tree&s=31">4</a> &middot;
+
+ <a href="/search?m=all&q=tree&s=41">5</a> &middot;
+ <a href="/search?m=all&q=tree&s=51">6</a> &middot;
+ <a href="/search?m=all&q=tree&s=11">Next &gt;&gt;</a>
+</center>
+
+ </body>
+</html>
diff --git a/t/02html.t b/t/02html.t
new file mode 100644
index 0000000..93e0eef
--- /dev/null
+++ b/t/02html.t
@@ -0,0 +1,54 @@
+# make sure HTML::TreeBuilder is available
+BEGIN {
+ eval { require HTML::TreeBuilder; };
+ if ($@) {
+ eval "use Test::More skip_all => q{02html.t requires HTML::TreeBuilder.};";
+ exit;
+ }
+}
+
+use Test::More qw(no_plan);
+use strict;
+use warnings;
+use Class::XPath;
+use HTML::TreeBuilder;
+
+# build a tree from some HTML
+my $root = HTML::TreeBuilder->new;
+isa_ok($root, 'HTML::TreeBuilder');
+$root->parse_file("t/02html.html");
+isa_ok($root, 'HTML::Element');
+
+# add Class::XPath routines to HTML::Element
+Class::XPath->add_methods(target => 'HTML::Element',
+ call_match => 'xpath_match',
+ call_xpath => 'xpath_id',
+ get_parent => 'parent',
+ get_name => 'tag',
+ get_attr_names =>
+ sub { my %attr = shift->all_external_attr;
+ return keys %attr; },
+ get_attr_value =>
+ sub { my %attr = shift->all_external_attr;
+ return $attr{$_[0]}; },
+ get_children =>
+ sub { grep { ref $_ } shift->content_list },
+ get_content =>
+ sub { grep { not ref $_ } shift->content_list },
+ get_root =>
+ sub { local $_=shift;
+ while($_->parent) { $_ = $_->parent }
+ return $_; });
+
+# do some matching tests against the HTML
+is($root->xpath_match('//table'), 3);
+is($root->xpath_match('/head/title'), 1);
+is($root->xpath_match('//head/title'), 1);
+is(($root->xpath_match('/head/title'))[0]->xpath_id, '/head[0]/title[0]');
+is(($root->xpath_match('/head/title'))[0]->parent->xpath_id, '/head[0]');
+is(($root->xpath_match('/head/title'))[0]->parent->parent->xpath_id, '/');
+is($root->xpath_match('//a'), 54);
+
+my ($head) = $root->xpath_match('/head');
+is($head->xpath_match('title'), 1);
+is($head->xpath_match('/title'), 0);
diff --git a/t/Simple.pm b/t/Simple.pm
new file mode 100644
index 0000000..1b417df
--- /dev/null
+++ b/t/Simple.pm
@@ -0,0 +1,31 @@
+package Simple;
+use strict;
+use warnings;
+
+use Class::XPath
+ get_name => 'name',
+ get_parent => 'parent',
+ get_root => 'root',
+ get_children => 'kids',
+ get_attr_names => 'param',
+ get_attr_value => 'param',
+ get_content => 'data';
+
+
+sub name { shift->{name} }
+sub parent { shift->{parent} }
+sub root { local $_=shift;
+ while($_->{parent}) { $_ = $_->{parent} }
+ return $_; }
+sub param { if (@_ == 2) { return $_[0]->{$_[1]} }
+ else { return qw(foo bar baz) } }
+sub data { shift->{data} }
+sub kids { @{shift->{kids}} }
+
+sub new_root { my $pkg = shift; bless({kids => [], @_}, $pkg); }
+sub add_kid { my $self = shift;
+ push(@{$self->{kids}},
+ bless({kids => [], @_, parent => $self }, ref $self));
+ $self->{kids}[-1]; }
+
+1;