package PPIx::XPath; use strict; use warnings; use PPI; use Carp; use Scalar::Util qw(reftype blessed); use Tree::XPathEngine; use 5.006; # VERSION # ABSTRACT: an XPath implementation for the PDOM =head1 SYNOPSIS use PPI; use PPIx::XPath; use Tree::XPathEngine; my $pdom = PPI::Document->new('some_code.pl'); my $xpath = Tree::XPathEngine->new(); my @subs = $xpath->findnodes('//Statement-Sub',$pdom); my @vars = $xpath->findnodes('//Token-Symbol',$pdom); Deprecated interface, backward-compatible with C version 1: use PPIx::XPath; my $pxp = PPIx::XPath->new("some_code.pl"); my @subs = $pxp->match("//Statement::Sub"); my $vars = $pxp->match("//Token::Symbol"); =head1 DESCRIPTION This module augments L's classes with the methods required by L, allowing you to perform complex XPath matches against any PDOM tree. See L for details about its methods. =head2 Mapping the PDOM to the XPath data model =begin :list * Each node in the PDOM is an element as seen by XPath * The name of the element is the class name of the node, minus the initial C, with C<::> replaced by C<->. That is: ($xpath_name = substr($pdom_node->class,5)) =~ s/::/-/g; * Only "significant" nodes are seen by XPath * all scalar-valued accessors of PDOM nodes are visible as attributes * "here-docs" contents are I mapped =end :list =method C my $pxp = PPIx::XPath->new("some_code.pl"); my $pxp = PPIx::XPath->new($pdom); Only useful for the backward-compatible, and deprecated, interface. Returns an instance of C tied to the given document. =cut sub new { my ($class,$source) = @_; croak "PPIx::XPath->new needs a source document" unless defined($source); my $doc; if (blessed($source) && $source->isa('PPI::Node')) { $doc = $source; } elsif (reftype($source) eq 'SCALAR' or (!ref($source) && -f $source)) { $doc = PPI::Document->new($source); } else { croak "PPIx::XPath expects either a PPI::Node or a file" . " got a: [" .( ref($source) || $source ). ']'; } return bless {doc=>$doc},$class; } =method C my $new_xpath_expr = $pxp->clean_xpath_expr($old_xpath_expr); C version 1.0.0 allowed the use of partial package names (like C) as element names: this collides with the axis specification of proper XPath. For this reason, in newer version of C, the element name is the class name of the PDOM node, minus the initial C, with C<::> replaced by C<-> (like C). This method replaces all occurrences of PPI package names in the given string with the new names. =cut { my $legacy_names_rx;my %new_name_for; sub clean_xpath_expr { my (undef,$expr)=@_; $expr =~ s{$legacy_names_rx}{$new_name_for{$1}}ge; return $expr; } my @PPI_Packs; # taken from Devel::Symdump # breadth-first search of packages under C my @packages=('PPI'); while (my $pack=shift(@packages)) { my %pack_symbols = do { no strict 'refs'; ## no critic(ProhibitNoStrict) %{*{"$pack\::"}} }; while (my ($key,$val)=each(%pack_symbols)) { # that {HASH} lookup is special for typeglobs, so we have # to alias a local typeglob to make it work local *ENTRY=$val; # does this symbol table entry look like a sub-package? if (defined $val && defined *ENTRY{HASH} && $key=~/::$/ && $key !~ /^::/ && $key ne 'main::' && $key ne '::') { # add it to the search list my $p = "$pack\::$key";$p =~ s{::$}{}; push @packages,$p; # and add it to the map of names $p =~ s{^PPI::}{}; next unless $p=~/::/; my $newname=$p; $newname =~ s{::}{-}g; push @PPI_Packs,$p; $new_name_for{$p}=$newname; } } } # @PPI_Packs now contains all the old-style names, build a regex # to match them (the sort is important, we want to match longer # names first!) $legacy_names_rx='\b('.join(q{|}, sort {length($b) <=> length($a)} @PPI_Packs ).')\b'; $legacy_names_rx=qr{$legacy_names_rx}; } =method C my @subs = $pxp->match("//Statement::Sub"); my $vars = $pxp->match("//Token::Symbol"); Only useful for the backward-compatible, and deprecated, interface. From the document this instance was built against, returns the nodes that match the given XPath expression. You should not use this method, you should call L<< C|Tree::XPathEngine/findnodes >> instead: my $xpath = Tree::XPathEngine->new(); my @subs = $xpath->findnodes('//Statement-Sub',$pdom); my @vars = $xpath->findnodes('//Token-Symbol',$pdom); =cut sub match { my ($self,$expr) = @_; $expr=$self->clean_xpath_expr($expr); Tree::XPathEngine->new()->findnodes($expr,$self->{doc}); } =head1 BUGS and LIMITATIONS =for :list * "here-docs" contents are I mapped * node ordering is slow, because I could not find a way in PPI to compare two nodes for document order; suggestions are most welcome =head1 SEE ALSO L L L (the XPath specification) =head1 AUTHORS Dan Brook original author Gianni Ceccarelli Tree::XPathEngine-based re-implementation =cut package PPI::Element; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_name { my $pack_name=substr($_[0]->class,5); $pack_name =~ s/::/-/g; $pack_name } sub xpath_get_next_sibling { $_[0]->snext_sibling } sub xpath_get_previous_sibling { $_[0]->sprevious_sibling } sub xpath_get_root_node { $_[0]->top } sub xpath_get_parent_node { $_[0]->parent } sub xpath_is_element_node { 1 } sub xpath_is_attribute_node { 0 } sub xpath_is_document_node { 0 } sub xpath_get_attributes { return PPIx::XPath::Attr->new($_[0],'significant'), PPIx::XPath::Attr->new($_[0],'content'), } sub xpath_to_literal { "$_[0]" } sub xpath_get_value { "$_[0]" } sub xpath_string_value { "$_[0]" } sub xpath_cmp { my( $a, $b)= @_; if ( UNIVERSAL::isa( $b, 'PPIx::XPath::Attr')) { # elt <=> att, compare the elt to the att->{elt} # if the elt is the att->{elt} (cmp return 0) then -1, elt is before att return ($a->_xpath_elt_cmp( $b->{parent}) ) || -1 ; } elsif ( UNIVERSAL::isa( $b, 'PPI::Document')) { # elt <=> document, elt is after document return 1; } else { # 2 elts, compare them return $a->_xpath_elt_cmp( $b); } } sub _xpath_elt_cmp { my ($a,$b)=@_; # easy cases return 0 if( $a == $b); return 1 if( $a->_xpath_in($b)); # a starts after b return -1 if( $b->_xpath_in($a)); # a starts before b # ancestors does not include the element itself my @a_pile= ($a, $a->_xpath_ancestors); my @b_pile= ($b, $b->_xpath_ancestors); # the 2 elements are not in the same twig return undef unless( $a_pile[-1] == $b_pile[-1]); # find the first non common ancestors (they are siblings) my $a_anc= pop @a_pile; my $b_anc= pop @b_pile; while( $a_anc == $b_anc) { $a_anc= pop @a_pile; $b_anc= pop @b_pile; } # from there move left and right and figure out the order my( $a_prev, $a_next, $b_prev, $b_next)= ($a_anc, $a_anc, $b_anc, $b_anc); while () { $a_prev= $a_prev->sprevious_sibling || return( -1); return 1 if( $a_prev == $b_next); $a_next= $a_next->snext_sibling || return( 1); return -1 if( $a_next == $b_prev); $b_prev= $b_prev->sprevious_sibling || return( 1); return -1 if( $b_prev == $a_next); $b_next= $b_next->snext_sibling || return( -1); return 1 if( $b_next == $a_prev); } } sub _xpath_in { my ($self, $ancestor)= @_; while ( $self= $self->parent) { return $self if ( $self == $ancestor); } } sub _xpath_ancestors { my( $self)= @_; my @ancestors; while ( $self= $self->parent) { push @ancestors, $self; } return @ancestors; } package PPI::Token; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_child_nodes { return } package PPI::Token::Quote::Double; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'interpolations'), } package PPI::Token::Number; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'base'), } package PPI::Token::Word; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'method-call'), } package PPI::Token::Comment; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'line'), } package PPI::Token::HereDoc; ## no critic(ProhibitMultiplePackages) use strict; use warnings; # TODO: add access to the contents of the heredoc (->heredoc method) sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'terminator'), } package PPI::Token::Prototype; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_to_literal { $_[0]->prototype } sub xpath_get_value { $_[0]->prototype } sub xpath_string_value { $_[0]->prototype } package PPI::Node; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_child_nodes { $_[0]->schildren } sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'scope'), } package PPI::Token::Attribute; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'identifier'), PPIx::XPath::Attr->new($_[0],'parameters'), } package PPI::Token::Symbol; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'symbol'), PPIx::XPath::Attr->new($_[0],'canonical'), PPIx::XPath::Attr->new($_[0],'raw_type'), PPIx::XPath::Attr->new($_[0],'symbol_typel'), } package PPI::Statement; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'label'), PPIx::XPath::Attr->new($_[0],'stable'), PPIx::XPath::Attr->new($_[0],'type'), } package PPI::Statement::Sub; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'name'), PPIx::XPath::Attr->new($_[0],'prototype'), PPIx::XPath::Attr->new($_[0],'forward'), PPIx::XPath::Attr->new($_[0],'reserved'), } package PPI::Statement::Package; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'namespace'), PPIx::XPath::Attr->new($_[0],'file-scoped'), } package PPI::Statement::Include; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'module'), PPIx::XPath::Attr->new($_[0],'module-version'), PPIx::XPath::Attr->new($_[0],'version'), PPIx::XPath::Attr->new($_[0],'version-literal'), PPIx::XPath::Attr->new($_[0],'pragma'), } package PPI::Structure; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'start'), PPIx::XPath::Attr->new($_[0],'finish'), PPIx::XPath::Attr->new($_[0],'braces'), } package PPI::Document; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_root_node { $_[0] } sub xpath_get_parent_node { return } sub xpath_is_attribute_node { 0 } sub xpath_is_document_node { 1 } package PPIx::XPath::Attr; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub new { my ($class,$parent,$name)=@_; my $meth=$parent->can($name); return unless $meth; my $value; eval {$value=$meth->($parent);1} or return; return unless defined $value; return bless {parent=>$parent,name=>$name,value=>$value},$class; } sub xpath_get_name { $_[0]->{name} } sub xpath_get_root_node { $_[0]->{parent}->top } sub xpath_get_parent_node { $_[0]->{parent} } sub xpath_is_element_node { 0 } sub xpath_is_attribute_node { 1 } sub xpath_is_document_node { 0 } sub xpath_to_literal { $_[0]->{value} } sub xpath_get_value { $_[0]->{value} } sub xpath_string_value { $_[0]->{value} } sub xpath_to_number { Tree::XPathEngine::Number->new($_[0]->{value}) } sub xpath_cmp { my( $a, $b)= @_; if ( UNIVERSAL::isa( $b, 'PPIx::XPath::Attr')) { # 2 attributes, compare their elements, then their name return ($a->{parent}->_xpath_elt_cmp( $b->{parent}) ) || ($a->{name} cmp $b->{name}); } elsif ( UNIVERSAL::isa( $b, 'PPI::Document')) { # att <=> document, att is after document return 1; } else { # att <=> elt : compare the att->elt and the elt # if att->elt is the elt (cmp returns 0) then 1 (elt is before att) return ($a->{parent}->_xpath_elt_cmp( $b) ) || 1 ; } } 1;