package PPIx::XPath; use strict; use warnings; use PPI; use Carp; use Scalar::Util qw(reftype blessed); use Tree::XPathEngine; use 5.006; # VERSION # ABSTRACT: an XPath implementation for the PDOM =head1 SYNOPSIS use PPI; use PPIx::XPath; use Tree::XPathEngine; my $pdom = PPI::Document->new('some_code.pl'); my $xpath = Tree::XPathEngine->new(); my @subs = $xpath->findnodes('//Statement-Sub',$pdom); my @vars = $xpath->findnodes('//Token-Symbol',$pdom); Deprecated interface, backward-compatible with C version 1: use PPIx::XPath; my $pxp = PPIx::XPath->new("some_code.pl"); my @subs = $pxp->match("//Statement::Sub"); my $vars = $pxp->match("//Token::Symbol"); =head1 DESCRIPTION This module augments L's classes with the methods required by L, allowing you to perform complex XPath matches against any PDOM tree. See L for details about its methods. =head2 Mapping the PDOM to the XPath data model =begin :list * Each node in the PDOM is an element as seen by XPath * The name of the element is the class name of the node, minus the initial C, with C<::> replaced by C<->. That is: ($xpath_name = substr($pdom_node->class,5)) =~ s/::/-/g; * Only "significant" nodes are seen by XPath * all scalar-valued accessors of PDOM nodes are visible as attributes * "here-docs" contents are I mapped =end :list =cut sub new { my ($class,$source) = @_; croak "PPIx::XPath->new needs a source document" unless defined($source); my $doc; if (blessed($source) && $source->isa('PPI::Node')) { $doc = $source; } elsif (reftype($source) eq 'SCALAR' or (!ref($source) && -f $source)) { $doc = PPI::Document->new($source); } else { croak "PPIx::XPath expects either a PPI::Node or a file" . " got a: [" .( ref($source) || $source ). ']'; } return bless {doc=>$doc},$class; } # PPIx::XPath 1.0.0 allowed the use of partial package names as node names; # this collides with the axis specification of proper XPath. # Here we change the "old-style" node names into the new names { my $legacy_names_rx;my %new_name_for; sub clean_xpath_expr { my (undef,$expr)=@_; $expr =~ s{$legacy_names_rx}{$new_name_for{$1}}ge; return $expr; } my @PPI_Packs; # taken from Devel::Symdump my @packages=('PPI'); while (my $pack=shift(@packages)) { my %pack_symbols = do { no strict 'refs'; ## no critic(ProhibitNoStrict) %{*{"$pack\::"}} }; while (my ($key,$val)=each(%pack_symbols)) { local *ENTRY=$val; if (defined $val && defined *ENTRY{HASH} && $key=~/::$/ && $key !~ /^::/ && $key ne 'main::' && $key ne '::') { my $p = "$pack\::$key";$p =~ s{::$}{}; push @packages,$p; $p =~ s{^PPI::}{}; next unless $p=~/::/; my $newname=$p; $newname =~ s{::}{-}g; push @PPI_Packs,$p; $new_name_for{$p}=$newname; } } } $legacy_names_rx='\b('.join(q{|}, sort {length($b) <=> length($a)} @PPI_Packs ).')\b'; $legacy_names_rx=qr{$legacy_names_rx}; } sub match { my ($self,$expr) = @_; $expr=$self->clean_xpath_expr($expr); Tree::XPathEngine->new()->findnodes($expr,$self->{doc}); } =head1 BUGS and LIMITATIONS =for :list * "here-docs" contents are I mapped * node ordering is slow, because I could not find a way in PPI to compare two nodes for document order; suggestions are most welcome =head1 SEE ALSO L L L (the XPath specification) =head1 AUTHORS Dan Brook original author Gianni Ceccarelli Tree::XPathEngine-based re-implementation =cut package PPI::Element; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_name { my $pack_name=substr($_[0]->class,5); $pack_name =~ s/::/-/g; $pack_name } sub xpath_get_next_sibling { $_[0]->snext_sibling } sub xpath_get_previous_sibling { $_[0]->sprevious_sibling } sub xpath_get_root_node { $_[0]->top } sub xpath_get_parent_node { $_[0]->parent } sub xpath_is_element_node { 1 } sub xpath_is_attribute_node { 0 } sub xpath_is_document_node { 0 } sub xpath_get_attributes { return PPIx::XPath::Attr->new($_[0],'significant'), PPIx::XPath::Attr->new($_[0],'content'), } sub xpath_to_literal { "$_[0]" } sub xpath_get_value { "$_[0]" } sub xpath_string_value { "$_[0]" } sub xpath_cmp { my( $a, $b)= @_; if ( UNIVERSAL::isa( $b, 'PPIx::XPath::Attr')) { # elt <=> att, compare the elt to the att->{elt} # if the elt is the att->{elt} (cmp return 0) then -1, elt is before att return ($a->_xpath_elt_cmp( $b->{parent}) ) || -1 ; } elsif ( UNIVERSAL::isa( $b, 'PPI::Document')) { # elt <=> document, elt is after document return 1; } else { # 2 elts, compare them return $a->_xpath_elt_cmp( $b); } } sub _xpath_elt_cmp { my ($a,$b)=@_; # easy cases return 0 if( $a == $b); return 1 if( $a->_xpath_in($b)); # a starts after b return -1 if( $b->_xpath_in($a)); # a starts before b # ancestors does not include the element itself my @a_pile= ($a, $a->_xpath_ancestors); my @b_pile= ($b, $b->_xpath_ancestors); # the 2 elements are not in the same twig return undef unless( $a_pile[-1] == $b_pile[-1]); # find the first non common ancestors (they are siblings) my $a_anc= pop @a_pile; my $b_anc= pop @b_pile; while( $a_anc == $b_anc) { $a_anc= pop @a_pile; $b_anc= pop @b_pile; } # from there move left and right and figure out the order my( $a_prev, $a_next, $b_prev, $b_next)= ($a_anc, $a_anc, $b_anc, $b_anc); while () { $a_prev= $a_prev->sprevious_sibling || return( -1); return 1 if( $a_prev == $b_next); $a_next= $a_next->snext_sibling || return( 1); return -1 if( $a_next == $b_prev); $b_prev= $b_prev->sprevious_sibling || return( 1); return -1 if( $b_prev == $a_next); $b_next= $b_next->snext_sibling || return( -1); return 1 if( $b_next == $a_prev); } } sub _xpath_in { my ($self, $ancestor)= @_; while ( $self= $self->parent) { return $self if ( $self == $ancestor); } } sub _xpath_ancestors { my( $self)= @_; my @ancestors; while ( $self= $self->parent) { push @ancestors, $self; } return @ancestors; } package PPI::Token; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_child_nodes { return } package PPI::Token::Quote::Double; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'interpolations'), } package PPI::Token::Number; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'base'), } package PPI::Token::Word; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'method-call'), } package PPI::Token::Comment; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'line'), } package PPI::Token::HereDoc; ## no critic(ProhibitMultiplePackages) use strict; use warnings; # TODO: add access to the contents of the heredoc (->heredoc method) sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'terminator'), } package PPI::Token::Prototype; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_to_literal { $_[0]->prototype } sub xpath_get_value { $_[0]->prototype } sub xpath_string_value { $_[0]->prototype } package PPI::Node; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_child_nodes { $_[0]->schildren } sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'scope'), } package PPI::Token::Attribute; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'identifier'), PPIx::XPath::Attr->new($_[0],'parameters'), } package PPI::Token::Symbol; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'symbol'), PPIx::XPath::Attr->new($_[0],'canonical'), PPIx::XPath::Attr->new($_[0],'raw_type'), PPIx::XPath::Attr->new($_[0],'symbol_typel'), } package PPI::Statement; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'label'), PPIx::XPath::Attr->new($_[0],'stable'), PPIx::XPath::Attr->new($_[0],'type'), } package PPI::Statement::Sub; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'name'), PPIx::XPath::Attr->new($_[0],'prototype'), PPIx::XPath::Attr->new($_[0],'forward'), PPIx::XPath::Attr->new($_[0],'reserved'), } package PPI::Statement::Package; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'namespace'), PPIx::XPath::Attr->new($_[0],'file-scoped'), } package PPI::Statement::Include; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'module'), PPIx::XPath::Attr->new($_[0],'module-version'), PPIx::XPath::Attr->new($_[0],'version'), PPIx::XPath::Attr->new($_[0],'version-literal'), PPIx::XPath::Attr->new($_[0],'pragma'), } package PPI::Structure; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_attributes { return $_[0]->SUPER::xpath_get_attributes, PPIx::XPath::Attr->new($_[0],'start'), PPIx::XPath::Attr->new($_[0],'finish'), PPIx::XPath::Attr->new($_[0],'braces'), } package PPI::Document; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub xpath_get_root_node { $_[0] } sub xpath_get_parent_node { return } sub xpath_is_attribute_node { 0 } sub xpath_is_document_node { 1 } package PPIx::XPath::Attr; ## no critic(ProhibitMultiplePackages) use strict; use warnings; sub new { my ($class,$parent,$name)=@_; my $meth=$parent->can($name); return unless $meth; my $value; eval {$value=$meth->($parent);1} or return; return unless defined $value; return bless {parent=>$parent,name=>$name,value=>$value},$class; } sub xpath_get_name { $_[0]->{name} } sub xpath_get_root_node { $_[0]->{parent}->top } sub xpath_get_parent_node { $_[0]->{parent} } sub xpath_is_element_node { 0 } sub xpath_is_attribute_node { 1 } sub xpath_is_document_node { 0 } sub xpath_to_literal { $_[0]->{value} } sub xpath_get_value { $_[0]->{value} } sub xpath_string_value { $_[0]->{value} } sub xpath_to_number { Tree::XPathEngine::Number->new($_[0]->{value}) } sub xpath_cmp { my( $a, $b)= @_; if ( UNIVERSAL::isa( $b, 'PPIx::XPath::Attr')) { # 2 attributes, compare their elements, then their name return ($a->{parent}->_xpath_elt_cmp( $b->{parent}) ) || ($a->{name} cmp $b->{name}); } elsif ( UNIVERSAL::isa( $b, 'PPI::Document')) { # att <=> document, att is after document return 1; } else { # att <=> elt : compare the att->elt and the elt # if att->elt is the elt (cmp returns 0) then 1 (elt is before att) return ($a->{parent}->_xpath_elt_cmp( $b) ) || 1 ; } } 1;