package WebCoso::Common; use strict; use warnings; use File::Next; use Path::Class; use XML::LibXML::XPathContext; use DateTime; use DateTime::Format::DateParse; use Log::Log4perl ':easy'; use List::MoreUtils 'uniq'; use List::Util 'first'; our $SRCPATH='src'; our $DSTPATH='dst'; our $DSTBASEURL='/'; our @TMPLPATH=('common/'); our $VERBOSITY=0; sub setpaths { for my $path ($SRCPATH,$DSTPATH,@TMPLPATH) { $path=file($path)->absolute()->cleanup->stringify; } return; } my $xpath=XML::LibXML::XPathContext->new(); $xpath->registerNs('x', 'http://www.w3.org/1999/xhtml'); sub langOf { my ($name)=@_; $name=~m{(^|/)(?:document|feed)\.([^.]+)(\.|$)} and return $2; return; } sub typeOf { my ($name)=@_; $name=~m{(^|/)(?:document|feed)\.[^.]+\.((?:[^.]+\.)*[^.]+)$} and return $2; return; } sub typedAs { my ($name,$newtype)=@_; $name=~s{(^|/)((?:document|feed)\.[^.]+\.)((?:[^.]+\.)*[^.]+)$}{$1$2$newtype}; return $name; } sub dstUriFor { my ($name,$path,$short)=@_; $short=1 unless defined $short; $path||=''; DEBUG("dstUriFor($name,$path,$short)"); if ($short) { $name=~s{/[^/]+$}{/}; } else { $name=typedAs($name,'html'); } if ($path) { $name=file($name)->absolute(file($path)->parent)->relative($SRCPATH); $name=dir("$SRCPATH/$name/")->cleanup->stringify . '/'; $name=~ s{/(?:[^/]+)/\.\.}{}g; } $name=~s{^\Q$SRCPATH\E/}{$DSTBASEURL}; DEBUG("dstUriFor -> $name"); return $name; } sub srcXMLFor { my ($lang,$path,$name)=@_; my $doc_name="$name"; # force it to a string, since it might be an XML Node $doc_name=~s{\.html$}{.du.xml}; $doc_name=~s{/$}{/document.$lang.du.xml}; if ($doc_name=~m{^\Q$DSTBASEURL\E}) { $doc_name=~s{^\Q$DSTBASEURL\E}{$SRCPATH/}; } else { $doc_name=file($doc_name)->absolute(file($path)->parent)->cleanup->stringify; $doc_name=~ s{/(?:[^/]+)/\.\.}{}g; } return $doc_name; } sub isLang { my ($lang,$name)=@_; DEBUG("isLang($lang,$name)"); return 1 if $name=~m{/$}; # assume that MultiViews on the server will work return 1 if langOf($name) eq $lang; return; } sub getTitleFor { my ($fc,$lang,$path,$name)=@_; DEBUG("getTitleFor($lang,$path,$name)"); my ($doc_name,$doc); if (ref($lang)) { # assume it's an array for my $l (@$lang) { $doc_name=srcXMLFor($l,$path,$name); DEBUG("getTitleFor -> try $doc_name"); $doc=$fc->get($doc_name); last if $doc; } } else { $doc_name=srcXMLFor($lang,$path,$name); DEBUG("getTitleFor -> $doc_name"); $doc=$fc->get($doc_name); } unless ($doc) { LOGWARN("No document for <$doc_name>, returning <$name>"); return "$name"; # force it to a string, since it might be an XML Node } my $title=$xpath->findnodes( q{/document/title}, $doc); DEBUG("getTitleFor <- $title"); return $title; } sub getAtomIdFor { my ($fc,$lang,$path,$name)=@_; DEBUG("getAtomIdFor($lang,$path,$name)"); my $doc_name=srcXMLFor($lang,$path,$name); DEBUG("getAtomIdFor -> $doc_name"); my $doc=$fc->get($doc_name); unless ($doc) { LOGWARN("No document for <$doc_name>, returning ''"); return ''; } my $atomId=$xpath->findnodes( q{/document/docinfo/field[field_name='Id']/field_body}, $doc); unless (defined $atomId) { LOGWARN("No Id field for <$doc_name>, returning ''"); return ''; } $atomId=~s{(?:\A\s+)|(?:\s+\z)}{}smgx; return $atomId; } { my $zero=DateTime->from_epoch(epoch=>0); sub getDatesFor { my ($fc,$lang,$path,$name)=@_; DEBUG("getDatesFor($lang,$path,$name)"); my $doc_name=srcXMLFor($lang,$path,$name); my $doc=$fc->get($doc_name); unless ($doc) { LOGWARN("No document for <$doc_name>, returning <0>"); return {creation=>$zero,last_change=>$zero}; } my ($creation_date)=map {$_->textContent} $xpath->findnodes( q{/document/docinfo/field[field_name='CreationDate']/field_body}, $doc); $creation_date=DateTime::Format::DateParse->parse_datetime($creation_date)||$zero; my $earliest_name=earliestFor($lang,$path,$name); my $last_change=DateTime->from_epoch(epoch=>$fc->stat($earliest_name)->mtime); return {creation=>$creation_date, last_change=>$last_change}; } } sub getTagsFor { my ($fc,$lang,$path,$name)=@_; DEBUG("getTagsFor($lang,$path,$name)"); my $doc_name=srcXMLFor($lang,$path,$name); my $doc=$fc->get($doc_name); unless ($doc) { LOGWARN("No document for <$doc_name>, returning <>"); return; } my @tags=map {$_->textContent} $xpath->findnodes( q{/document/docinfo/field[field_name='tags']/field_body/*/list_item|/document/docinfo/field[field_name='tags']/field_body/paragraph}, $doc); chomp for @tags; return @tags; } sub getDates { my ($fc,@docs)=@_; my %dates; for my $doc_name (uniq(@docs)) { my $xml_name=typedAs($doc_name,'du.xml'); my $doc=$fc->get($xml_name); my ($creation_date)=map {$_->textContent} $xpath->findnodes( q{/document/docinfo/field[field_name='CreationDate']/field_body}, $doc); $creation_date=DateTime::Format::DateParse->parse_datetime($creation_date); my $earliest_name=earliestFor(langOf($doc_name),$doc_name,'./'); my $last_change=DateTime->from_epoch(epoch=> $fc->stat($earliest_name)->mtime); $dates{$doc_name}={creation=>$creation_date, last_change=>$last_change}; } return \%dates; } sub getTags { my ($fc,@docs)=@_; my %tagged; for my $doc_name (uniq(@docs)) { my $xml_name=typedAs($doc_name,'du.xml'); my $doc=$fc->get($xml_name); my @tags=map {$_->textContent} $xpath->findnodes( q{/document/docinfo/field[field_name='tags']/field_body/*/list_item|/document/docinfo/field[field_name='tags']/field_body/paragraph}, $doc); chomp for @tags; push @{$tagged{$_}},$doc_name for @tags; } return \%tagged; } sub fromTo { my ($base,$opts)=@_; DEBUG("fromTo: $base,{@{[ %$opts ]}}"); my $iter=File::Next::files( { file_filter=>$opts->{files}, descend_filter=>$opts->{dirs}, }, $base); my (@ret,$file); if (defined $opts->{transform}) { push @ret,$opts->{transform}->($file) while $file=$iter->(); } else { push @ret,$file while $file=$iter->(); } DEBUG("fromTo: @ret"); return uniq(@ret); } { my %order=( 'tt'=>0, 'rest.tt'=>0, 'rest.txt'=>1, 'xml'=>2, 'du.xml'=>2, ); sub earliest { my ($a,$b)=@_; return $a unless $b; return $order{$a} < $order{$b} ? $a : $b; } sub keepEarliest { my %dirs; for my $f (@_) { my $c=file($f); my $lang=langOf($c->basename); my $type=typeOf($c->basename); if (!defined $lang or !defined $type) { die "Weird document name <$f>"; } $dirs{$c->parent}->{$lang}=earliest($type,$dirs{$c->parent}->{$lang}); } my @ret; while (my ($d,$langs)=each %dirs) { while (my ($lang,$type)=each %$langs) { push @ret,file($d,"document.$lang.$type")->stringify; } } return uniq(@ret); } sub earliestFor { my ($lang,$path,$name)=@_; my $doc_name="$name"; # force it to a string, since it might be an XML Node $doc_name=~s{\.[^/]+$}{.du.xml}; $doc_name=~s{/$}{/document.$lang.}; if ($doc_name=~m{^\Q$DSTBASEURL\E}) { $doc_name=~s{^\Q$DSTBASEURL\E}{$SRCPATH/}; } else { $doc_name=file($doc_name)->absolute(file($path)->parent)->cleanup->stringify; $doc_name=~ s{/(?:[^/]+)/\.\.}{}g; } for my $ext (sort { $order{$a} <=> $order{$b} } keys %order) { return "$doc_name$ext" if -e "$doc_name$ext"; } return; } } our %docfiles=( files=>sub{m{^document\.(?:[^.]+\.)+(?:tt|rest\.txt|xml)$}}, dirs=>sub{!m{^(tags$|_|\.)}}); our %feedfiles=( files=>sub{m{^feed\.(?:[^.]+\.)+tt$}}, dirs=>sub{!m{^[_.]}}); 1;