diff options
Diffstat (limited to 'lib/WebCoso/Step')
-rw-r--r-- | lib/WebCoso/Step/Base.pm | 7 | ||||
-rw-r--r-- | lib/WebCoso/Step/ReST/SplitLang.pm | 87 | ||||
-rw-r--r-- | lib/WebCoso/Step/ReST/ToXml.pm | 139 |
3 files changed, 0 insertions, 233 deletions
diff --git a/lib/WebCoso/Step/Base.pm b/lib/WebCoso/Step/Base.pm deleted file mode 100644 index 8e73705..0000000 --- a/lib/WebCoso/Step/Base.pm +++ /dev/null @@ -1,7 +0,0 @@ -package WebCoso::Step::Base; -use strict; -use warnings; -use Class::Std; -use WebCoso::X; - -1; diff --git a/lib/WebCoso/Step/ReST/SplitLang.pm b/lib/WebCoso/Step/ReST/SplitLang.pm deleted file mode 100644 index cdbc109..0000000 --- a/lib/WebCoso/Step/ReST/SplitLang.pm +++ /dev/null @@ -1,87 +0,0 @@ -package WebCoso::Step::ReST::SplitLang; -use strict; -use warnings; -use base 'WebCoso::Step'; -use Class::Std; - -{ - -=head2 Che fa - -Prende il sorgente da {filename=>'sperosiaunosolo'}->datastream, cerca -righe della forma - - ^\s*.. lang:: (\w*) - -raccoglie tutti i C<$1>, e quelle sono le lingue (C<''> sta per 'tutte -le lingue') - -Splitta poi in {language=>'$1'}->rstdoc (stringhe) - -Fa tutto alla prima passata - -=cut - -my %srckey_of :ATTR(:init_arg<from> :get<srckey> :default<datastream>); -my %dstkey_of :ATTR(:init_arg<to> :get<dstkey> :default<rstdoc>); - -my $lang_re=qr{^\s*\.\.\s+lang::(?:\s+(\w+))?\s*$}; - -sub process { - my ($self,$resource,$stage)=@_; - - return unless $stage eq 'meta'; - - my $srckey=$self->get_srckey(); - my $dstkey=$self->get_dstkey(); - - my $fh=$resource->get_property_fh($srckey); - if (!defined $fh) { - my ($filename)=$resource->get_axis_values('filename'); - $fh=$resource->get_property_fh({filename=>$filename},$srckey); - } - - binmode $fh,':utf8'; - - # raccolgo le lingue usate - my %langs=(''=>undef); - seek $fh,0,0; - while (my $line=<$fh>) { - if ($line =~ m{$lang_re}) { - $langs{$1||''}=undef; - } - } - delete $langs{''}; - seek $fh,0,0; - - if (%langs) { # multilingua: split! - my $curlang='';my %docs=(); - while (my $line=<$fh>) { - if ($line =~ m{$lang_re}) { - $curlang=$1||''; - next; - } - if ($curlang) { - $docs{$curlang}.=$line; - } - else { # 'any', per cui scrivo su tutti - $docs{$_}.=$line for keys %langs; - } - } - # salvo - $resource->set_property({language=>$_},$dstkey=>$docs{$_}) - for keys %langs; - } - else { # monolingua: passa il filehandle - $resource->set_property($dstkey=>$fh); - } - - # rimetto a posto il filehandle - seek $fh,0,0; - - return; -} - -} - -1; diff --git a/lib/WebCoso/Step/ReST/ToXml.pm b/lib/WebCoso/Step/ReST/ToXml.pm deleted file mode 100644 index f5cf5a9..0000000 --- a/lib/WebCoso/Step/ReST/ToXml.pm +++ /dev/null @@ -1,139 +0,0 @@ -package WebCoso::Step::ReST::ToXml; -use strict; -use warnings; -use base 'WebCoso::Step'; -use Class::Std; -use Inline 'Python'; -use XML::LibXML; -use Encode; - -{ - -=head2 Che fa - -prende i {language=>'quelchece'}->rstdoc, li passa a docutils, prende -l'xml, lo passa al parser, e salva il dom in -{language=>'quelchece'}->xmldom - -suppone venga tutto da un solo file - -Fa tutto alla prima passata, e raccatta un po' di meta - -=cut - -my %srckey_of :ATTR(:init_arg<from> :get<srckey> :default<rstdoc>); -my %dstkey_of :ATTR(:init_arg<to> :get<dstkey> :default<xmldom>); - -my $xml_parser=XML::LibXML->new(); -$xml_parser->load_ext_dtd(0); -$xml_parser->clean_namespaces(1); - -sub process { - my ($self,$resource,$stage)=@_; - - return unless $stage eq 'meta'; - - my ($src_path)=$resource->get_axis_values('filename'); - - my $srckey=$self->get_srckey(); - my $dstkey=$self->get_dstkey(); - - my ($rst_doc,$xml_dom); - $rst_doc=$resource->get_property_string($srckey); - if (defined $rst_doc) { # monolingua - my $dom=rst2xml($rst_doc,$src_path); - $resource->set_property( - $dstkey, - $dom, - ); - $self->_set_meta($resource,$dom); - } - else { # multilingua - my @langs=$resource->get_axis_values('language'); - for my $cur_lang (@langs) { - $rst_doc=$resource->get_property_string({language=>$cur_lang},$srckey); - my $dom=rst2xml($rst_doc,$src_path,$cur_lang); - $resource->set_property( - {language=>$cur_lang}, - $dstkey, - $dom, - ); - $self->_set_meta($resource,$cur_lang,$dom); - } - } - - return; -} - -{ -my %docinfo_fields=( - title => '/document/title', - subtitle => '/document/subtitle', - author => '/document/docinfo/author|/document/docinfo/authors/author', - version => '/document/docinfo/version', - status => '/document/docinfo/status', - date => '/document/docinfo/date', - creation_date => '/document/docinfo/field[field_name="CreationDate"]/field_body', -); -my $collections='/document/docinfo/field[field_name="Collection"]/field_body|/document/docinfo/field[field_name="Collections"]/field_body//list_item'; -sub _set_meta { - my ($self,$res,$lang,$dom)=@_; - - if ($dom) { # 4 parametri - $lang={language=>$lang}; - } - else { # 3 parametri: monolingua - $dom=$lang; - $lang={}; - } - - for my $meta (keys %docinfo_fields) { - my @nodes=$dom->findnodes($docinfo_fields{$meta}); - next unless @nodes; - @nodes=map {$_->textContent()} @nodes; - if (@nodes==1) { - $res->set_property($lang,$meta,$nodes[0]); - } - else { - $res->set_property($lang,$meta,[@nodes]); - } - } - - # per questo serve poter chiamare coll e res per nome TODO - #my @collections=$dom->findnodes($collections); - #return unless @collections; - #@collections=map {$_->textContent()} @collections; - - #$res->add_coll($_) for @collections; - - return; -} -} - -sub rst2xml { - my ($rst_string,$source_path,$language)=@_; - - $rst_string=Encode::encode('utf-8',$rst_string); - my $xml_string=_rst2xml($rst_string,$source_path,'en'); - $xml_parser->base_uri($source_path); - return $xml_parser->parse_string($xml_string); -} - -} -1; -__DATA__ -__Python__ - -import locale -import docutils.core - -def _rst2xml(source,source_path,language): - return docutils.core.publish_string( - source,source_path=source_path, - writer_name='xml', - settings_overrides={ - 'input_encoding':'utf-8', - 'output_encoding':'utf-8', - 'language_code':language, - }, - ) |