package WebCoso::Step::ReST::ToXml; use strict; use warnings; use base 'WebCoso::Step'; use Class::Std; use Inline 'Python'; use XML::LibXML; use Encode; { =head2 Che fa prende i {language=>'quelchece'}->rstdoc, li passa a docutils, prende l'xml, lo passa al parser, e salva il dom in {language=>'quelchece'}->xmldom suppone venga tutto da un solo file Fa tutto alla prima passata, e raccatta un po' di meta =cut my %srckey_of :ATTR(:init_arg :get :default); my %dstkey_of :ATTR(:init_arg :get :default); my $xml_parser=XML::LibXML->new(); $xml_parser->load_ext_dtd(0); $xml_parser->clean_namespaces(1); sub process { my ($self,$resource,$stage)=@_; return unless $stage eq 'meta'; my ($src_path)=$resource->get_axis_values('filename'); my $srckey=$self->get_srckey(); my $dstkey=$self->get_dstkey(); my ($rst_doc,$xml_dom); $rst_doc=$resource->get_property_string($srckey); if (defined $rst_doc) { # monolingua my $dom=rst2xml($rst_doc,$src_path); $resource->set_property( $dstkey, $dom, ); $self->_set_meta($resource,$dom); } else { # multilingua my @langs=$resource->get_axis_values('language'); for my $cur_lang (@langs) { $rst_doc=$resource->get_property_string({language=>$cur_lang},$srckey); my $dom=rst2xml($rst_doc,$src_path,$cur_lang); $resource->set_property( {language=>$cur_lang}, $dstkey, $dom, ); $self->_set_meta($resource,$cur_lang,$dom); } } return; } { my %docinfo_fields=( title => '/document/title', subtitle => '/document/subtitle', author => '/document/docinfo/author|/document/docinfo/authors/author', version => '/document/docinfo/version', status => '/document/docinfo/status', date => '/document/docinfo/date', creation_date => '/document/docinfo/field[field_name="CreationDate"]/field_body', ); my $collections='/document/docinfo/field[field_name="Collection"]/field_body|/document/docinfo/field[field_name="Collections"]/field_body//list_item'; sub _set_meta { my ($self,$res,$lang,$dom)=@_; if ($dom) { # 4 parametri $lang={language=>$lang}; } else { # 3 parametri: monolingua $dom=$lang; $lang={}; } for my $meta (keys %docinfo_fields) { my @nodes=$dom->findnodes($docinfo_fields{$meta}); next unless @nodes; @nodes=map {$_->textContent()} @nodes; if (@nodes==1) { $res->set_property($lang,$meta,$nodes[0]); } else { $res->set_property($lang,$meta,[@nodes]); } } # per questo serve poter chiamare coll e res per nome TODO #my @collections=$dom->findnodes($collections); #return unless @collections; #@collections=map {$_->textContent()} @collections; #$res->add_coll($_) for @collections; return; } } sub rst2xml { my ($rst_string,$source_path,$language)=@_; $rst_string=Encode::encode('utf-8',$rst_string); my $xml_string=_rst2xml($rst_string,$source_path,'en'); $xml_parser->base_uri($source_path); return $xml_parser->parse_string($xml_string); } } 1; __DATA__ __Python__ import locale import docutils.core def _rst2xml(source,source_path,language): return docutils.core.publish_string( source,source_path=source_path, writer_name='xml', settings_overrides={ 'input_encoding':'utf-8', 'output_encoding':'utf-8', 'language_code':language, }, )