diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/WebCoso/Step/ReST/SplitLang.pm | 2 | ||||
-rw-r--r-- | lib/WebCoso/Step/ReST/ToXml.pm | 84 |
2 files changed, 86 insertions, 0 deletions
diff --git a/lib/WebCoso/Step/ReST/SplitLang.pm b/lib/WebCoso/Step/ReST/SplitLang.pm index a24e55e..dc60757 100644 --- a/lib/WebCoso/Step/ReST/SplitLang.pm +++ b/lib/WebCoso/Step/ReST/SplitLang.pm @@ -35,6 +35,8 @@ sub process { $fh=$resource->get_property({filename=>$filename},'datastream'); } + binmode $fh,':utf8'; + # raccolgo le lingue usate my %langs=(''=>undef); seek $fh,0,0; diff --git a/lib/WebCoso/Step/ReST/ToXml.pm b/lib/WebCoso/Step/ReST/ToXml.pm new file mode 100644 index 0000000..2f9384a --- /dev/null +++ b/lib/WebCoso/Step/ReST/ToXml.pm @@ -0,0 +1,84 @@ +package WebCoso::Step::ReST::ToXml; +use strict; +use warnings; +use base 'WebCoso::Step'; +use Class::Std; +use Inline 'Python'; +use XML::LibXML; +use Encode; + +{ + +=head2 Che fa + +prende i {language=>'quelchece'}->rstdoc, li passa a docutils, prende +l'xml, lo passa al parser, e salva il dom in +{language=>'quelchece'}->xmldom + +suppone venga tutto da un solo file + +Fa tutto alla prima passata + +=cut + +my $xml_parser=XML::LibXML->new(); +$xml_parser->load_ext_dtd(0); +$xml_parser->clean_namespaces(1); + +sub process { + my ($self,$resource,$stage)=@_; + + return unless $stage eq 'meta'; + + my ($src_path)=$resource->get_axis_values('filename'); + + my ($rst_doc,$xml_dom); + $rst_doc=$resource->get_property('rstdoc'); + if (defined $rst_doc) { # monolingua + $resource->set_property( + 'xmldom', + rst2xml($rst_doc,$src_path) + ); + } + else { # multilingua + my @langs=$resource->get_axis_values('language'); + for my $cur_lang (@langs) { + $rst_doc=$resource->get_property({language=>$cur_lang},'rstdoc'); + $resource->set_property( + {language=>$cur_lang}, + 'xmldom', + rst2xml($rst_doc,$src_path,$cur_lang) + ); + } + } + + return; +} + +sub rst2xml { + my ($rst_string,$source_path,$language)=@_; + + $rst_string=Encode::encode('utf-8',$rst_string); + my $xml_string=_rst2xml($rst_string,$source_path,$language||'it'); + $xml_parser->base_uri($source_path); + return $xml_parser->parse_string($xml_string); +} + +} +1; +__DATA__ +__Python__ + +import locale +import docutils.core + +def _rst2xml(source,source_path,language): + return docutils.core.publish_string( + source,source_path=source_path, + writer_name='xml', + settings_overrides={ + 'input_encoding':'utf-8', + 'output_encoding':'utf-8', + 'language_code':language, + }, + ) |