aboutsummaryrefslogtreecommitdiff
path: root/lib/WebCoso
diff options
context:
space:
mode:
authordakkar <dakkar@luxion>2006-02-05 13:45:32 +0000
committerdakkar <dakkar@luxion>2006-02-05 13:45:32 +0000
commit0202ee945a19bcb6e497f0200f096de16db845f4 (patch)
treea34aa0f8939541247e8577dff4165227708017db /lib/WebCoso
parentstep di split lingue per ReST (diff)
downloadWebCoso-0202ee945a19bcb6e497f0200f096de16db845f4.tar.gz
WebCoso-0202ee945a19bcb6e497f0200f096de16db845f4.tar.bz2
WebCoso-0202ee945a19bcb6e497f0200f096de16db845f4.zip
step per parsare ReST in un dom XML
git-svn-id: svn://luxion/repos/WebCoso/trunk@154 fcb26f47-9200-0410-b104-b98ab5b095f3
Diffstat (limited to 'lib/WebCoso')
-rw-r--r--lib/WebCoso/Step/ReST/SplitLang.pm2
-rw-r--r--lib/WebCoso/Step/ReST/ToXml.pm84
2 files changed, 86 insertions, 0 deletions
diff --git a/lib/WebCoso/Step/ReST/SplitLang.pm b/lib/WebCoso/Step/ReST/SplitLang.pm
index a24e55e..dc60757 100644
--- a/lib/WebCoso/Step/ReST/SplitLang.pm
+++ b/lib/WebCoso/Step/ReST/SplitLang.pm
@@ -35,6 +35,8 @@ sub process {
$fh=$resource->get_property({filename=>$filename},'datastream');
}
+ binmode $fh,':utf8';
+
# raccolgo le lingue usate
my %langs=(''=>undef);
seek $fh,0,0;
diff --git a/lib/WebCoso/Step/ReST/ToXml.pm b/lib/WebCoso/Step/ReST/ToXml.pm
new file mode 100644
index 0000000..2f9384a
--- /dev/null
+++ b/lib/WebCoso/Step/ReST/ToXml.pm
@@ -0,0 +1,84 @@
+package WebCoso::Step::ReST::ToXml;
+use strict;
+use warnings;
+use base 'WebCoso::Step';
+use Class::Std;
+use Inline 'Python';
+use XML::LibXML;
+use Encode;
+
+{
+
+=head2 Che fa
+
+prende i {language=>'quelchece'}->rstdoc, li passa a docutils, prende
+l'xml, lo passa al parser, e salva il dom in
+{language=>'quelchece'}->xmldom
+
+suppone venga tutto da un solo file
+
+Fa tutto alla prima passata
+
+=cut
+
+my $xml_parser=XML::LibXML->new();
+$xml_parser->load_ext_dtd(0);
+$xml_parser->clean_namespaces(1);
+
+sub process {
+ my ($self,$resource,$stage)=@_;
+
+ return unless $stage eq 'meta';
+
+ my ($src_path)=$resource->get_axis_values('filename');
+
+ my ($rst_doc,$xml_dom);
+ $rst_doc=$resource->get_property('rstdoc');
+ if (defined $rst_doc) { # monolingua
+ $resource->set_property(
+ 'xmldom',
+ rst2xml($rst_doc,$src_path)
+ );
+ }
+ else { # multilingua
+ my @langs=$resource->get_axis_values('language');
+ for my $cur_lang (@langs) {
+ $rst_doc=$resource->get_property({language=>$cur_lang},'rstdoc');
+ $resource->set_property(
+ {language=>$cur_lang},
+ 'xmldom',
+ rst2xml($rst_doc,$src_path,$cur_lang)
+ );
+ }
+ }
+
+ return;
+}
+
+sub rst2xml {
+ my ($rst_string,$source_path,$language)=@_;
+
+ $rst_string=Encode::encode('utf-8',$rst_string);
+ my $xml_string=_rst2xml($rst_string,$source_path,$language||'it');
+ $xml_parser->base_uri($source_path);
+ return $xml_parser->parse_string($xml_string);
+}
+
+}
+1;
+__DATA__
+__Python__
+
+import locale
+import docutils.core
+
+def _rst2xml(source,source_path,language):
+ return docutils.core.publish_string(
+ source,source_path=source_path,
+ writer_name='xml',
+ settings_overrides={
+ 'input_encoding':'utf-8',
+ 'output_encoding':'utf-8',
+ 'language_code':language,
+ },
+ )