package WebCoso::Step::ReST::ToXml;
use strict;
use warnings;
use base 'WebCoso::Step';
use Class::Std;
use Inline 'Python';
use XML::LibXML;
use Encode;
{
my %srckey_of :ATTR(:init_arg<from> :get<srckey> :default<rstdoc>);
my %dstkey_of :ATTR(:init_arg<to> :get<dstkey> :default<xmldom>);
my $xml_parser=XML::LibXML->new();
$xml_parser->load_ext_dtd(0);
$xml_parser->clean_namespaces(1);
sub process {
my ($self,$resource,$stage)=@_;
return unless $stage eq 'meta';
my ($src_path)=$resource->get_axis_values('filename');
my $srckey=$self->get_srckey();
my $dstkey=$self->get_dstkey();
my ($rst_doc,$xml_dom);
$rst_doc=$resource->get_property_string($srckey);
if (defined $rst_doc) {
my $dom=rst2xml($rst_doc,$src_path);
$resource->set_property(
$dstkey,
$dom,
);
$self->_set_meta($resource,$dom);
}
else {
my @langs=$resource->get_axis_values('language');
for my $cur_lang (@langs) {
$rst_doc=$resource->get_property_string({language=>$cur_lang},$srckey);
my $dom=rst2xml($rst_doc,$src_path,$cur_lang);
$resource->set_property(
{language=>$cur_lang},
$dstkey,
$dom,
);
$self->_set_meta($resource,$cur_lang,$dom);
}
}
return;
}
{
my %docinfo_fields=(
title => '/document/title',
subtitle => '/document/subtitle',
author => '/document/docinfo/author|/document/docinfo/authors/author',
version => '/document/docinfo/version',
status => '/document/docinfo/status',
date => '/document/docinfo/date',
creation_date => '/document/docinfo/field[field_name="CreationDate"]/field_body',
);
my $collections='/document/docinfo/field[field_name="Collection"]/field_body|/document/docinfo/field[field_name="Collections"]/field_body//list_item';
sub _set_meta {
my ($self,$res,$lang,$dom)=@_;
if ($dom) {
$lang={language=>$lang};
}
else {
$dom=$lang;
$lang={};
}
for my $meta (keys %docinfo_fields) {
my @nodes=$dom->findnodes($docinfo_fields{$meta});
next unless @nodes;
@nodes=map {$_->textContent()} @nodes;
if (@nodes==1) {
$res->set_property($lang,$meta,$nodes[0]);
}
else {
$res->set_property($lang,$meta,[@nodes]);
}
}
return;
}
}
sub rst2xml {
my ($rst_string,$source_path,$language)=@_;
$rst_string=Encode::encode('utf-8',$rst_string);
my $xml_string=_rst2xml($rst_string,$source_path,'en');
$xml_parser->base_uri($source_path);
return $xml_parser->parse_string($xml_string);
}
}
1;
__DATA__
__Python__
import locale
import docutils.core
def _rst2xml(source,source_path,language):
return docutils.core.publish_string(
source,source_path=source_path,
writer_name='xml',
settings_overrides={
'input_encoding':'utf-8',
'output_encoding':'utf-8',
'language_code':language,
},
)