From 283072c9c895f2842ae2823bde59ffed2dcb28ed Mon Sep 17 00:00:00 2001 From: dakkar Date: Mon, 31 Aug 2009 14:29:39 +0200 Subject: handle bad unicode strings somethimes T::RT produces non-unicode strings (not marked with the "utf8 flag"); we force every string to pass through a conversion, so we're safe --- lib/Text/Restructured/Writer/LibXML.pm | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/lib/Text/Restructured/Writer/LibXML.pm b/lib/Text/Restructured/Writer/LibXML.pm index 99ef206..b936260 100644 --- a/lib/Text/Restructured/Writer/LibXML.pm +++ b/lib/Text/Restructured/Writer/LibXML.pm @@ -2,6 +2,7 @@ package Text::Restructured::Writer::LibXML; use strict; use warnings; use XML::LibXML; +use Encode; $Text::Restructured::Writer::LibXML::VERSION='0.01'; @@ -64,18 +65,18 @@ sub _mathml2xml { my ($mnode,$xdoc)=@_; if ($mnode->isText) { - return $xdoc->createTextNode($mnode->nodeValue); + return $xdoc->createTextNode(_ensure_char_semantic($mnode->nodeValue)); } my @children=map {_mathml2xml($_,$xdoc)} $mnode->childNodes(); - my $elem=$xdoc->createElementNS($MATHML,$mnode->nodeName); + my $elem=$xdoc->createElementNS($MATHML,_ensure_char_semantic($mnode->nodeName)); for my $attname ($mnode->attributeList) { next if $attname eq 'xmlns'; - $elem->setAttribute($attname, - $mnode->attribute($attname)) + $elem->setAttribute(_ensure_char_semantic($attname), + _ensure_char_semantic($mnode->attribute($attname))) } $elem->appendChild($_) for @children; @@ -87,7 +88,7 @@ sub _docutils2xml { my ($dunode,$xdoc)=@_; if ($dunode->{tag} eq '#PCDATA') { - return $xdoc->createTextNode($dunode->{text} || ''); + return $xdoc->createTextNode(_ensure_char_semantic($dunode->{text} || '')); } if ($dunode->{tag} eq 'mathml') { @@ -97,7 +98,7 @@ sub _docutils2xml { my @children=map {_docutils2xml($_,$xdoc)} @{ $dunode->{content} || [] }; - my $elem=$xdoc->createElement($dunode->{tag}); + my $elem=$xdoc->createElement(_ensure_char_semantic($dunode->{tag})); if (defined $dunode->{attr}) { while (my ($attname,$attval)=each %{$dunode->{attr}}) { @@ -105,9 +106,10 @@ sub _docutils2xml { $attval=''; } elsif (ref($attval) eq 'ARRAY') { - $attval=join ' ',@$attval; + $attval=join ' ',map {_ensure_char_semantic($_)} @$attval; } - $elem->setAttribute($attname,$attval); + $elem->setAttribute(_ensure_char_semantic($attname), + _ensure_char_semantic($attval)); } } $elem->appendChild($_) for @children; @@ -115,4 +117,12 @@ sub _docutils2xml { return $elem; } +sub _ensure_char_semantic { + my ($str)=@_; + + return $str if utf8::is_utf8($str); + + return decode('utf8',$str); +} + 1; -- cgit v1.2.3