From a0a15ddbe693d7867b4adf19d8310c9d4aabfed6 Mon Sep 17 00:00:00 2001 From: dakkar Date: Tue, 31 Dec 2019 11:55:17 +0000 Subject: work around some weird newline issues --- lib/MaildirIndexer/Parser.rakumod | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/MaildirIndexer/Parser.rakumod b/lib/MaildirIndexer/Parser.rakumod index 420d4c3..407078d 100644 --- a/lib/MaildirIndexer/Parser.rakumod +++ b/lib/MaildirIndexer/Parser.rakumod @@ -3,11 +3,13 @@ unit module MaildirIndexer::Parser; use MaildirIndexer::LogTimelineSchema; use MaildirIndexer::Email; +# for some reason, \x0d\x0a works differently than \r\n + my @separators = ( - "\x0a\x0d\x0a\x0d", - "\x0d\x0a\x0d\x0a", - "\x0a\x0a", - "\x0d\x0d", + "\n\r\n\r", + "\r\n\r\n", + "\n\n", + "\r\r", ); my grammar Message { @@ -17,8 +19,8 @@ my grammar Message { } - token newline { [\x0d\x0a] | [\x0a\x0d] | \x0a | \x0d } - token separator { @separators } + regex newline { \r\n | \n\r | \n | \r } + regex separator { @separators } token body { .* } regex headers { @@ -61,6 +63,7 @@ my class Message-actions { multi parse-email(IO::Path:D $p --> MaildirIndexer::Email) is export { my MaildirIndexer::Email $result; MaildirIndexer::LogTimelineSchema::Parse::Email::File.log: :file($p.path), -> { + # as of 2019,11, `slurp` replaces all \r\n with \n ?? $result = parse-email($p.slurp(:enc), path => $p); } return $result; -- cgit v1.2.3