summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordakkar <dakkar@thenautilus.net>2019-12-31 11:55:17 +0000
committerdakkar <dakkar@thenautilus.net>2019-12-31 11:55:17 +0000
commita0a15ddbe693d7867b4adf19d8310c9d4aabfed6 (patch)
tree7772e157e9dd302a4ae512a9ccaf1fa49a5a70aa
parentmore sensible worker count default (diff)
downloadMaildirIndexer-a0a15ddbe693d7867b4adf19d8310c9d4aabfed6.tar.gz
MaildirIndexer-a0a15ddbe693d7867b4adf19d8310c9d4aabfed6.tar.bz2
MaildirIndexer-a0a15ddbe693d7867b4adf19d8310c9d4aabfed6.zip
work around some weird newline issues
-rw-r--r--lib/MaildirIndexer/Parser.rakumod15
1 files changed, 9 insertions, 6 deletions
diff --git a/lib/MaildirIndexer/Parser.rakumod b/lib/MaildirIndexer/Parser.rakumod
index 420d4c3..407078d 100644
--- a/lib/MaildirIndexer/Parser.rakumod
+++ b/lib/MaildirIndexer/Parser.rakumod
@@ -3,11 +3,13 @@ unit module MaildirIndexer::Parser;
use MaildirIndexer::LogTimelineSchema;
use MaildirIndexer::Email;
+# for some reason, \x0d\x0a works differently than \r\n
+
my @separators = (
- "\x0a\x0d\x0a\x0d",
- "\x0d\x0a\x0d\x0a",
- "\x0a\x0a",
- "\x0d\x0d",
+ "\n\r\n\r",
+ "\r\n\r\n",
+ "\n\n",
+ "\r\r",
);
my grammar Message {
@@ -17,8 +19,8 @@ my grammar Message {
<body>
}
- token newline { [\x0d\x0a] | [\x0a\x0d] | \x0a | \x0d }
- token separator { @separators }
+ regex newline { \r\n | \n\r | \n | \r }
+ regex separator { @separators }
token body { .* }
regex headers {
@@ -61,6 +63,7 @@ my class Message-actions {
multi parse-email(IO::Path:D $p --> MaildirIndexer::Email) is export {
my MaildirIndexer::Email $result;
MaildirIndexer::LogTimelineSchema::Parse::Email::File.log: :file($p.path), -> {
+ # as of 2019,11, `slurp` replaces all \r\n with \n ??
$result = parse-email($p.slurp(:enc<utf8-c8>), path => $p);
}
return $result;