use v6.d; unit module MaildirIndexer::Parser; use MaildirIndexer::LogTimelineSchema; use MaildirIndexer::Email; # for some reason, \x0d\x0a works differently than \r\n my @separators = ( "\n\r\n\r", "\r\n\r\n", "\n\n", "\r\r", ); my grammar Message { regex TOP { } regex newline { \r\n | \n\r | \n | \r } regex separator { @separators } token body { .* } regex headers {
+ % } regex header { \: \h* || } token name { <-[:\s]>+ } regex value { + % [ \h+] } token line { \N* } token junk { \N+ } } my class Message-actions { has $.path = IO; method TOP($/) { make MaildirIndexer::Email.new( headers => $/.made, body => $/.Str, path => $.path, ); } method headers($/) { make %( flat |$/
ยป.made ); } method header($/) { make $/ ?? () !! ( $/.Str.lc => $/.made ); } method value($/) { make $/.join(' ') } } multi parse-email(IO::Path:D $p --> MaildirIndexer::Email) is export { my MaildirIndexer::Email $result; MaildirIndexer::LogTimelineSchema::Parse::Email::File.log: :file($p.path), -> { # as of 2019,11, `slurp` replaces all \r\n with \n ?? $result = parse-email($p.slurp(:enc), path => $p); } return $result; } multi parse-email(IO::Path:D $p, :$headers-only! --> MaildirIndexer::Email) is export { my MaildirIndexer::Email $result; MaildirIndexer::LogTimelineSchema::Parse::Email::File.log: :file($p.path), -> { my IO::Handle $h = $p.open( :enc, :nl-in(@separators), :!chomp, ); $result = parse-email( $h.lines()[0], path => $p, ); $h.close(); } return $result; } multi parse-email(IO::Socket::Async:D $s, Int $timeout = 1 --> MaildirIndexer::Email) is export { my MaildirIndexer::Email $result; MaildirIndexer::LogTimelineSchema::Parse::Email::Socket.log: -> { my $string; react { whenever $s.Supply(:enc) { $string ~= $_; # parsing the whole email is much faster (0.02 seconds # instead of 1.2!) than just C<< $string ~~ # /@separators/ >> $result = parse-email($string) and done; QUIT { done }; } whenever Promise.in($timeout) { done } } } return $result; } multi parse-email(Str:D $email-str, :$path = IO --> MaildirIndexer::Email) is export { my MaildirIndexer::Email $result; MaildirIndexer::LogTimelineSchema::Parse::Email::Str.log: -> { CATCH { warn $_; return Nil }; with Message.parse($email-str,:actions(Message-actions.new(:$path))) { $result = .made; } } return $result; }