diff options
Diffstat (limited to 'lib/MaildirIndexer/Parser.rakumod')
-rw-r--r-- | lib/MaildirIndexer/Parser.rakumod | 112 |
1 files changed, 112 insertions, 0 deletions
diff --git a/lib/MaildirIndexer/Parser.rakumod b/lib/MaildirIndexer/Parser.rakumod new file mode 100644 index 0000000..420d4c3 --- /dev/null +++ b/lib/MaildirIndexer/Parser.rakumod @@ -0,0 +1,112 @@ +use v6.d; +unit module MaildirIndexer::Parser; +use MaildirIndexer::LogTimelineSchema; +use MaildirIndexer::Email; + +my @separators = ( + "\x0a\x0d\x0a\x0d", + "\x0d\x0a\x0d\x0a", + "\x0a\x0a", + "\x0d\x0d", +); + +my grammar Message { + regex TOP { + <headers> + <separator> + <body> + } + + token newline { [\x0d\x0a] | [\x0a\x0d] | \x0a | \x0d } + token separator { @separators } + + token body { .* } + regex headers { + <header>+ % <newline> + } + regex header { + <name> \: \h* <value> + || <junk> + } + token name { + <-[:\s]>+ + } + regex value { + <line>+ % [<newline> \h+] + } + token line { \N* } + token junk { \N+ } +} + +my class Message-actions { + has $.path = IO; + method TOP($/) { + make MaildirIndexer::Email.new( + headers => $/<headers>.made, + body => $/<body>.Str, + path => $.path, + ); + } + method headers($/) { + make %( flat |$/<header>ยป.made ); + } + method header($/) { + make $/<junk> ?? () !! ( $/<name>.Str.lc => $/<value>.made ); + } + method value($/) { + make $/<line>.join(' ') + } +} + +multi parse-email(IO::Path:D $p --> MaildirIndexer::Email) is export { + my MaildirIndexer::Email $result; + MaildirIndexer::LogTimelineSchema::Parse::Email::File.log: :file($p.path), -> { + $result = parse-email($p.slurp(:enc<utf8-c8>), path => $p); + } + return $result; +} + +multi parse-email(IO::Path:D $p, :$headers-only! --> MaildirIndexer::Email) is export { + my MaildirIndexer::Email $result; + MaildirIndexer::LogTimelineSchema::Parse::Email::File.log: :file($p.path), -> { + my IO::Handle $h = $p.open( + :enc<utf8-c8>, + :nl-in(@separators), + :!chomp, + ); + $result = parse-email( + $h.lines()[0], + path => $p, + ); + $h.close(); + } + return $result; +} + +multi parse-email(IO::Socket::Async:D $s --> MaildirIndexer::Email) is export { + my MaildirIndexer::Email $result; + MaildirIndexer::LogTimelineSchema::Parse::Email::Socket.log: -> { + my $string; + react { + whenever $s.Supply(:enc<utf8-c8>) { + $string ~= $_; + # parsing the whole email is much faster (0.02 seconds + # instead of 1.2!) than just C<< $string ~~ + # /@separators/ >> + $result = parse-email($string) and done; + } + } + } + return $result; +} + +multi parse-email(Str:D $email-str, :$path = IO --> MaildirIndexer::Email) is export { + my MaildirIndexer::Email $result; + MaildirIndexer::LogTimelineSchema::Parse::Email::Str.log: -> { + CATCH { warn $_; return Nil }; + with Message.parse($email-str,:actions(Message-actions.new(:$path))) { + $result = .made; + } + } + return $result; +} |