From 5b10920b6e38614ceea0cd97031ab48f4f1f9a39 Mon Sep 17 00:00:00 2001 From: dakkar Date: Sun, 29 Dec 2019 13:24:00 +0000 Subject: new module extension --- lib/MaildirIndexer/Parser.rakumod | 112 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 lib/MaildirIndexer/Parser.rakumod (limited to 'lib/MaildirIndexer/Parser.rakumod') diff --git a/lib/MaildirIndexer/Parser.rakumod b/lib/MaildirIndexer/Parser.rakumod new file mode 100644 index 0000000..420d4c3 --- /dev/null +++ b/lib/MaildirIndexer/Parser.rakumod @@ -0,0 +1,112 @@ +use v6.d; +unit module MaildirIndexer::Parser; +use MaildirIndexer::LogTimelineSchema; +use MaildirIndexer::Email; + +my @separators = ( + "\x0a\x0d\x0a\x0d", + "\x0d\x0a\x0d\x0a", + "\x0a\x0a", + "\x0d\x0d", +); + +my grammar Message { + regex TOP { + + + + } + + token newline { [\x0d\x0a] | [\x0a\x0d] | \x0a | \x0d } + token separator { @separators } + + token body { .* } + regex headers { +
+ % + } + regex header { + \: \h* + || + } + token name { + <-[:\s]>+ + } + regex value { + + % [ \h+] + } + token line { \N* } + token junk { \N+ } +} + +my class Message-actions { + has $.path = IO; + method TOP($/) { + make MaildirIndexer::Email.new( + headers => $/.made, + body => $/.Str, + path => $.path, + ); + } + method headers($/) { + make %( flat |$/
ยป.made ); + } + method header($/) { + make $/ ?? () !! ( $/.Str.lc => $/.made ); + } + method value($/) { + make $/.join(' ') + } +} + +multi parse-email(IO::Path:D $p --> MaildirIndexer::Email) is export { + my MaildirIndexer::Email $result; + MaildirIndexer::LogTimelineSchema::Parse::Email::File.log: :file($p.path), -> { + $result = parse-email($p.slurp(:enc), path => $p); + } + return $result; +} + +multi parse-email(IO::Path:D $p, :$headers-only! --> MaildirIndexer::Email) is export { + my MaildirIndexer::Email $result; + MaildirIndexer::LogTimelineSchema::Parse::Email::File.log: :file($p.path), -> { + my IO::Handle $h = $p.open( + :enc, + :nl-in(@separators), + :!chomp, + ); + $result = parse-email( + $h.lines()[0], + path => $p, + ); + $h.close(); + } + return $result; +} + +multi parse-email(IO::Socket::Async:D $s --> MaildirIndexer::Email) is export { + my MaildirIndexer::Email $result; + MaildirIndexer::LogTimelineSchema::Parse::Email::Socket.log: -> { + my $string; + react { + whenever $s.Supply(:enc) { + $string ~= $_; + # parsing the whole email is much faster (0.02 seconds + # instead of 1.2!) than just C<< $string ~~ + # /@separators/ >> + $result = parse-email($string) and done; + } + } + } + return $result; +} + +multi parse-email(Str:D $email-str, :$path = IO --> MaildirIndexer::Email) is export { + my MaildirIndexer::Email $result; + MaildirIndexer::LogTimelineSchema::Parse::Email::Str.log: -> { + CATCH { warn $_; return Nil }; + with Message.parse($email-str,:actions(Message-actions.new(:$path))) { + $result = .made; + } + } + return $result; +} -- cgit v1.2.3