summaryrefslogtreecommitdiff
path: root/lib/MaildirIndexer/Parser.rakumod
diff options
context:
space:
mode:
Diffstat (limited to 'lib/MaildirIndexer/Parser.rakumod')
-rw-r--r--lib/MaildirIndexer/Parser.rakumod112
1 files changed, 112 insertions, 0 deletions
diff --git a/lib/MaildirIndexer/Parser.rakumod b/lib/MaildirIndexer/Parser.rakumod
new file mode 100644
index 0000000..420d4c3
--- /dev/null
+++ b/lib/MaildirIndexer/Parser.rakumod
@@ -0,0 +1,112 @@
+use v6.d;
+unit module MaildirIndexer::Parser;
+use MaildirIndexer::LogTimelineSchema;
+use MaildirIndexer::Email;
+
+my @separators = (
+ "\x0a\x0d\x0a\x0d",
+ "\x0d\x0a\x0d\x0a",
+ "\x0a\x0a",
+ "\x0d\x0d",
+);
+
+my grammar Message {
+ regex TOP {
+ <headers>
+ <separator>
+ <body>
+ }
+
+ token newline { [\x0d\x0a] | [\x0a\x0d] | \x0a | \x0d }
+ token separator { @separators }
+
+ token body { .* }
+ regex headers {
+ <header>+ % <newline>
+ }
+ regex header {
+ <name> \: \h* <value>
+ || <junk>
+ }
+ token name {
+ <-[:\s]>+
+ }
+ regex value {
+ <line>+ % [<newline> \h+]
+ }
+ token line { \N* }
+ token junk { \N+ }
+}
+
+my class Message-actions {
+ has $.path = IO;
+ method TOP($/) {
+ make MaildirIndexer::Email.new(
+ headers => $/<headers>.made,
+ body => $/<body>.Str,
+ path => $.path,
+ );
+ }
+ method headers($/) {
+ make %( flat |$/<header>ยป.made );
+ }
+ method header($/) {
+ make $/<junk> ?? () !! ( $/<name>.Str.lc => $/<value>.made );
+ }
+ method value($/) {
+ make $/<line>.join(' ')
+ }
+}
+
+multi parse-email(IO::Path:D $p --> MaildirIndexer::Email) is export {
+ my MaildirIndexer::Email $result;
+ MaildirIndexer::LogTimelineSchema::Parse::Email::File.log: :file($p.path), -> {
+ $result = parse-email($p.slurp(:enc<utf8-c8>), path => $p);
+ }
+ return $result;
+}
+
+multi parse-email(IO::Path:D $p, :$headers-only! --> MaildirIndexer::Email) is export {
+ my MaildirIndexer::Email $result;
+ MaildirIndexer::LogTimelineSchema::Parse::Email::File.log: :file($p.path), -> {
+ my IO::Handle $h = $p.open(
+ :enc<utf8-c8>,
+ :nl-in(@separators),
+ :!chomp,
+ );
+ $result = parse-email(
+ $h.lines()[0],
+ path => $p,
+ );
+ $h.close();
+ }
+ return $result;
+}
+
+multi parse-email(IO::Socket::Async:D $s --> MaildirIndexer::Email) is export {
+ my MaildirIndexer::Email $result;
+ MaildirIndexer::LogTimelineSchema::Parse::Email::Socket.log: -> {
+ my $string;
+ react {
+ whenever $s.Supply(:enc<utf8-c8>) {
+ $string ~= $_;
+ # parsing the whole email is much faster (0.02 seconds
+ # instead of 1.2!) than just C<< $string ~~
+ # /@separators/ >>
+ $result = parse-email($string) and done;
+ }
+ }
+ }
+ return $result;
+}
+
+multi parse-email(Str:D $email-str, :$path = IO --> MaildirIndexer::Email) is export {
+ my MaildirIndexer::Email $result;
+ MaildirIndexer::LogTimelineSchema::Parse::Email::Str.log: -> {
+ CATCH { warn $_; return Nil };
+ with Message.parse($email-str,:actions(Message-actions.new(:$path))) {
+ $result = .made;
+ }
+ }
+ return $result;
+}