diff options
author | dakkar <dakkar@thenautilus.net> | 2018-11-02 14:50:46 +0000 |
---|---|---|
committer | dakkar <dakkar@thenautilus.net> | 2018-11-02 14:50:46 +0000 |
commit | 3184d538d9edebc4f9fdbc5331847159bfbe2cdd (patch) | |
tree | 333b756c81f515c4c9b34e9e4a18286d139f0c82 /bayes | |
parent | add licence (diff) | |
download | MaildirIndexer-3184d538d9edebc4f9fdbc5331847159bfbe2cdd.tar.gz MaildirIndexer-3184d538d9edebc4f9fdbc5331847159bfbe2cdd.tar.bz2 MaildirIndexer-3184d538d9edebc4f9fdbc5331847159bfbe2cdd.zip |
let's use Perl 6!
minimal email parser, for now
Diffstat (limited to 'bayes')
-rw-r--r-- | bayes | 55 |
1 files changed, 55 insertions, 0 deletions
@@ -0,0 +1,55 @@ +#!/usr/bin/env perl6 +use v6.d.PREVIEW; + +grammar Message { + regex TOP { + <headers> + <separator> + <body> + } + + token newline { [\x0d\x0a] | [\x0a\x0d] | \x0a | \x0d } + token separator { + [\x0a\x0d\x0a\x0d] | [\x0d\x0a\x0d\x0a] | \x0a ** 2 | \x0d ** 2 + } + token body { .* } + regex headers { + <header>+ % <newline> + } + regex header { + <name> \: \h* <value> + || <junk> + } + token name { + <-[:\s]>+ + } + regex value { + <line>+ % [<newline> \h+] + } + token line { \N* } + token junk { \N+ } +} + +class Message-actions { + method TOP($/) { + make %( headers => $/<headers>.made, body => $/<body>.Str ); + } + method headers($/) { + make %( |$/<header>ยป.made ); + } + method header($/) { + make $/<junk> ?? () !! ( $/<name>.Str => $/<value>.made ); + } + method value($/) { + make $/<line>.join(' ') + } +} + +sub MAIN(*@files) { + say "Starting";my $start = now; + my @messages = @files.race(:degree(10) :batch(100)).map({ + my $email = $_.IO.slurp(:enc<utf8-c8>); + my $match = Message.parse($email,:actions(Message-actions.new)); + }); + say "Took { now - $start } for { +@messages } messages"; +} |