summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordakkar <dakkar@thenautilus.net>2018-11-02 14:50:46 +0000
committerdakkar <dakkar@thenautilus.net>2018-11-02 14:50:46 +0000
commit3184d538d9edebc4f9fdbc5331847159bfbe2cdd (patch)
tree333b756c81f515c4c9b34e9e4a18286d139f0c82
parentadd licence (diff)
downloadMaildirIndexer-3184d538d9edebc4f9fdbc5331847159bfbe2cdd.tar.gz
MaildirIndexer-3184d538d9edebc4f9fdbc5331847159bfbe2cdd.tar.bz2
MaildirIndexer-3184d538d9edebc4f9fdbc5331847159bfbe2cdd.zip
let's use Perl 6!
minimal email parser, for now
-rw-r--r--bayes55
1 files changed, 55 insertions, 0 deletions
diff --git a/bayes b/bayes
new file mode 100644
index 0000000..37352ce
--- /dev/null
+++ b/bayes
@@ -0,0 +1,55 @@
+#!/usr/bin/env perl6
+use v6.d.PREVIEW;
+
+grammar Message {
+ regex TOP {
+ <headers>
+ <separator>
+ <body>
+ }
+
+ token newline { [\x0d\x0a] | [\x0a\x0d] | \x0a | \x0d }
+ token separator {
+ [\x0a\x0d\x0a\x0d] | [\x0d\x0a\x0d\x0a] | \x0a ** 2 | \x0d ** 2
+ }
+ token body { .* }
+ regex headers {
+ <header>+ % <newline>
+ }
+ regex header {
+ <name> \: \h* <value>
+ || <junk>
+ }
+ token name {
+ <-[:\s]>+
+ }
+ regex value {
+ <line>+ % [<newline> \h+]
+ }
+ token line { \N* }
+ token junk { \N+ }
+}
+
+class Message-actions {
+ method TOP($/) {
+ make %( headers => $/<headers>.made, body => $/<body>.Str );
+ }
+ method headers($/) {
+ make %( |$/<header>ยป.made );
+ }
+ method header($/) {
+ make $/<junk> ?? () !! ( $/<name>.Str => $/<value>.made );
+ }
+ method value($/) {
+ make $/<line>.join(' ')
+ }
+}
+
+sub MAIN(*@files) {
+ say "Starting";my $start = now;
+ my @messages = @files.race(:degree(10) :batch(100)).map({
+ my $email = $_.IO.slurp(:enc<utf8-c8>);
+ my $match = Message.parse($email,:actions(Message-actions.new));
+ });
+ say "Took { now - $start } for { +@messages } messages";
+}