From 5b10920b6e38614ceea0cd97031ab48f4f1f9a39 Mon Sep 17 00:00:00 2001 From: dakkar Date: Sun, 29 Dec 2019 13:24:00 +0000 Subject: new module extension --- META6.json | 18 ++--- lib/MaildirIndexer/Email.pm6 | 69 ----------------- lib/MaildirIndexer/Email.rakumod | 69 +++++++++++++++++ lib/MaildirIndexer/Index.pm6 | 7 -- lib/MaildirIndexer/Index.rakumod | 7 ++ lib/MaildirIndexer/Index/ByAddresses.pm6 | 92 ---------------------- lib/MaildirIndexer/Index/ByAddresses.rakumod | 92 ++++++++++++++++++++++ lib/MaildirIndexer/Index/ByRef.pm6 | 44 ----------- lib/MaildirIndexer/Index/ByRef.rakumod | 44 +++++++++++ lib/MaildirIndexer/LogTimelineSchema.pm6 | 20 ----- lib/MaildirIndexer/LogTimelineSchema.rakumod | 20 +++++ lib/MaildirIndexer/Parser.pm6 | 112 --------------------------- lib/MaildirIndexer/Parser.rakumod | 112 +++++++++++++++++++++++++++ lib/MaildirIndexer/ScanDir.pm6 | 34 -------- lib/MaildirIndexer/ScanDir.rakumod | 34 ++++++++ lib/MaildirIndexer/Server.pm6 | 33 -------- lib/MaildirIndexer/Server.rakumod | 33 ++++++++ lib/MaildirIndexer/Store.pm6 | 69 ----------------- lib/MaildirIndexer/Store.rakumod | 69 +++++++++++++++++ t/lib/TestIndex.pm6 | 35 --------- t/lib/TestIndex.rakumod | 35 +++++++++ 21 files changed, 524 insertions(+), 524 deletions(-) delete mode 100644 lib/MaildirIndexer/Email.pm6 create mode 100644 lib/MaildirIndexer/Email.rakumod delete mode 100644 lib/MaildirIndexer/Index.pm6 create mode 100644 lib/MaildirIndexer/Index.rakumod delete mode 100644 lib/MaildirIndexer/Index/ByAddresses.pm6 create mode 100644 lib/MaildirIndexer/Index/ByAddresses.rakumod delete mode 100644 lib/MaildirIndexer/Index/ByRef.pm6 create mode 100644 lib/MaildirIndexer/Index/ByRef.rakumod delete mode 100644 lib/MaildirIndexer/LogTimelineSchema.pm6 create mode 100644 lib/MaildirIndexer/LogTimelineSchema.rakumod delete mode 100644 lib/MaildirIndexer/Parser.pm6 create mode 100644 lib/MaildirIndexer/Parser.rakumod delete mode 100644 lib/MaildirIndexer/ScanDir.pm6 create mode 100644 lib/MaildirIndexer/ScanDir.rakumod delete mode 100644 lib/MaildirIndexer/Server.pm6 create mode 100644 lib/MaildirIndexer/Server.rakumod delete mode 100644 lib/MaildirIndexer/Store.pm6 create mode 100644 lib/MaildirIndexer/Store.rakumod delete mode 100644 t/lib/TestIndex.pm6 create mode 100644 t/lib/TestIndex.rakumod diff --git a/META6.json b/META6.json index 9c4c0d2..eaf2dc1 100644 --- a/META6.json +++ b/META6.json @@ -6,15 +6,15 @@ "authors": [ "dakkar" ], "description": "index maildirs, suggest folders", "provides": { - "MaildirIndexer::Parser": "lib/MaildirIndexer/Parser.pm6", - "MaildirIndexer::Server": "lib/MaildirIndexer/Server.pm6", - "MaildirIndexer::Store": "lib/MaildirIndexer/Store.pm6", - "MaildirIndexer::Email": "lib/MaildirIndexer/Email.pm6", - "MaildirIndexer::Index::ByRef": "lib/MaildirIndexer/Index/ByRef.pm6", - "MaildirIndexer::Index::ByAddresses": "lib/MaildirIndexer/Index/ByAddresses.pm6", - "MaildirIndexer::ScanDir": "lib/MaildirIndexer/ScanDir.pm6", - "MaildirIndexer::LogTimelineSchema": "lib/MaildirIndexer/LogTimelineSchema.pm6", - "MaildirIndexer::Index": "lib/MaildirIndexer/Index.pm6" + "MaildirIndexer::Parser": "lib/MaildirIndexer/Parser.rakumod", + "MaildirIndexer::Server": "lib/MaildirIndexer/Server.rakumod", + "MaildirIndexer::Store": "lib/MaildirIndexer/Store.rakumod", + "MaildirIndexer::Email": "lib/MaildirIndexer/Email.rakumod", + "MaildirIndexer::Index::ByRef": "lib/MaildirIndexer/Index/ByRef.rakumod", + "MaildirIndexer::Index::ByAddresses": "lib/MaildirIndexer/Index/ByAddresses.rakumod", + "MaildirIndexer::ScanDir": "lib/MaildirIndexer/ScanDir.rakumod", + "MaildirIndexer::LogTimelineSchema": "lib/MaildirIndexer/LogTimelineSchema.rakumod", + "MaildirIndexer::Index": "lib/MaildirIndexer/Index.rakumod" }, "depends": { "runtime": { diff --git a/lib/MaildirIndexer/Email.pm6 b/lib/MaildirIndexer/Email.pm6 deleted file mode 100644 index 4f92106..0000000 --- a/lib/MaildirIndexer/Email.pm6 +++ /dev/null @@ -1,69 +0,0 @@ -use v6.d; -unit class MaildirIndexer::Email; -use MaildirIndexer::LogTimelineSchema; - -has IO $.path; -has %!headers; -has $!body; - -method BUILD(:%!headers,:$!body,:$!path = IO) { } - -method message-id(--> Str:D) { split-refs(%!headers)[0] // '' } -method refs(--> Iterable) { - return ( - |split-refs(%!headers), - |split-refs(%!headers).reverse, - ); -} - -multi split-refs(Any --> Iterable) { return () } -multi split-refs(Str:D $str --> Iterable) { - my @result; - MaildirIndexer::LogTimelineSchema::Parse::Header.log: { - @result = $/».Str if $str ~~ m{'<' $ = (<-[<>]>+)+ % [ '>' .*? '<' ] '>' }; - } - return @result; -} - -method addresses (--> Iterable) { - return ( - |extract-addresses(%!headers), - |extract-addresses(%!headers), - |extract-addresses(%!headers), - |extract-addresses(%!headers), - |extract-addresses(%!headers), - |extract-addresses(%!headers), - |extract-addresses(%!headers), - ).unique; - # we should add a hack for info@meetup.com, where we keep the - # whole "from", since it's the only way to distinguish between - # different groups -} - -# subset of the grammar of p5's Email::Address, ignoring comments and -# quoting -my grammar Address { - token CTL { <[ \x00 .. \x1F \x7F ]> } - token special { <[ \( \) \< \> \[ \] \: \; \@ \\ \, \. \" ]> } - - token atext { <-CTL> & <-special> & \S } - token dot-atom { [ [+]+ % '.' ] } - - token local-part { <.dot-atom> } - token domain { <.dot-atom> } - - token addr { '@' } - - rule TOP { ^ .*? [ + % .+? ] .*? $ } -} - -multi sub extract-addresses(Any --> Iterable) { return () } -multi sub extract-addresses(Str:D $str --> Iterable) { - my @result; - MaildirIndexer::LogTimelineSchema::Parse::Header.log: { - with Address.parse($str) { - @result = $_».Str; - } - } - return @result; -} diff --git a/lib/MaildirIndexer/Email.rakumod b/lib/MaildirIndexer/Email.rakumod new file mode 100644 index 0000000..4f92106 --- /dev/null +++ b/lib/MaildirIndexer/Email.rakumod @@ -0,0 +1,69 @@ +use v6.d; +unit class MaildirIndexer::Email; +use MaildirIndexer::LogTimelineSchema; + +has IO $.path; +has %!headers; +has $!body; + +method BUILD(:%!headers,:$!body,:$!path = IO) { } + +method message-id(--> Str:D) { split-refs(%!headers)[0] // '' } +method refs(--> Iterable) { + return ( + |split-refs(%!headers), + |split-refs(%!headers).reverse, + ); +} + +multi split-refs(Any --> Iterable) { return () } +multi split-refs(Str:D $str --> Iterable) { + my @result; + MaildirIndexer::LogTimelineSchema::Parse::Header.log: { + @result = $/».Str if $str ~~ m{'<' $ = (<-[<>]>+)+ % [ '>' .*? '<' ] '>' }; + } + return @result; +} + +method addresses (--> Iterable) { + return ( + |extract-addresses(%!headers), + |extract-addresses(%!headers), + |extract-addresses(%!headers), + |extract-addresses(%!headers), + |extract-addresses(%!headers), + |extract-addresses(%!headers), + |extract-addresses(%!headers), + ).unique; + # we should add a hack for info@meetup.com, where we keep the + # whole "from", since it's the only way to distinguish between + # different groups +} + +# subset of the grammar of p5's Email::Address, ignoring comments and +# quoting +my grammar Address { + token CTL { <[ \x00 .. \x1F \x7F ]> } + token special { <[ \( \) \< \> \[ \] \: \; \@ \\ \, \. \" ]> } + + token atext { <-CTL> & <-special> & \S } + token dot-atom { [ [+]+ % '.' ] } + + token local-part { <.dot-atom> } + token domain { <.dot-atom> } + + token addr { '@' } + + rule TOP { ^ .*? [ + % .+? ] .*? $ } +} + +multi sub extract-addresses(Any --> Iterable) { return () } +multi sub extract-addresses(Str:D $str --> Iterable) { + my @result; + MaildirIndexer::LogTimelineSchema::Parse::Header.log: { + with Address.parse($str) { + @result = $_».Str; + } + } + return @result; +} diff --git a/lib/MaildirIndexer/Index.pm6 b/lib/MaildirIndexer/Index.pm6 deleted file mode 100644 index 2cb308a..0000000 --- a/lib/MaildirIndexer/Index.pm6 +++ /dev/null @@ -1,7 +0,0 @@ -use v6.d; -unit role MaildirIndexer::Index; -use MaildirIndexer::Email; - -method add-mail(MaildirIndexer::Email:D $email, Str:D $mailbox --> Nil) { ... } -method del-path(IO:D $path, Str:D $mailbox --> Nil) { ... } -method mailbox-for-email(MaildirIndexer::Email:D $email --> Str) { ... } diff --git a/lib/MaildirIndexer/Index.rakumod b/lib/MaildirIndexer/Index.rakumod new file mode 100644 index 0000000..2cb308a --- /dev/null +++ b/lib/MaildirIndexer/Index.rakumod @@ -0,0 +1,7 @@ +use v6.d; +unit role MaildirIndexer::Index; +use MaildirIndexer::Email; + +method add-mail(MaildirIndexer::Email:D $email, Str:D $mailbox --> Nil) { ... } +method del-path(IO:D $path, Str:D $mailbox --> Nil) { ... } +method mailbox-for-email(MaildirIndexer::Email:D $email --> Str) { ... } diff --git a/lib/MaildirIndexer/Index/ByAddresses.pm6 b/lib/MaildirIndexer/Index/ByAddresses.pm6 deleted file mode 100644 index b83a239..0000000 --- a/lib/MaildirIndexer/Index/ByAddresses.pm6 +++ /dev/null @@ -1,92 +0,0 @@ -use v6.d; -use MaildirIndexer::Index; -unit class MaildirIndexer::Index::ByAddresses does MaildirIndexer::Index; -use MaildirIndexer::LogTimelineSchema; -use MaildirIndexer::Email; - -# most of this is copied from -# p6-Algorithm-NaiveBayes:auth:ver<0.04>, in particular the -# Algorithm::NaiveBayes::Classifier::Bernoulli class - -has Array[Str] %!addresses-for-file; -# I'd like to type-constrain these BagHash-es, but the compiler -# currently dies if I try -has BagHash $!count-by-address-and-mailbox .= new; -has BagHash $!known-addresses .= new; -has BagHash $!count-by-mailbox .= new; -has $!total-count; - -method dump() { -} - -submethod account-for(Str @addresses,Str $mailbox,Int $step) { - $!total-count += $step; - $!count-by-mailbox{$mailbox} += $step; - - for @addresses -> Str $addr { - $!known-addresses{$addr} += $step; - $!count-by-address-and-mailbox{$addr => $mailbox} += $step; - } -} - -method add-mail(MaildirIndexer::Email:D $email, Str:D $mailbox --> Nil) { - MaildirIndexer::LogTimelineSchema::Index::Add.log: :class('ByAddresses'),:$mailbox, -> { - my Str @addresses = $email.addresses or return; - %!addresses-for-file{ $email.path } = @addresses; - - self.account-for(@addresses,$mailbox,1); - - return; - } -} - -method del-path(IO:D $file, Str:D $mailbox --> Nil) { - MaildirIndexer::LogTimelineSchema::Index::Rm.log: :class('ByAddresses'),:$mailbox, -> { - my Str @addresses = %!addresses-for-file{$file.path} or return; - - self.account-for(@addresses,$mailbox,-1); - - return; - } -} - -submethod p-address-given-mailbox(Str $addr, Str $mailbox) { - my $a = 1e-15 + $!count-by-address-and-mailbox{$addr => $mailbox}; - my $b = 1 + $!count-by-mailbox{$mailbox}; - return $a / $b; -} - -submethod predict-mailbox-given-addresses(@addresses) { - my %prediction; - my Bag $addr-bag .= new(|@addresses); - - for $!count-by-mailbox.keys -> Str $mailbox { - my $p = $!count-by-mailbox{$mailbox} / $!total-count; - - for $!known-addresses.keys -> Str $addr { - my $addr-p = self.p-address-given-mailbox($addr,$mailbox); - if ($addr-bag{$addr}) { - $p *= $addr-p; - } - else { - $p *= 1 - $addr-p; - } - } - - %prediction{$mailbox} = $p; - } - - return %prediction; -} - -method mailbox-for-email(MaildirIndexer::Email:D $email --> Str) { - my Str $result; - MaildirIndexer::LogTimelineSchema::Index::Find.log: :class('ByAddresses'), -> { - my %prediction = self.predict-mailbox-given-addresses($email.addresses); - - my @most-probable-mailboxes = %prediction.pairs.sort(*.value); - - if @most-probable-mailboxes -> $_ { $result = .[*-1].key } - } - return $result; -} diff --git a/lib/MaildirIndexer/Index/ByAddresses.rakumod b/lib/MaildirIndexer/Index/ByAddresses.rakumod new file mode 100644 index 0000000..b83a239 --- /dev/null +++ b/lib/MaildirIndexer/Index/ByAddresses.rakumod @@ -0,0 +1,92 @@ +use v6.d; +use MaildirIndexer::Index; +unit class MaildirIndexer::Index::ByAddresses does MaildirIndexer::Index; +use MaildirIndexer::LogTimelineSchema; +use MaildirIndexer::Email; + +# most of this is copied from +# p6-Algorithm-NaiveBayes:auth:ver<0.04>, in particular the +# Algorithm::NaiveBayes::Classifier::Bernoulli class + +has Array[Str] %!addresses-for-file; +# I'd like to type-constrain these BagHash-es, but the compiler +# currently dies if I try +has BagHash $!count-by-address-and-mailbox .= new; +has BagHash $!known-addresses .= new; +has BagHash $!count-by-mailbox .= new; +has $!total-count; + +method dump() { +} + +submethod account-for(Str @addresses,Str $mailbox,Int $step) { + $!total-count += $step; + $!count-by-mailbox{$mailbox} += $step; + + for @addresses -> Str $addr { + $!known-addresses{$addr} += $step; + $!count-by-address-and-mailbox{$addr => $mailbox} += $step; + } +} + +method add-mail(MaildirIndexer::Email:D $email, Str:D $mailbox --> Nil) { + MaildirIndexer::LogTimelineSchema::Index::Add.log: :class('ByAddresses'),:$mailbox, -> { + my Str @addresses = $email.addresses or return; + %!addresses-for-file{ $email.path } = @addresses; + + self.account-for(@addresses,$mailbox,1); + + return; + } +} + +method del-path(IO:D $file, Str:D $mailbox --> Nil) { + MaildirIndexer::LogTimelineSchema::Index::Rm.log: :class('ByAddresses'),:$mailbox, -> { + my Str @addresses = %!addresses-for-file{$file.path} or return; + + self.account-for(@addresses,$mailbox,-1); + + return; + } +} + +submethod p-address-given-mailbox(Str $addr, Str $mailbox) { + my $a = 1e-15 + $!count-by-address-and-mailbox{$addr => $mailbox}; + my $b = 1 + $!count-by-mailbox{$mailbox}; + return $a / $b; +} + +submethod predict-mailbox-given-addresses(@addresses) { + my %prediction; + my Bag $addr-bag .= new(|@addresses); + + for $!count-by-mailbox.keys -> Str $mailbox { + my $p = $!count-by-mailbox{$mailbox} / $!total-count; + + for $!known-addresses.keys -> Str $addr { + my $addr-p = self.p-address-given-mailbox($addr,$mailbox); + if ($addr-bag{$addr}) { + $p *= $addr-p; + } + else { + $p *= 1 - $addr-p; + } + } + + %prediction{$mailbox} = $p; + } + + return %prediction; +} + +method mailbox-for-email(MaildirIndexer::Email:D $email --> Str) { + my Str $result; + MaildirIndexer::LogTimelineSchema::Index::Find.log: :class('ByAddresses'), -> { + my %prediction = self.predict-mailbox-given-addresses($email.addresses); + + my @most-probable-mailboxes = %prediction.pairs.sort(*.value); + + if @most-probable-mailboxes -> $_ { $result = .[*-1].key } + } + return $result; +} diff --git a/lib/MaildirIndexer/Index/ByRef.pm6 b/lib/MaildirIndexer/Index/ByRef.pm6 deleted file mode 100644 index d044272..0000000 --- a/lib/MaildirIndexer/Index/ByRef.pm6 +++ /dev/null @@ -1,44 +0,0 @@ -use v6.d; -use MaildirIndexer::Index; -unit class MaildirIndexer::Index::ByRef does MaildirIndexer::Index; -use MaildirIndexer::LogTimelineSchema; -use MaildirIndexer::Email; - -has Str %!id-for-file; -has Array[Str] %!mailboxes-for-id; - -method dump() { - say "{.key} → {.value}" for %!id-for-file; - say "{.key} ⇒ {.value.perl}" for %!mailboxes-for-id; -} - -method add-mail(MaildirIndexer::Email:D $email, Str:D $mailbox --> Nil) { - MaildirIndexer::LogTimelineSchema::Index::Add.log: :class('ByRef'),:$mailbox, -> { - my $id = $email.message-id or return; - %!id-for-file{ $email.path } = $id; - %!mailboxes-for-id{ $id }.push($mailbox); - return; - } -} - -method del-path(IO:D $file, Str:D $mailbox --> Nil) { - MaildirIndexer::LogTimelineSchema::Index::Rm.log: :class('ByRef'),:$mailbox, -> { - my $id = %!id-for-file{ $file.path }:delete; - with %!mailboxes-for-id{ $id } { - with .grep($mailbox):k -> $pos { - .splice($pos,1); - } - } - return; - } -} - -method mailbox-for-email(MaildirIndexer::Email:D $email --> Str) { - my Str $result; - MaildirIndexer::LogTimelineSchema::Index::Find.log: :class('ByRef'), -> { - for |$email.refs() -> $ref { - with %!mailboxes-for-id{$ref} { $result = .[*-1] } - } - } - return $result; -} diff --git a/lib/MaildirIndexer/Index/ByRef.rakumod b/lib/MaildirIndexer/Index/ByRef.rakumod new file mode 100644 index 0000000..d044272 --- /dev/null +++ b/lib/MaildirIndexer/Index/ByRef.rakumod @@ -0,0 +1,44 @@ +use v6.d; +use MaildirIndexer::Index; +unit class MaildirIndexer::Index::ByRef does MaildirIndexer::Index; +use MaildirIndexer::LogTimelineSchema; +use MaildirIndexer::Email; + +has Str %!id-for-file; +has Array[Str] %!mailboxes-for-id; + +method dump() { + say "{.key} → {.value}" for %!id-for-file; + say "{.key} ⇒ {.value.perl}" for %!mailboxes-for-id; +} + +method add-mail(MaildirIndexer::Email:D $email, Str:D $mailbox --> Nil) { + MaildirIndexer::LogTimelineSchema::Index::Add.log: :class('ByRef'),:$mailbox, -> { + my $id = $email.message-id or return; + %!id-for-file{ $email.path } = $id; + %!mailboxes-for-id{ $id }.push($mailbox); + return; + } +} + +method del-path(IO:D $file, Str:D $mailbox --> Nil) { + MaildirIndexer::LogTimelineSchema::Index::Rm.log: :class('ByRef'),:$mailbox, -> { + my $id = %!id-for-file{ $file.path }:delete; + with %!mailboxes-for-id{ $id } { + with .grep($mailbox):k -> $pos { + .splice($pos,1); + } + } + return; + } +} + +method mailbox-for-email(MaildirIndexer::Email:D $email --> Str) { + my Str $result; + MaildirIndexer::LogTimelineSchema::Index::Find.log: :class('ByRef'), -> { + for |$email.refs() -> $ref { + with %!mailboxes-for-id{$ref} { $result = .[*-1] } + } + } + return $result; +} diff --git a/lib/MaildirIndexer/LogTimelineSchema.pm6 b/lib/MaildirIndexer/LogTimelineSchema.pm6 deleted file mode 100644 index 1e1848f..0000000 --- a/lib/MaildirIndexer/LogTimelineSchema.pm6 +++ /dev/null @@ -1,20 +0,0 @@ -use v6.d; -unit module MaildirIndexer::LogTimelineSchema; -use Log::Timeline; - -class Store::Add does Log::Timeline::Task['MaildirIndexer','Store','add file'] { }; -class Store::Rm does Log::Timeline::Task['MaildirIndexer','Store','rm file'] { }; -class Store::Find does Log::Timeline::Task['MaildirIndexer','Store','finding mailbox'] { }; - -class Parse::Email::Str does Log::Timeline::Task['MaildirIndexer','Parser','parsing a string'] { }; -class Parse::Email::File does Log::Timeline::Task['MaildirIndexer','Parser','parsing a file'] { }; -class Parse::Email::Socket does Log::Timeline::Task['MaildirIndexer','Parser','parsing a socket'] { }; - -class Parse::Header does Log::Timeline::Task['MaildirIndexer','Email','parsing a header'] { }; - -class Index::Add does Log::Timeline::Task['MaildirIndexer','Index','add email'] { }; -class Index::Rm does Log::Timeline::Task['MaildirIndexer','Index','rm path'] { }; - -class Index::Find does Log::Timeline::Task['MaildirIndexer','Index','finding mailbox'] { }; - -class Server::Serve does Log::Timeline::Task['MaildirIndexer','Server','serving a request'] { }; diff --git a/lib/MaildirIndexer/LogTimelineSchema.rakumod b/lib/MaildirIndexer/LogTimelineSchema.rakumod new file mode 100644 index 0000000..1e1848f --- /dev/null +++ b/lib/MaildirIndexer/LogTimelineSchema.rakumod @@ -0,0 +1,20 @@ +use v6.d; +unit module MaildirIndexer::LogTimelineSchema; +use Log::Timeline; + +class Store::Add does Log::Timeline::Task['MaildirIndexer','Store','add file'] { }; +class Store::Rm does Log::Timeline::Task['MaildirIndexer','Store','rm file'] { }; +class Store::Find does Log::Timeline::Task['MaildirIndexer','Store','finding mailbox'] { }; + +class Parse::Email::Str does Log::Timeline::Task['MaildirIndexer','Parser','parsing a string'] { }; +class Parse::Email::File does Log::Timeline::Task['MaildirIndexer','Parser','parsing a file'] { }; +class Parse::Email::Socket does Log::Timeline::Task['MaildirIndexer','Parser','parsing a socket'] { }; + +class Parse::Header does Log::Timeline::Task['MaildirIndexer','Email','parsing a header'] { }; + +class Index::Add does Log::Timeline::Task['MaildirIndexer','Index','add email'] { }; +class Index::Rm does Log::Timeline::Task['MaildirIndexer','Index','rm path'] { }; + +class Index::Find does Log::Timeline::Task['MaildirIndexer','Index','finding mailbox'] { }; + +class Server::Serve does Log::Timeline::Task['MaildirIndexer','Server','serving a request'] { }; diff --git a/lib/MaildirIndexer/Parser.pm6 b/lib/MaildirIndexer/Parser.pm6 deleted file mode 100644 index 420d4c3..0000000 --- a/lib/MaildirIndexer/Parser.pm6 +++ /dev/null @@ -1,112 +0,0 @@ -use v6.d; -unit module MaildirIndexer::Parser; -use MaildirIndexer::LogTimelineSchema; -use MaildirIndexer::Email; - -my @separators = ( - "\x0a\x0d\x0a\x0d", - "\x0d\x0a\x0d\x0a", - "\x0a\x0a", - "\x0d\x0d", -); - -my grammar Message { - regex TOP { - - - - } - - token newline { [\x0d\x0a] | [\x0a\x0d] | \x0a | \x0d } - token separator { @separators } - - token body { .* } - regex headers { -
+ % - } - regex header { - \: \h* - || - } - token name { - <-[:\s]>+ - } - regex value { - + % [ \h+] - } - token line { \N* } - token junk { \N+ } -} - -my class Message-actions { - has $.path = IO; - method TOP($/) { - make MaildirIndexer::Email.new( - headers => $/.made, - body => $/.Str, - path => $.path, - ); - } - method headers($/) { - make %( flat |$/
».made ); - } - method header($/) { - make $/ ?? () !! ( $/.Str.lc => $/.made ); - } - method value($/) { - make $/.join(' ') - } -} - -multi parse-email(IO::Path:D $p --> MaildirIndexer::Email) is export { - my MaildirIndexer::Email $result; - MaildirIndexer::LogTimelineSchema::Parse::Email::File.log: :file($p.path), -> { - $result = parse-email($p.slurp(:enc), path => $p); - } - return $result; -} - -multi parse-email(IO::Path:D $p, :$headers-only! --> MaildirIndexer::Email) is export { - my MaildirIndexer::Email $result; - MaildirIndexer::LogTimelineSchema::Parse::Email::File.log: :file($p.path), -> { - my IO::Handle $h = $p.open( - :enc, - :nl-in(@separators), - :!chomp, - ); - $result = parse-email( - $h.lines()[0], - path => $p, - ); - $h.close(); - } - return $result; -} - -multi parse-email(IO::Socket::Async:D $s --> MaildirIndexer::Email) is export { - my MaildirIndexer::Email $result; - MaildirIndexer::LogTimelineSchema::Parse::Email::Socket.log: -> { - my $string; - react { - whenever $s.Supply(:enc) { - $string ~= $_; - # parsing the whole email is much faster (0.02 seconds - # instead of 1.2!) than just C<< $string ~~ - # /@separators/ >> - $result = parse-email($string) and done; - } - } - } - return $result; -} - -multi parse-email(Str:D $email-str, :$path = IO --> MaildirIndexer::Email) is export { - my MaildirIndexer::Email $result; - MaildirIndexer::LogTimelineSchema::Parse::Email::Str.log: -> { - CATCH { warn $_; return Nil }; - with Message.parse($email-str,:actions(Message-actions.new(:$path))) { - $result = .made; - } - } - return $result; -} diff --git a/lib/MaildirIndexer/Parser.rakumod b/lib/MaildirIndexer/Parser.rakumod new file mode 100644 index 0000000..420d4c3 --- /dev/null +++ b/lib/MaildirIndexer/Parser.rakumod @@ -0,0 +1,112 @@ +use v6.d; +unit module MaildirIndexer::Parser; +use MaildirIndexer::LogTimelineSchema; +use MaildirIndexer::Email; + +my @separators = ( + "\x0a\x0d\x0a\x0d", + "\x0d\x0a\x0d\x0a", + "\x0a\x0a", + "\x0d\x0d", +); + +my grammar Message { + regex TOP { + + + + } + + token newline { [\x0d\x0a] | [\x0a\x0d] | \x0a | \x0d } + token separator { @separators } + + token body { .* } + regex headers { +
+ % + } + regex header { + \: \h* + || + } + token name { + <-[:\s]>+ + } + regex value { + + % [ \h+] + } + token line { \N* } + token junk { \N+ } +} + +my class Message-actions { + has $.path = IO; + method TOP($/) { + make MaildirIndexer::Email.new( + headers => $/.made, + body => $/.Str, + path => $.path, + ); + } + method headers($/) { + make %( flat |$/
».made ); + } + method header($/) { + make $/ ?? () !! ( $/.Str.lc => $/.made ); + } + method value($/) { + make $/.join(' ') + } +} + +multi parse-email(IO::Path:D $p --> MaildirIndexer::Email) is export { + my MaildirIndexer::Email $result; + MaildirIndexer::LogTimelineSchema::Parse::Email::File.log: :file($p.path), -> { + $result = parse-email($p.slurp(:enc), path => $p); + } + return $result; +} + +multi parse-email(IO::Path:D $p, :$headers-only! --> MaildirIndexer::Email) is export { + my MaildirIndexer::Email $result; + MaildirIndexer::LogTimelineSchema::Parse::Email::File.log: :file($p.path), -> { + my IO::Handle $h = $p.open( + :enc, + :nl-in(@separators), + :!chomp, + ); + $result = parse-email( + $h.lines()[0], + path => $p, + ); + $h.close(); + } + return $result; +} + +multi parse-email(IO::Socket::Async:D $s --> MaildirIndexer::Email) is export { + my MaildirIndexer::Email $result; + MaildirIndexer::LogTimelineSchema::Parse::Email::Socket.log: -> { + my $string; + react { + whenever $s.Supply(:enc) { + $string ~= $_; + # parsing the whole email is much faster (0.02 seconds + # instead of 1.2!) than just C<< $string ~~ + # /@separators/ >> + $result = parse-email($string) and done; + } + } + } + return $result; +} + +multi parse-email(Str:D $email-str, :$path = IO --> MaildirIndexer::Email) is export { + my MaildirIndexer::Email $result; + MaildirIndexer::LogTimelineSchema::Parse::Email::Str.log: -> { + CATCH { warn $_; return Nil }; + with Message.parse($email-str,:actions(Message-actions.new(:$path))) { + $result = .made; + } + } + return $result; +} diff --git a/lib/MaildirIndexer/ScanDir.pm6 b/lib/MaildirIndexer/ScanDir.pm6 deleted file mode 100644 index b21fd81..0000000 --- a/lib/MaildirIndexer/ScanDir.pm6 +++ /dev/null @@ -1,34 +0,0 @@ -use v6.d; -unit module MaildirIndexer::ScanDir; - -sub scan-dir(IO() $path --> Supply) is export { - supply { - my %watched-dirs; - - sub add-dir(IO::Path $dir, :$initial) { - %watched-dirs{$dir} = True; - - CATCH { when X::IO::Dir { }; default { warn $_ } } - - whenever $dir.watch { - my $path-io = .path.IO; - emit $path-io; - when $path-io.e && $path-io.d { - add-dir($path-io) unless %watched-dirs{$path-io}; - } - when !$path-io.e { - %watched-dirs{$path-io}:delete - } - } - - for $dir.dir { - emit $_; - when .e && .d { - add-dir($_); - } - } - } - - add-dir($path); - } -} diff --git a/lib/MaildirIndexer/ScanDir.rakumod b/lib/MaildirIndexer/ScanDir.rakumod new file mode 100644 index 0000000..b21fd81 --- /dev/null +++ b/lib/MaildirIndexer/ScanDir.rakumod @@ -0,0 +1,34 @@ +use v6.d; +unit module MaildirIndexer::ScanDir; + +sub scan-dir(IO() $path --> Supply) is export { + supply { + my %watched-dirs; + + sub add-dir(IO::Path $dir, :$initial) { + %watched-dirs{$dir} = True; + + CATCH { when X::IO::Dir { }; default { warn $_ } } + + whenever $dir.watch { + my $path-io = .path.IO; + emit $path-io; + when $path-io.e && $path-io.d { + add-dir($path-io) unless %watched-dirs{$path-io}; + } + when !$path-io.e { + %watched-dirs{$path-io}:delete + } + } + + for $dir.dir { + emit $_; + when .e && .d { + add-dir($_); + } + } + } + + add-dir($path); + } +} diff --git a/lib/MaildirIndexer/Server.pm6 b/lib/MaildirIndexer/Server.pm6 deleted file mode 100644 index 07dfb4a..0000000 --- a/lib/MaildirIndexer/Server.pm6 +++ /dev/null @@ -1,33 +0,0 @@ -use v6.d; -unit class MaildirIndexer::Server; -use MaildirIndexer::LogTimelineSchema; -use MaildirIndexer::Parser; -use MaildirIndexer::Store; - -has $.port = 9000; -has MaildirIndexer::Store $.store is required; - -method serve() { - my $listener = IO::Socket::Async.listen( - '127.0.0.1', - $.port, - :enc, - ); - - react { - whenever signal(SIGINT) { exit } - whenever signal(SIGHUP) { - $.store.dump(); - } - whenever $listener -> $conn { - MaildirIndexer::LogTimelineSchema::Server::Serve.log: { - LEAVE { $conn.close } - with parse-email($conn) -> $email { - with $.store.mailbox-for-email($email) -> $mailbox { - await $conn.print("$mailbox\x0d\x0a"); - } - } - } - } - } -} diff --git a/lib/MaildirIndexer/Server.rakumod b/lib/MaildirIndexer/Server.rakumod new file mode 100644 index 0000000..07dfb4a --- /dev/null +++ b/lib/MaildirIndexer/Server.rakumod @@ -0,0 +1,33 @@ +use v6.d; +unit class MaildirIndexer::Server; +use MaildirIndexer::LogTimelineSchema; +use MaildirIndexer::Parser; +use MaildirIndexer::Store; + +has $.port = 9000; +has MaildirIndexer::Store $.store is required; + +method serve() { + my $listener = IO::Socket::Async.listen( + '127.0.0.1', + $.port, + :enc, + ); + + react { + whenever signal(SIGINT) { exit } + whenever signal(SIGHUP) { + $.store.dump(); + } + whenever $listener -> $conn { + MaildirIndexer::LogTimelineSchema::Server::Serve.log: { + LEAVE { $conn.close } + with parse-email($conn) -> $email { + with $.store.mailbox-for-email($email) -> $mailbox { + await $conn.print("$mailbox\x0d\x0a"); + } + } + } + } + } +} diff --git a/lib/MaildirIndexer/Store.pm6 b/lib/MaildirIndexer/Store.pm6 deleted file mode 100644 index 502d60e..0000000 --- a/lib/MaildirIndexer/Store.pm6 +++ /dev/null @@ -1,69 +0,0 @@ -use v6.d; -unit class MaildirIndexer::Store; -use MaildirIndexer::LogTimelineSchema; -use MaildirIndexer::Index; -use MaildirIndexer::Parser; - -has Lock $!lock .= new; -has MaildirIndexer::Index @.indices is required; -has Channel $.file-channel is required; -has Int $.workers = 10; - -method dump(--> Nil) { - $!lock.protect: { - .dump() for @!indices; - } -} - -method start(--> Nil) { - for ^$.workers { - start react { - CATCH { warn $_ }; - whenever $.file-channel -> $file { - if $file.e && $file.f { - MaildirIndexer::LogTimelineSchema::Store::Add.log: :file($file.path), -> { - self.add-file($file); - } - } - elsif !$file.e { - MaildirIndexer::LogTimelineSchema::Store::Rm.log: :file($file.path), -> { - self.del-file($file); - } - } - } - } - } -} - -method add-file(IO:D $file --> Nil) { - my $mailbox = mailbox-from-path($file.path) or return; - my $email = parse-email($file,:headers-only) or return; - CATCH { warn $_ }; - $!lock.protect: { - .add-mail($email,$mailbox) for @!indices; - } - return; -} - -method del-file(IO:D $file --> Nil) { - my $mailbox = mailbox-from-path($file.path) or return; - $!lock.protect: { - .del-path($file,$mailbox) for @!indices; - } - return; -} - -method mailbox-for-email(MaildirIndexer::Email:D $email --> Str) { - my Str $result; - MaildirIndexer::LogTimelineSchema::Store::Find.log: { - for @!indices -> $index { - with $index.mailbox-for-email($email) { $result = $_; last }; - } - } - return $result; -} - -sub mailbox-from-path(Str() $path --> Str) { - $path ~~ m{'/' (<-[/]>+?) '/' [cur|new|tmp] '/'} and return ~$/[0]; - return Nil; -} diff --git a/lib/MaildirIndexer/Store.rakumod b/lib/MaildirIndexer/Store.rakumod new file mode 100644 index 0000000..502d60e --- /dev/null +++ b/lib/MaildirIndexer/Store.rakumod @@ -0,0 +1,69 @@ +use v6.d; +unit class MaildirIndexer::Store; +use MaildirIndexer::LogTimelineSchema; +use MaildirIndexer::Index; +use MaildirIndexer::Parser; + +has Lock $!lock .= new; +has MaildirIndexer::Index @.indices is required; +has Channel $.file-channel is required; +has Int $.workers = 10; + +method dump(--> Nil) { + $!lock.protect: { + .dump() for @!indices; + } +} + +method start(--> Nil) { + for ^$.workers { + start react { + CATCH { warn $_ }; + whenever $.file-channel -> $file { + if $file.e && $file.f { + MaildirIndexer::LogTimelineSchema::Store::Add.log: :file($file.path), -> { + self.add-file($file); + } + } + elsif !$file.e { + MaildirIndexer::LogTimelineSchema::Store::Rm.log: :file($file.path), -> { + self.del-file($file); + } + } + } + } + } +} + +method add-file(IO:D $file --> Nil) { + my $mailbox = mailbox-from-path($file.path) or return; + my $email = parse-email($file,:headers-only) or return; + CATCH { warn $_ }; + $!lock.protect: { + .add-mail($email,$mailbox) for @!indices; + } + return; +} + +method del-file(IO:D $file --> Nil) { + my $mailbox = mailbox-from-path($file.path) or return; + $!lock.protect: { + .del-path($file,$mailbox) for @!indices; + } + return; +} + +method mailbox-for-email(MaildirIndexer::Email:D $email --> Str) { + my Str $result; + MaildirIndexer::LogTimelineSchema::Store::Find.log: { + for @!indices -> $index { + with $index.mailbox-for-email($email) { $result = $_; last }; + } + } + return $result; +} + +sub mailbox-from-path(Str() $path --> Str) { + $path ~~ m{'/' (<-[/]>+?) '/' [cur|new|tmp] '/'} and return ~$/[0]; + return Nil; +} diff --git a/t/lib/TestIndex.pm6 b/t/lib/TestIndex.pm6 deleted file mode 100644 index c0b0b03..0000000 --- a/t/lib/TestIndex.pm6 +++ /dev/null @@ -1,35 +0,0 @@ -use v6.d; -use MaildirIndexer::Index; - -unit class TestIndex does MaildirIndexer::Index; - -has %.mails; -has $.name = 'test index'; -has @.responses = ( 'foo' xx 10 ); - -has atomicint $!seen = 0; -has Int $.expect; -has $.seen-all; -has $!seen-all-vow; - -method set-expect(Int:D $!expect) { - $!seen ⚛= 0; - $!seen-all = Promise.new; - $!seen-all-vow = $!seen-all.vow; -} - -method add-mail(MaildirIndexer::Email:D $email, Str:D $mailbox --> Nil) { - %.mails{$mailbox}{$email.path}=1; - ++⚛$!seen; - if ($!seen == $!expect) { - $!seen-all-vow.keep(True) - } -} - -method del-path(IO:D $path, Str:D $mailbox --> Nil) { - %.mails{$mailbox}{$path}:delete; -} - -method mailbox-for-email(MaildirIndexer::Email:D $email --> Str) { - return @!responses.shift; -} diff --git a/t/lib/TestIndex.rakumod b/t/lib/TestIndex.rakumod new file mode 100644 index 0000000..c0b0b03 --- /dev/null +++ b/t/lib/TestIndex.rakumod @@ -0,0 +1,35 @@ +use v6.d; +use MaildirIndexer::Index; + +unit class TestIndex does MaildirIndexer::Index; + +has %.mails; +has $.name = 'test index'; +has @.responses = ( 'foo' xx 10 ); + +has atomicint $!seen = 0; +has Int $.expect; +has $.seen-all; +has $!seen-all-vow; + +method set-expect(Int:D $!expect) { + $!seen ⚛= 0; + $!seen-all = Promise.new; + $!seen-all-vow = $!seen-all.vow; +} + +method add-mail(MaildirIndexer::Email:D $email, Str:D $mailbox --> Nil) { + %.mails{$mailbox}{$email.path}=1; + ++⚛$!seen; + if ($!seen == $!expect) { + $!seen-all-vow.keep(True) + } +} + +method del-path(IO:D $path, Str:D $mailbox --> Nil) { + %.mails{$mailbox}{$path}:delete; +} + +method mailbox-for-email(MaildirIndexer::Email:D $email --> Str) { + return @!responses.shift; +} -- cgit v1.2.3