use v6.d; use MaildirIndexer::Index; unit class MaildirIndexer::Index::ByAddresses does MaildirIndexer::Index; use MaildirIndexer::LogTimelineSchema; use MaildirIndexer::Email; # most of this is copied from # p6-Algorithm-NaiveBayes:auth:ver<0.04>, in particular the # Algorithm::NaiveBayes::Classifier::Bernoulli class has Array[Str] %!addresses-for-file; # I'd like to type-constrain these BagHash-es, but the compiler # currently dies if I try has BagHash $!count-by-address-and-mailbox .= new; has BagHash $!known-addresses .= new; has BagHash $!count-by-mailbox .= new; has $!total-count; method dump() { } submethod account-for(Str @addresses,Str $mailbox,Int $step) { $!total-count += $step; $!count-by-mailbox{$mailbox} += $step; for @addresses -> Str $addr { $!known-addresses{$addr} += $step; $!count-by-address-and-mailbox{$addr => $mailbox} += $step; } } method add-mail(MaildirIndexer::Email:D $email, Str:D $mailbox --> Nil) { MaildirIndexer::LogTimelineSchema::Index::Add.log: :class('ByAddresses'),:$mailbox, -> { my Str @addresses = $email.addresses or return; %!addresses-for-file{ $email.path } = @addresses; self.account-for(@addresses,$mailbox,1); return; } } method del-path(IO:D $file, Str:D $mailbox --> Nil) { MaildirIndexer::LogTimelineSchema::Index::Rm.log: :class('ByAddresses'),:$mailbox, -> { my Str @addresses = %!addresses-for-file{$file.path} or return; self.account-for(@addresses,$mailbox,-1); return; } } submethod p-address-given-mailbox(Str $addr, Str $mailbox) { my $a = 1e-15 + $!count-by-address-and-mailbox{$addr => $mailbox}; my $b = 1 + $!count-by-mailbox{$mailbox}; return $a / $b; } submethod predict-mailbox-given-addresses(@addresses) { my %prediction; my Bag $addr-bag .= new(|@addresses); for $!count-by-mailbox.keys -> Str $mailbox { my $p = $!count-by-mailbox{$mailbox} / $!total-count; for $!known-addresses.keys -> Str $addr { my $addr-p = self.p-address-given-mailbox($addr,$mailbox); if ($addr-bag{$addr}) { $p *= $addr-p; } else { $p *= 1 - $addr-p; } } %prediction{$mailbox} = $p; } return %prediction; } method mailbox-for-email(MaildirIndexer::Email:D $email --> Str) { my Str $result; MaildirIndexer::LogTimelineSchema::Index::Find.log: :class('ByAddresses'), -> { my %prediction = self.predict-mailbox-given-addresses($email.addresses); my @most-probable-mailboxes = %prediction.pairs.sort(*.value); if @most-probable-mailboxes -> $_ { $result = .[*-1].key } } return $result; }