use v6.d;
use MaildirIndexer::Index;
unit class MaildirIndexer::Index::ByAddresses does MaildirIndexer::Index;
use MaildirIndexer::LogTimelineSchema;
use MaildirIndexer::Email;
has Array[Str] %!addresses-for-file;
has BagHash $!count-by-address-and-mailbox .= new;
has BagHash $!known-addresses .= new;
has BagHash $!count-by-mailbox .= new;
has $!total-count;
method dump() {
}
submethod account-for(Str @addresses,Str $mailbox,Int $step) {
$!total-count += $step;
$!count-by-mailbox{$mailbox} += $step;
for @addresses -> Str $addr {
$!known-addresses{$addr} += $step;
$!count-by-address-and-mailbox{$addr => $mailbox} += $step;
}
}
method add-mail(MaildirIndexer::Email:D $email, Str:D $mailbox --> Nil) {
MaildirIndexer::LogTimelineSchema::Index::Add.log: :class('ByAddresses'),:$mailbox, -> {
my Str @addresses = $email.addresses or return;
%!addresses-for-file{ $email.path } = @addresses;
self.account-for(@addresses,$mailbox,1);
return;
}
}
method del-path(IO:D $file, Str:D $mailbox --> Nil) {
MaildirIndexer::LogTimelineSchema::Index::Rm.log: :class('ByAddresses'),:$mailbox, -> {
my Str @addresses = %!addresses-for-file{$file.path} or return;
self.account-for(@addresses,$mailbox,-1);
return;
}
}
submethod p-address-given-mailbox(Str $addr, Str $mailbox) {
my $a = 1e-15 + $!count-by-address-and-mailbox{$addr => $mailbox};
my $b = 1 + $!count-by-mailbox{$mailbox};
return $a / $b;
}
submethod predict-mailbox-given-addresses(@addresses) {
my %prediction;
my Bag $addr-bag .= new(|@addresses);
for $!count-by-mailbox.keys -> Str $mailbox {
my $p = $!count-by-mailbox{$mailbox} / $!total-count;
for $!known-addresses.keys -> Str $addr {
my $addr-p = self.p-address-given-mailbox($addr,$mailbox);
if ($addr-bag{$addr}) {
$p *= $addr-p;
}
else {
$p *= 1 - $addr-p;
}
}
%prediction{$mailbox} = $p;
}
return %prediction;
}
method mailbox-for-email(MaildirIndexer::Email:D $email --> Str) {
my Str $result;
MaildirIndexer::LogTimelineSchema::Index::Find.log: :class('ByAddresses'), -> {
my %prediction = self.predict-mailbox-given-addresses($email.addresses);
my @most-probable-mailboxes = %prediction.pairs.sort(*.value);
if @most-probable-mailboxes -> $_ { $result = .[*-1].key }
}
return $result;
}