use v6.d;
use MaildirIndexer::Index;
unit class MaildirIndexer::Index::ByAddresses does MaildirIndexer::Index;
use MaildirIndexer::LogTimelineSchema;
use MaildirIndexer::Email;
has Array[Str] %!addresses-for-file;
has BagHash $!count-by-address-and-mailbox .= new;
has BagHash $!known-addresses .= new;
has BagHash $!count-by-mailbox .= new;
has Hash $!p-address-given-mailbox .= new;
has $!total-count;
method dump() {
}
submethod account-for(Str @addresses,Str $mailbox,Int $step) {
$!total-count += $step;
$!count-by-mailbox{$mailbox} += $step;
for @addresses -> Str $addr {
my $pair = ( $addr => $mailbox );
$!known-addresses{$addr} += $step;
my $count = $!count-by-address-and-mailbox{$pair} += $step;
if ($count) {
my $a = 1e-15 + $count;
my $b = 1 + $!count-by-mailbox{$mailbox};
$!p-address-given-mailbox{$pair} = $a / $b;
}
else {
$!p-address-given-mailbox{$pair} :delete;
}
}
}
method add-mail(MaildirIndexer::Email:D $email, Str:D $mailbox --> Nil) {
MaildirIndexer::LogTimelineSchema::Index::Add.log: :class('ByAddresses'),:$mailbox, {
return if %!addresses-for-file{ $email.path }:exists;
my Str @addresses = $email.addresses or return;
%!addresses-for-file{ $email.path } := @addresses;
self.account-for(@addresses,$mailbox,1);
return;
}
}
method del-path(IO:D $file, Str:D $mailbox --> Nil) {
MaildirIndexer::LogTimelineSchema::Index::Rm.log: :class('ByAddresses'),:$mailbox, {
my Str @addresses := %!addresses-for-file{$file.path}:delete or return;
self.account-for(@addresses,$mailbox,-1);
return;
}
}
submethod predict-mailbox-given-addresses(@addresses) {
my %prediction;
my Bag $addr-bag .= new(|@addresses);
for $!count-by-mailbox.keys -> Str $mailbox {
my $p = $!count-by-mailbox{$mailbox} / $!total-count;
for $!known-addresses.keys -> Str $addr {
my $addr-p = $!p-address-given-mailbox{$addr => $mailbox} // 1e-15;
if ($addr-bag{$addr}) {
$p *= $addr-p;
}
else {
$p *= 1 - $addr-p;
}
}
%prediction{$mailbox} = $p;
}
return %prediction;
}
method mailbox-for-email(MaildirIndexer::Email:D $email --> Str) {
MaildirIndexer::LogTimelineSchema::Index::Find.log: :class('ByAddresses'), {
my %prediction = self.predict-mailbox-given-addresses($email.addresses);
my @most-probable-mailboxes = %prediction.pairs.sort(*.value);
if @most-probable-mailboxes -> $_ { return .[*-1].key }
else { return Nil }
}
}