use v6.d;
use MaildirIndexer::Index;
unit class MaildirIndexer::Index::ByAddresses does MaildirIndexer::Index;
use MaildirIndexer::LogTimelineSchema;
use MaildirIndexer::Email;
has Array[Str] %!addresses-for-file;
has BagHash $!count-by-address-and-mailbox .= new;
has BagHash $!known-addresses .= new;
has BagHash $!count-by-mailbox .= new;
has Numeric %!p-address-given-mailbox;
has Int $!total-count;
has Numeric %!cached-p-given-mailbox;
has Supplier $!update-cache .= new;
constant $NOT-ZERO = 1e-15;
submethod TWEAK() {
$!update-cache.Supply.stable(10).tap({ self.do-update-cache() });
}
method dump() {
}
submethod account-for(Str @addresses,Str $mailbox,Int $step) {
$!total-count += $step;
$!count-by-mailbox{$mailbox} += $step;
for @addresses -> Str $addr {
my Pair $pair = ( $addr<> => $mailbox<> );
$!known-addresses{$addr} += $step;
my Numeric $count = $!count-by-address-and-mailbox{$pair} += $step;
if ($count) {
my Numeric $a = $NOT-ZERO + $count;
my Numeric $b = 1 + $!count-by-mailbox{$mailbox};
%!p-address-given-mailbox{$pair} = $a / $b;
}
else {
%!p-address-given-mailbox{$pair} :delete;
}
}
$!update-cache.emit(Any);
}
submethod do-update-cache() {
MaildirIndexer::LogTimelineSchema::Index::Cache.log: {
for $!count-by-mailbox.keys -> Str $mailbox {
my Numeric $p = $!count-by-mailbox{$mailbox} / $!total-count;
for $!known-addresses.keys -> Str $addr {
my $addr-p = %!p-address-given-mailbox{$addr<> => $mailbox<>} // $NOT-ZERO;
$p *= 1 - $addr-p;
}
%!cached-p-given-mailbox{$mailbox} = $p;
}
}
}
method add-mail(MaildirIndexer::Email:D $email, Str:D $mailbox --> Nil) {
MaildirIndexer::LogTimelineSchema::Index::Add.log: :class('ByAddresses'),:$mailbox, -> {
return if %!addresses-for-file{ $email.path }:exists;
my Str @addresses = $email.addresses or return;
%!addresses-for-file{ $email.path } := @addresses;
self.account-for(@addresses,$mailbox,1);
return;
}
}
method del-path(IO:D $file, Str:D $mailbox --> Nil) {
MaildirIndexer::LogTimelineSchema::Index::Rm.log: :class('ByAddresses'),:$mailbox, -> {
my Str @addresses := %!addresses-for-file{$file.path}:delete or return;
self.account-for(@addresses,$mailbox,-1);
return;
}
}
submethod predict-mailbox-given-addresses(@addresses) {
my Numeric %prediction;
for $!count-by-mailbox.keys -> Str $mailbox {
my Numeric $p = %!cached-p-given-mailbox{$mailbox} // $NOT-ZERO;
for @addresses -> Str $addr {
my $addr-p = %!p-address-given-mailbox{$addr<> => $mailbox<>} // $NOT-ZERO;
$p *= $addr-p / ( 1- $addr-p );
}
%prediction{$mailbox} = $p;
}
return %prediction;
}
method mailbox-for-email(MaildirIndexer::Email:D $email --> Str) {
my Str $result;
MaildirIndexer::LogTimelineSchema::Index::Find.log: :class('ByAddresses'), -> {
my %prediction = self.predict-mailbox-given-addresses($email.addresses);
my @most-probable-mailboxes = %prediction.pairs.sort(*.value);
if @most-probable-mailboxes -> $_ { $result = .[*-1].key }
}
return $result;
}