From f440d7576cc5f9f33debf00f6f90b88a783fed35 Mon Sep 17 00:00:00 2001 From: dakkar Date: Sat, 11 Jan 2020 15:12:23 +0000 Subject: pre-compute more Bayes stuff when all my mails were indexed, it took ages to guess the mailbox --- lib/MaildirIndexer/Index/ByAddresses.rakumod | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/lib/MaildirIndexer/Index/ByAddresses.rakumod b/lib/MaildirIndexer/Index/ByAddresses.rakumod index 638740f..40e98f9 100644 --- a/lib/MaildirIndexer/Index/ByAddresses.rakumod +++ b/lib/MaildirIndexer/Index/ByAddresses.rakumod @@ -14,6 +14,7 @@ has Array[Str] %!addresses-for-file; has BagHash $!count-by-address-and-mailbox .= new; has BagHash $!known-addresses .= new; has BagHash $!count-by-mailbox .= new; +has Hash $!p-address-given-mailbox .= new; has $!total-count; method dump() { @@ -24,8 +25,18 @@ submethod account-for(Str @addresses,Str $mailbox,Int $step) { $!count-by-mailbox{$mailbox} += $step; for @addresses -> Str $addr { + my $pair = ( $addr => $mailbox ); $!known-addresses{$addr} += $step; - $!count-by-address-and-mailbox{$addr => $mailbox} += $step; + my $count = $!count-by-address-and-mailbox{$pair} += $step; + + if ($count) { + my $a = 1e-15 + $count; + my $b = 1 + $!count-by-mailbox{$mailbox}; + $!p-address-given-mailbox{$pair} = $a / $b; + } + else { + $!p-address-given-mailbox{$pair} :delete; + } } } @@ -58,12 +69,6 @@ method del-path(IO:D $file, Str:D $mailbox --> Nil) { } } -submethod p-address-given-mailbox(Str $addr, Str $mailbox) { - my $a = 1e-15 + $!count-by-address-and-mailbox{$addr => $mailbox}; - my $b = 1 + $!count-by-mailbox{$mailbox}; - return $a / $b; -} - submethod predict-mailbox-given-addresses(@addresses) { my %prediction; my Bag $addr-bag .= new(|@addresses); @@ -72,7 +77,7 @@ submethod predict-mailbox-given-addresses(@addresses) { my $p = $!count-by-mailbox{$mailbox} / $!total-count; for $!known-addresses.keys -> Str $addr { - my $addr-p = self.p-address-given-mailbox($addr,$mailbox); + my $addr-p = $!p-address-given-mailbox{$addr => $mailbox} // 1e-15; if ($addr-bag{$addr}) { $p *= $addr-p; } -- cgit v1.2.3