From b16e56baf28bdb987c1a48924a2a510d4bb095ab Mon Sep 17 00:00:00 2001 From: dakkar Date: Sat, 25 Jan 2020 16:00:05 +0000 Subject: speed-up Bayes predictions this is *probably* correct --- lib/MaildirIndexer/Index/ByAddresses.rakumod | 32 +++++++++++++++++++--------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/lib/MaildirIndexer/Index/ByAddresses.rakumod b/lib/MaildirIndexer/Index/ByAddresses.rakumod index c6cc412..3e7dc9d 100644 --- a/lib/MaildirIndexer/Index/ByAddresses.rakumod +++ b/lib/MaildirIndexer/Index/ByAddresses.rakumod @@ -15,7 +15,10 @@ has BagHash $!count-by-address-and-mailbox .= new; has BagHash $!known-addresses .= new; has BagHash $!count-by-mailbox .= new; has Hash $!p-address-given-mailbox .= new; -has $!total-count; +has Int $!total-count; + +has Hash $!cached-p-given-mailbox .= new; +has Instant $!last-cached-at .= from-posix(0); method dump() { } @@ -38,6 +41,21 @@ submethod account-for(Str @addresses,Str $mailbox,Int $step) { $!p-address-given-mailbox{$pair} :delete; } } + + # update the cache every 10 seconds + if ((now - $!last-cached-at) > 10) { + for $!count-by-mailbox.keys -> Str $mailbox { + my $p = $!count-by-mailbox{$mailbox} / $!total-count; + + for $!known-addresses.keys -> Str $addr { + my $addr-p = $!p-address-given-mailbox{$addr => $mailbox} // 1e-15; + $p *= 1 - $addr-p; + } + + $!cached-p-given-mailbox{$mailbox} = $p; + } + $!last-cached-at = now; + } } method add-mail(MaildirIndexer::Email:D $email, Str:D $mailbox --> Nil) { @@ -71,19 +89,13 @@ method del-path(IO:D $file, Str:D $mailbox --> Nil) { submethod predict-mailbox-given-addresses(@addresses) { my %prediction; - my Bag $addr-bag .= new(|@addresses); for $!count-by-mailbox.keys -> Str $mailbox { - my $p = $!count-by-mailbox{$mailbox} / $!total-count; + my $p = $!cached-p-given-mailbox{$mailbox}; - for $!known-addresses.keys -> Str $addr { + for @addresses -> Str $addr { my $addr-p = $!p-address-given-mailbox{$addr => $mailbox} // 1e-15; - if ($addr-bag{$addr}) { - $p *= $addr-p; - } - else { - $p *= 1 - $addr-p; - } + $p *= $addr-p / ( 1- $addr-p ); } %prediction{$mailbox} = $p; -- cgit v1.2.3