diff options
author | dakkar <dakkar@thenautilus.net> | 2020-01-25 16:00:05 +0000 |
---|---|---|
committer | dakkar <dakkar@thenautilus.net> | 2020-01-25 16:00:05 +0000 |
commit | b16e56baf28bdb987c1a48924a2a510d4bb095ab (patch) | |
tree | a085057e9fa0c2a1f491a08f969e881465431f45 | |
parent | fewer temporary variables (diff) | |
download | MaildirIndexer-b16e56baf28bdb987c1a48924a2a510d4bb095ab.tar.gz MaildirIndexer-b16e56baf28bdb987c1a48924a2a510d4bb095ab.tar.bz2 MaildirIndexer-b16e56baf28bdb987c1a48924a2a510d4bb095ab.zip |
speed-up Bayes predictions
this is *probably* correct
-rw-r--r-- | lib/MaildirIndexer/Index/ByAddresses.rakumod | 32 |
1 files changed, 22 insertions, 10 deletions
diff --git a/lib/MaildirIndexer/Index/ByAddresses.rakumod b/lib/MaildirIndexer/Index/ByAddresses.rakumod index c6cc412..3e7dc9d 100644 --- a/lib/MaildirIndexer/Index/ByAddresses.rakumod +++ b/lib/MaildirIndexer/Index/ByAddresses.rakumod @@ -15,7 +15,10 @@ has BagHash $!count-by-address-and-mailbox .= new; has BagHash $!known-addresses .= new; has BagHash $!count-by-mailbox .= new; has Hash $!p-address-given-mailbox .= new; -has $!total-count; +has Int $!total-count; + +has Hash $!cached-p-given-mailbox .= new; +has Instant $!last-cached-at .= from-posix(0); method dump() { } @@ -38,6 +41,21 @@ submethod account-for(Str @addresses,Str $mailbox,Int $step) { $!p-address-given-mailbox{$pair} :delete; } } + + # update the cache every 10 seconds + if ((now - $!last-cached-at) > 10) { + for $!count-by-mailbox.keys -> Str $mailbox { + my $p = $!count-by-mailbox{$mailbox} / $!total-count; + + for $!known-addresses.keys -> Str $addr { + my $addr-p = $!p-address-given-mailbox{$addr => $mailbox} // 1e-15; + $p *= 1 - $addr-p; + } + + $!cached-p-given-mailbox{$mailbox} = $p; + } + $!last-cached-at = now; + } } method add-mail(MaildirIndexer::Email:D $email, Str:D $mailbox --> Nil) { @@ -71,19 +89,13 @@ method del-path(IO:D $file, Str:D $mailbox --> Nil) { submethod predict-mailbox-given-addresses(@addresses) { my %prediction; - my Bag $addr-bag .= new(|@addresses); for $!count-by-mailbox.keys -> Str $mailbox { - my $p = $!count-by-mailbox{$mailbox} / $!total-count; + my $p = $!cached-p-given-mailbox{$mailbox}; - for $!known-addresses.keys -> Str $addr { + for @addresses -> Str $addr { my $addr-p = $!p-address-given-mailbox{$addr => $mailbox} // 1e-15; - if ($addr-bag{$addr}) { - $p *= $addr-p; - } - else { - $p *= 1 - $addr-p; - } + $p *= $addr-p / ( 1- $addr-p ); } %prediction{$mailbox} = $p; |