summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordakkar <dakkar@thenautilus.net>2020-01-25 16:00:05 +0000
committerdakkar <dakkar@thenautilus.net>2020-01-25 16:00:05 +0000
commitb16e56baf28bdb987c1a48924a2a510d4bb095ab (patch)
treea085057e9fa0c2a1f491a08f969e881465431f45
parentfewer temporary variables (diff)
downloadMaildirIndexer-b16e56baf28bdb987c1a48924a2a510d4bb095ab.tar.gz
MaildirIndexer-b16e56baf28bdb987c1a48924a2a510d4bb095ab.tar.bz2
MaildirIndexer-b16e56baf28bdb987c1a48924a2a510d4bb095ab.zip
speed-up Bayes predictions
this is *probably* correct
-rw-r--r--lib/MaildirIndexer/Index/ByAddresses.rakumod32
1 files changed, 22 insertions, 10 deletions
diff --git a/lib/MaildirIndexer/Index/ByAddresses.rakumod b/lib/MaildirIndexer/Index/ByAddresses.rakumod
index c6cc412..3e7dc9d 100644
--- a/lib/MaildirIndexer/Index/ByAddresses.rakumod
+++ b/lib/MaildirIndexer/Index/ByAddresses.rakumod
@@ -15,7 +15,10 @@ has BagHash $!count-by-address-and-mailbox .= new;
has BagHash $!known-addresses .= new;
has BagHash $!count-by-mailbox .= new;
has Hash $!p-address-given-mailbox .= new;
-has $!total-count;
+has Int $!total-count;
+
+has Hash $!cached-p-given-mailbox .= new;
+has Instant $!last-cached-at .= from-posix(0);
method dump() {
}
@@ -38,6 +41,21 @@ submethod account-for(Str @addresses,Str $mailbox,Int $step) {
$!p-address-given-mailbox{$pair} :delete;
}
}
+
+ # update the cache every 10 seconds
+ if ((now - $!last-cached-at) > 10) {
+ for $!count-by-mailbox.keys -> Str $mailbox {
+ my $p = $!count-by-mailbox{$mailbox} / $!total-count;
+
+ for $!known-addresses.keys -> Str $addr {
+ my $addr-p = $!p-address-given-mailbox{$addr => $mailbox} // 1e-15;
+ $p *= 1 - $addr-p;
+ }
+
+ $!cached-p-given-mailbox{$mailbox} = $p;
+ }
+ $!last-cached-at = now;
+ }
}
method add-mail(MaildirIndexer::Email:D $email, Str:D $mailbox --> Nil) {
@@ -71,19 +89,13 @@ method del-path(IO:D $file, Str:D $mailbox --> Nil) {
submethod predict-mailbox-given-addresses(@addresses) {
my %prediction;
- my Bag $addr-bag .= new(|@addresses);
for $!count-by-mailbox.keys -> Str $mailbox {
- my $p = $!count-by-mailbox{$mailbox} / $!total-count;
+ my $p = $!cached-p-given-mailbox{$mailbox};
- for $!known-addresses.keys -> Str $addr {
+ for @addresses -> Str $addr {
my $addr-p = $!p-address-given-mailbox{$addr => $mailbox} // 1e-15;
- if ($addr-bag{$addr}) {
- $p *= $addr-p;
- }
- else {
- $p *= 1 - $addr-p;
- }
+ $p *= $addr-p / ( 1- $addr-p );
}
%prediction{$mailbox} = $p;