summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordakkar <dakkar@thenautilus.net>2020-01-11 15:12:23 +0000
committerdakkar <dakkar@thenautilus.net>2020-01-11 15:12:23 +0000
commitf440d7576cc5f9f33debf00f6f90b88a783fed35 (patch)
tree906f36ccc8aea4c0415e010fa51b5bb0e32c71b4
parentallow multiple maildirs on cmdline (diff)
downloadMaildirIndexer-f440d7576cc5f9f33debf00f6f90b88a783fed35.tar.gz
MaildirIndexer-f440d7576cc5f9f33debf00f6f90b88a783fed35.tar.bz2
MaildirIndexer-f440d7576cc5f9f33debf00f6f90b88a783fed35.zip
pre-compute more Bayes stuff
when all my mails were indexed, it took ages to guess the mailbox
-rw-r--r--lib/MaildirIndexer/Index/ByAddresses.rakumod21
1 files changed, 13 insertions, 8 deletions
diff --git a/lib/MaildirIndexer/Index/ByAddresses.rakumod b/lib/MaildirIndexer/Index/ByAddresses.rakumod
index 638740f..40e98f9 100644
--- a/lib/MaildirIndexer/Index/ByAddresses.rakumod
+++ b/lib/MaildirIndexer/Index/ByAddresses.rakumod
@@ -14,6 +14,7 @@ has Array[Str] %!addresses-for-file;
has BagHash $!count-by-address-and-mailbox .= new;
has BagHash $!known-addresses .= new;
has BagHash $!count-by-mailbox .= new;
+has Hash $!p-address-given-mailbox .= new;
has $!total-count;
method dump() {
@@ -24,8 +25,18 @@ submethod account-for(Str @addresses,Str $mailbox,Int $step) {
$!count-by-mailbox{$mailbox} += $step;
for @addresses -> Str $addr {
+ my $pair = ( $addr => $mailbox );
$!known-addresses{$addr} += $step;
- $!count-by-address-and-mailbox{$addr => $mailbox} += $step;
+ my $count = $!count-by-address-and-mailbox{$pair} += $step;
+
+ if ($count) {
+ my $a = 1e-15 + $count;
+ my $b = 1 + $!count-by-mailbox{$mailbox};
+ $!p-address-given-mailbox{$pair} = $a / $b;
+ }
+ else {
+ $!p-address-given-mailbox{$pair} :delete;
+ }
}
}
@@ -58,12 +69,6 @@ method del-path(IO:D $file, Str:D $mailbox --> Nil) {
}
}
-submethod p-address-given-mailbox(Str $addr, Str $mailbox) {
- my $a = 1e-15 + $!count-by-address-and-mailbox{$addr => $mailbox};
- my $b = 1 + $!count-by-mailbox{$mailbox};
- return $a / $b;
-}
-
submethod predict-mailbox-given-addresses(@addresses) {
my %prediction;
my Bag $addr-bag .= new(|@addresses);
@@ -72,7 +77,7 @@ submethod predict-mailbox-given-addresses(@addresses) {
my $p = $!count-by-mailbox{$mailbox} / $!total-count;
for $!known-addresses.keys -> Str $addr {
- my $addr-p = self.p-address-given-mailbox($addr,$mailbox);
+ my $addr-p = $!p-address-given-mailbox{$addr => $mailbox} // 1e-15;
if ($addr-bag{$addr}) {
$p *= $addr-p;
}