From 4a72c119574047375dce27b731bb630a279be125 Mon Sep 17 00:00:00 2001 From: dakkar Date: Sat, 28 Dec 2019 16:32:15 +0000 Subject: forget about things when they go to 0 --- lib/MaildirIndexer/Index/ByAddresses.pm6 | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/lib/MaildirIndexer/Index/ByAddresses.pm6 b/lib/MaildirIndexer/Index/ByAddresses.pm6 index 5636f1f..4de80e8 100644 --- a/lib/MaildirIndexer/Index/ByAddresses.pm6 +++ b/lib/MaildirIndexer/Index/ByAddresses.pm6 @@ -9,9 +9,11 @@ use MaildirIndexer::Email; # Algorithm::NaiveBayes::Classifier::Bernoulli class has Array[Str] %!addresses-for-file; -has %!count-by-address-and-mailbox; -has %!known-addresses; -has %!count-by-mailbox; +# I'd like to type-constrain these BagHash-es, but the compiler +# currently dies if I try +has BagHash $!count-by-address-and-mailbox .= new; +has BagHash $!known-addresses .= new; +has BagHash $!count-by-mailbox .= new; has $!total-count; method dump() { @@ -19,11 +21,11 @@ method dump() { submethod account-for(Str @addresses,$mailbox,Int $step) { $!total-count += $step; - %!count-by-mailbox{$mailbox} += $step; + $!count-by-mailbox{$mailbox} += $step; for @addresses -> $addr { - %!known-addresses{$addr} += $step; - %!count-by-address-and-mailbox{$addr}{$mailbox} += $step; + $!known-addresses{$addr} += $step; + $!count-by-address-and-mailbox{$addr => $mailbox} += $step; } } @@ -49,8 +51,8 @@ method del-path(IO:D $file, Str:D $mailbox --> Nil) { } submethod p-address-given-mailbox($addr,$mailbox) { - my $a = 1 + (%!count-by-address-and-mailbox{$addr}{$mailbox} // 0); - my $b = 2 + (%!count-by-mailbox{$mailbox} // 0); + my $a = 1 + ($!count-by-address-and-mailbox{$addr => $mailbox} // 0); + my $b = 2 + ($!count-by-mailbox{$mailbox} // 0); return $a / $b; } @@ -58,10 +60,10 @@ submethod predict-mailbox-given-addresses(@addresses) { my %prediction; my Bag $addr-bag .= new(|@addresses); - for %!count-by-mailbox.keys -> $mailbox { + for $!count-by-mailbox.keys -> $mailbox { my $p = 1; - for %!known-addresses.keys -> $addr { + for $!known-addresses.keys -> $addr { if ($addr-bag{$addr}) { $p *= self.p-address-given-mailbox($addr,$mailbox); } @@ -69,7 +71,7 @@ submethod predict-mailbox-given-addresses(@addresses) { $p *= (1 - self.p-address-given-mailbox($addr,$mailbox)); } } - $p *= %!count-by-mailbox{$mailbox} / $!total-count; + $p *= $!count-by-mailbox{$mailbox} / $!total-count; %prediction{$mailbox} = $p; } -- cgit v1.2.3