diff options
author | dakkar <dakkar@thenautilus.net> | 2019-12-28 16:32:15 +0000 |
---|---|---|
committer | dakkar <dakkar@thenautilus.net> | 2019-12-28 16:45:46 +0000 |
commit | 4a72c119574047375dce27b731bb630a279be125 (patch) | |
tree | 8e38c9e5678a23090c17e52d1e17867fcd9620b3 | |
parent | naive-bayes classifying index (diff) | |
download | MaildirIndexer-4a72c119574047375dce27b731bb630a279be125.tar.gz MaildirIndexer-4a72c119574047375dce27b731bb630a279be125.tar.bz2 MaildirIndexer-4a72c119574047375dce27b731bb630a279be125.zip |
forget about things when they go to 0
-rw-r--r-- | lib/MaildirIndexer/Index/ByAddresses.pm6 | 24 |
1 files changed, 13 insertions, 11 deletions
diff --git a/lib/MaildirIndexer/Index/ByAddresses.pm6 b/lib/MaildirIndexer/Index/ByAddresses.pm6 index 5636f1f..4de80e8 100644 --- a/lib/MaildirIndexer/Index/ByAddresses.pm6 +++ b/lib/MaildirIndexer/Index/ByAddresses.pm6 @@ -9,9 +9,11 @@ use MaildirIndexer::Email; # Algorithm::NaiveBayes::Classifier::Bernoulli class has Array[Str] %!addresses-for-file; -has %!count-by-address-and-mailbox; -has %!known-addresses; -has %!count-by-mailbox; +# I'd like to type-constrain these BagHash-es, but the compiler +# currently dies if I try +has BagHash $!count-by-address-and-mailbox .= new; +has BagHash $!known-addresses .= new; +has BagHash $!count-by-mailbox .= new; has $!total-count; method dump() { @@ -19,11 +21,11 @@ method dump() { submethod account-for(Str @addresses,$mailbox,Int $step) { $!total-count += $step; - %!count-by-mailbox{$mailbox} += $step; + $!count-by-mailbox{$mailbox} += $step; for @addresses -> $addr { - %!known-addresses{$addr} += $step; - %!count-by-address-and-mailbox{$addr}{$mailbox} += $step; + $!known-addresses{$addr} += $step; + $!count-by-address-and-mailbox{$addr => $mailbox} += $step; } } @@ -49,8 +51,8 @@ method del-path(IO:D $file, Str:D $mailbox --> Nil) { } submethod p-address-given-mailbox($addr,$mailbox) { - my $a = 1 + (%!count-by-address-and-mailbox{$addr}{$mailbox} // 0); - my $b = 2 + (%!count-by-mailbox{$mailbox} // 0); + my $a = 1 + ($!count-by-address-and-mailbox{$addr => $mailbox} // 0); + my $b = 2 + ($!count-by-mailbox{$mailbox} // 0); return $a / $b; } @@ -58,10 +60,10 @@ submethod predict-mailbox-given-addresses(@addresses) { my %prediction; my Bag $addr-bag .= new(|@addresses); - for %!count-by-mailbox.keys -> $mailbox { + for $!count-by-mailbox.keys -> $mailbox { my $p = 1; - for %!known-addresses.keys -> $addr { + for $!known-addresses.keys -> $addr { if ($addr-bag{$addr}) { $p *= self.p-address-given-mailbox($addr,$mailbox); } @@ -69,7 +71,7 @@ submethod predict-mailbox-given-addresses(@addresses) { $p *= (1 - self.p-address-given-mailbox($addr,$mailbox)); } } - $p *= %!count-by-mailbox{$mailbox} / $!total-count; + $p *= $!count-by-mailbox{$mailbox} / $!total-count; %prediction{$mailbox} = $p; } |