summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordakkar <dakkar@thenautilus.net>2019-12-28 16:32:15 +0000
committerdakkar <dakkar@thenautilus.net>2019-12-28 16:45:46 +0000
commit4a72c119574047375dce27b731bb630a279be125 (patch)
tree8e38c9e5678a23090c17e52d1e17867fcd9620b3
parentnaive-bayes classifying index (diff)
downloadMaildirIndexer-4a72c119574047375dce27b731bb630a279be125.tar.gz
MaildirIndexer-4a72c119574047375dce27b731bb630a279be125.tar.bz2
MaildirIndexer-4a72c119574047375dce27b731bb630a279be125.zip
forget about things when they go to 0
-rw-r--r--lib/MaildirIndexer/Index/ByAddresses.pm624
1 files changed, 13 insertions, 11 deletions
diff --git a/lib/MaildirIndexer/Index/ByAddresses.pm6 b/lib/MaildirIndexer/Index/ByAddresses.pm6
index 5636f1f..4de80e8 100644
--- a/lib/MaildirIndexer/Index/ByAddresses.pm6
+++ b/lib/MaildirIndexer/Index/ByAddresses.pm6
@@ -9,9 +9,11 @@ use MaildirIndexer::Email;
# Algorithm::NaiveBayes::Classifier::Bernoulli class
has Array[Str] %!addresses-for-file;
-has %!count-by-address-and-mailbox;
-has %!known-addresses;
-has %!count-by-mailbox;
+# I'd like to type-constrain these BagHash-es, but the compiler
+# currently dies if I try
+has BagHash $!count-by-address-and-mailbox .= new;
+has BagHash $!known-addresses .= new;
+has BagHash $!count-by-mailbox .= new;
has $!total-count;
method dump() {
@@ -19,11 +21,11 @@ method dump() {
submethod account-for(Str @addresses,$mailbox,Int $step) {
$!total-count += $step;
- %!count-by-mailbox{$mailbox} += $step;
+ $!count-by-mailbox{$mailbox} += $step;
for @addresses -> $addr {
- %!known-addresses{$addr} += $step;
- %!count-by-address-and-mailbox{$addr}{$mailbox} += $step;
+ $!known-addresses{$addr} += $step;
+ $!count-by-address-and-mailbox{$addr => $mailbox} += $step;
}
}
@@ -49,8 +51,8 @@ method del-path(IO:D $file, Str:D $mailbox --> Nil) {
}
submethod p-address-given-mailbox($addr,$mailbox) {
- my $a = 1 + (%!count-by-address-and-mailbox{$addr}{$mailbox} // 0);
- my $b = 2 + (%!count-by-mailbox{$mailbox} // 0);
+ my $a = 1 + ($!count-by-address-and-mailbox{$addr => $mailbox} // 0);
+ my $b = 2 + ($!count-by-mailbox{$mailbox} // 0);
return $a / $b;
}
@@ -58,10 +60,10 @@ submethod predict-mailbox-given-addresses(@addresses) {
my %prediction;
my Bag $addr-bag .= new(|@addresses);
- for %!count-by-mailbox.keys -> $mailbox {
+ for $!count-by-mailbox.keys -> $mailbox {
my $p = 1;
- for %!known-addresses.keys -> $addr {
+ for $!known-addresses.keys -> $addr {
if ($addr-bag{$addr}) {
$p *= self.p-address-given-mailbox($addr,$mailbox);
}
@@ -69,7 +71,7 @@ submethod predict-mailbox-given-addresses(@addresses) {
$p *= (1 - self.p-address-given-mailbox($addr,$mailbox));
}
}
- $p *= %!count-by-mailbox{$mailbox} / $!total-count;
+ $p *= $!count-by-mailbox{$mailbox} / $!total-count;
%prediction{$mailbox} = $p;
}