summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordakkar <dakkar@thenautilus.net>2019-12-28 17:27:31 +0000
committerdakkar <dakkar@thenautilus.net>2019-12-28 17:27:31 +0000
commit6f269df9027a6289ed634c6160ebcacdb372f238 (patch)
tree6860a50f7ad1521ee49f89f08e583d1c58988ca2
parentone more address (diff)
downloadMaildirIndexer-6f269df9027a6289ed634c6160ebcacdb372f238.tar.gz
MaildirIndexer-6f269df9027a6289ed634c6160ebcacdb372f238.tar.bz2
MaildirIndexer-6f269df9027a6289ed634c6160ebcacdb372f238.zip
more types, counts seems to work better this way
-rw-r--r--lib/MaildirIndexer/Index/ByAddresses.pm623
1 files changed, 12 insertions, 11 deletions
diff --git a/lib/MaildirIndexer/Index/ByAddresses.pm6 b/lib/MaildirIndexer/Index/ByAddresses.pm6
index 4de80e8..e2a96c3 100644
--- a/lib/MaildirIndexer/Index/ByAddresses.pm6
+++ b/lib/MaildirIndexer/Index/ByAddresses.pm6
@@ -19,11 +19,11 @@ has $!total-count;
method dump() {
}
-submethod account-for(Str @addresses,$mailbox,Int $step) {
+submethod account-for(Str @addresses,Str $mailbox,Int $step) {
$!total-count += $step;
$!count-by-mailbox{$mailbox} += $step;
- for @addresses -> $addr {
+ for @addresses -> Str $addr {
$!known-addresses{$addr} += $step;
$!count-by-address-and-mailbox{$addr => $mailbox} += $step;
}
@@ -50,9 +50,9 @@ method del-path(IO:D $file, Str:D $mailbox --> Nil) {
}
}
-submethod p-address-given-mailbox($addr,$mailbox) {
- my $a = 1 + ($!count-by-address-and-mailbox{$addr => $mailbox} // 0);
- my $b = 2 + ($!count-by-mailbox{$mailbox} // 0);
+submethod p-address-given-mailbox(Str $addr, Str $mailbox) {
+ my $a = 1 + $!count-by-address-and-mailbox{$addr => $mailbox};
+ my $b = 2 + $!count-by-mailbox{$mailbox};
return $a / $b;
}
@@ -60,18 +60,19 @@ submethod predict-mailbox-given-addresses(@addresses) {
my %prediction;
my Bag $addr-bag .= new(|@addresses);
- for $!count-by-mailbox.keys -> $mailbox {
- my $p = 1;
+ for $!count-by-mailbox.keys -> Str $mailbox {
+ my $p = $!count-by-mailbox{$mailbox} / $!total-count;
- for $!known-addresses.keys -> $addr {
+ for $!known-addresses.keys -> Str $addr {
+ my $addr-p = self.p-address-given-mailbox($addr,$mailbox);
if ($addr-bag{$addr}) {
- $p *= self.p-address-given-mailbox($addr,$mailbox);
+ $p *= $addr-p;
}
else {
- $p *= (1 - self.p-address-given-mailbox($addr,$mailbox));
+ $p *= 1 - $addr-p;
}
}
- $p *= $!count-by-mailbox{$mailbox} / $!total-count;
+
%prediction{$mailbox} = $p;
}