summaryrefslogtreecommitdiff
path: root/lib/MaildirIndexer/Index/ByAddresses.pm6
diff options
context:
space:
mode:
Diffstat (limited to 'lib/MaildirIndexer/Index/ByAddresses.pm6')
-rw-r--r--lib/MaildirIndexer/Index/ByAddresses.pm689
1 files changed, 89 insertions, 0 deletions
diff --git a/lib/MaildirIndexer/Index/ByAddresses.pm6 b/lib/MaildirIndexer/Index/ByAddresses.pm6
new file mode 100644
index 0000000..5636f1f
--- /dev/null
+++ b/lib/MaildirIndexer/Index/ByAddresses.pm6
@@ -0,0 +1,89 @@
+use v6.d;
+use MaildirIndexer::Index;
+unit class MaildirIndexer::Index::ByAddresses does MaildirIndexer::Index;
+use MaildirIndexer::LogTimelineSchema;
+use MaildirIndexer::Email;
+
+# most of this is copied from
+# p6-Algorithm-NaiveBayes:auth<cpan:TITSUKI>:ver<0.04>, in particular the
+# Algorithm::NaiveBayes::Classifier::Bernoulli class
+
+has Array[Str] %!addresses-for-file;
+has %!count-by-address-and-mailbox;
+has %!known-addresses;
+has %!count-by-mailbox;
+has $!total-count;
+
+method dump() {
+}
+
+submethod account-for(Str @addresses,$mailbox,Int $step) {
+ $!total-count += $step;
+ %!count-by-mailbox{$mailbox} += $step;
+
+ for @addresses -> $addr {
+ %!known-addresses{$addr} += $step;
+ %!count-by-address-and-mailbox{$addr}{$mailbox} += $step;
+ }
+}
+
+method add-mail(MaildirIndexer::Email:D $email, Str:D $mailbox --> Nil) {
+ MaildirIndexer::LogTimelineSchema::Index::Add.log: :class('ByAddresses'),:$mailbox, -> {
+ my Str @addresses = $email.addresses or return;
+ %!addresses-for-file{ $email.path } = @addresses;
+
+ self.account-for(@addresses,$mailbox,1);
+
+ return;
+ }
+}
+
+method del-path(IO:D $file, Str:D $mailbox --> Nil) {
+ MaildirIndexer::LogTimelineSchema::Index::Rm.log: :class('ByAddresses'),:$mailbox, -> {
+ my Str @addresses = %!addresses-for-file{$file.path} or return;
+
+ self.account-for(@addresses,$mailbox,-1);
+
+ return;
+ }
+}
+
+submethod p-address-given-mailbox($addr,$mailbox) {
+ my $a = 1 + (%!count-by-address-and-mailbox{$addr}{$mailbox} // 0);
+ my $b = 2 + (%!count-by-mailbox{$mailbox} // 0);
+ return $a / $b;
+}
+
+submethod predict-mailbox-given-addresses(@addresses) {
+ my %prediction;
+ my Bag $addr-bag .= new(|@addresses);
+
+ for %!count-by-mailbox.keys -> $mailbox {
+ my $p = 1;
+
+ for %!known-addresses.keys -> $addr {
+ if ($addr-bag{$addr}) {
+ $p *= self.p-address-given-mailbox($addr,$mailbox);
+ }
+ else {
+ $p *= (1 - self.p-address-given-mailbox($addr,$mailbox));
+ }
+ }
+ $p *= %!count-by-mailbox{$mailbox} / $!total-count;
+ %prediction{$mailbox} = $p;
+ }
+
+ return %prediction;
+}
+
+method mailbox-for-email(MaildirIndexer::Email:D $email --> Str) {
+ my Str $result;
+ MaildirIndexer::LogTimelineSchema::Index::Find.log: :class('ByAddresses'), -> {
+ my %prediction = self.predict-mailbox-given-addresses($email.addresses);
+
+ my @most-probable-mailboxes = %prediction.pairs.sort(*.value).map(*.key);
+
+ if @most-probable-mailboxes -> $_ { $result = .[*-1] }
+ }
+ return $result;
+}