From 5b10920b6e38614ceea0cd97031ab48f4f1f9a39 Mon Sep 17 00:00:00 2001 From: dakkar Date: Sun, 29 Dec 2019 13:24:00 +0000 Subject: new module extension --- lib/MaildirIndexer/Index/ByAddresses.pm6 | 92 ---------------------------- lib/MaildirIndexer/Index/ByAddresses.rakumod | 92 ++++++++++++++++++++++++++++ lib/MaildirIndexer/Index/ByRef.pm6 | 44 ------------- lib/MaildirIndexer/Index/ByRef.rakumod | 44 +++++++++++++ 4 files changed, 136 insertions(+), 136 deletions(-) delete mode 100644 lib/MaildirIndexer/Index/ByAddresses.pm6 create mode 100644 lib/MaildirIndexer/Index/ByAddresses.rakumod delete mode 100644 lib/MaildirIndexer/Index/ByRef.pm6 create mode 100644 lib/MaildirIndexer/Index/ByRef.rakumod (limited to 'lib/MaildirIndexer/Index') diff --git a/lib/MaildirIndexer/Index/ByAddresses.pm6 b/lib/MaildirIndexer/Index/ByAddresses.pm6 deleted file mode 100644 index b83a239..0000000 --- a/lib/MaildirIndexer/Index/ByAddresses.pm6 +++ /dev/null @@ -1,92 +0,0 @@ -use v6.d; -use MaildirIndexer::Index; -unit class MaildirIndexer::Index::ByAddresses does MaildirIndexer::Index; -use MaildirIndexer::LogTimelineSchema; -use MaildirIndexer::Email; - -# most of this is copied from -# p6-Algorithm-NaiveBayes:auth:ver<0.04>, in particular the -# Algorithm::NaiveBayes::Classifier::Bernoulli class - -has Array[Str] %!addresses-for-file; -# I'd like to type-constrain these BagHash-es, but the compiler -# currently dies if I try -has BagHash $!count-by-address-and-mailbox .= new; -has BagHash $!known-addresses .= new; -has BagHash $!count-by-mailbox .= new; -has $!total-count; - -method dump() { -} - -submethod account-for(Str @addresses,Str $mailbox,Int $step) { - $!total-count += $step; - $!count-by-mailbox{$mailbox} += $step; - - for @addresses -> Str $addr { - $!known-addresses{$addr} += $step; - $!count-by-address-and-mailbox{$addr => $mailbox} += $step; - } -} - -method add-mail(MaildirIndexer::Email:D $email, Str:D $mailbox --> Nil) { - MaildirIndexer::LogTimelineSchema::Index::Add.log: :class('ByAddresses'),:$mailbox, -> { - my Str @addresses = $email.addresses or return; - %!addresses-for-file{ $email.path } = @addresses; - - self.account-for(@addresses,$mailbox,1); - - return; - } -} - -method del-path(IO:D $file, Str:D $mailbox --> Nil) { - MaildirIndexer::LogTimelineSchema::Index::Rm.log: :class('ByAddresses'),:$mailbox, -> { - my Str @addresses = %!addresses-for-file{$file.path} or return; - - self.account-for(@addresses,$mailbox,-1); - - return; - } -} - -submethod p-address-given-mailbox(Str $addr, Str $mailbox) { - my $a = 1e-15 + $!count-by-address-and-mailbox{$addr => $mailbox}; - my $b = 1 + $!count-by-mailbox{$mailbox}; - return $a / $b; -} - -submethod predict-mailbox-given-addresses(@addresses) { - my %prediction; - my Bag $addr-bag .= new(|@addresses); - - for $!count-by-mailbox.keys -> Str $mailbox { - my $p = $!count-by-mailbox{$mailbox} / $!total-count; - - for $!known-addresses.keys -> Str $addr { - my $addr-p = self.p-address-given-mailbox($addr,$mailbox); - if ($addr-bag{$addr}) { - $p *= $addr-p; - } - else { - $p *= 1 - $addr-p; - } - } - - %prediction{$mailbox} = $p; - } - - return %prediction; -} - -method mailbox-for-email(MaildirIndexer::Email:D $email --> Str) { - my Str $result; - MaildirIndexer::LogTimelineSchema::Index::Find.log: :class('ByAddresses'), -> { - my %prediction = self.predict-mailbox-given-addresses($email.addresses); - - my @most-probable-mailboxes = %prediction.pairs.sort(*.value); - - if @most-probable-mailboxes -> $_ { $result = .[*-1].key } - } - return $result; -} diff --git a/lib/MaildirIndexer/Index/ByAddresses.rakumod b/lib/MaildirIndexer/Index/ByAddresses.rakumod new file mode 100644 index 0000000..b83a239 --- /dev/null +++ b/lib/MaildirIndexer/Index/ByAddresses.rakumod @@ -0,0 +1,92 @@ +use v6.d; +use MaildirIndexer::Index; +unit class MaildirIndexer::Index::ByAddresses does MaildirIndexer::Index; +use MaildirIndexer::LogTimelineSchema; +use MaildirIndexer::Email; + +# most of this is copied from +# p6-Algorithm-NaiveBayes:auth:ver<0.04>, in particular the +# Algorithm::NaiveBayes::Classifier::Bernoulli class + +has Array[Str] %!addresses-for-file; +# I'd like to type-constrain these BagHash-es, but the compiler +# currently dies if I try +has BagHash $!count-by-address-and-mailbox .= new; +has BagHash $!known-addresses .= new; +has BagHash $!count-by-mailbox .= new; +has $!total-count; + +method dump() { +} + +submethod account-for(Str @addresses,Str $mailbox,Int $step) { + $!total-count += $step; + $!count-by-mailbox{$mailbox} += $step; + + for @addresses -> Str $addr { + $!known-addresses{$addr} += $step; + $!count-by-address-and-mailbox{$addr => $mailbox} += $step; + } +} + +method add-mail(MaildirIndexer::Email:D $email, Str:D $mailbox --> Nil) { + MaildirIndexer::LogTimelineSchema::Index::Add.log: :class('ByAddresses'),:$mailbox, -> { + my Str @addresses = $email.addresses or return; + %!addresses-for-file{ $email.path } = @addresses; + + self.account-for(@addresses,$mailbox,1); + + return; + } +} + +method del-path(IO:D $file, Str:D $mailbox --> Nil) { + MaildirIndexer::LogTimelineSchema::Index::Rm.log: :class('ByAddresses'),:$mailbox, -> { + my Str @addresses = %!addresses-for-file{$file.path} or return; + + self.account-for(@addresses,$mailbox,-1); + + return; + } +} + +submethod p-address-given-mailbox(Str $addr, Str $mailbox) { + my $a = 1e-15 + $!count-by-address-and-mailbox{$addr => $mailbox}; + my $b = 1 + $!count-by-mailbox{$mailbox}; + return $a / $b; +} + +submethod predict-mailbox-given-addresses(@addresses) { + my %prediction; + my Bag $addr-bag .= new(|@addresses); + + for $!count-by-mailbox.keys -> Str $mailbox { + my $p = $!count-by-mailbox{$mailbox} / $!total-count; + + for $!known-addresses.keys -> Str $addr { + my $addr-p = self.p-address-given-mailbox($addr,$mailbox); + if ($addr-bag{$addr}) { + $p *= $addr-p; + } + else { + $p *= 1 - $addr-p; + } + } + + %prediction{$mailbox} = $p; + } + + return %prediction; +} + +method mailbox-for-email(MaildirIndexer::Email:D $email --> Str) { + my Str $result; + MaildirIndexer::LogTimelineSchema::Index::Find.log: :class('ByAddresses'), -> { + my %prediction = self.predict-mailbox-given-addresses($email.addresses); + + my @most-probable-mailboxes = %prediction.pairs.sort(*.value); + + if @most-probable-mailboxes -> $_ { $result = .[*-1].key } + } + return $result; +} diff --git a/lib/MaildirIndexer/Index/ByRef.pm6 b/lib/MaildirIndexer/Index/ByRef.pm6 deleted file mode 100644 index d044272..0000000 --- a/lib/MaildirIndexer/Index/ByRef.pm6 +++ /dev/null @@ -1,44 +0,0 @@ -use v6.d; -use MaildirIndexer::Index; -unit class MaildirIndexer::Index::ByRef does MaildirIndexer::Index; -use MaildirIndexer::LogTimelineSchema; -use MaildirIndexer::Email; - -has Str %!id-for-file; -has Array[Str] %!mailboxes-for-id; - -method dump() { - say "{.key} → {.value}" for %!id-for-file; - say "{.key} ⇒ {.value.perl}" for %!mailboxes-for-id; -} - -method add-mail(MaildirIndexer::Email:D $email, Str:D $mailbox --> Nil) { - MaildirIndexer::LogTimelineSchema::Index::Add.log: :class('ByRef'),:$mailbox, -> { - my $id = $email.message-id or return; - %!id-for-file{ $email.path } = $id; - %!mailboxes-for-id{ $id }.push($mailbox); - return; - } -} - -method del-path(IO:D $file, Str:D $mailbox --> Nil) { - MaildirIndexer::LogTimelineSchema::Index::Rm.log: :class('ByRef'),:$mailbox, -> { - my $id = %!id-for-file{ $file.path }:delete; - with %!mailboxes-for-id{ $id } { - with .grep($mailbox):k -> $pos { - .splice($pos,1); - } - } - return; - } -} - -method mailbox-for-email(MaildirIndexer::Email:D $email --> Str) { - my Str $result; - MaildirIndexer::LogTimelineSchema::Index::Find.log: :class('ByRef'), -> { - for |$email.refs() -> $ref { - with %!mailboxes-for-id{$ref} { $result = .[*-1] } - } - } - return $result; -} diff --git a/lib/MaildirIndexer/Index/ByRef.rakumod b/lib/MaildirIndexer/Index/ByRef.rakumod new file mode 100644 index 0000000..d044272 --- /dev/null +++ b/lib/MaildirIndexer/Index/ByRef.rakumod @@ -0,0 +1,44 @@ +use v6.d; +use MaildirIndexer::Index; +unit class MaildirIndexer::Index::ByRef does MaildirIndexer::Index; +use MaildirIndexer::LogTimelineSchema; +use MaildirIndexer::Email; + +has Str %!id-for-file; +has Array[Str] %!mailboxes-for-id; + +method dump() { + say "{.key} → {.value}" for %!id-for-file; + say "{.key} ⇒ {.value.perl}" for %!mailboxes-for-id; +} + +method add-mail(MaildirIndexer::Email:D $email, Str:D $mailbox --> Nil) { + MaildirIndexer::LogTimelineSchema::Index::Add.log: :class('ByRef'),:$mailbox, -> { + my $id = $email.message-id or return; + %!id-for-file{ $email.path } = $id; + %!mailboxes-for-id{ $id }.push($mailbox); + return; + } +} + +method del-path(IO:D $file, Str:D $mailbox --> Nil) { + MaildirIndexer::LogTimelineSchema::Index::Rm.log: :class('ByRef'),:$mailbox, -> { + my $id = %!id-for-file{ $file.path }:delete; + with %!mailboxes-for-id{ $id } { + with .grep($mailbox):k -> $pos { + .splice($pos,1); + } + } + return; + } +} + +method mailbox-for-email(MaildirIndexer::Email:D $email --> Str) { + my Str $result; + MaildirIndexer::LogTimelineSchema::Index::Find.log: :class('ByRef'), -> { + for |$email.refs() -> $ref { + with %!mailboxes-for-id{$ref} { $result = .[*-1] } + } + } + return $result; +} -- cgit v1.2.3