summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authordakkar <dakkar@thenautilus.net>2019-12-27 14:14:24 +0000
committerdakkar <dakkar@thenautilus.net>2019-12-27 15:15:36 +0000
commitd31b7f8710f89db6035a80adae4d97ada8503336 (patch)
treeab8d718f71075e49d2cad5ee9edaac63bd53973a /lib
parentrestructure store, extract server (diff)
downloadMaildirIndexer-d31b7f8710f89db6035a80adae4d97ada8503336.tar.gz
MaildirIndexer-d31b7f8710f89db6035a80adae4d97ada8503336.tar.bz2
MaildirIndexer-d31b7f8710f89db6035a80adae4d97ada8503336.zip
extract addresses
tried to add some typing, wanted to return `Positional[Str:D]`, but half the `Array` / `List` methods return `Seq`, empty arrays are untyped, plus https://github.com/rakudo/rakudo/issues/3383 made the whole thing a mess so we're just returning `Iterable`, which is not even parametric, so we can't promise we're returning strings
Diffstat (limited to 'lib')
-rw-r--r--lib/MaildirIndexer/Email.pm647
1 files changed, 43 insertions, 4 deletions
diff --git a/lib/MaildirIndexer/Email.pm6 b/lib/MaildirIndexer/Email.pm6
index 0cd5bd7..d5ecc18 100644
--- a/lib/MaildirIndexer/Email.pm6
+++ b/lib/MaildirIndexer/Email.pm6
@@ -7,16 +7,55 @@ has $!body;
method BUILD(:%!headers,:$!body,:$!path = IO) { }
-method message-id { split-refs(%!headers<message-id>)[0] // '' }
-method refs {
+method message-id(--> Str:D) { split-refs(%!headers<message-id>)[0] // '' }
+method refs(--> Iterable) {
return (
|split-refs(%!headers<in-reply-to>),
|split-refs(%!headers<references>).reverse,
);
}
-multi split-refs(Any) { return (); }
-multi split-refs(Str:D $str) {
+multi split-refs(Any --> Iterable) { return () }
+multi split-refs(Str:D $str --> Iterable) {
return $/<refs>».Str if $str ~~ m{'<' $<refs> = (<-[<>]>+)+ % [ '>' .*? '<' ] '>' };
return ();
}
+
+method addresses (--> Iterable) {
+ return (
+ |extract-addresses(%!headers<sender>),
+ |extract-addresses(%!headers<x-original-sender>),
+ |extract-addresses(%!headers<from>),
+ |extract-addresses(%!headers<to>),
+ |extract-addresses(%!headers<reply-to>),
+ |extract-addresses(%!headers<mailing-list>),
+ ).unique;
+ # we should add a hack for info@meetup.com, where we keep the
+ # whole "from", since it's the only way to distinguish between
+ # different groups
+}
+
+# subset of the grammar of p5's Email::Address, ignoring comments and
+# quoting
+my grammar Address {
+ token CTL { <[ \x00 .. \x1F \x7F ]> }
+ token special { <[ \( \) \< \> \[ \] \: \; \@ \\ \, \. \" ]> }
+
+ token atext { <-CTL> & <-special> & \S }
+ token dot-atom { <ws> [ [<atext>+]+ % '.' ] <ws> }
+
+ token local-part { <.dot-atom> }
+ token domain { <.dot-atom> }
+
+ token addr { <local-part> '@' <domain> }
+
+ rule TOP { ^ .*? [ <addr>+ % .+? ] .*? $ }
+}
+
+multi sub extract-addresses(Any --> Iterable) { return () }
+multi sub extract-addresses(Str:D $str --> Iterable) {
+ with Address.parse($str) {
+ return $_<addr>».Str;
+ }
+ return ();
+}