extract addresses

tried to add some typing, wanted to return `Positional[Str:D]`, but half the `Array` / `List` methods return `Seq`, empty arrays are untyped, plus https://github.com/rakudo/rakudo/issues/3383 made the whole thing a mess so we're just returning `Iterable`, which is not even parametric, so we can't promise we're returning strings
author: dakkar <dakkar@thenautilus.net> 2019-12-27 14:14:24 +0000
committer: dakkar <dakkar@thenautilus.net> 2019-12-27 15:15:36 +0000
commit: d31b7f8710f89db6035a80adae4d97ada8503336 (patch)
tree: ab8d718f71075e49d2cad5ee9edaac63bd53973a
parent: restructure store, extract server (diff)
download: MaildirIndexer-d31b7f8710f89db6035a80adae4d97ada8503336.tar.gz
MaildirIndexer-d31b7f8710f89db6035a80adae4d97ada8503336.tar.bz2
MaildirIndexer-d31b7f8710f89db6035a80adae4d97ada8503336.zip
2 files changed, 53 insertions, 6 deletions
diff --git a/lib/MaildirIndexer/Email.pm6 b/lib/MaildirIndexer/Email.pm6
index 0cd5bd7..d5ecc18 100644
--- a/lib/MaildirIndexer/Email.pm6
+++ b/lib/MaildirIndexer/Email.pm6
@@ -7,16 +7,55 @@ has $!body;
 
 method BUILD(:%!headers,:$!body,:$!path = IO) { }
 
-method message-id { split-refs(%!headers<message-id>)[0] // '' }
-method refs {
+method message-id(--> Str:D) { split-refs(%!headers<message-id>)[0] // '' }
+method refs(--> Iterable) {
     return (
         |split-refs(%!headers<in-reply-to>),
         |split-refs(%!headers<references>).reverse,
     );
 }
 
-multi split-refs(Any) { return (); }
-multi split-refs(Str:D $str) {
+multi split-refs(Any --> Iterable) { return () }
+multi split-refs(Str:D $str --> Iterable) {
     return $/<refs>».Str if $str ~~ m{'<' $<refs> = (<-[<>]>+)+ % [ '>' .*? '<' ] '>' };
     return ();
 }
+
+method addresses (--> Iterable) {
+    return (
+        |extract-addresses(%!headers<sender>),
+        |extract-addresses(%!headers<x-original-sender>),
+        |extract-addresses(%!headers<from>),
+        |extract-addresses(%!headers<to>),
+        |extract-addresses(%!headers<reply-to>),
+        |extract-addresses(%!headers<mailing-list>),
+    ).unique;
+    # we should add a hack for info@meetup.com, where we keep the
+    # whole "from", since it's the only way to distinguish between
+    # different groups
+}
+
+# subset of the grammar of p5's Email::Address, ignoring comments and
+# quoting
+my grammar Address {
+    token CTL { <[ \x00 .. \x1F \x7F ]> }
+    token special { <[ \( \) \< \> \[ \] \: \; \@ \\ \, \. \" ]> }
+
+    token atext { <-CTL> & <-special> & \S }
+    token dot-atom { <ws> [ [<atext>+]+ % '.' ] <ws> }
+
+    token local-part { <.dot-atom> }
+    token domain { <.dot-atom> }
+
+    token addr { <local-part> '@' <domain> }
+
+    rule TOP { ^ .*? [ <addr>+ % .+? ] .*? $ }
+}
+
+multi sub extract-addresses(Any --> Iterable) { return () }
+multi sub extract-addresses(Str:D $str --> Iterable) {
+    with Address.parse($str) {
+        return $_<addr>».Str;
+    }
+    return ();
+}
diff --git a/t/email.t b/t/email.t
index f2208a6..c6093db 100644
--- a/t/email.t
+++ b/t/email.t
@@ -10,7 +10,8 @@ subtest 'no values' => {
     );
 
     is-deeply $email.message-id, '', 'message id should parse';
-    is-deeply $email.refs, qw[], 'refs should parse';
+    is-deeply $email.refs, @(), 'refs should parse';
+    is-deeply $email.addresses, @(), 'addresses should parse';
 }
 
 subtest 'bad values' => {
@@ -24,7 +25,8 @@ subtest 'bad values' => {
     );
 
     is-deeply $email.message-id, '', 'message id should parse';
-    is-deeply $email.refs, qw[], 'refs should parse';
+    is-deeply $email.refs, @(), 'refs should parse';
+    is-deeply $email.addresses, @(), 'addresses should parse';
 }
 
 subtest 'all values' => {
@@ -33,12 +35,18 @@ subtest 'all values' => {
             message-id => 'some <stuff> here',
             in-reply-to => '<one>',
             references => 'bad <two> garbage <three>',
+            reply-to => 'foo <one@me>',
+            from => 'me@my.domain',
+            to => 'one <one@your.domain>, two <two@their.domain>, etc',
         ),
         body => '',
     );
 
     is-deeply $email.message-id, 'stuff', 'message id should parse';
     is-deeply $email.refs, qw[one three two], 'refs should parse';
+    is-deeply( $email.addresses.sort,
+               @('me@my.domain','one@me','one@your.domain','two@their.domain'),
+               'addresses should parse' );
 }
 
 done-testing;
author	dakkar <dakkar@thenautilus.net>	2019-12-27 14:14:24 +0000
committer	dakkar <dakkar@thenautilus.net>	2019-12-27 15:15:36 +0000
commit	d31b7f8710f89db6035a80adae4d97ada8503336 (patch)
tree	ab8d718f71075e49d2cad5ee9edaac63bd53973a
parent	restructure store, extract server (diff)
download	MaildirIndexer-d31b7f8710f89db6035a80adae4d97ada8503336.tar.gz MaildirIndexer-d31b7f8710f89db6035a80adae4d97ada8503336.tar.bz2 MaildirIndexer-d31b7f8710f89db6035a80adae4d97ada8503336.zip