From d31b7f8710f89db6035a80adae4d97ada8503336 Mon Sep 17 00:00:00 2001
From: dakkar <dakkar@thenautilus.net>
Date: Fri, 27 Dec 2019 14:14:24 +0000
Subject: extract addresses

tried to add some typing, wanted to return `Positional[Str:D]`, but
half the `Array` / `List` methods return `Seq`, empty arrays are
untyped, plus https://github.com/rakudo/rakudo/issues/3383 made the
whole thing a mess

so we're just returning `Iterable`, which is not even parametric, so
we can't promise we're returning strings
---
 lib/MaildirIndexer/Email.pm6 | 47 ++++++++++++++++++++++++++++++++++++++++----
 t/email.t                    | 12 +++++++++--
 2 files changed, 53 insertions(+), 6 deletions(-)
diff --git a/lib/MaildirIndexer/Email.pm6 b/lib/MaildirIndexer/Email.pm6
index 0cd5bd7..d5ecc18 100644
--- a/lib/MaildirIndexer/Email.pm6
+++ b/lib/MaildirIndexer/Email.pm6
@@ -7,16 +7,55 @@ has $!body;
 
 method BUILD(:%!headers,:$!body,:$!path = IO) { }
 
-method message-id { split-refs(%!headers<message-id>)[0] // '' }
-method refs {
+method message-id(--> Str:D) { split-refs(%!headers<message-id>)[0] // '' }
+method refs(--> Iterable) {
     return (
         |split-refs(%!headers<in-reply-to>),
         |split-refs(%!headers<references>).reverse,
     );
 }
 
-multi split-refs(Any) { return (); }
-multi split-refs(Str:D $str) {
+multi split-refs(Any --> Iterable) { return () }
+multi split-refs(Str:D $str --> Iterable) {
     return $/<refs>».Str if $str ~~ m{'<' $<refs> = (<-[<>]>+)+ % [ '>' .*? '<' ] '>' };
     return ();
 }
+
+method addresses (--> Iterable) {
+    return (
+        |extract-addresses(%!headers<sender>),
+        |extract-addresses(%!headers<x-original-sender>),
+        |extract-addresses(%!headers<from>),
+        |extract-addresses(%!headers<to>),
+        |extract-addresses(%!headers<reply-to>),
+        |extract-addresses(%!headers<mailing-list>),
+    ).unique;
+    # we should add a hack for info@meetup.com, where we keep the
+    # whole "from", since it's the only way to distinguish between
+    # different groups
+}
+
+# subset of the grammar of p5's Email::Address, ignoring comments and
+# quoting
+my grammar Address {
+    token CTL { <[ \x00 .. \x1F \x7F ]> }
+    token special { <[ \( \) \< \> \[ \] \: \; \@ \\ \, \. \" ]> }
+
+    token atext { <-CTL> & <-special> & \S }
+    token dot-atom { <ws> [ [<atext>+]+ % '.' ] <ws> }
+
+    token local-part { <.dot-atom> }
+    token domain { <.dot-atom> }
+
+    token addr { <local-part> '@' <domain> }
+
+    rule TOP { ^ .*? [ <addr>+ % .+? ] .*? $ }
+}
+
+multi sub extract-addresses(Any --> Iterable) { return () }
+multi sub extract-addresses(Str:D $str --> Iterable) {
+    with Address.parse($str) {
+        return $_<addr>».Str;
+    }
+    return ();
+}
diff --git a/t/email.t b/t/email.t
index f2208a6..c6093db 100644
--- a/t/email.t
+++ b/t/email.t
@@ -10,7 +10,8 @@ subtest 'no values' => {
     );
 
     is-deeply $email.message-id, '', 'message id should parse';
-    is-deeply $email.refs, qw[], 'refs should parse';
+    is-deeply $email.refs, @(), 'refs should parse';
+    is-deeply $email.addresses, @(), 'addresses should parse';
 }
 
 subtest 'bad values' => {
@@ -24,7 +25,8 @@ subtest 'bad values' => {
     );
 
     is-deeply $email.message-id, '', 'message id should parse';
-    is-deeply $email.refs, qw[], 'refs should parse';
+    is-deeply $email.refs, @(), 'refs should parse';
+    is-deeply $email.addresses, @(), 'addresses should parse';
 }
 
 subtest 'all values' => {
@@ -33,12 +35,18 @@ subtest 'all values' => {
             message-id => 'some <stuff> here',
             in-reply-to => '<one>',
             references => 'bad <two> garbage <three>',
+            reply-to => 'foo <one@me>',
+            from => 'me@my.domain',
+            to => 'one <one@your.domain>, two <two@their.domain>, etc',
         ),
         body => '',
     );
 
     is-deeply $email.message-id, 'stuff', 'message id should parse';
     is-deeply $email.refs, qw[one three two], 'refs should parse';
+    is-deeply( $email.addresses.sort,
+               @('me@my.domain','one@me','one@your.domain','two@their.domain'),
+               'addresses should parse' );
 }
 
 done-testing;
-- 
cgit v1.2.3