summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordakkar <dakkar@thenautilus.net>2019-12-27 14:14:24 +0000
committerdakkar <dakkar@thenautilus.net>2019-12-27 15:15:36 +0000
commitd31b7f8710f89db6035a80adae4d97ada8503336 (patch)
treeab8d718f71075e49d2cad5ee9edaac63bd53973a
parentrestructure store, extract server (diff)
downloadMaildirIndexer-d31b7f8710f89db6035a80adae4d97ada8503336.tar.gz
MaildirIndexer-d31b7f8710f89db6035a80adae4d97ada8503336.tar.bz2
MaildirIndexer-d31b7f8710f89db6035a80adae4d97ada8503336.zip
extract addresses
tried to add some typing, wanted to return `Positional[Str:D]`, but half the `Array` / `List` methods return `Seq`, empty arrays are untyped, plus https://github.com/rakudo/rakudo/issues/3383 made the whole thing a mess so we're just returning `Iterable`, which is not even parametric, so we can't promise we're returning strings
-rw-r--r--lib/MaildirIndexer/Email.pm647
-rw-r--r--t/email.t12
2 files changed, 53 insertions, 6 deletions
diff --git a/lib/MaildirIndexer/Email.pm6 b/lib/MaildirIndexer/Email.pm6
index 0cd5bd7..d5ecc18 100644
--- a/lib/MaildirIndexer/Email.pm6
+++ b/lib/MaildirIndexer/Email.pm6
@@ -7,16 +7,55 @@ has $!body;
method BUILD(:%!headers,:$!body,:$!path = IO) { }
-method message-id { split-refs(%!headers<message-id>)[0] // '' }
-method refs {
+method message-id(--> Str:D) { split-refs(%!headers<message-id>)[0] // '' }
+method refs(--> Iterable) {
return (
|split-refs(%!headers<in-reply-to>),
|split-refs(%!headers<references>).reverse,
);
}
-multi split-refs(Any) { return (); }
-multi split-refs(Str:D $str) {
+multi split-refs(Any --> Iterable) { return () }
+multi split-refs(Str:D $str --> Iterable) {
return $/<refs>».Str if $str ~~ m{'<' $<refs> = (<-[<>]>+)+ % [ '>' .*? '<' ] '>' };
return ();
}
+
+method addresses (--> Iterable) {
+ return (
+ |extract-addresses(%!headers<sender>),
+ |extract-addresses(%!headers<x-original-sender>),
+ |extract-addresses(%!headers<from>),
+ |extract-addresses(%!headers<to>),
+ |extract-addresses(%!headers<reply-to>),
+ |extract-addresses(%!headers<mailing-list>),
+ ).unique;
+ # we should add a hack for info@meetup.com, where we keep the
+ # whole "from", since it's the only way to distinguish between
+ # different groups
+}
+
+# subset of the grammar of p5's Email::Address, ignoring comments and
+# quoting
+my grammar Address {
+ token CTL { <[ \x00 .. \x1F \x7F ]> }
+ token special { <[ \( \) \< \> \[ \] \: \; \@ \\ \, \. \" ]> }
+
+ token atext { <-CTL> & <-special> & \S }
+ token dot-atom { <ws> [ [<atext>+]+ % '.' ] <ws> }
+
+ token local-part { <.dot-atom> }
+ token domain { <.dot-atom> }
+
+ token addr { <local-part> '@' <domain> }
+
+ rule TOP { ^ .*? [ <addr>+ % .+? ] .*? $ }
+}
+
+multi sub extract-addresses(Any --> Iterable) { return () }
+multi sub extract-addresses(Str:D $str --> Iterable) {
+ with Address.parse($str) {
+ return $_<addr>».Str;
+ }
+ return ();
+}
diff --git a/t/email.t b/t/email.t
index f2208a6..c6093db 100644
--- a/t/email.t
+++ b/t/email.t
@@ -10,7 +10,8 @@ subtest 'no values' => {
);
is-deeply $email.message-id, '', 'message id should parse';
- is-deeply $email.refs, qw[], 'refs should parse';
+ is-deeply $email.refs, @(), 'refs should parse';
+ is-deeply $email.addresses, @(), 'addresses should parse';
}
subtest 'bad values' => {
@@ -24,7 +25,8 @@ subtest 'bad values' => {
);
is-deeply $email.message-id, '', 'message id should parse';
- is-deeply $email.refs, qw[], 'refs should parse';
+ is-deeply $email.refs, @(), 'refs should parse';
+ is-deeply $email.addresses, @(), 'addresses should parse';
}
subtest 'all values' => {
@@ -33,12 +35,18 @@ subtest 'all values' => {
message-id => 'some <stuff> here',
in-reply-to => '<one>',
references => 'bad <two> garbage <three>',
+ reply-to => 'foo <one@me>',
+ from => 'me@my.domain',
+ to => 'one <one@your.domain>, two <two@their.domain>, etc',
),
body => '',
);
is-deeply $email.message-id, 'stuff', 'message id should parse';
is-deeply $email.refs, qw[one three two], 'refs should parse';
+ is-deeply( $email.addresses.sort,
+ @('me@my.domain','one@me','one@your.domain','two@their.domain'),
+ 'addresses should parse' );
}
done-testing;