From d31b7f8710f89db6035a80adae4d97ada8503336 Mon Sep 17 00:00:00 2001 From: dakkar Date: Fri, 27 Dec 2019 14:14:24 +0000 Subject: extract addresses tried to add some typing, wanted to return `Positional[Str:D]`, but half the `Array` / `List` methods return `Seq`, empty arrays are untyped, plus https://github.com/rakudo/rakudo/issues/3383 made the whole thing a mess so we're just returning `Iterable`, which is not even parametric, so we can't promise we're returning strings --- lib/MaildirIndexer/Email.pm6 | 47 ++++++++++++++++++++++++++++++++++++++++---- t/email.t | 12 +++++++++-- 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/lib/MaildirIndexer/Email.pm6 b/lib/MaildirIndexer/Email.pm6 index 0cd5bd7..d5ecc18 100644 --- a/lib/MaildirIndexer/Email.pm6 +++ b/lib/MaildirIndexer/Email.pm6 @@ -7,16 +7,55 @@ has $!body; method BUILD(:%!headers,:$!body,:$!path = IO) { } -method message-id { split-refs(%!headers)[0] // '' } -method refs { +method message-id(--> Str:D) { split-refs(%!headers)[0] // '' } +method refs(--> Iterable) { return ( |split-refs(%!headers), |split-refs(%!headers).reverse, ); } -multi split-refs(Any) { return (); } -multi split-refs(Str:D $str) { +multi split-refs(Any --> Iterable) { return () } +multi split-refs(Str:D $str --> Iterable) { return $/».Str if $str ~~ m{'<' $ = (<-[<>]>+)+ % [ '>' .*? '<' ] '>' }; return (); } + +method addresses (--> Iterable) { + return ( + |extract-addresses(%!headers), + |extract-addresses(%!headers), + |extract-addresses(%!headers), + |extract-addresses(%!headers), + |extract-addresses(%!headers), + |extract-addresses(%!headers), + ).unique; + # we should add a hack for info@meetup.com, where we keep the + # whole "from", since it's the only way to distinguish between + # different groups +} + +# subset of the grammar of p5's Email::Address, ignoring comments and +# quoting +my grammar Address { + token CTL { <[ \x00 .. \x1F \x7F ]> } + token special { <[ \( \) \< \> \[ \] \: \; \@ \\ \, \. \" ]> } + + token atext { <-CTL> & <-special> & \S } + token dot-atom { [ [+]+ % '.' ] } + + token local-part { <.dot-atom> } + token domain { <.dot-atom> } + + token addr { '@' } + + rule TOP { ^ .*? [ + % .+? ] .*? $ } +} + +multi sub extract-addresses(Any --> Iterable) { return () } +multi sub extract-addresses(Str:D $str --> Iterable) { + with Address.parse($str) { + return $_».Str; + } + return (); +} diff --git a/t/email.t b/t/email.t index f2208a6..c6093db 100644 --- a/t/email.t +++ b/t/email.t @@ -10,7 +10,8 @@ subtest 'no values' => { ); is-deeply $email.message-id, '', 'message id should parse'; - is-deeply $email.refs, qw[], 'refs should parse'; + is-deeply $email.refs, @(), 'refs should parse'; + is-deeply $email.addresses, @(), 'addresses should parse'; } subtest 'bad values' => { @@ -24,7 +25,8 @@ subtest 'bad values' => { ); is-deeply $email.message-id, '', 'message id should parse'; - is-deeply $email.refs, qw[], 'refs should parse'; + is-deeply $email.refs, @(), 'refs should parse'; + is-deeply $email.addresses, @(), 'addresses should parse'; } subtest 'all values' => { @@ -33,12 +35,18 @@ subtest 'all values' => { message-id => 'some here', in-reply-to => '', references => 'bad garbage ', + reply-to => 'foo ', + from => 'me@my.domain', + to => 'one , two , etc', ), body => '', ); is-deeply $email.message-id, 'stuff', 'message id should parse'; is-deeply $email.refs, qw[one three two], 'refs should parse'; + is-deeply( $email.addresses.sort, + @('me@my.domain','one@me','one@your.domain','two@their.domain'), + 'addresses should parse' ); } done-testing; -- cgit v1.2.3