From d31b7f8710f89db6035a80adae4d97ada8503336 Mon Sep 17 00:00:00 2001 From: dakkar Date: Fri, 27 Dec 2019 14:14:24 +0000 Subject: extract addresses tried to add some typing, wanted to return `Positional[Str:D]`, but half the `Array` / `List` methods return `Seq`, empty arrays are untyped, plus https://github.com/rakudo/rakudo/issues/3383 made the whole thing a mess so we're just returning `Iterable`, which is not even parametric, so we can't promise we're returning strings --- lib/MaildirIndexer/Email.pm6 | 47 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/MaildirIndexer/Email.pm6 b/lib/MaildirIndexer/Email.pm6 index 0cd5bd7..d5ecc18 100644 --- a/lib/MaildirIndexer/Email.pm6 +++ b/lib/MaildirIndexer/Email.pm6 @@ -7,16 +7,55 @@ has $!body; method BUILD(:%!headers,:$!body,:$!path = IO) { } -method message-id { split-refs(%!headers)[0] // '' } -method refs { +method message-id(--> Str:D) { split-refs(%!headers)[0] // '' } +method refs(--> Iterable) { return ( |split-refs(%!headers), |split-refs(%!headers).reverse, ); } -multi split-refs(Any) { return (); } -multi split-refs(Str:D $str) { +multi split-refs(Any --> Iterable) { return () } +multi split-refs(Str:D $str --> Iterable) { return $/».Str if $str ~~ m{'<' $ = (<-[<>]>+)+ % [ '>' .*? '<' ] '>' }; return (); } + +method addresses (--> Iterable) { + return ( + |extract-addresses(%!headers), + |extract-addresses(%!headers), + |extract-addresses(%!headers), + |extract-addresses(%!headers), + |extract-addresses(%!headers), + |extract-addresses(%!headers), + ).unique; + # we should add a hack for info@meetup.com, where we keep the + # whole "from", since it's the only way to distinguish between + # different groups +} + +# subset of the grammar of p5's Email::Address, ignoring comments and +# quoting +my grammar Address { + token CTL { <[ \x00 .. \x1F \x7F ]> } + token special { <[ \( \) \< \> \[ \] \: \; \@ \\ \, \. \" ]> } + + token atext { <-CTL> & <-special> & \S } + token dot-atom { [ [+]+ % '.' ] } + + token local-part { <.dot-atom> } + token domain { <.dot-atom> } + + token addr { '@' } + + rule TOP { ^ .*? [ + % .+? ] .*? $ } +} + +multi sub extract-addresses(Any --> Iterable) { return () } +multi sub extract-addresses(Str:D $str --> Iterable) { + with Address.parse($str) { + return $_».Str; + } + return (); +} -- cgit v1.2.3