diff options
author | dakkar <dakkar@thenautilus.net> | 2019-12-27 14:14:24 +0000 |
---|---|---|
committer | dakkar <dakkar@thenautilus.net> | 2019-12-27 15:15:36 +0000 |
commit | d31b7f8710f89db6035a80adae4d97ada8503336 (patch) | |
tree | ab8d718f71075e49d2cad5ee9edaac63bd53973a /lib | |
parent | restructure store, extract server (diff) | |
download | MaildirIndexer-d31b7f8710f89db6035a80adae4d97ada8503336.tar.gz MaildirIndexer-d31b7f8710f89db6035a80adae4d97ada8503336.tar.bz2 MaildirIndexer-d31b7f8710f89db6035a80adae4d97ada8503336.zip |
extract addresses
tried to add some typing, wanted to return `Positional[Str:D]`, but
half the `Array` / `List` methods return `Seq`, empty arrays are
untyped, plus https://github.com/rakudo/rakudo/issues/3383 made the
whole thing a mess
so we're just returning `Iterable`, which is not even parametric, so
we can't promise we're returning strings
Diffstat (limited to 'lib')
-rw-r--r-- | lib/MaildirIndexer/Email.pm6 | 47 |
1 files changed, 43 insertions, 4 deletions
diff --git a/lib/MaildirIndexer/Email.pm6 b/lib/MaildirIndexer/Email.pm6 index 0cd5bd7..d5ecc18 100644 --- a/lib/MaildirIndexer/Email.pm6 +++ b/lib/MaildirIndexer/Email.pm6 @@ -7,16 +7,55 @@ has $!body; method BUILD(:%!headers,:$!body,:$!path = IO) { } -method message-id { split-refs(%!headers<message-id>)[0] // '' } -method refs { +method message-id(--> Str:D) { split-refs(%!headers<message-id>)[0] // '' } +method refs(--> Iterable) { return ( |split-refs(%!headers<in-reply-to>), |split-refs(%!headers<references>).reverse, ); } -multi split-refs(Any) { return (); } -multi split-refs(Str:D $str) { +multi split-refs(Any --> Iterable) { return () } +multi split-refs(Str:D $str --> Iterable) { return $/<refs>».Str if $str ~~ m{'<' $<refs> = (<-[<>]>+)+ % [ '>' .*? '<' ] '>' }; return (); } + +method addresses (--> Iterable) { + return ( + |extract-addresses(%!headers<sender>), + |extract-addresses(%!headers<x-original-sender>), + |extract-addresses(%!headers<from>), + |extract-addresses(%!headers<to>), + |extract-addresses(%!headers<reply-to>), + |extract-addresses(%!headers<mailing-list>), + ).unique; + # we should add a hack for info@meetup.com, where we keep the + # whole "from", since it's the only way to distinguish between + # different groups +} + +# subset of the grammar of p5's Email::Address, ignoring comments and +# quoting +my grammar Address { + token CTL { <[ \x00 .. \x1F \x7F ]> } + token special { <[ \( \) \< \> \[ \] \: \; \@ \\ \, \. \" ]> } + + token atext { <-CTL> & <-special> & \S } + token dot-atom { <ws> [ [<atext>+]+ % '.' ] <ws> } + + token local-part { <.dot-atom> } + token domain { <.dot-atom> } + + token addr { <local-part> '@' <domain> } + + rule TOP { ^ .*? [ <addr>+ % .+? ] .*? $ } +} + +multi sub extract-addresses(Any --> Iterable) { return () } +multi sub extract-addresses(Str:D $str --> Iterable) { + with Address.parse($str) { + return $_<addr>».Str; + } + return (); +} |