use v6.d; unit class MaildirIndexer::Email; use MaildirIndexer::LogTimelineSchema; has IO $.path; has %!headers; has $!body; method BUILD(:%!headers,:$!body,:$!path = IO) { } method message-id(--> Str:D) { split-refs(%!headers)[0] // '' } method refs(--> Iterable) { return ( |split-refs(%!headers), |split-refs(%!headers).reverse, ); } multi split-refs(Any --> Iterable) { return () } multi split-refs(Str:D $str --> Iterable) { my @result; MaildirIndexer::LogTimelineSchema::Parse::Header.log: { @result = $/».Str if $str ~~ m{'<' $ = (<-[<>]>+)+ % [ '>' .*? '<' ] '>' }; } return @result; } method addresses (--> Iterable) { return ( |extract-addresses(%!headers), |extract-addresses(%!headers), |extract-addresses(%!headers), |extract-addresses(%!headers), |extract-addresses(%!headers), |extract-addresses(%!headers), |extract-addresses(%!headers), ).unique; # we should add a hack for info@meetup.com, where we keep the # whole "from", since it's the only way to distinguish between # different groups } # subset of the grammar of p5's Email::Address, ignoring comments and # quoting my grammar Address { token CTL { <[ \x00 .. \x1F \x7F ]> } token special { <[ \( \) \< \> \[ \] \: \; \@ \\ \, \. \" ]> } token atext { <-CTL> & <-special> & \S } token dot-atom { [ [+]+ % '.' ] } token local-part { <.dot-atom> } token domain { <.dot-atom> } token addr { '@' } rule TOP { ^ .*? [ + % .+? ] .*? $ } } multi sub extract-addresses(Any --> Iterable) { return () } multi sub extract-addresses(Str:D $str --> Iterable) { my @result; MaildirIndexer::LogTimelineSchema::Parse::Header.log: { with Address.parse($str) { @result = $_».Str; } } return @result; }