summaryrefslogtreecommitdiff
path: root/lib/MaildirIndexer/Email.rakumod
diff options
context:
space:
mode:
Diffstat (limited to 'lib/MaildirIndexer/Email.rakumod')
-rw-r--r--lib/MaildirIndexer/Email.rakumod69
1 files changed, 69 insertions, 0 deletions
diff --git a/lib/MaildirIndexer/Email.rakumod b/lib/MaildirIndexer/Email.rakumod
new file mode 100644
index 0000000..4f92106
--- /dev/null
+++ b/lib/MaildirIndexer/Email.rakumod
@@ -0,0 +1,69 @@
+use v6.d;
+unit class MaildirIndexer::Email;
+use MaildirIndexer::LogTimelineSchema;
+
+has IO $.path;
+has %!headers;
+has $!body;
+
+method BUILD(:%!headers,:$!body,:$!path = IO) { }
+
+method message-id(--> Str:D) { split-refs(%!headers<message-id>)[0] // '' }
+method refs(--> Iterable) {
+ return (
+ |split-refs(%!headers<in-reply-to>),
+ |split-refs(%!headers<references>).reverse,
+ );
+}
+
+multi split-refs(Any --> Iterable) { return () }
+multi split-refs(Str:D $str --> Iterable) {
+ my @result;
+ MaildirIndexer::LogTimelineSchema::Parse::Header.log: {
+ @result = $/<refs>».Str if $str ~~ m{'<' $<refs> = (<-[<>]>+)+ % [ '>' .*? '<' ] '>' };
+ }
+ return @result;
+}
+
+method addresses (--> Iterable) {
+ return (
+ |extract-addresses(%!headers<sender>),
+ |extract-addresses(%!headers<x-original-sender>),
+ |extract-addresses(%!headers<from>),
+ |extract-addresses(%!headers<to>),
+ |extract-addresses(%!headers<reply-to>),
+ |extract-addresses(%!headers<mailing-list>),
+ |extract-addresses(%!headers<x-original-from>),
+ ).unique;
+ # we should add a hack for info@meetup.com, where we keep the
+ # whole "from", since it's the only way to distinguish between
+ # different groups
+}
+
+# subset of the grammar of p5's Email::Address, ignoring comments and
+# quoting
+my grammar Address {
+ token CTL { <[ \x00 .. \x1F \x7F ]> }
+ token special { <[ \( \) \< \> \[ \] \: \; \@ \\ \, \. \" ]> }
+
+ token atext { <-CTL> & <-special> & \S }
+ token dot-atom { <ws> [ [<atext>+]+ % '.' ] <ws> }
+
+ token local-part { <.dot-atom> }
+ token domain { <.dot-atom> }
+
+ token addr { <local-part> '@' <domain> }
+
+ rule TOP { ^ .*? [ <addr>+ % .+? ] .*? $ }
+}
+
+multi sub extract-addresses(Any --> Iterable) { return () }
+multi sub extract-addresses(Str:D $str --> Iterable) {
+ my @result;
+ MaildirIndexer::LogTimelineSchema::Parse::Header.log: {
+ with Address.parse($str) {
+ @result = $_<addr>».Str;
+ }
+ }
+ return @result;
+}