summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordakkar <dakkar@thenautilus.net>2019-12-29 11:43:52 +0000
committerdakkar <dakkar@thenautilus.net>2019-12-29 11:43:52 +0000
commit38e90e74f640269b15da8c907a5b6b1c063a88c1 (patch)
treea6a3217be29479a3bb8946e007e73abad7459b65
parentnicer way of finding `lib` (diff)
downloadMaildirIndexer-38e90e74f640269b15da8c907a5b6b1c063a88c1.tar.gz
MaildirIndexer-38e90e74f640269b15da8c907a5b6b1c063a88c1.tar.bz2
MaildirIndexer-38e90e74f640269b15da8c907a5b6b1c063a88c1.zip
kill p5 program
-rw-r--r--maildir-indexer.pl387
1 files changed, 0 insertions, 387 deletions
diff --git a/maildir-indexer.pl b/maildir-indexer.pl
deleted file mode 100644
index e6c7693..0000000
--- a/maildir-indexer.pl
+++ /dev/null
@@ -1,387 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use warnings;
-use threads;
-use threads::shared;
-use IO::Socket::INET;
-use File::Next;
-use Email::Simple;
-
-=head1 NAME
-
-maildir-indexer.pl
-
-=head1 AUTHOR
-
-Gianni Ceccarelli <dakkar@thenautilus.net>
-
-=head1 COPYRIGHT & LICENSE
-
-Copyright 2009 Gianni Ceccarelli
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as
-published by the Free Software Foundation, version 3.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-General Public License for more details.
-
-For a copy of the GNU Affero General Public License, see
-L<http://www.gnu.org/licenses/>.
-
-=cut
-
-{
-
-=head1 Index handling
-
-C<%files2id> maps a mailbox-file pair to a message id
-(C<$files2id{$mailbox}->{$file}=$id>).
-
-C<%id2mailbox> maps a message id to a list of mailboxes.
-
-These hashes are private to the index handling section: the following
-function must be used to manipulate the index.
-
-=cut
-
-my %files2id : shared;
-my %id2mailbox : shared;
-
-=head2 C<add_file>
-
- add_file($mailbox,$file,$id);
-
-Adds an entry to the index. Takes care of locking, and of sharing
-newly-created hashes and arrays.
-
-=cut
-
-sub add_file {
- my ($mailbox,$file,$id)=@_;
- lock(%files2id);
-
- # Auto-vivified references are not shared, we must share them explicitly.
- # See threads::shared for an explanation of the '&share' call style.
-
- $files2id{$mailbox}||=&share({});
- $files2id{$mailbox}->{$file}=$id;
-
- $id2mailbox{$id}||=&share([]);
- push @{$id2mailbox{$id}},$mailbox;
-
- return;
-}
-
-=head2 C<del_file>
-
- del_file($mailbox,$file);
-
-Removes an entry from the index. Takes care of locking, and of
-removing empty hashes and arrays.
-
-=cut
-
-sub del_file {
- my ($mailbox,$file)=@_;
- lock(%files2id);
-
- return unless exists $files2id{$mailbox};
- return unless exists $files2id{$mailbox}->{$file};
- my $id=delete $files2id{$mailbox}->{$file};
-
- return unless exists $id2mailbox{$id};
- # remove the mailbox, keeping the order
- @{$id2mailbox{$id}}=grep {$_ ne $mailbox} @{$id2mailbox{$id}};
- delete $id2mailbox{$id} unless @{$id2mailbox{$id}};
-
- return;
-}
-
-=head2 C<get_mailboxes>
-
- @mailboxes = get_mailboxes($id);
-
-Returns all the mailboxes that contain a message with the given
-message id, in roughly chronological order. Can return an empty list.
-
-=cut
-
-sub get_mailboxes {
- my ($id)=@_;
- lock(%files2id);
-
- return unless exists $id2mailbox{$id};
- return @{$id2mailbox{$id}};
-}
-
-}
-
-=head1 Message & mailbox parsing
-
-=head2 C<id_from_file>
-
- $id = id_from_file($dir,$file);
-
-Returns the message id of the C<$file> in the C<$dir>. Returns
-C<undef> if the file can't be read, or it does not contain a message
-id.
-
-=cut #'
-
-sub id_from_file {
- my ($dir,$file)=@_;
-
- # we might use Email::Simple here, but it's useless to load
- # and parse the entire message
-
- open my $fh,'<',"$dir/$file" or return;
- while (my $line=<$fh>) {
- $line=~s{[\x0d\x0a]+\z}{};
- return unless length($line); # end of headers, no messageid found
- $line =~ m{^Message-ID: \s* <(\S+?)>}xi and return $1;
- }
- return;
-}
-
-=head2 C<split_refs>
-
- @ids = split_refs($header_field_value);
-
-Extracts the message ids from the value of a header file such as
-C<In-Reply-To:> or C<References:>.
-
-=cut
-
-sub split_refs {
- my ($str)=@_;
- return unless $str;
-
- $str=~s{^.*<}{};$str=~s{>.*$}{};
- return split />.*?</,$str;
-}
-
-=head2 C<mailbox_from_path>
-
- $mailbox = mailbox_from_path($dir);
-
-Extracts the mailbox name from a directory name. Assumes that all
-mailboxes are under a C<.maildir/> directory. Returns C<undef> if the
-directory name can't be parsed.
-
-=cut #'
-
-sub mailbox_from_path {
- my ($dir)=@_;
-
- # /home/dakkar/.maildir/.mail.Personal.Dakkar/cur
- # ->
- # .mail.Personal.Dakkar
- $dir=~m{/.maildir/(.*?)/(?:cur|new|tmp)} and return $1;
- return;
-}
-
-=head2 C<ignorable>
-
- if (ignorable($mailbox)) { ... }
-
-Returns whether a mailbox can be safely ignored by the indexer. There
-are two ignorable mailboxes: C<.Trash> and C<.mail.SPAM>.
-
-=cut
-
-sub ignorable {
- return $_[0] =~ m{(?:^|/)(?:\.mail\.SPAM|\.Trash)(?:/|$)};
-}
-
-=head1 Scanning the mail archive
-
-=head2 C<scan_directory>
-
-Using L<File::Next>, scans C<$ENV{HOME}/.maildir> for message files,
-and adds them to the index.
-
-=cut
-
-sub scan_directory {
- my $files=File::Next::files({
- file_filter => sub {
- # /cur/ and /new/ are the only interesting directories in a maildir
- ($File::Next::dir =~ m{/(?:cur|new)(?:/|$)})
- and
- # we ignore dovecot's files
- ($_ !~ m{^dovecot});
- },
- descend_filter => sub {
- # ignorable mailboxes are ignored
- !ignorable($_);
- },
- follow_symlinks=>0,
- }, "$ENV{HOME}/.maildir");
-
- my ($dir,$file,$fullpath,$mailbox,$id);
- while (($dir,$file,$fullpath)=$files->()) {
- $mailbox=mailbox_from_path($dir);
- next unless defined $mailbox; # might not be a real mailbox
- next if ignorable($mailbox); # should not happen
- $id=id_from_file($dir,$file);
- next unless defined $id; # might not be a message, or have disappeared
- add_file($mailbox,$file,$id);
- }
-
- return;
-}
-
-{
-
-=head1 Watching the mail archive for changes
-
-=cut
-
-my $child_pid : shared;
-# cleanup action
-END {
- kill 2,$child_pid if $child_pid;
-}
-
-=head2 C<watch_directory>
-
-Parses the output from C<inotifywait>. For the events C<CREATED> and
-C<MOVED_TO>, adds entries to the index. For the events C<DELETED> and
-C<MOVED_FROM>, removes entries from the index.
-
-=cut
-
-sub watch_directory {
- # see inotifywait(1)
- $child_pid=
- open my $inotify,'-|',
- qw(inotifywait -q -m -r
- --exclude (^|/)(dovecot|tmp/).*
- -c
- -e move -e delete -e create),
- "$ENV{HOME}/.maildir"
- or die "Can't start inotifywait: $!";
-
- while (my $line=<$inotify>) {
- chomp($line);
-
- # these lines are a naif parser for the CSV output of inotifywait
- my ($dir,$event,$file)=
- ($line =~ m{^ ( (?:".*?") | [^,]* ) ,
- ( (?:".*?") | [^,]* ) ,
- ( (?:".*?") | [^,]* ) $ }smx );
- s{(?:\A")|(?:"\z)}{}g for $dir,$event,$file;
-
- my $mailbox=mailbox_from_path($dir);
- next unless defined $mailbox; # might not be a mailbox event
- next if ignorable($mailbox); # ignore ignorable mailboxes
-
- if ($event =~ /CREATE|MOVED_TO/) {
- # a file has appeared: get the message id
- my $id=id_from_file($dir,$file);
- if (defined $id) {
- # it is a proper message, add it to the index
- add_file($mailbox,$file,$id);
- }
- }
- elsif ($event =~ /DELETE|MOVED_FROM/) {
- # a file has disappeared: remove it from the index
- del_file($mailbox,$file);
- }
- }
-}
-
-}
-
-=head1 Serving requests
-
-=head2 C<server>
-
-Opens a listening socket on C<localhost:9000>. Whenever a connection
-is received, executes C<handle_client> in a separate thread.
-
-=cut
-
-sub server {
- my $serv_sock=IO::Socket::INET->new(
- Listen=>10,
- LocalAddr=>'127.0.0.1',
- LocalPort=>9000,
- Proto=>'tcp',
- ReuseAddr=>1,
- );
- while (my $other_sock=$serv_sock->accept) {
- threads->create(\&handle_client,$other_sock)->detach;
- }
-}
-
-=head2 C<handle_client>
-
-This funtction expects a mail message from the socket. The message is
-parsesd, and possible references extracted. If the are references, and
-one of them is known to the index, the mailbox name of such a
-reference is printed to the socket.
-
-=cut
-
-sub handle_client {
- my ($client)=@_;
-
- # we get a message header, followed by a blank line
- my $header;
- while (my $line=<$client>) {
- $line=~s{[\x0d\x0a]+\z}{\x0d\x0a};
- $header.=$line;
- last if $line eq "\x0d\x0a";
- }
-
- my $header_obj=Email::Simple->new(\$header)->header_obj;
- # In-Reply-To is the most trustworthy
- # References are listed in chronological order
- # @refs ends up with the ids in decreasing order of importance
- my @refs=(split_refs($header_obj->header('In-Reply-To')),
- reverse(split_refs($header_obj->header('References'))));
-
- my @mailboxes;
- for my $id (@refs) {
- @mailboxes=get_mailboxes($id);
- if (@mailboxes) {
- # we know where a message is! tell the client
- # (we only print the most recent mailbox)
- # the client may have disconnected: printing to a
- # closed socket makes the server die
- print {$client} "$mailboxes[-1]\x0d\x0a"
- if $client->connected;
-
- return;
- }
- }
- return;
-}
-
-=head1 Main program
-
-=over 4
-
-=item *
-
-start C<watch_directory> in a thread
-
-=item *
-
-start C<scan_directory> in another thread
-
-=item *
-
-start C<server> in the main thread
-
-=back
-
-=cut
-
-threads->create(\&watch_directory)->detach;
-threads->create(\&scan_directory)->detach;
-server();