summaryrefslogtreecommitdiff
path: root/lib/Feed/Role
diff options
context:
space:
mode:
authordakkar <dakkar@thenautilus.net>2012-12-09 15:30:06 +0000
committerdakkar <dakkar@thenautilus.net>2012-12-09 15:30:06 +0000
commit7a005adbf486cbb6c3f64ba26b1aa97c2f696025 (patch)
tree29f1e82702d43f5e1710a990416d2b032065db50 /lib/Feed/Role
parentmaildir publisher (diff)
downloadfeeder-7a005adbf486cbb6c3f64ba26b1aa97c2f696025.tar.gz
feeder-7a005adbf486cbb6c3f64ba26b1aa97c2f696025.tar.bz2
feeder-7a005adbf486cbb6c3f64ba26b1aa97c2f696025.zip
renaming roles
Diffstat (limited to 'lib/Feed/Role')
-rw-r--r--lib/Feed/Role/AuthorName.pm30
-rw-r--r--lib/Feed/Role/DeDupe.pm141
-rw-r--r--lib/Feed/Role/FixDateTime.pm29
-rw-r--r--lib/Feed/Role/MailDir.pm76
-rw-r--r--lib/Feed/Role/Printer.pm28
5 files changed, 304 insertions, 0 deletions
diff --git a/lib/Feed/Role/AuthorName.pm b/lib/Feed/Role/AuthorName.pm
new file mode 100644
index 0000000..ec8a44d
--- /dev/null
+++ b/lib/Feed/Role/AuthorName.pm
@@ -0,0 +1,30 @@
+package Feed::Role::AuthorName;
+use Moose::Role;
+use 5.016;
+use namespace::autoclean;
+use Email::Address;
+use Try::Tiny;
+
+requires 'process_entry';
+
+before process_entry => sub {
+ my ($self,$entry) = @_;
+
+ $self->log->trace('before process_entry - begin');
+
+ my $author = $entry->author;
+
+ return unless $author && $author =~ /\@/;
+
+ try {
+ my $address = (Email::Address->parse($author))[0];
+ if (my $name = $address->name) {
+ $entry->author($name);
+ }
+ };
+
+ $self->log->trace('before process_entry - end');
+};
+
+1;
+
diff --git a/lib/Feed/Role/DeDupe.pm b/lib/Feed/Role/DeDupe.pm
new file mode 100644
index 0000000..f1bfc71
--- /dev/null
+++ b/lib/Feed/Role/DeDupe.pm
@@ -0,0 +1,141 @@
+package Feed::Role::DeDupe;
+use Moose::Role;
+use 5.016;
+use namespace::autoclean -also => ['_maybe_build_schema'];
+use DBI;
+use Try::Tiny;
+use Encode;
+use Digest::SHA1 'sha1_base64';
+
+requires 'get_feed','process','process_entry';
+
+has 'dupe_dsn' => (
+ is => 'ro',
+ isa => 'Str',
+ required => 1,
+);
+
+has dbh => (
+ is => 'ro',
+ lazy_build => 1,
+);
+
+has ['_find_sth','_insert_sth'] => (
+ is => 'ro',
+ lazy_build => 1,
+);
+
+sub _build_dbh {
+ my ($self) = @_;
+
+ my $dbh = DBI->connect($self->dupe_dsn,undef,undef,{
+ RaiseError => 1,
+ PrintError => 0,
+ AutoCommit => 0,
+ });
+
+ _maybe_build_schema($dbh);
+
+ return $dbh;
+}
+
+sub _maybe_build_schema {
+ my ($dbh) = @_;
+
+ try {
+ $dbh->selectrow_array(q{SELECT * FROM seen_items LIMIT 1});
+ $dbh->rollback;
+ } catch {
+ $dbh->do(<<'SQL');
+CREATE TABLE seen_items (
+ id VARCHAR(255) PRIMARY KEY
+)
+SQL
+$dbh->commit;
+ };
+
+ return;
+}
+
+sub _build__find_sth {
+ my ($self) = @_;
+
+ return $self->dbh->prepare(<<'SQL');
+SELECT COUNT(*)
+FROM seen_items
+WHERE id=?
+SQL
+}
+
+sub _build__insert_sth {
+ my ($self) = @_;
+
+ return $self->dbh->prepare(<<'SQL');
+INSERT INTO seen_items(id)
+VALUES (?)
+SQL
+}
+
+after process => sub {
+ my ($self) = @_;
+
+ $self->log->trace('after process');
+
+ $self->dbh->commit;
+};
+
+around process_entry => sub {
+ my ($orig,$self,$entry) = @_;
+
+ $self->log->trace('around process_entry - begin');
+
+ return if $self->seen_already($entry);
+
+ $self->log->trace('around process_entry - call original');
+
+ $self->$orig($entry);
+
+ $self->mark_seen($entry);
+
+ $self->log->trace('around process_entry - end');
+
+ return;
+};
+
+sub seen_already {
+ my ($self,$e) = @_;
+
+ $self->log->trace('seen_already - begin');
+
+ my $id = $self->_entry_id($e);
+
+ $self->_find_sth->execute($id);
+ my ($count) = $self->_find_sth->fetchrow_array;
+
+ $self->log->trace("seen_already - end ($count)");
+
+ return $count;
+}
+
+sub mark_seen {
+ my ($self,$e) = @_;
+
+ $self->log->trace('mark_seen - begin');
+
+ my $id = $self->_entry_id($e);
+
+ $self->_insert_sth->execute($id);
+
+ $self->log->trace('mark_seen - end');
+}
+
+sub _entry_id {
+ my ($self,$e) = @_;
+
+ my $body = $e->content->body;
+ my $content_digest = sha1_base64(encode('utf-8',$body));
+ my $id = join '-',$e->id,$e->modified->iso8601,$content_digest;
+ return encode('utf-8',$id);
+}
+
+1;
diff --git a/lib/Feed/Role/FixDateTime.pm b/lib/Feed/Role/FixDateTime.pm
new file mode 100644
index 0000000..86bc19a
--- /dev/null
+++ b/lib/Feed/Role/FixDateTime.pm
@@ -0,0 +1,29 @@
+package Feed::Role::FixDateTime;
+use Moose::Role;
+use 5.016;
+use namespace::autoclean;
+use DateTime;
+
+requires 'process_entry';
+
+before process_entry => sub {
+ my ($self,$entry) = @_;
+
+ $self->log->trace('before process_entry - begin');
+
+ for my $f ('issued','modified') {
+ my $date = $entry->$f;
+ if (!$date) {
+ $date = DateTime->from_epoch(epoch=>0);
+ }
+ if ($date && $date->time_zone->is_floating) {
+ $date->set_time_zone('UTC');
+ }
+
+ $entry->$f($date);
+ }
+
+ $self->log->trace('before process_entry - end');
+};
+
+1;
diff --git a/lib/Feed/Role/MailDir.pm b/lib/Feed/Role/MailDir.pm
new file mode 100644
index 0000000..f7cfc20
--- /dev/null
+++ b/lib/Feed/Role/MailDir.pm
@@ -0,0 +1,76 @@
+package Feed::Role::MailDir;
+use Moose::Role;
+use 5.016;
+use namespace::autoclean;
+use Encode;
+use Encode::IMAPUTF7;
+use MooseX::Types::Path::Class 'Dir';
+use Maildir::Lite;
+
+with 'Feed::HelperRole::Mail';
+
+requires 'extract_entries';
+
+has maildir_base => (
+ is => 'ro',
+ isa => Dir,
+ coerce => 1,
+ required => 1,
+);
+
+has maildir_folder => (
+ is => 'ro',
+ isa => 'Maildir::Lite',
+ lazy_build => 1,
+);
+
+sub _build_maildir_folder {
+ my ($self) = @_;
+
+ my $feed = $self->feed;
+
+ my $folder = encode('IMAP-UTF-7',$self->maildir_folder_for($feed));
+
+ $self->maildir_base->mkpath;
+
+ return Maildir::Lite->new(
+ dir => $self->maildir_base->subdir($folder)->stringify,
+ );
+}
+
+sub process_entry {
+ my ($self,$entry) = @_;
+
+ my ($msg) = $self->entry_to_mime($entry);
+
+ $self->store_into_maildir($msg);
+
+ return;
+}
+
+sub _clean_folder_part {
+ return map { s{\W+}{-}gr } @_;
+}
+
+sub maildir_folder_for {
+ my ($self,$feed) = @_;
+
+ my (@comps) = _clean_folder_part($self->mail_folders,$self->title);
+ return join '.','',@comps;
+}
+
+sub store_into_maildir {
+ my ($self,$msg) = @_;
+
+ my ($fh,$stat) = $self->maildir_folder->creat_message();
+ die "maildir create message failed ($stat)" if $stat;
+
+ binmode($fh);
+ $msg->print($fh)
+ or die "print to maildir failed: $!";
+
+ $self->maildir_folder->deliver_message($fh)
+ and die "maildir write failed";
+}
+
+1;
diff --git a/lib/Feed/Role/Printer.pm b/lib/Feed/Role/Printer.pm
new file mode 100644
index 0000000..44b05a8
--- /dev/null
+++ b/lib/Feed/Role/Printer.pm
@@ -0,0 +1,28 @@
+package Feed::Role::Printer;
+use Moose::Role;
+use 5.016;
+use namespace::autoclean;
+
+requires 'process';
+
+before process => sub {
+ my ($self) = @_;
+
+ say $self->title;
+};
+
+sub process_entry {
+ my ($self,$entry) = @_;
+
+ $self->log->trace('process_entry - begin');
+
+ for my $f (qw(id author title link issued modified)) {
+ say " $f:",$entry->$f//'<undef>';
+ }
+ say $entry->content->body;
+ say '';
+
+ $self->log->trace('process_entry - end');
+}
+
+1;