diff options
author | Gianni Ceccarelli <gianni.ceccarelli@net-a-porter.com> | 2012-12-14 14:09:36 +0000 |
---|---|---|
committer | Gianni Ceccarelli <gianni.ceccarelli@net-a-porter.com> | 2012-12-14 14:09:36 +0000 |
commit | 6cd57d56a2873897fa1a037e89e66dd02c59511d (patch) | |
tree | af876f26b423a3e6a378a8a163ba3a69d75fe16b /lib/Feed/Role | |
parent | dedupe: save last see timestamp (diff) | |
download | feeder-6cd57d56a2873897fa1a037e89e66dd02c59511d.tar.gz feeder-6cd57d56a2873897fa1a037e89e66dd02c59511d.tar.bz2 feeder-6cd57d56a2873897fa1a037e89e66dd02c59511d.zip |
dedupe schema & trace update
Diffstat (limited to 'lib/Feed/Role')
-rw-r--r-- | lib/Feed/Role/DeDupe.pm | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/lib/Feed/Role/DeDupe.pm b/lib/Feed/Role/DeDupe.pm index 164e3ec..9cd19c8 100644 --- a/lib/Feed/Role/DeDupe.pm +++ b/lib/Feed/Role/DeDupe.pm @@ -60,7 +60,8 @@ sub _maybe_build_schema { } catch { $dbh->do(<<'SQL'); CREATE TABLE seen_items ( - id VARCHAR(255) PRIMARY KEY + id VARCHAR(255) PRIMARY KEY, + last_seen TIMESTAMP ) SQL $dbh->commit; @@ -121,7 +122,7 @@ sub seen_already { my $id = $self->_entry_id($e); - my $count = $self->_update_sth->execute($id); + my $count = 0 + $self->_update_sth->execute($id); $self->log->trace("seen_already - end ($count)"); @@ -145,15 +146,21 @@ sub _entry_id { my $id = $e->id; + $self->log->trace("_entry_id: $id"); + if ($self->dedupe_use_date) { my $date = $e->modified // $e->issued; - if ($date) { $id .= '-' . $date->iso8601 } + if ($date) { + $id .= '-' . $date->iso8601; + $self->log->trace("_entry_id: (with date) $id"); + } } if ($self->dedupe_use_body) { my $body = $e->content->body; my $content_digest = sha1_base64(encode('utf-8',$body)); $id .= '-' . $content_digest; + $self->log->trace("_entry_id: (with content) $id"); } return encode('utf-8',$id); |