summaryrefslogtreecommitdiff
path: root/lib/Feed/Role/LinkedPage.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Feed/Role/LinkedPage.pm')
-rw-r--r--lib/Feed/Role/LinkedPage.pm53
1 files changed, 53 insertions, 0 deletions
diff --git a/lib/Feed/Role/LinkedPage.pm b/lib/Feed/Role/LinkedPage.pm
new file mode 100644
index 0000000..694e4c1
--- /dev/null
+++ b/lib/Feed/Role/LinkedPage.pm
@@ -0,0 +1,53 @@
+package Feed::Role::LinkedPage;
+use Moose::Role;
+use 5.012;
+use namespace::autoclean;
+use XML::Feed::Content;
+
+=head1 NAME
+
+Feed::Role::LinkedPage - fetch the linked resource instead of using
+the RSS summary
+
+=head1 SYNOPSIS
+
+ set_feed_class(Feed->with_traits(
+ 'Mail',
+ 'LinkedPage',
+ 'ContentOnly',
+ ));
+
+If your feed only has summaries or only the first paragraph, maybe you
+want to fetch the complete HTML page for processing.
+
+=cut
+
+around extract_entries => sub {
+ my ($orig, $self) = @_;
+
+ $self->log->trace('around extract_entries - begin');
+
+ # Fetch the linked HTML page from the feed instead of
+ # using the content of the feed itself
+
+ my $entries= $self->$orig();
+ splice @$entries, 2;
+
+ for my $entry (@{ $entries }) {
+ $self->log->trace('around extract_entries - fetching ' . $entry->link);
+ my $res= $self->user_agent->get( $entry->link );
+ if( $res->is_success and $res->decoded_content ) {
+ my $c= XML::Feed::Content->wrap({
+ type => $res->header( 'Content-Type' ),
+ body => $res->decoded_content,
+ base => $entry->link,
+ });
+ $entry->content( $c );
+ };
+ };
+
+ $self->log->trace('around extract_entries - end');
+ $entries
+};
+
+1;