package Feed::Role::ContentOnly; use Moose::Role; use 5.012; use namespace::autoclean; use HTML::ExtractMain 'extract_main_html'; use XML::Feed::Content; =head1 NAME Feed::Role::ContentOnly - extract and sanitize HTML content from a feed body =head1 SYNOPSIS set_feed_class(Feed->with_traits( 'Mail', 'LinkedPage', 'ContentOnly', )); If your feed only has summaries or only the first paragraph, maybe you want to fetch the complete HTML page for processing. This strips out the sidebars and sanitizes the HTML to look sane by using L. =cut around extract_entries => sub { my ($orig, $self) = @_; $self->log->trace('around extract_entries - begin'); my $entries= $self->$orig(); for my $entry (@{ $entries }) { my $html= $entry->content->body; $self->log->trace('finding content'); if( $html ) { my $c= XML::Feed::Content->wrap({ type => 'text/html', body => extract_main_html( $html ), base => $entry->content->base, }); $entry->content( $c ); }; }; $self->log->trace('around extract_entries - end'); $entries }; 1;