summaryrefslogtreecommitdiff
path: root/feeder.pl
blob: ff611be636550ab37a187e1b73d951eb9d130860 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
#!/usr/bin/env perl 
use strict;
use warnings;
use 5.012;
use Feed;
use Feed::Utils;
use Log::Log4perl;
 
Log::Log4perl::init(\<<'LOG');
log4perl.rootLogger = WARN, Screen
log4perl.appender.Screen = Log::Log4perl::Appender::Screen
log4perl.appender.Screen.stderr = 1
log4perl.appender.Screen.layout = PatternLayout
log4perl.appender.Screen.layout.ConversionPattern = [%d{ISO8601}] %p{1} %X{opml} %X{uri} (%c) %m{chomp}%n
LOG
 
set_feed_class(Feed->with_traits(
    'MailDir',
    'FixDateTime',
    'DeDupe',
    'AuthorName',
    'UnBase64',
));
 
set_feed_args(
    dupe_dsn => 'dbi:SQLite:dbname=dedup.db',
    maildir_base => 'maildir',
);
 
feeds_from_opml(
    {
        mail_folders => 'Gentoo',
    },
    'https://planet.gentoo.org/opml.xml',
    {
        exclude => [
            'http://www.deathwing00.org/wordpress/category/gentoo/feed/',
            'http://deathwing00.org/wordpress/category/gentoo/feed/',
            'http://home.coming.dk/index.php?tempskin=_rss2',
            'http://blog.vandium.net/category/gentoo-org/feed/',
            'http://blog.i-no.de//rss.xml',
            'http://blog.i-no.de/rss.xml',
            'https://barelysufficient.org/category/gentoo/feed/',
            'http://rrr.thetruth.de/category/planetgentoo/feed/',
            'http://pi-ist-genau-3.de/?cat=441&amp;feed=atom',
            'http://pi-ist-genau-3.de/?cat=441&#038;feed=atom',
            'http://pi-ist-genau-3.de/?cat=441&feed=atom',
            'http://blog.oldworld.fr/index.php?feed/tag/gentoo/atom',
            'http://blog.cafarelli.fr:82/feed/category/Gentoo/rss2',
            'http://www.hboeck.de/feeds/categories/11-Gentoo.rss',
    'http://hexxeh.net/?cat=5&feed=rss2',
    'http://gryniewicz.com/blogs/dang/category/gentoo/feed/',
    'http://max.kanat.us/tag-syndicate/?user=psykil&tag=gentoo',
    'http://dev.c1pher.net/index.php/category/gentoo/feed/',
    'http://idl0r.qasl.de/blog/index.php?tempskin=_rss2',
    'http://my.opera.com/pacho/xml/rss/blog/gentoo',
    'http://vh4x0r.wordpress.com/category/planet-gentoo/feed/',
    'http://hwoarang.silverarrow.org/category/linux/gentoo/feed/',
    'http://v00d00.net/category/linux/gentoo-linux/feed',
    'http://linuxcrazy.com/?q=rss.xml',
    'http://gentoo-pr.org/rss.xml',
    'https://blog.jolexa.net/category/gentoo/feed/',
    'http://blog.dastergon.gr/rss.xml',
    'http://www.domenkozar.com/feed/atom/',
    'https://www.gentoo.org/news/feed.xml',
    'http://www.professionalsysadmin.com/matt/atom.xml',
    'https://www.p8952.info/feed.category.xml',
    'https://rgm.io/atom/gentoo/',
    'http://blogs.gentoo.org/tampakrap/category/planets/planet-gentoo/feed/',
    'http://pi-ist-genau-3.de/category/gentoo/feed/atom/',
        ],
    },
);
 
feeds_from_opml(
    {
        mail_folders => 'Perl',
    },
    'http://planet.perl.org/opml.xml',
    {
        exclude => [
            'http://blog.urth.org/programming/atom.xml',
            'http://blogs.perl.org/users/john_mcnamara/',
            'http://www.dev411.com/blog/xml/atom/category/feed.xml',
            'http://chainsawblues.vox.com/library/posts/page/1/atom.xml',
            'http://pjf.id.au/blog/index.rss?tag=perl',
            'http://log.perl.org/index.rdf',
    'http://www.dagolden.com/index.php/feed/',
    'http://feeds.feedburner.com/PerlBuzz',
    'http://perlcast.com/rss/current.xml',
        ],
    },
);
 
feeds(
    {
        mail_folders => 'Fun',
    },
    'http://feeds2.feedburner.com/rsspect/fJur',
    'http://feeds2.feedburner.com/sydneypadua/yBZX',
    {
        uri => 'http://oglaf.com/feeds/rss/',
        title=>'Oglaf',
    },
    'http://www.bureau42.com/bureau42.rss',
    'http://syndication.thedailywtf.com/TheDailyWtf',
    'http://thisisindexed.com/feed/',
    'http://notalwaysright.com/feed/atom',
    {
        uri => 'http://bakabt.me/rss.php?uid=714931&key=94346a8e67f313b3cf64bef311eaf551',
        title => 'BakaBT releases',
        dedupe_use_body => 0,
        dedupe_use_date => 0,
    },
    'http://blog.thimbleweedpark.com/rss',
);
 
feeds(
    {
        mail_folders => 'OSS',
    },
    'http://www.alistapart.com/rss.xml',
    'http://www.groklaw.net/backend/GrokLaw.rdf',
    'http://feeds.feedburner.com/oreilly/radar/atom',
    'https://www.oreilly.com/topics/four-short-links/feed.atom',
    'http://feeds.feedburner.com/Oreillygmt',
    'http://emacs-fu.blogspot.com/feeds/posts/default',
);
 
feeds(
    {
        mail_folders => 'Perl',
    },
    {
        uri => 'file:///var/www/thenautilus/htdocs/metacpan.atom',
        title => 'Recent CPAN uploads - MetaCPAN',
    },
    'http://pugs.blogs.com/pugs/index.rdf',
    'http://blog.plover.com/index.atom',
    'http://blogs.perl.org/atom.xml',
    'http://YAPHB.blogspot.com/atom.xml',
    'http://pugs.blogs.com/audrey/atom.xml',
    'http://blog.fsck.com/atom.xml',
    'http://blog.urth.org/feed/',
    'http://log.perl.org/feeds/posts/default',
    'http://blog.schwern.net/atom.xml',
    {
        uri => 'http://www.dagolden.com/index.php/feed/',
        dedupe_use_body => 0,
    },
    'http://perl6advent.wordpress.com/feed/',
    'http://perladvent.org/2014/atom.xml',
    'http://leonerds-code.blogspot.com/feeds/posts/default',
    'http://pl6anet.org/atom.xml',
);
 
feeds(
    {
        mail_folders => 'Science',
    },
    {
        uri => 'http://www.slate.com/blogs/bad_astronomy.fulltext.all.10.rss',
        title => 'Bad Astronomy',
    },
    'http://www.badscience.net/feed/',
    'http://blog.khymos.org/feed/',
    'http://glottorellando.wordpress.com/feed/',
    'http://languagelog.ldc.upenn.edu/nll/?feed=atom',
    {
     uri => 'http://www.goodmath.org/blog/?feed=rss2',
        dedupe_use_body => 0,
    },
    'http://web.randi.org/1/feed',
    'http://cattaneo-lescienze.blogautore.espresso.repubblica.it/feed/atom/',
    'http://feeds.feedburner.com/makezineonline',
    'http://ovadia-lescienze.blogautore.espresso.repubblica.it/feed/atom/',
    'http://rudimatematici-lescienze.blogautore.espresso.repubblica.it/feed/atom/',
    'http://www.schneier.com/blog/index.rdf',
    'http://bressanini-lescienze.blogautore.espresso.repubblica.it/feed/atom/',
    'http://feeds.feedburner.com/cavolettobruxelles',
    'http://scienceofmagic.wordpress.com/feed/',
    'http://blog.plan28.org/feeds/posts/default',
    'http://chasmosaurs.blogspot.com/feeds/posts/default',
);
 
feeds(
    {
        mail_folders => 'Stuff',
    },
    'https://freedom-to-tinker.com/feed/atom/',
    'http://hackaday.com/feed/',
    'http://watchismo.blogspot.com/feeds/posts/default',
);
 
feeds(
    {
        mail_folders => 'Weblogs',
    },
    'http://feeds.feedburner.com/boingboing/iBag',
    'http://www.rousette.org.uk/blog/index.xml',
    {
        uri => 'http://www.haibane.info/feed/',
        dedupe_use_body => 0,
    },
    'http://nobodyharks.blogspot.com/feeds/posts/default',
    'http://spaghettovolante.wordpress.com/feed/',
    'http://exquisitebeast.tumblr.com/rss',
);
 
feeds(
    {
        mail_folders => 'Writers',
    },
    'http://scienceinmyfiction.com/feed/',
    {
        uri => 'http://www.antipope.org/charlie/blog-static/atom.xml',
        dedupe_use_date => 0,
    },
    {
        uri => 'http://elizabethbear.livejournal.com/data/rss',
        dedupe_use_body => 0,
    },
    'http://lisagoldresearch.wordpress.com/feed/',
    {
     uri => 'http://journal.neilgaiman.com/feeds/posts/default',
dedupe_use_body => 0,
    },
    'http://www.tor.com/feed',
    'http://www.thewaythefutureblogs.com/feed/',
    'http://whatever.scalzi.com/feed/',
    {
        uri => 'http://thecomposites.tumblr.com/rss',
        dedupe_use_body => 0,
    },
    {
     uri => 'http://www.goodreads.com/author/show/16094.Lois_McMaster_Bujold/blog?format=rss',
dedupe_use_body => 0,
    },
);
 
feeds(
    {
        mail_folders => '3dPrinter',
    },
    {
        uri => 'http://hydraraptor.blogspot.com/feeds/posts/default',
        dedupe_use_body => 0,
    },
);