summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGianni Ceccarelli <gianni.ceccarelli@broadbean.com>2023-02-22 13:53:31 +0000
committerGianni Ceccarelli <gianni.ceccarelli@broadbean.com>2023-02-22 13:53:31 +0000
commit2f110d90d8076c4eecedc038598a40d48d253362 (patch)
tree3b1ff90a87fc6990552a047d8f6a23f3fda2e96a
parentonly print failed responses on error (diff)
parentstore misskey notes & people (diff)
downloadtweet-archive-2f110d90d8076c4eecedc038598a40d48d253362.tar.gz
tweet-archive-2f110d90d8076c4eecedc038598a40d48d253362.tar.bz2
tweet-archive-2f110d90d8076c4eecedc038598a40d48d253362.zip
Merge branch 'misskey'
-rw-r--r--cpanfile14
-rw-r--r--lib/Dakkar/Misskey.pm82
-rw-r--r--lib/Dakkar/NotesArchive.pm46
-rw-r--r--lib/Dakkar/TweetArchive.pm3
-rw-r--r--lib/Dakkar/TweetArchive/Store.pm202
-rw-r--r--lib/Dakkar/TweetArchive/TraceCalls.pm2
-rw-r--r--tweet-archive.pl89
7 files changed, 393 insertions, 45 deletions
diff --git a/cpanfile b/cpanfile
index 6a5df06..38449c5 100644
--- a/cpanfile
+++ b/cpanfile
@@ -1,14 +1,16 @@
#!perl
-requires 'perl', '>= 5.024';
+requires 'perl', '>= 5.036';
+requires 'DBD::Pg';
+requires 'DBI';
requires 'DateTime::Format::Pg';
requires 'DateTime::Format::Strptime';
-requires 'DBI';
-requires 'DBD::Pg';
-requires 'experimental';
requires 'JSON::MaybeXS';
+requires 'List::Util';
requires 'Moo';
requires 'Moo::Role';
-requires 'namespace::clean';
requires 'Net::Twitter';
-requires 'Path::Class';
+requires 'Path::Tiny';
+requires 'PerlX::Maybe';
requires 'Types::Standard';
+requires 'experimental';
+requires 'namespace::clean';
diff --git a/lib/Dakkar/Misskey.pm b/lib/Dakkar/Misskey.pm
new file mode 100644
index 0000000..327cfe8
--- /dev/null
+++ b/lib/Dakkar/Misskey.pm
@@ -0,0 +1,82 @@
+package Dakkar::Misskey;
+use v5.36;
+use Moo;
+use JSON::MaybeXS;
+use LWP::UserAgent;
+use Types::Standard qw(Str);
+use Types::URI qw(Uri);
+use List::Util qw(minstr);
+use URI;
+use namespace::clean;
+
+has _json => ( is => 'lazy', builder => sub { JSON::MaybeXS->new(utf8=>1,relaxed=>1, pretty=>0) } );
+has ua => ( is => 'lazy', builder => sub { LWP::UserAgent->new(agent=>'Dakkar::Misskey') } );
+
+has token => ( is => 'ro', required => 1 );
+has base_url => ( is => 'ro', required => 1, isa => Uri, coerce => 1 );
+
+sub _request($self, $endpoint, $payload) {
+ my $payload_json = $self->_json->encode({
+ $payload->%*,
+ i => $self->token,
+ });
+
+ my $uri = URI->new($endpoint)->abs($self->base_url);
+
+ my $response = $self->ua->post(
+ $uri,
+ 'Content-type' => 'application/json',
+ Content => $payload_json,
+ );
+
+ if ($response->is_success) {
+ return $self->_json->decode(
+ $response->decoded_content(charset=>'none')
+ );
+ }
+
+ die $response->status_line;
+}
+
+sub _paged_request($self, $endpoint, $payload) {
+ my @all_results;
+
+ my $page_payload = {
+ limit => 100,
+ $payload->%*,
+ };
+
+ while (1) {
+ my $result = $self->_request($endpoint, $page_payload);
+
+ last unless $result->@*;
+
+ push @all_results, $result->@*;
+ $page_payload->{untilId} = minstr(map { $_->{id} } $result->@* );
+ }
+
+ return \@all_results;
+}
+
+sub timeline($self,$options) {
+ return $self->_paged_request(
+ 'api/notes/timeline',
+ $options,
+ );
+}
+
+sub followers($self,$user_id) {
+ return $self->_paged_request(
+ 'api/users/followers',
+ { userId => $user_id },
+ );
+}
+
+sub following($self,$user_id) {
+ return $self->_paged_request(
+ 'api/users/following',
+ { userId => $user_id },
+ );
+}
+
+1;
diff --git a/lib/Dakkar/NotesArchive.pm b/lib/Dakkar/NotesArchive.pm
new file mode 100644
index 0000000..1fcf248
--- /dev/null
+++ b/lib/Dakkar/NotesArchive.pm
@@ -0,0 +1,46 @@
+package Dakkar::NotesArchive;
+use v5.36;
+use Moo;
+use experimental 'builtin';
+use PerlX::Maybe;
+use Dakkar::Misskey;
+use Types::Standard qw(Str InstanceOf);
+use namespace::clean;
+
+has [qw(base_url token user_id)] => (
+ is => 'ro',
+ required => 1,
+ isa => Str,
+);
+
+has client => (
+ is => 'lazy',
+ isa => InstanceOf['Dakkar::Misskey'],
+);
+
+
+sub _build_client($self) {
+ my $nt = Dakkar::Misskey->new({
+ base_url => $self->base_url,
+ token => $self->token,
+ });
+}
+
+sub timeline($self, $since_id) {
+ return $self->client->timeline({
+ maybe sinceId => $since_id,
+ includeMyRenotes => \1,
+ includeLocalRenotes => \1,
+ includeRenotedMyNotes => \0,
+ });
+}
+
+sub following($self) {
+ return $self->client->following($self->user_id);
+}
+
+sub followers($self) {
+ return $self->client->followers($self->user_id);
+}
+
+1;
diff --git a/lib/Dakkar/TweetArchive.pm b/lib/Dakkar/TweetArchive.pm
index fe28d13..53f9777 100644
--- a/lib/Dakkar/TweetArchive.pm
+++ b/lib/Dakkar/TweetArchive.pm
@@ -1,7 +1,6 @@
package Dakkar::TweetArchive;
-use 5.024;
+use v5.36;
use Moo;
-use experimental 'signatures';
use Net::Twitter;
use Types::Standard qw(Str InstanceOf);
use namespace::clean;
diff --git a/lib/Dakkar/TweetArchive/Store.pm b/lib/Dakkar/TweetArchive/Store.pm
index 9a97ca6..f0068de 100644
--- a/lib/Dakkar/TweetArchive/Store.pm
+++ b/lib/Dakkar/TweetArchive/Store.pm
@@ -1,7 +1,6 @@
package Dakkar::TweetArchive::Store;
-use 5.024;
+use v5.36;
use Moo;
-use experimental 'signatures';
use DBI;
use Types::Standard qw(Str InstanceOf);
use DateTime::Format::Strptime;
@@ -36,6 +35,7 @@ sub BUILD($self,@) {
}
my $dt_parser = DateTime::Format::Strptime->new(pattern => '%a %b %d %T %z %Y');
+my $dt_parser_iso = DateTime::Format::Strptime->new(pattern => '%Y-%m-%dT%H:%M:%S.%3NZ', time_zone=>'UTC');
my $dt_printer = DateTime::Format::Pg->new();
my $json_printer = JSON::MaybeXS->new(
@@ -47,6 +47,8 @@ my $json_printer = JSON::MaybeXS->new(
convert_blessed => 1,
);
+# tweets
+
sub latest_tweet_id($self) {
return $self->dbh->selectall_arrayref(
q{SELECT MAX(id) FROM tweets},
@@ -72,14 +74,14 @@ SQL
);
}
-sub _store_people($self,$people) {
+sub _store_twitter_people($self,$people) {
my @ids;
for my $person ($people->@*) {
my $person_str = $json_printer->encode($person);
push @ids,
$self->dbh->selectall_arrayref(<<'SQL',{},$person_str)->[0][0];
-INSERT INTO people(data) VALUES (?)
-ON CONFLICT (people_details(data)) DO UPDATE
+INSERT INTO twitter_people(data) VALUES (?)
+ON CONFLICT (twitter_people_details(data)) DO UPDATE
SET data=EXCLUDED.data
RETURNING id
SQL
@@ -89,22 +91,84 @@ SQL
}
-sub store_friends($self,$friends) {
- my $ids = $self->_store_people($friends);
+sub store_twitter_friends($self,$friends) {
+ my $ids = $self->_store_twitter_people($friends);
$self->dbh->do(<<"SQL", {}, $ids);
-INSERT INTO friends(users) VALUES(?)
+INSERT INTO twitter_friends(users) VALUES(?)
SQL
}
-sub store_followers($self,$followers) {
- my $ids = $self->_store_people($followers);
+sub store_twitter_followers($self,$followers) {
+ my $ids = $self->_store_twitter_people($followers);
$self->dbh->do(<<"SQL", {}, $ids);
-INSERT INTO followers(users) VALUES(?)
+INSERT INTO twitter_followers(users) VALUES(?)
+SQL
+}
+
+# misskey notes
+
+sub latest_note_id($self) {
+ return $self->dbh->selectall_arrayref(
+ q{SELECT MAX(id) FROM notes},
+ )->[0][0];
+}
+
+sub store_note($self,$note) {
+ # yes, the source most probably decoded this from a string, we
+ # have to serialise it again, so that PostgreSQL can parse it
+ # *again*
+ my $note_str = $json_printer->encode($note);
+ my $created_at = $dt_parser_iso->parse_datetime($note->{createdAt});
+
+ $self->dbh->do(<<'SQL', {},
+INSERT INTO notes(id,created_at,data) VALUES(?,?,?)
+ ON CONFLICT (id) DO UPDATE SET
+ created_at = EXCLUDED.created_at,
+ data = EXCLUDED.data
+SQL
+ $note->{id},
+ $dt_printer->format_datetime($created_at),
+ $note_str,
+ );
+}
+
+sub _store_misskey_people($self,$people) {
+ my @ids;
+ for my $person ($people->@*) {
+ my $person_str = $json_printer->encode($person);
+ push @ids,
+ $self->dbh->selectall_arrayref(<<'SQL',{},$person_str)->[0][0];
+INSERT INTO misskey_people(data) VALUES (?)
+ON CONFLICT (misskey_people_details(data)) DO UPDATE
+ SET data=EXCLUDED.data
+RETURNING id
SQL
+ }
+
+ return \@ids;
}
+
+sub store_misskey_following($self,$friends) {
+ my $ids = $self->_store_twitter_people($friends);
+
+ $self->dbh->do(<<"SQL", {}, $ids);
+INSERT INTO misskey_following(users) VALUES(?)
+SQL
+}
+
+sub store_misskey_followers($self,$followers) {
+ my $ids = $self->_store_misskey_people($followers);
+
+ $self->dbh->do(<<"SQL", {}, $ids);
+INSERT INTO misskey_followers(users) VALUES(?)
+SQL
+}
+
+# schema
+
sub _schema_deploy($self,$next_version) {
my $method_name = "_schema_deploy_${next_version}";
if (my $method = $self->can($method_name)) {
@@ -360,4 +424,120 @@ $$;
SQL
}
+sub _schema_deploy_6($self) {
+ my $dbh = $self->dbh;
+
+ $dbh->do('ALTER TABLE people RENAME TO twitter_people');
+ $dbh->do('ALTER TABLE friends RENAME TO twitter_friends');
+ $dbh->do('ALTER TABLE followers RENAME TO twitter_followers');
+ $dbh->do('ALTER INDEX idx_people RENAME TO idx_twitter_people');
+ $dbh->do('ALTER FUNCTION people_details RENAME TO twitter_people_details');
+
+ $dbh->do(<<'SQL');
+CREATE OR REPLACE FUNCTION note_text(IN t JSONB) RETURNS text
+LANGUAGE sql
+IMMUTABLE
+RETURNS NULL ON NULL INPUT
+PARALLEL SAFE
+AS $$
+SELECT t->>'text'
+$$;
+SQL
+
+ $dbh->do(<<'SQL');
+CREATE OR REPLACE FUNCTION note_text_recursive(IN t JSONB) RETURNS text
+LANGUAGE sql
+IMMUTABLE
+RETURNS NULL ON NULL INPUT
+PARALLEL SAFE
+AS $$ SELECT
+note_text(t) || ' ' ||
+COALESCE( note_text_recursive(t->'renote'), '')
+$$;
+SQL
+
+ # misskey doesn't currently store language, let's pretend it's all
+ # English
+ $dbh->do(<<'SQL');
+CREATE OR REPLACE FUNCTION note_language(IN t JSONB) RETURNS regconfig
+LANGUAGE sql
+IMMUTABLE
+RETURNS NULL ON NULL INPUT
+PARALLEL SAFE
+AS $$
+ SELECT 'pg_catalog.english'::regconfig
+$$;
+SQL
+
+ $dbh->do(<<'SQL');
+CREATE TABLE notes (
+ id VARCHAR(255) PRIMARY KEY,
+ created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ data JSONB NOT NULL,
+ fts tsvector
+);
+SQL
+
+ $dbh->do(<<'SQL');
+CREATE INDEX notes_fts ON notes USING GIN (fts);
+SQL
+
+ $dbh->do(<<'SQL');
+CREATE OR REPLACE FUNCTION notes_fts_trigger()
+RETURNS trigger
+LANGUAGE plpgsql
+AS $$
+begin
+ new.fts := to_tsvector(note_language(new.data),note_text_recursive(new.data));
+ return new;
+end
+$$;
+SQL
+
+ $dbh->do(<<'SQL');
+CREATE TRIGGER notes_fts_update
+ BEFORE INSERT OR UPDATE
+ ON notes
+ FOR EACH ROW
+ EXECUTE PROCEDURE notes_fts_trigger();
+SQL
+
+ $dbh->do(<<'SQL');
+CREATE TABLE misskey_people (
+ id SERIAL PRIMARY KEY,
+ data JSONB NOT NULL
+)
+SQL
+ $dbh->do(<<'SQL');
+CREATE FUNCTION misskey_people_details(data jsonb) RETURNS text[] AS $$
+ SELECT array[
+ (data->>'id'),
+ (data->>'username'),
+ COALESCE(data->>'host',''),
+ (data->>'location'),
+ (data->>'description'),
+ (data->>'name'),
+ (data->>'avatarBlurhash'),
+ (data->>'bannerBlurhash')
+ ];
+$$ LANGUAGE SQL IMMUTABLE RETURNS NULL ON NULL INPUT
+SQL
+ $dbh->do(<<'SQL');
+CREATE UNIQUE INDEX idx_miskkey_people ON misskey_people (misskey_people_details(data))
+SQL
+
+ $dbh->do(<<'SQL');
+CREATE TABLE misskey_followers (
+ taken_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP PRIMARY KEY,
+ users integer[] NOT NULL
+)
+SQL
+ $dbh->do(<<'SQL');
+CREATE TABLE misskey_following (
+ taken_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP PRIMARY KEY,
+ users integer[] NOT NULL
+)
+SQL
+}
+
1;
diff --git a/lib/Dakkar/TweetArchive/TraceCalls.pm b/lib/Dakkar/TweetArchive/TraceCalls.pm
index 7591b69..de8cc78 100644
--- a/lib/Dakkar/TweetArchive/TraceCalls.pm
+++ b/lib/Dakkar/TweetArchive/TraceCalls.pm
@@ -1,5 +1,5 @@
package Dakkar::TweetArchive::TraceCalls;
-use 5.024;
+use v5.36;
use Moo::Role;
use experimental 'signatures';
diff --git a/tweet-archive.pl b/tweet-archive.pl
index 6e007d3..1a7232b 100644
--- a/tweet-archive.pl
+++ b/tweet-archive.pl
@@ -1,10 +1,13 @@
#!/usr/bin/env perl
-use 5.024;
+use v5.36;
use strict;
use warnings;
-use Path::Class;
+use experimental 'try';
+use lib 'lib','local/lib/perl5';
+use Path::Tiny;
use JSON::MaybeXS;
use Dakkar::TweetArchive;
+use Dakkar::NotesArchive;
use Dakkar::TweetArchive::Store;
my $json_parser = JSON::MaybeXS->new(
@@ -13,34 +16,70 @@ my $json_parser = JSON::MaybeXS->new(
);
my $conf = $json_parser->decode(
- file(__FILE__)->parent->file('tweet-archive.conf')
- ->slurp(iomode=>'<:raw')
- // '{}'
- );
+ do {
+ try {
+ path(__FILE__)->sibling('tweet-archive.conf')->slurp_raw()
+ }
+ catch ($e) {
+ '{}'
+ }
+ },
+);
+
+my $store = Dakkar::TweetArchive::Store->new($conf->{db});
+
+if ($conf->{twitter}) {
+ my $client = Dakkar::TweetArchive->new($conf->{twitter});
-my $client = Dakkar::TweetArchive->new($conf);
-my $store = Dakkar::TweetArchive::Store->new($conf);
+ my @responses;
+ $client->client->ua->add_handler( response_done => sub { push @responses, $_[0]; return } );
-my $ua = $client->client->ua;
-my @responses;
-$ua->add_handler( response_done => sub { push @responses, $_[0]; return } );
+ try {
+ my $latest_id = $store->latest_tweet_id;
-eval {
- my $latest_id = $store->latest_tweet_id;
+ for my $tweet ($client->home_timeline($latest_id)->@*) {
+ $store->store_tweet($tweet);
+ }
- for my $tweet ($client->home_timeline($latest_id)->@*) {
- $store->store_tweet($tweet);
+ $store->store_twitter_friends($client->friends);
+ $store->store_twitter_followers($client->followers);
}
+ catch ($e) {
+ print "Twitter Fail: $e\n";
+ for my $res (@responses) {
+ next if $res->is_success;
+ print $res->request->as_string;
+ print $res->as_string;
+ print "\n",'-' x 50,"\n\n";
+ }
+ }
+ ;
+};
- $store->store_friends($client->friends);
- $store->store_followers($client->followers);
- 1;
-} or do {
- print "Fail: $@\n";
- for my $res (@responses) {
- next if $res->is_success;
- print $res->request->as_string;
- print $res->as_string;
- print "\n",'-' x 50,"\n\n";
+if ($conf->{misskey}) {
+ my $client = Dakkar::NotesArchive->new($conf->{misskey});
+
+ my @responses;
+ $client->client->ua->add_handler( response_done => sub { push @responses, $_[0]; return } );
+
+ try {
+ my $latest_id = $store->latest_note_id;
+
+ for my $note ($client->timeline($latest_id)->@*) {
+ $store->store_note($note);
+ }
+
+ $store->store_misskey_following($client->following);
+ $store->store_misskey_followers($client->followers);
}
+ catch ($e) {
+ print "Misskey Fail: $e\n";
+ for my $res (@responses) {
+ next if $res->is_success;
+ print $res->request->as_string;
+ print $res->as_string;
+ print "\n",'-' x 50,"\n\n";
+ }
+ };
};
+