summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGianni Ceccarelli <gianni.ceccarelli@broadbean.com>2023-02-22 13:53:11 +0000
committerGianni Ceccarelli <gianni.ceccarelli@broadbean.com>2023-02-22 13:53:11 +0000
commit0b356d2e192fa531fb149cfd89f9d93324a90ebe (patch)
tree3b1ff90a87fc6990552a047d8f6a23f3fda2e96a
parentmisskey client (diff)
downloadtweet-archive-0b356d2e192fa531fb149cfd89f9d93324a90ebe.tar.gz
tweet-archive-0b356d2e192fa531fb149cfd89f9d93324a90ebe.tar.bz2
tweet-archive-0b356d2e192fa531fb149cfd89f9d93324a90ebe.zip
store misskey notes & people
-rw-r--r--cpanfile11
-rw-r--r--lib/Dakkar/Misskey.pm18
-rw-r--r--lib/Dakkar/NotesArchive.pm3
-rw-r--r--lib/Dakkar/TweetArchive/Store.pm199
-rw-r--r--tweet-archive.pl72
5 files changed, 257 insertions, 46 deletions
diff --git a/cpanfile b/cpanfile
index 66ba8c6..38449c5 100644
--- a/cpanfile
+++ b/cpanfile
@@ -1,15 +1,16 @@
#!perl
requires 'perl', '>= 5.036';
+requires 'DBD::Pg';
+requires 'DBI';
requires 'DateTime::Format::Pg';
requires 'DateTime::Format::Strptime';
-requires 'DBI';
-requires 'DBD::Pg';
-requires 'experimental';
requires 'JSON::MaybeXS';
+requires 'List::Util';
requires 'Moo';
requires 'Moo::Role';
-requires 'namespace::clean';
requires 'Net::Twitter';
requires 'Path::Tiny';
+requires 'PerlX::Maybe';
requires 'Types::Standard';
-requires 'List::Util';
+requires 'experimental';
+requires 'namespace::clean';
diff --git a/lib/Dakkar/Misskey.pm b/lib/Dakkar/Misskey.pm
index 8d6dd5e..327cfe8 100644
--- a/lib/Dakkar/Misskey.pm
+++ b/lib/Dakkar/Misskey.pm
@@ -10,7 +10,7 @@ use URI;
use namespace::clean;
has _json => ( is => 'lazy', builder => sub { JSON::MaybeXS->new(utf8=>1,relaxed=>1, pretty=>0) } );
-has _ua => ( is => 'lazy', builder => sub { LWP::UserAgent->new(agent=>'Dakkar::Misskey') } );
+has ua => ( is => 'lazy', builder => sub { LWP::UserAgent->new(agent=>'Dakkar::Misskey') } );
has token => ( is => 'ro', required => 1 );
has base_url => ( is => 'ro', required => 1, isa => Uri, coerce => 1 );
@@ -23,22 +23,19 @@ sub _request($self, $endpoint, $payload) {
my $uri = URI->new($endpoint)->abs($self->base_url);
- my $response = $self->_ua->post(
+ my $response = $self->ua->post(
$uri,
'Content-type' => 'application/json',
Content => $payload_json,
);
if ($response->is_success) {
- return(
- $self->_json->decode(
- $response->decoded_content(charset=>'none')
- ),
- undef,
+ return $self->_json->decode(
+ $response->decoded_content(charset=>'none')
);
}
- return( undef, $response->status_line );
+ die $response->status_line;
}
sub _paged_request($self, $endpoint, $payload) {
@@ -50,8 +47,7 @@ sub _paged_request($self, $endpoint, $payload) {
};
while (1) {
- my ($result, $error) = $self->_request($endpoint, $page_payload);
- return (\@all_results, $error) if $error;
+ my $result = $self->_request($endpoint, $page_payload);
last unless $result->@*;
@@ -59,7 +55,7 @@ sub _paged_request($self, $endpoint, $payload) {
$page_payload->{untilId} = minstr(map { $_->{id} } $result->@* );
}
- return (\@all_results, undef);
+ return \@all_results;
}
sub timeline($self,$options) {
diff --git a/lib/Dakkar/NotesArchive.pm b/lib/Dakkar/NotesArchive.pm
index 5571419..1fcf248 100644
--- a/lib/Dakkar/NotesArchive.pm
+++ b/lib/Dakkar/NotesArchive.pm
@@ -2,6 +2,7 @@ package Dakkar::NotesArchive;
use v5.36;
use Moo;
use experimental 'builtin';
+use PerlX::Maybe;
use Dakkar::Misskey;
use Types::Standard qw(Str InstanceOf);
use namespace::clean;
@@ -27,7 +28,7 @@ sub _build_client($self) {
sub timeline($self, $since_id) {
return $self->client->timeline({
- sinceId => $since_id,
+ maybe sinceId => $since_id,
includeMyRenotes => \1,
includeLocalRenotes => \1,
includeRenotedMyNotes => \0,
diff --git a/lib/Dakkar/TweetArchive/Store.pm b/lib/Dakkar/TweetArchive/Store.pm
index fc75475..f0068de 100644
--- a/lib/Dakkar/TweetArchive/Store.pm
+++ b/lib/Dakkar/TweetArchive/Store.pm
@@ -35,6 +35,7 @@ sub BUILD($self,@) {
}
my $dt_parser = DateTime::Format::Strptime->new(pattern => '%a %b %d %T %z %Y');
+my $dt_parser_iso = DateTime::Format::Strptime->new(pattern => '%Y-%m-%dT%H:%M:%S.%3NZ', time_zone=>'UTC');
my $dt_printer = DateTime::Format::Pg->new();
my $json_printer = JSON::MaybeXS->new(
@@ -46,6 +47,8 @@ my $json_printer = JSON::MaybeXS->new(
convert_blessed => 1,
);
+# tweets
+
sub latest_tweet_id($self) {
return $self->dbh->selectall_arrayref(
q{SELECT MAX(id) FROM tweets},
@@ -71,14 +74,14 @@ SQL
);
}
-sub _store_people($self,$people) {
+sub _store_twitter_people($self,$people) {
my @ids;
for my $person ($people->@*) {
my $person_str = $json_printer->encode($person);
push @ids,
$self->dbh->selectall_arrayref(<<'SQL',{},$person_str)->[0][0];
-INSERT INTO people(data) VALUES (?)
-ON CONFLICT (people_details(data)) DO UPDATE
+INSERT INTO twitter_people(data) VALUES (?)
+ON CONFLICT (twitter_people_details(data)) DO UPDATE
SET data=EXCLUDED.data
RETURNING id
SQL
@@ -88,22 +91,84 @@ SQL
}
-sub store_friends($self,$friends) {
- my $ids = $self->_store_people($friends);
+sub store_twitter_friends($self,$friends) {
+ my $ids = $self->_store_twitter_people($friends);
$self->dbh->do(<<"SQL", {}, $ids);
-INSERT INTO friends(users) VALUES(?)
+INSERT INTO twitter_friends(users) VALUES(?)
SQL
}
-sub store_followers($self,$followers) {
- my $ids = $self->_store_people($followers);
+sub store_twitter_followers($self,$followers) {
+ my $ids = $self->_store_twitter_people($followers);
$self->dbh->do(<<"SQL", {}, $ids);
-INSERT INTO followers(users) VALUES(?)
+INSERT INTO twitter_followers(users) VALUES(?)
+SQL
+}
+
+# misskey notes
+
+sub latest_note_id($self) {
+ return $self->dbh->selectall_arrayref(
+ q{SELECT MAX(id) FROM notes},
+ )->[0][0];
+}
+
+sub store_note($self,$note) {
+ # yes, the source most probably decoded this from a string, we
+ # have to serialise it again, so that PostgreSQL can parse it
+ # *again*
+ my $note_str = $json_printer->encode($note);
+ my $created_at = $dt_parser_iso->parse_datetime($note->{createdAt});
+
+ $self->dbh->do(<<'SQL', {},
+INSERT INTO notes(id,created_at,data) VALUES(?,?,?)
+ ON CONFLICT (id) DO UPDATE SET
+ created_at = EXCLUDED.created_at,
+ data = EXCLUDED.data
+SQL
+ $note->{id},
+ $dt_printer->format_datetime($created_at),
+ $note_str,
+ );
+}
+
+sub _store_misskey_people($self,$people) {
+ my @ids;
+ for my $person ($people->@*) {
+ my $person_str = $json_printer->encode($person);
+ push @ids,
+ $self->dbh->selectall_arrayref(<<'SQL',{},$person_str)->[0][0];
+INSERT INTO misskey_people(data) VALUES (?)
+ON CONFLICT (misskey_people_details(data)) DO UPDATE
+ SET data=EXCLUDED.data
+RETURNING id
SQL
+ }
+
+ return \@ids;
}
+
+sub store_misskey_following($self,$friends) {
+ my $ids = $self->_store_twitter_people($friends);
+
+ $self->dbh->do(<<"SQL", {}, $ids);
+INSERT INTO misskey_following(users) VALUES(?)
+SQL
+}
+
+sub store_misskey_followers($self,$followers) {
+ my $ids = $self->_store_misskey_people($followers);
+
+ $self->dbh->do(<<"SQL", {}, $ids);
+INSERT INTO misskey_followers(users) VALUES(?)
+SQL
+}
+
+# schema
+
sub _schema_deploy($self,$next_version) {
my $method_name = "_schema_deploy_${next_version}";
if (my $method = $self->can($method_name)) {
@@ -359,4 +424,120 @@ $$;
SQL
}
+sub _schema_deploy_6($self) {
+ my $dbh = $self->dbh;
+
+ $dbh->do('ALTER TABLE people RENAME TO twitter_people');
+ $dbh->do('ALTER TABLE friends RENAME TO twitter_friends');
+ $dbh->do('ALTER TABLE followers RENAME TO twitter_followers');
+ $dbh->do('ALTER INDEX idx_people RENAME TO idx_twitter_people');
+ $dbh->do('ALTER FUNCTION people_details RENAME TO twitter_people_details');
+
+ $dbh->do(<<'SQL');
+CREATE OR REPLACE FUNCTION note_text(IN t JSONB) RETURNS text
+LANGUAGE sql
+IMMUTABLE
+RETURNS NULL ON NULL INPUT
+PARALLEL SAFE
+AS $$
+SELECT t->>'text'
+$$;
+SQL
+
+ $dbh->do(<<'SQL');
+CREATE OR REPLACE FUNCTION note_text_recursive(IN t JSONB) RETURNS text
+LANGUAGE sql
+IMMUTABLE
+RETURNS NULL ON NULL INPUT
+PARALLEL SAFE
+AS $$ SELECT
+note_text(t) || ' ' ||
+COALESCE( note_text_recursive(t->'renote'), '')
+$$;
+SQL
+
+ # misskey doesn't currently store language, let's pretend it's all
+ # English
+ $dbh->do(<<'SQL');
+CREATE OR REPLACE FUNCTION note_language(IN t JSONB) RETURNS regconfig
+LANGUAGE sql
+IMMUTABLE
+RETURNS NULL ON NULL INPUT
+PARALLEL SAFE
+AS $$
+ SELECT 'pg_catalog.english'::regconfig
+$$;
+SQL
+
+ $dbh->do(<<'SQL');
+CREATE TABLE notes (
+ id VARCHAR(255) PRIMARY KEY,
+ created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ data JSONB NOT NULL,
+ fts tsvector
+);
+SQL
+
+ $dbh->do(<<'SQL');
+CREATE INDEX notes_fts ON notes USING GIN (fts);
+SQL
+
+ $dbh->do(<<'SQL');
+CREATE OR REPLACE FUNCTION notes_fts_trigger()
+RETURNS trigger
+LANGUAGE plpgsql
+AS $$
+begin
+ new.fts := to_tsvector(note_language(new.data),note_text_recursive(new.data));
+ return new;
+end
+$$;
+SQL
+
+ $dbh->do(<<'SQL');
+CREATE TRIGGER notes_fts_update
+ BEFORE INSERT OR UPDATE
+ ON notes
+ FOR EACH ROW
+ EXECUTE PROCEDURE notes_fts_trigger();
+SQL
+
+ $dbh->do(<<'SQL');
+CREATE TABLE misskey_people (
+ id SERIAL PRIMARY KEY,
+ data JSONB NOT NULL
+)
+SQL
+ $dbh->do(<<'SQL');
+CREATE FUNCTION misskey_people_details(data jsonb) RETURNS text[] AS $$
+ SELECT array[
+ (data->>'id'),
+ (data->>'username'),
+ COALESCE(data->>'host',''),
+ (data->>'location'),
+ (data->>'description'),
+ (data->>'name'),
+ (data->>'avatarBlurhash'),
+ (data->>'bannerBlurhash')
+ ];
+$$ LANGUAGE SQL IMMUTABLE RETURNS NULL ON NULL INPUT
+SQL
+ $dbh->do(<<'SQL');
+CREATE UNIQUE INDEX idx_miskkey_people ON misskey_people (misskey_people_details(data))
+SQL
+
+ $dbh->do(<<'SQL');
+CREATE TABLE misskey_followers (
+ taken_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP PRIMARY KEY,
+ users integer[] NOT NULL
+)
+SQL
+ $dbh->do(<<'SQL');
+CREATE TABLE misskey_following (
+ taken_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP PRIMARY KEY,
+ users integer[] NOT NULL
+)
+SQL
+}
+
1;
diff --git a/tweet-archive.pl b/tweet-archive.pl
index a1b5fff..1a7232b 100644
--- a/tweet-archive.pl
+++ b/tweet-archive.pl
@@ -3,9 +3,11 @@ use v5.36;
use strict;
use warnings;
use experimental 'try';
+use lib 'lib','local/lib/perl5';
use Path::Tiny;
use JSON::MaybeXS;
use Dakkar::TweetArchive;
+use Dakkar::NotesArchive;
use Dakkar::TweetArchive::Store;
my $json_parser = JSON::MaybeXS->new(
@@ -24,30 +26,60 @@ my $conf = $json_parser->decode(
},
);
-my $client = Dakkar::TweetArchive->new($conf);
-my $store = Dakkar::TweetArchive::Store->new($conf);
+my $store = Dakkar::TweetArchive::Store->new($conf->{db});
-my $ua = $client->client->ua;
-my @responses;
-$ua->add_handler( response_done => sub { push @responses, $_[0]; return } );
+if ($conf->{twitter}) {
+ my $client = Dakkar::TweetArchive->new($conf->{twitter});
-try {
- my $latest_id = $store->latest_tweet_id;
+ my @responses;
+ $client->client->ua->add_handler( response_done => sub { push @responses, $_[0]; return } );
- for my $tweet ($client->home_timeline($latest_id)->@*) {
- $store->store_tweet($tweet);
+ try {
+ my $latest_id = $store->latest_tweet_id;
+
+ for my $tweet ($client->home_timeline($latest_id)->@*) {
+ $store->store_tweet($tweet);
+ }
+
+ $store->store_twitter_friends($client->friends);
+ $store->store_twitter_followers($client->followers);
}
+ catch ($e) {
+ print "Twitter Fail: $e\n";
+ for my $res (@responses) {
+ next if $res->is_success;
+ print $res->request->as_string;
+ print $res->as_string;
+ print "\n",'-' x 50,"\n\n";
+ }
+ }
+ ;
+};
+
+if ($conf->{misskey}) {
+ my $client = Dakkar::NotesArchive->new($conf->{misskey});
- $store->store_friends($client->friends);
- $store->store_followers($client->followers);
- 1;
-}
-catch ($e) {
- print "Fail: $e\n";
- for my $res (@responses) {
- next if $res->is_success;
- print $res->request->as_string;
- print $res->as_string;
- print "\n",'-' x 50,"\n\n";
+ my @responses;
+ $client->client->ua->add_handler( response_done => sub { push @responses, $_[0]; return } );
+
+ try {
+ my $latest_id = $store->latest_note_id;
+
+ for my $note ($client->timeline($latest_id)->@*) {
+ $store->store_note($note);
+ }
+
+ $store->store_misskey_following($client->following);
+ $store->store_misskey_followers($client->followers);
}
+ catch ($e) {
+ print "Misskey Fail: $e\n";
+ for my $res (@responses) {
+ next if $res->is_success;
+ print $res->request->as_string;
+ print $res->as_string;
+ print "\n",'-' x 50,"\n\n";
+ }
+ };
};
+