From 7d942e463679220c13468f1530ebc002efcd832c Mon Sep 17 00:00:00 2001 From: Gianni Ceccarelli Date: Wed, 22 Feb 2023 10:51:36 +0000 Subject: update to 5.36 --- cpanfile | 4 ++-- lib/Dakkar/TweetArchive.pm | 3 +-- lib/Dakkar/TweetArchive/Store.pm | 3 +-- lib/Dakkar/TweetArchive/TraceCalls.pm | 2 +- tweet-archive.pl | 25 ++++++++++++++++--------- 5 files changed, 21 insertions(+), 16 deletions(-) diff --git a/cpanfile b/cpanfile index 6a5df06..9abf9e9 100644 --- a/cpanfile +++ b/cpanfile @@ -1,5 +1,5 @@ #!perl -requires 'perl', '>= 5.024'; +requires 'perl', '>= 5.036'; requires 'DateTime::Format::Pg'; requires 'DateTime::Format::Strptime'; requires 'DBI'; @@ -10,5 +10,5 @@ requires 'Moo'; requires 'Moo::Role'; requires 'namespace::clean'; requires 'Net::Twitter'; -requires 'Path::Class'; +requires 'Path::Tiny'; requires 'Types::Standard'; diff --git a/lib/Dakkar/TweetArchive.pm b/lib/Dakkar/TweetArchive.pm index fe28d13..53f9777 100644 --- a/lib/Dakkar/TweetArchive.pm +++ b/lib/Dakkar/TweetArchive.pm @@ -1,7 +1,6 @@ package Dakkar::TweetArchive; -use 5.024; +use v5.36; use Moo; -use experimental 'signatures'; use Net::Twitter; use Types::Standard qw(Str InstanceOf); use namespace::clean; diff --git a/lib/Dakkar/TweetArchive/Store.pm b/lib/Dakkar/TweetArchive/Store.pm index 9a97ca6..fc75475 100644 --- a/lib/Dakkar/TweetArchive/Store.pm +++ b/lib/Dakkar/TweetArchive/Store.pm @@ -1,7 +1,6 @@ package Dakkar::TweetArchive::Store; -use 5.024; +use v5.36; use Moo; -use experimental 'signatures'; use DBI; use Types::Standard qw(Str InstanceOf); use DateTime::Format::Strptime; diff --git a/lib/Dakkar/TweetArchive/TraceCalls.pm b/lib/Dakkar/TweetArchive/TraceCalls.pm index 7591b69..de8cc78 100644 --- a/lib/Dakkar/TweetArchive/TraceCalls.pm +++ b/lib/Dakkar/TweetArchive/TraceCalls.pm @@ -1,5 +1,5 @@ package Dakkar::TweetArchive::TraceCalls; -use 5.024; +use v5.36; use Moo::Role; use experimental 'signatures'; diff --git a/tweet-archive.pl b/tweet-archive.pl index 6e007d3..a1b5fff 100644 --- a/tweet-archive.pl +++ b/tweet-archive.pl @@ -1,8 +1,9 @@ #!/usr/bin/env perl -use 5.024; +use v5.36; use strict; use warnings; -use Path::Class; +use experimental 'try'; +use Path::Tiny; use JSON::MaybeXS; use Dakkar::TweetArchive; use Dakkar::TweetArchive::Store; @@ -13,10 +14,15 @@ my $json_parser = JSON::MaybeXS->new( ); my $conf = $json_parser->decode( - file(__FILE__)->parent->file('tweet-archive.conf') - ->slurp(iomode=>'<:raw') - // '{}' - ); + do { + try { + path(__FILE__)->sibling('tweet-archive.conf')->slurp_raw() + } + catch ($e) { + '{}' + } + }, +); my $client = Dakkar::TweetArchive->new($conf); my $store = Dakkar::TweetArchive::Store->new($conf); @@ -25,7 +31,7 @@ my $ua = $client->client->ua; my @responses; $ua->add_handler( response_done => sub { push @responses, $_[0]; return } ); -eval { +try { my $latest_id = $store->latest_tweet_id; for my $tweet ($client->home_timeline($latest_id)->@*) { @@ -35,8 +41,9 @@ eval { $store->store_friends($client->friends); $store->store_followers($client->followers); 1; -} or do { - print "Fail: $@\n"; +} +catch ($e) { + print "Fail: $e\n"; for my $res (@responses) { next if $res->is_success; print $res->request->as_string; -- cgit v1.2.3 From 8a66c4ef785ba4e72f320f4023718148854e6ac1 Mon Sep 17 00:00:00 2001 From: Gianni Ceccarelli Date: Wed, 22 Feb 2023 11:56:10 +0000 Subject: misskey client --- cpanfile | 1 + lib/Dakkar/Misskey.pm | 86 ++++++++++++++++++++++++++++++++++++++++++++++ lib/Dakkar/NotesArchive.pm | 45 ++++++++++++++++++++++++ 3 files changed, 132 insertions(+) create mode 100644 lib/Dakkar/Misskey.pm create mode 100644 lib/Dakkar/NotesArchive.pm diff --git a/cpanfile b/cpanfile index 9abf9e9..66ba8c6 100644 --- a/cpanfile +++ b/cpanfile @@ -12,3 +12,4 @@ requires 'namespace::clean'; requires 'Net::Twitter'; requires 'Path::Tiny'; requires 'Types::Standard'; +requires 'List::Util'; diff --git a/lib/Dakkar/Misskey.pm b/lib/Dakkar/Misskey.pm new file mode 100644 index 0000000..8d6dd5e --- /dev/null +++ b/lib/Dakkar/Misskey.pm @@ -0,0 +1,86 @@ +package Dakkar::Misskey; +use v5.36; +use Moo; +use JSON::MaybeXS; +use LWP::UserAgent; +use Types::Standard qw(Str); +use Types::URI qw(Uri); +use List::Util qw(minstr); +use URI; +use namespace::clean; + +has _json => ( is => 'lazy', builder => sub { JSON::MaybeXS->new(utf8=>1,relaxed=>1, pretty=>0) } ); +has _ua => ( is => 'lazy', builder => sub { LWP::UserAgent->new(agent=>'Dakkar::Misskey') } ); + +has token => ( is => 'ro', required => 1 ); +has base_url => ( is => 'ro', required => 1, isa => Uri, coerce => 1 ); + +sub _request($self, $endpoint, $payload) { + my $payload_json = $self->_json->encode({ + $payload->%*, + i => $self->token, + }); + + my $uri = URI->new($endpoint)->abs($self->base_url); + + my $response = $self->_ua->post( + $uri, + 'Content-type' => 'application/json', + Content => $payload_json, + ); + + if ($response->is_success) { + return( + $self->_json->decode( + $response->decoded_content(charset=>'none') + ), + undef, + ); + } + + return( undef, $response->status_line ); +} + +sub _paged_request($self, $endpoint, $payload) { + my @all_results; + + my $page_payload = { + limit => 100, + $payload->%*, + }; + + while (1) { + my ($result, $error) = $self->_request($endpoint, $page_payload); + return (\@all_results, $error) if $error; + + last unless $result->@*; + + push @all_results, $result->@*; + $page_payload->{untilId} = minstr(map { $_->{id} } $result->@* ); + } + + return (\@all_results, undef); +} + +sub timeline($self,$options) { + return $self->_paged_request( + 'api/notes/timeline', + $options, + ); +} + +sub followers($self,$user_id) { + return $self->_paged_request( + 'api/users/followers', + { userId => $user_id }, + ); +} + +sub following($self,$user_id) { + return $self->_paged_request( + 'api/users/following', + { userId => $user_id }, + ); +} + +1; diff --git a/lib/Dakkar/NotesArchive.pm b/lib/Dakkar/NotesArchive.pm new file mode 100644 index 0000000..5571419 --- /dev/null +++ b/lib/Dakkar/NotesArchive.pm @@ -0,0 +1,45 @@ +package Dakkar::NotesArchive; +use v5.36; +use Moo; +use experimental 'builtin'; +use Dakkar::Misskey; +use Types::Standard qw(Str InstanceOf); +use namespace::clean; + +has [qw(base_url token user_id)] => ( + is => 'ro', + required => 1, + isa => Str, +); + +has client => ( + is => 'lazy', + isa => InstanceOf['Dakkar::Misskey'], +); + + +sub _build_client($self) { + my $nt = Dakkar::Misskey->new({ + base_url => $self->base_url, + token => $self->token, + }); +} + +sub timeline($self, $since_id) { + return $self->client->timeline({ + sinceId => $since_id, + includeMyRenotes => \1, + includeLocalRenotes => \1, + includeRenotedMyNotes => \0, + }); +} + +sub following($self) { + return $self->client->following($self->user_id); +} + +sub followers($self) { + return $self->client->followers($self->user_id); +} + +1; -- cgit v1.2.3 From 0b356d2e192fa531fb149cfd89f9d93324a90ebe Mon Sep 17 00:00:00 2001 From: Gianni Ceccarelli Date: Wed, 22 Feb 2023 13:53:11 +0000 Subject: store misskey notes & people --- cpanfile | 11 ++- lib/Dakkar/Misskey.pm | 18 ++-- lib/Dakkar/NotesArchive.pm | 3 +- lib/Dakkar/TweetArchive/Store.pm | 199 +++++++++++++++++++++++++++++++++++++-- tweet-archive.pl | 72 ++++++++++---- 5 files changed, 257 insertions(+), 46 deletions(-) diff --git a/cpanfile b/cpanfile index 66ba8c6..38449c5 100644 --- a/cpanfile +++ b/cpanfile @@ -1,15 +1,16 @@ #!perl requires 'perl', '>= 5.036'; +requires 'DBD::Pg'; +requires 'DBI'; requires 'DateTime::Format::Pg'; requires 'DateTime::Format::Strptime'; -requires 'DBI'; -requires 'DBD::Pg'; -requires 'experimental'; requires 'JSON::MaybeXS'; +requires 'List::Util'; requires 'Moo'; requires 'Moo::Role'; -requires 'namespace::clean'; requires 'Net::Twitter'; requires 'Path::Tiny'; +requires 'PerlX::Maybe'; requires 'Types::Standard'; -requires 'List::Util'; +requires 'experimental'; +requires 'namespace::clean'; diff --git a/lib/Dakkar/Misskey.pm b/lib/Dakkar/Misskey.pm index 8d6dd5e..327cfe8 100644 --- a/lib/Dakkar/Misskey.pm +++ b/lib/Dakkar/Misskey.pm @@ -10,7 +10,7 @@ use URI; use namespace::clean; has _json => ( is => 'lazy', builder => sub { JSON::MaybeXS->new(utf8=>1,relaxed=>1, pretty=>0) } ); -has _ua => ( is => 'lazy', builder => sub { LWP::UserAgent->new(agent=>'Dakkar::Misskey') } ); +has ua => ( is => 'lazy', builder => sub { LWP::UserAgent->new(agent=>'Dakkar::Misskey') } ); has token => ( is => 'ro', required => 1 ); has base_url => ( is => 'ro', required => 1, isa => Uri, coerce => 1 ); @@ -23,22 +23,19 @@ sub _request($self, $endpoint, $payload) { my $uri = URI->new($endpoint)->abs($self->base_url); - my $response = $self->_ua->post( + my $response = $self->ua->post( $uri, 'Content-type' => 'application/json', Content => $payload_json, ); if ($response->is_success) { - return( - $self->_json->decode( - $response->decoded_content(charset=>'none') - ), - undef, + return $self->_json->decode( + $response->decoded_content(charset=>'none') ); } - return( undef, $response->status_line ); + die $response->status_line; } sub _paged_request($self, $endpoint, $payload) { @@ -50,8 +47,7 @@ sub _paged_request($self, $endpoint, $payload) { }; while (1) { - my ($result, $error) = $self->_request($endpoint, $page_payload); - return (\@all_results, $error) if $error; + my $result = $self->_request($endpoint, $page_payload); last unless $result->@*; @@ -59,7 +55,7 @@ sub _paged_request($self, $endpoint, $payload) { $page_payload->{untilId} = minstr(map { $_->{id} } $result->@* ); } - return (\@all_results, undef); + return \@all_results; } sub timeline($self,$options) { diff --git a/lib/Dakkar/NotesArchive.pm b/lib/Dakkar/NotesArchive.pm index 5571419..1fcf248 100644 --- a/lib/Dakkar/NotesArchive.pm +++ b/lib/Dakkar/NotesArchive.pm @@ -2,6 +2,7 @@ package Dakkar::NotesArchive; use v5.36; use Moo; use experimental 'builtin'; +use PerlX::Maybe; use Dakkar::Misskey; use Types::Standard qw(Str InstanceOf); use namespace::clean; @@ -27,7 +28,7 @@ sub _build_client($self) { sub timeline($self, $since_id) { return $self->client->timeline({ - sinceId => $since_id, + maybe sinceId => $since_id, includeMyRenotes => \1, includeLocalRenotes => \1, includeRenotedMyNotes => \0, diff --git a/lib/Dakkar/TweetArchive/Store.pm b/lib/Dakkar/TweetArchive/Store.pm index fc75475..f0068de 100644 --- a/lib/Dakkar/TweetArchive/Store.pm +++ b/lib/Dakkar/TweetArchive/Store.pm @@ -35,6 +35,7 @@ sub BUILD($self,@) { } my $dt_parser = DateTime::Format::Strptime->new(pattern => '%a %b %d %T %z %Y'); +my $dt_parser_iso = DateTime::Format::Strptime->new(pattern => '%Y-%m-%dT%H:%M:%S.%3NZ', time_zone=>'UTC'); my $dt_printer = DateTime::Format::Pg->new(); my $json_printer = JSON::MaybeXS->new( @@ -46,6 +47,8 @@ my $json_printer = JSON::MaybeXS->new( convert_blessed => 1, ); +# tweets + sub latest_tweet_id($self) { return $self->dbh->selectall_arrayref( q{SELECT MAX(id) FROM tweets}, @@ -71,14 +74,14 @@ SQL ); } -sub _store_people($self,$people) { +sub _store_twitter_people($self,$people) { my @ids; for my $person ($people->@*) { my $person_str = $json_printer->encode($person); push @ids, $self->dbh->selectall_arrayref(<<'SQL',{},$person_str)->[0][0]; -INSERT INTO people(data) VALUES (?) -ON CONFLICT (people_details(data)) DO UPDATE +INSERT INTO twitter_people(data) VALUES (?) +ON CONFLICT (twitter_people_details(data)) DO UPDATE SET data=EXCLUDED.data RETURNING id SQL @@ -88,22 +91,84 @@ SQL } -sub store_friends($self,$friends) { - my $ids = $self->_store_people($friends); +sub store_twitter_friends($self,$friends) { + my $ids = $self->_store_twitter_people($friends); $self->dbh->do(<<"SQL", {}, $ids); -INSERT INTO friends(users) VALUES(?) +INSERT INTO twitter_friends(users) VALUES(?) SQL } -sub store_followers($self,$followers) { - my $ids = $self->_store_people($followers); +sub store_twitter_followers($self,$followers) { + my $ids = $self->_store_twitter_people($followers); $self->dbh->do(<<"SQL", {}, $ids); -INSERT INTO followers(users) VALUES(?) +INSERT INTO twitter_followers(users) VALUES(?) +SQL +} + +# misskey notes + +sub latest_note_id($self) { + return $self->dbh->selectall_arrayref( + q{SELECT MAX(id) FROM notes}, + )->[0][0]; +} + +sub store_note($self,$note) { + # yes, the source most probably decoded this from a string, we + # have to serialise it again, so that PostgreSQL can parse it + # *again* + my $note_str = $json_printer->encode($note); + my $created_at = $dt_parser_iso->parse_datetime($note->{createdAt}); + + $self->dbh->do(<<'SQL', {}, +INSERT INTO notes(id,created_at,data) VALUES(?,?,?) + ON CONFLICT (id) DO UPDATE SET + created_at = EXCLUDED.created_at, + data = EXCLUDED.data +SQL + $note->{id}, + $dt_printer->format_datetime($created_at), + $note_str, + ); +} + +sub _store_misskey_people($self,$people) { + my @ids; + for my $person ($people->@*) { + my $person_str = $json_printer->encode($person); + push @ids, + $self->dbh->selectall_arrayref(<<'SQL',{},$person_str)->[0][0]; +INSERT INTO misskey_people(data) VALUES (?) +ON CONFLICT (misskey_people_details(data)) DO UPDATE + SET data=EXCLUDED.data +RETURNING id SQL + } + + return \@ids; } + +sub store_misskey_following($self,$friends) { + my $ids = $self->_store_twitter_people($friends); + + $self->dbh->do(<<"SQL", {}, $ids); +INSERT INTO misskey_following(users) VALUES(?) +SQL +} + +sub store_misskey_followers($self,$followers) { + my $ids = $self->_store_misskey_people($followers); + + $self->dbh->do(<<"SQL", {}, $ids); +INSERT INTO misskey_followers(users) VALUES(?) +SQL +} + +# schema + sub _schema_deploy($self,$next_version) { my $method_name = "_schema_deploy_${next_version}"; if (my $method = $self->can($method_name)) { @@ -359,4 +424,120 @@ $$; SQL } +sub _schema_deploy_6($self) { + my $dbh = $self->dbh; + + $dbh->do('ALTER TABLE people RENAME TO twitter_people'); + $dbh->do('ALTER TABLE friends RENAME TO twitter_friends'); + $dbh->do('ALTER TABLE followers RENAME TO twitter_followers'); + $dbh->do('ALTER INDEX idx_people RENAME TO idx_twitter_people'); + $dbh->do('ALTER FUNCTION people_details RENAME TO twitter_people_details'); + + $dbh->do(<<'SQL'); +CREATE OR REPLACE FUNCTION note_text(IN t JSONB) RETURNS text +LANGUAGE sql +IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS $$ +SELECT t->>'text' +$$; +SQL + + $dbh->do(<<'SQL'); +CREATE OR REPLACE FUNCTION note_text_recursive(IN t JSONB) RETURNS text +LANGUAGE sql +IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS $$ SELECT +note_text(t) || ' ' || +COALESCE( note_text_recursive(t->'renote'), '') +$$; +SQL + + # misskey doesn't currently store language, let's pretend it's all + # English + $dbh->do(<<'SQL'); +CREATE OR REPLACE FUNCTION note_language(IN t JSONB) RETURNS regconfig +LANGUAGE sql +IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS $$ + SELECT 'pg_catalog.english'::regconfig +$$; +SQL + + $dbh->do(<<'SQL'); +CREATE TABLE notes ( + id VARCHAR(255) PRIMARY KEY, + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP, + data JSONB NOT NULL, + fts tsvector +); +SQL + + $dbh->do(<<'SQL'); +CREATE INDEX notes_fts ON notes USING GIN (fts); +SQL + + $dbh->do(<<'SQL'); +CREATE OR REPLACE FUNCTION notes_fts_trigger() +RETURNS trigger +LANGUAGE plpgsql +AS $$ +begin + new.fts := to_tsvector(note_language(new.data),note_text_recursive(new.data)); + return new; +end +$$; +SQL + + $dbh->do(<<'SQL'); +CREATE TRIGGER notes_fts_update + BEFORE INSERT OR UPDATE + ON notes + FOR EACH ROW + EXECUTE PROCEDURE notes_fts_trigger(); +SQL + + $dbh->do(<<'SQL'); +CREATE TABLE misskey_people ( + id SERIAL PRIMARY KEY, + data JSONB NOT NULL +) +SQL + $dbh->do(<<'SQL'); +CREATE FUNCTION misskey_people_details(data jsonb) RETURNS text[] AS $$ + SELECT array[ + (data->>'id'), + (data->>'username'), + COALESCE(data->>'host',''), + (data->>'location'), + (data->>'description'), + (data->>'name'), + (data->>'avatarBlurhash'), + (data->>'bannerBlurhash') + ]; +$$ LANGUAGE SQL IMMUTABLE RETURNS NULL ON NULL INPUT +SQL + $dbh->do(<<'SQL'); +CREATE UNIQUE INDEX idx_miskkey_people ON misskey_people (misskey_people_details(data)) +SQL + + $dbh->do(<<'SQL'); +CREATE TABLE misskey_followers ( + taken_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP PRIMARY KEY, + users integer[] NOT NULL +) +SQL + $dbh->do(<<'SQL'); +CREATE TABLE misskey_following ( + taken_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP PRIMARY KEY, + users integer[] NOT NULL +) +SQL +} + 1; diff --git a/tweet-archive.pl b/tweet-archive.pl index a1b5fff..1a7232b 100644 --- a/tweet-archive.pl +++ b/tweet-archive.pl @@ -3,9 +3,11 @@ use v5.36; use strict; use warnings; use experimental 'try'; +use lib 'lib','local/lib/perl5'; use Path::Tiny; use JSON::MaybeXS; use Dakkar::TweetArchive; +use Dakkar::NotesArchive; use Dakkar::TweetArchive::Store; my $json_parser = JSON::MaybeXS->new( @@ -24,30 +26,60 @@ my $conf = $json_parser->decode( }, ); -my $client = Dakkar::TweetArchive->new($conf); -my $store = Dakkar::TweetArchive::Store->new($conf); +my $store = Dakkar::TweetArchive::Store->new($conf->{db}); -my $ua = $client->client->ua; -my @responses; -$ua->add_handler( response_done => sub { push @responses, $_[0]; return } ); +if ($conf->{twitter}) { + my $client = Dakkar::TweetArchive->new($conf->{twitter}); -try { - my $latest_id = $store->latest_tweet_id; + my @responses; + $client->client->ua->add_handler( response_done => sub { push @responses, $_[0]; return } ); - for my $tweet ($client->home_timeline($latest_id)->@*) { - $store->store_tweet($tweet); + try { + my $latest_id = $store->latest_tweet_id; + + for my $tweet ($client->home_timeline($latest_id)->@*) { + $store->store_tweet($tweet); + } + + $store->store_twitter_friends($client->friends); + $store->store_twitter_followers($client->followers); } + catch ($e) { + print "Twitter Fail: $e\n"; + for my $res (@responses) { + next if $res->is_success; + print $res->request->as_string; + print $res->as_string; + print "\n",'-' x 50,"\n\n"; + } + } + ; +}; + +if ($conf->{misskey}) { + my $client = Dakkar::NotesArchive->new($conf->{misskey}); - $store->store_friends($client->friends); - $store->store_followers($client->followers); - 1; -} -catch ($e) { - print "Fail: $e\n"; - for my $res (@responses) { - next if $res->is_success; - print $res->request->as_string; - print $res->as_string; - print "\n",'-' x 50,"\n\n"; + my @responses; + $client->client->ua->add_handler( response_done => sub { push @responses, $_[0]; return } ); + + try { + my $latest_id = $store->latest_note_id; + + for my $note ($client->timeline($latest_id)->@*) { + $store->store_note($note); + } + + $store->store_misskey_following($client->following); + $store->store_misskey_followers($client->followers); } + catch ($e) { + print "Misskey Fail: $e\n"; + for my $res (@responses) { + next if $res->is_success; + print $res->request->as_string; + print $res->as_string; + print "\n",'-' x 50,"\n\n"; + } + }; }; + -- cgit v1.2.3