diff options
author | Gianni Ceccarelli <gianni.ceccarelli@broadbean.com> | 2023-02-22 13:53:11 +0000 |
---|---|---|
committer | Gianni Ceccarelli <gianni.ceccarelli@broadbean.com> | 2023-02-22 13:53:11 +0000 |
commit | 0b356d2e192fa531fb149cfd89f9d93324a90ebe (patch) | |
tree | 3b1ff90a87fc6990552a047d8f6a23f3fda2e96a /lib/Dakkar/TweetArchive/Store.pm | |
parent | misskey client (diff) | |
download | tweet-archive-0b356d2e192fa531fb149cfd89f9d93324a90ebe.tar.gz tweet-archive-0b356d2e192fa531fb149cfd89f9d93324a90ebe.tar.bz2 tweet-archive-0b356d2e192fa531fb149cfd89f9d93324a90ebe.zip |
store misskey notes & people
Diffstat (limited to 'lib/Dakkar/TweetArchive/Store.pm')
-rw-r--r-- | lib/Dakkar/TweetArchive/Store.pm | 199 |
1 files changed, 190 insertions, 9 deletions
diff --git a/lib/Dakkar/TweetArchive/Store.pm b/lib/Dakkar/TweetArchive/Store.pm index fc75475..f0068de 100644 --- a/lib/Dakkar/TweetArchive/Store.pm +++ b/lib/Dakkar/TweetArchive/Store.pm @@ -35,6 +35,7 @@ sub BUILD($self,@) { } my $dt_parser = DateTime::Format::Strptime->new(pattern => '%a %b %d %T %z %Y'); +my $dt_parser_iso = DateTime::Format::Strptime->new(pattern => '%Y-%m-%dT%H:%M:%S.%3NZ', time_zone=>'UTC'); my $dt_printer = DateTime::Format::Pg->new(); my $json_printer = JSON::MaybeXS->new( @@ -46,6 +47,8 @@ my $json_printer = JSON::MaybeXS->new( convert_blessed => 1, ); +# tweets + sub latest_tweet_id($self) { return $self->dbh->selectall_arrayref( q{SELECT MAX(id) FROM tweets}, @@ -71,14 +74,14 @@ SQL ); } -sub _store_people($self,$people) { +sub _store_twitter_people($self,$people) { my @ids; for my $person ($people->@*) { my $person_str = $json_printer->encode($person); push @ids, $self->dbh->selectall_arrayref(<<'SQL',{},$person_str)->[0][0]; -INSERT INTO people(data) VALUES (?) -ON CONFLICT (people_details(data)) DO UPDATE +INSERT INTO twitter_people(data) VALUES (?) +ON CONFLICT (twitter_people_details(data)) DO UPDATE SET data=EXCLUDED.data RETURNING id SQL @@ -88,22 +91,84 @@ SQL } -sub store_friends($self,$friends) { - my $ids = $self->_store_people($friends); +sub store_twitter_friends($self,$friends) { + my $ids = $self->_store_twitter_people($friends); $self->dbh->do(<<"SQL", {}, $ids); -INSERT INTO friends(users) VALUES(?) +INSERT INTO twitter_friends(users) VALUES(?) SQL } -sub store_followers($self,$followers) { - my $ids = $self->_store_people($followers); +sub store_twitter_followers($self,$followers) { + my $ids = $self->_store_twitter_people($followers); $self->dbh->do(<<"SQL", {}, $ids); -INSERT INTO followers(users) VALUES(?) +INSERT INTO twitter_followers(users) VALUES(?) +SQL +} + +# misskey notes + +sub latest_note_id($self) { + return $self->dbh->selectall_arrayref( + q{SELECT MAX(id) FROM notes}, + )->[0][0]; +} + +sub store_note($self,$note) { + # yes, the source most probably decoded this from a string, we + # have to serialise it again, so that PostgreSQL can parse it + # *again* + my $note_str = $json_printer->encode($note); + my $created_at = $dt_parser_iso->parse_datetime($note->{createdAt}); + + $self->dbh->do(<<'SQL', {}, +INSERT INTO notes(id,created_at,data) VALUES(?,?,?) + ON CONFLICT (id) DO UPDATE SET + created_at = EXCLUDED.created_at, + data = EXCLUDED.data +SQL + $note->{id}, + $dt_printer->format_datetime($created_at), + $note_str, + ); +} + +sub _store_misskey_people($self,$people) { + my @ids; + for my $person ($people->@*) { + my $person_str = $json_printer->encode($person); + push @ids, + $self->dbh->selectall_arrayref(<<'SQL',{},$person_str)->[0][0]; +INSERT INTO misskey_people(data) VALUES (?) +ON CONFLICT (misskey_people_details(data)) DO UPDATE + SET data=EXCLUDED.data +RETURNING id SQL + } + + return \@ids; } + +sub store_misskey_following($self,$friends) { + my $ids = $self->_store_twitter_people($friends); + + $self->dbh->do(<<"SQL", {}, $ids); +INSERT INTO misskey_following(users) VALUES(?) +SQL +} + +sub store_misskey_followers($self,$followers) { + my $ids = $self->_store_misskey_people($followers); + + $self->dbh->do(<<"SQL", {}, $ids); +INSERT INTO misskey_followers(users) VALUES(?) +SQL +} + +# schema + sub _schema_deploy($self,$next_version) { my $method_name = "_schema_deploy_${next_version}"; if (my $method = $self->can($method_name)) { @@ -359,4 +424,120 @@ $$; SQL } +sub _schema_deploy_6($self) { + my $dbh = $self->dbh; + + $dbh->do('ALTER TABLE people RENAME TO twitter_people'); + $dbh->do('ALTER TABLE friends RENAME TO twitter_friends'); + $dbh->do('ALTER TABLE followers RENAME TO twitter_followers'); + $dbh->do('ALTER INDEX idx_people RENAME TO idx_twitter_people'); + $dbh->do('ALTER FUNCTION people_details RENAME TO twitter_people_details'); + + $dbh->do(<<'SQL'); +CREATE OR REPLACE FUNCTION note_text(IN t JSONB) RETURNS text +LANGUAGE sql +IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS $$ +SELECT t->>'text' +$$; +SQL + + $dbh->do(<<'SQL'); +CREATE OR REPLACE FUNCTION note_text_recursive(IN t JSONB) RETURNS text +LANGUAGE sql +IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS $$ SELECT +note_text(t) || ' ' || +COALESCE( note_text_recursive(t->'renote'), '') +$$; +SQL + + # misskey doesn't currently store language, let's pretend it's all + # English + $dbh->do(<<'SQL'); +CREATE OR REPLACE FUNCTION note_language(IN t JSONB) RETURNS regconfig +LANGUAGE sql +IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS $$ + SELECT 'pg_catalog.english'::regconfig +$$; +SQL + + $dbh->do(<<'SQL'); +CREATE TABLE notes ( + id VARCHAR(255) PRIMARY KEY, + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP, + data JSONB NOT NULL, + fts tsvector +); +SQL + + $dbh->do(<<'SQL'); +CREATE INDEX notes_fts ON notes USING GIN (fts); +SQL + + $dbh->do(<<'SQL'); +CREATE OR REPLACE FUNCTION notes_fts_trigger() +RETURNS trigger +LANGUAGE plpgsql +AS $$ +begin + new.fts := to_tsvector(note_language(new.data),note_text_recursive(new.data)); + return new; +end +$$; +SQL + + $dbh->do(<<'SQL'); +CREATE TRIGGER notes_fts_update + BEFORE INSERT OR UPDATE + ON notes + FOR EACH ROW + EXECUTE PROCEDURE notes_fts_trigger(); +SQL + + $dbh->do(<<'SQL'); +CREATE TABLE misskey_people ( + id SERIAL PRIMARY KEY, + data JSONB NOT NULL +) +SQL + $dbh->do(<<'SQL'); +CREATE FUNCTION misskey_people_details(data jsonb) RETURNS text[] AS $$ + SELECT array[ + (data->>'id'), + (data->>'username'), + COALESCE(data->>'host',''), + (data->>'location'), + (data->>'description'), + (data->>'name'), + (data->>'avatarBlurhash'), + (data->>'bannerBlurhash') + ]; +$$ LANGUAGE SQL IMMUTABLE RETURNS NULL ON NULL INPUT +SQL + $dbh->do(<<'SQL'); +CREATE UNIQUE INDEX idx_miskkey_people ON misskey_people (misskey_people_details(data)) +SQL + + $dbh->do(<<'SQL'); +CREATE TABLE misskey_followers ( + taken_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP PRIMARY KEY, + users integer[] NOT NULL +) +SQL + $dbh->do(<<'SQL'); +CREATE TABLE misskey_following ( + taken_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP PRIMARY KEY, + users integer[] NOT NULL +) +SQL +} + 1; |