summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Dakkar/TweetArchive/Store.pm88
1 files changed, 88 insertions, 0 deletions
diff --git a/lib/Dakkar/TweetArchive/Store.pm b/lib/Dakkar/TweetArchive/Store.pm
index cc64a87..8470823 100644
--- a/lib/Dakkar/TweetArchive/Store.pm
+++ b/lib/Dakkar/TweetArchive/Store.pm
@@ -252,4 +252,92 @@ SQL
$dbh->do(q{ALTER TABLE new_followers RENAME TO followers});
}
+sub _schema_deploy_4($self) {
+ # let's get some sensible ordering when saying "order by id"
+ $dbh->do(<<'SQL');
+ALTER TABLE tweets ALTER COLUMN id SET DATA TYPE bigint USING id::bigint
+SQL
+
+ # get the text of a tweet
+ #
+ # we don't actually get extended_tweet, probably Twitter only
+ # gives it to "recognised" clients
+ $dbh->do(<<'SQL');
+CREATE OR REPLACE FUNCTION tweet_text(IN t JSONB) RETURNS text
+LANGUAGE sql
+IMMUTABLE
+RETURNS NULL ON NULL INPUT
+PARALLEL SAFE
+AS $$ SELECT
+CASE t ? 'extended_tweet'
+ WHEN true THEN t->'extended_tweet'->>'text'
+ ELSE t->>'text'
+END
+$$;
+SQL
+
+ # get text, including RTs and quotes
+ $dbh->do(<<'SQL');
+CREATE OR REPLACE FUNCTION tweet_text_recursive(IN t JSONB) RETURNS text
+LANGUAGE sql
+IMMUTABLE
+RETURNS NULL ON NULL INPUT
+PARALLEL SAFE
+AS $$ SELECT
+tweet_text(t) || ' ' ||
+COALESCE( tweet_text_recursive(t->'retweeted_status'), '') || ' ' ||
+COALESCE( tweet_text_recursive(t->'quoted_status'), '')
+$$;
+SQL
+
+ # map a tweet to a full-text search language configuration
+ $dbh->do(<<'SQL');
+CREATE OR REPLACE FUNCTION tweet_language(IN t JSONB) RETURNS regconfig
+LANGUAGE sql
+IMMUTABLE
+RETURNS NULL ON NULL INPUT
+PARALLEL SAFE
+AS $$ SELECT CASE t->>'lang'
+WHEN 'it' THEN 'pg_catalog.italian'::regconfig
+WHEN 'fr' THEN 'pg_catalog.french'::regconfig
+WHEN 'de' THEN 'pg_catalog.german'::regconfig
+WHEN 'es' THEN 'pg_catalog.spanish'::regconfig
+WHEN 'nl' THEN 'pg_catalog.dutch'::regconfig
+ELSE 'pg_catalog.english'::regconfig
+END
+$$;
+SQL
+
+ # add the full-text search data
+ $dbh->do(<<'SQL');
+ALTER TABLE tweets ADD COLUMN fts tsvector;
+SQL
+ $dbh->do(<<'SQL');
+UPDATE tweets SET fts = to_tsvector(tweet_language(data),tweet_text_recursive(data));
+SQL
+ $dbh->do(<<'SQL');
+CREATE INDEX tweets_fts ON tweets USING GIN (fts);
+SQL
+
+ # and make sure it stays up-to-date
+ $dbh->do(<<'SQL');
+CREATE OR REPLACE FUNCTION tweets_fts_trigger()
+RETURNS trigger
+LANGUAGE plpgsql
+AS $$
+begin
+ new.fts := to_tsvector(tweet_language(new.data),tweet_text_recursive(new.data));
+ return new;
+end
+$$;
+SQL
+ $dbh->do(<<'SQL');
+CREATE TRIGGER tweets_fts_update
+ BEFORE INSERT OR UPDATE
+ ON tweets
+ FOR EACH ROW
+ EXECUTE PROCEDURE tweets_fts_trigger();
+SQL
+}
+
1;