summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGianni Ceccarelli <gceccarelli@veritone.com>2023-10-04 15:25:22 +0100
committerGianni Ceccarelli <gceccarelli@veritone.com>2023-10-04 15:28:13 +0100
commit49d5ca98b21551d86fcea7814f1b7dfb43038b6b (patch)
tree98a4b59dba2ebe6e90e629a94bc5697399227be3
downloadpuppa-49d5ca98b21551d86fcea7814f1b7dfb43038b6b.tar.gz
puppa-49d5ca98b21551d86fcea7814f1b7dfb43038b6b.tar.bz2
puppa-49d5ca98b21551d86fcea7814f1b7dfb43038b6b.zip
works enough
-rw-r--r--index.js89
-rw-r--r--package.json17
2 files changed, 106 insertions, 0 deletions
diff --git a/index.js b/index.js
new file mode 100644
index 0000000..88930aa
--- /dev/null
+++ b/index.js
@@ -0,0 +1,89 @@
+const puppeteer = require('puppeteer-extra');
+const StealthPlugin = require('puppeteer-extra-plugin-stealth')
+puppeteer.use(StealthPlugin())
+
+async function goThere(channel) {
+ const browser = await puppeteer.launch({
+ headless: 'new',
+ // need real Chrome to stream video
+ executablePath: '/usr/bin/google-chrome-stable',
+ });
+ const page = await browser.newPage();
+ await page.goto(
+ `https://www.twitch.tv/${channel}`,
+ { waitUntil: 'domcontentloaded', timeout: 60000 },
+ );
+
+ // set some bits to cheat Twitch a bit more
+ await page.evaluate(() => {
+ localStorage.setItem(
+ 'content-classification-labels-acknowledged',
+ `{"loggedIn":{},"loggedOut":{"MatureGame":${Date.now() + 86400000}}}`,
+ );
+ localStorage.setItem('video-muted', '{"default":false}');
+ localStorage.setItem('volume', '0.5');
+ localStorage.setItem('video-quality', '{"default":"160p30"}');
+ });
+
+ await page.setViewport({ width: 1280, height: 720 });
+ await page.reload({
+ waitUntil: ['domcontentloaded']
+ });
+
+ return {browser,page};
+}
+
+// scrape comments, just to pretend we're doing something useful
+async function seeComments(page) {
+ /* wait for chat to be visible */
+ await page.waitForSelector(
+ 'div.chat-shell',
+ { timeout: 60000 },
+ );
+
+ try {
+ let chatText = await page.evaluate(() => {
+ let scrapeComments = [];
+ const comments = document.querySelectorAll('div.chat-line__message');
+
+ comments.forEach(comment => {
+ const commentAuthor = comment.querySelector('span.chat-line__username span.chat-author__display-name').innerText;
+ const commentContent = comment.querySelector('span.text-fragment')?.innerText;
+
+ scrapeComments.push({ commentAuthor, commentContent });
+ });
+
+ return { 'userComments': scrapeComments };
+ });
+
+ console.log(await chatText);
+ } catch (err) {
+ console.log(err.message);
+ }
+}
+
+// there must be some more built-in way to do this...
+function delayP(delay) {
+ return new Promise((resolve, reject) => {
+ setTimeout(
+ () => { resolve(1) },
+ delay,
+ );
+ });
+}
+
+(async () => {
+ const channel = 'yogscast'; // a random channel
+ const runForMs = 3600 * 1000; // 1 hour
+
+ const x = await goThere(channel);
+
+ const started = Date.now();
+
+ while ((Date.now() - started) < runForMs ) {
+ await seeComments(x.page);
+ await delayP(5000);
+ }
+
+ await x.browser.close();
+})();
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..8fe73fb
--- /dev/null
+++ b/package.json
@@ -0,0 +1,17 @@
+{
+ "name": "src",
+ "version": "1.0.0",
+ "description": "",
+ "main": "index.js",
+ "scripts": {
+ "test": "echo \"Error: no test specified\" && exit 1"
+ },
+ "keywords": [],
+ "author": "",
+ "license": "ISC",
+ "dependencies": {
+ "puppeteer": "^21.3.6",
+ "puppeteer-extra": "^3.3.6",
+ "puppeteer-extra-plugin-stealth": "^2.11.2"
+ }
+}