diff options
author | Gianni Ceccarelli <gceccarelli@veritone.com> | 2023-10-04 15:25:22 +0100 |
---|---|---|
committer | Gianni Ceccarelli <gceccarelli@veritone.com> | 2023-10-04 15:28:13 +0100 |
commit | 49d5ca98b21551d86fcea7814f1b7dfb43038b6b (patch) | |
tree | 98a4b59dba2ebe6e90e629a94bc5697399227be3 /index.js | |
download | puppa-49d5ca98b21551d86fcea7814f1b7dfb43038b6b.tar.gz puppa-49d5ca98b21551d86fcea7814f1b7dfb43038b6b.tar.bz2 puppa-49d5ca98b21551d86fcea7814f1b7dfb43038b6b.zip |
works enough
Diffstat (limited to 'index.js')
-rw-r--r-- | index.js | 89 |
1 files changed, 89 insertions, 0 deletions
diff --git a/index.js b/index.js new file mode 100644 index 0000000..88930aa --- /dev/null +++ b/index.js @@ -0,0 +1,89 @@ +const puppeteer = require('puppeteer-extra'); +const StealthPlugin = require('puppeteer-extra-plugin-stealth') +puppeteer.use(StealthPlugin()) + +async function goThere(channel) { + const browser = await puppeteer.launch({ + headless: 'new', + // need real Chrome to stream video + executablePath: '/usr/bin/google-chrome-stable', + }); + const page = await browser.newPage(); + await page.goto( + `https://www.twitch.tv/${channel}`, + { waitUntil: 'domcontentloaded', timeout: 60000 }, + ); + + // set some bits to cheat Twitch a bit more + await page.evaluate(() => { + localStorage.setItem( + 'content-classification-labels-acknowledged', + `{"loggedIn":{},"loggedOut":{"MatureGame":${Date.now() + 86400000}}}`, + ); + localStorage.setItem('video-muted', '{"default":false}'); + localStorage.setItem('volume', '0.5'); + localStorage.setItem('video-quality', '{"default":"160p30"}'); + }); + + await page.setViewport({ width: 1280, height: 720 }); + await page.reload({ + waitUntil: ['domcontentloaded'] + }); + + return {browser,page}; +} + +// scrape comments, just to pretend we're doing something useful +async function seeComments(page) { + /* wait for chat to be visible */ + await page.waitForSelector( + 'div.chat-shell', + { timeout: 60000 }, + ); + + try { + let chatText = await page.evaluate(() => { + let scrapeComments = []; + const comments = document.querySelectorAll('div.chat-line__message'); + + comments.forEach(comment => { + const commentAuthor = comment.querySelector('span.chat-line__username span.chat-author__display-name').innerText; + const commentContent = comment.querySelector('span.text-fragment')?.innerText; + + scrapeComments.push({ commentAuthor, commentContent }); + }); + + return { 'userComments': scrapeComments }; + }); + + console.log(await chatText); + } catch (err) { + console.log(err.message); + } +} + +// there must be some more built-in way to do this... +function delayP(delay) { + return new Promise((resolve, reject) => { + setTimeout( + () => { resolve(1) }, + delay, + ); + }); +} + +(async () => { + const channel = 'yogscast'; // a random channel + const runForMs = 3600 * 1000; // 1 hour + + const x = await goThere(channel); + + const started = Date.now(); + + while ((Date.now() - started) < runForMs ) { + await seeComments(x.page); + await delayP(5000); + } + + await x.browser.close(); +})(); |