diff options
author | Evgeny Zinoviev <me@ch1p.io> | 2022-03-15 03:56:59 +0300 |
---|---|---|
committer | Evgeny Zinoviev <me@ch1p.io> | 2022-03-15 03:56:59 +0300 |
commit | e075f11bdb1b194062d153cad181a6b04ad8fcfc (patch) | |
tree | fdadba7ab1c19c70cff5e247e173ad0bf939b8a6 /lib | |
parent | 4f867d7cbcb7cd23b9cdfab3422bad9dc4a92415 (diff) |
Diffstat (limited to 'lib')
-rw-r--r-- | lib/browser.js | 111 | ||||
-rw-r--r-- | lib/cookies-storage.js | 80 | ||||
-rw-r--r-- | lib/logging.js | 35 |
3 files changed, 226 insertions, 0 deletions
diff --git a/lib/browser.js b/lib/browser.js new file mode 100644 index 0000000..d0f7861 --- /dev/null +++ b/lib/browser.js @@ -0,0 +1,111 @@ +const puppeteer = require("puppeteer-extra"); +const StealthPlugin = require('puppeteer-extra-plugin-stealth'); +const cookiesStorage = require("./cookies-storage"); +puppeteer.use(StealthPlugin()) + +const {getLogger} = require('./logging') + +const logger = getLogger('browser') + + +const chromeOptions = { + headless: true, + args: [] +}; + +let browser = null + +class PageWrapper { + constructor() { + this.intrNeededCallback = null + this.intrCallback = null + this.intrPostCallback = null + + this.page = null + this.cdpClient = null + } + + async getPage(interceptionNeededCallback, interceptCallback, postInterceptCallback) { + this.intrCallback = interceptCallback + this.intrNeededCallback = interceptionNeededCallback + this.intrPostCallback = postInterceptCallback + + if (this.page !== null && this.page.isClosed()) { + this.page.removeAllListeners && this.page.removeAllListeners() + this.page = null + } + + if (this.page !== null) + return this.page + + this.page = await browser.newPage() + + let cookies = await cookiesStorage.get() + // logger.debug('loaded cookies:', cookies) + await this.page.setCookie(...cookies) + + this.page.on('domcontentloaded', async () => { + try { + let cookies = await this.page.cookies(); + if (cookies) + await cookiesStorage.save(cookies) + } catch (e) { + logger.error('page.cookies() failed:', e) + } + }) + + await this.page.removeAllListeners('request') + await this.page.setRequestInterception(true) + this.page.on('request', async request => { + let contData = this.intrPostCallback(request) + await request.continue(contData) + }) + + this.cdpClient = await this.page.target().createCDPSession(); + await this.cdpClient.send('Network.enable') + await this.cdpClient.send('Network.setRequestInterception', { + patterns: [ + { + urlPattern: '*', + resourceType: 'Document', + interceptionStage: 'HeadersReceived' + } + ], + }) + await this.cdpClient.on('Network.requestIntercepted', async e => { + let obj = {interceptionId: e.interceptionId} + if (this.intrNeededCallback && this.intrNeededCallback(e) === true) { + let ret = await this.cdpClient.send('Network.getResponseBodyForInterception', { + interceptionId: e.interceptionId + }) + this.intrCallback(ret, e.responseHeaders) + obj['errorReason'] = 'BlockedByClient' + } + await this.cdpClient.send('Network.continueInterceptedRequest', obj) + }) + return this.page + } +} + +let singlePageWrapper = new PageWrapper() + +module.exports = { + async launch(options) { + if (options.proxy) + chromeOptions.args.push(`--proxy-server=${options.proxy}`) + + if (options.noSandbox) + chromeOptions.args.push( + '--no-sandbox', + '--disable-setuid-sandbox' + ) + + if (options.headful) + chromeOptions.headless = false + + browser = await puppeteer.launch(chromeOptions) + }, + + singlePageWrapper, + PageWrapper, +}
\ No newline at end of file diff --git a/lib/cookies-storage.js b/lib/cookies-storage.js new file mode 100644 index 0000000..15714ca --- /dev/null +++ b/lib/cookies-storage.js @@ -0,0 +1,80 @@ +const fs = require('fs').promises +const {Mutex} = require('async-mutex') +const {getLogger} = require('./logging') + +const logger = getLogger('CookiesStorage') + +let storageFileName = null +const mutex = new Mutex() + +async function exists(path) { + try { + await fs.access(path) + return true + } catch { + return false + } +} + +function cookiesAsHashed(cookies) { + if (!cookies.length) + return {} + + const map = {} + const sep = '|;|;' + for (const c of cookies) { + const k = `${c.domain}${sep}${c.path}${sep}${c.name}` + map[k] = c + } + return map +} + +module.exports = { + async save(newCookies) { + let currentCookies = await this.get() + + await mutex.runExclusive(async () => { + if (currentCookies.length) { + for (let newCookie of newCookies) { + if (!currentCookies.length) + break + let i = currentCookies.findIndex((oldCookie) => { + return oldCookie.domain === newCookie.domain + && oldCookie.path === newCookie.path + && oldCookie.name === newCookie.name + && ( + oldCookie.value !== newCookie.value + || oldCookie.expires !== newCookie.expires + ) + }) + if (i !== -1) { + let removed = currentCookies.splice(i, 1) + // logger.debug('removed cookie:', removed, 'instead got new one:', newCookie) + } + } + } + + const cookiesMap = Object.assign({}, cookiesAsHashed(currentCookies), cookiesAsHashed(newCookies)) + await fs.writeFile(storageFileName, JSON.stringify(Object.values(cookiesMap), null, 2), 'utf-8') + }) + }, + + async get() { + if (!(await exists(storageFileName))) + return [] + + try { + const raw = await mutex.runExclusive(async () => { + return await fs.readFile(storageFileName, 'utf-8') + }) + return JSON.parse(raw) + } catch (e) { + logger.error('Failed to parse storage:', e) + return [] + } + }, + + setFileName(name) { + storageFileName = name + } +};
\ No newline at end of file diff --git a/lib/logging.js b/lib/logging.js new file mode 100644 index 0000000..bca6eb6 --- /dev/null +++ b/lib/logging.js @@ -0,0 +1,35 @@ +const log4js = require('log4js') + +module.exports = { + configure(verbose) { + const categories = { + default: { + appenders: ['stdout-filter'], + level: 'trace' + } + } + const appenders = { + stdout: { + type: 'stdout', + level: 'warn' + }, + 'stdout-filter': { + type: 'logLevelFilter', + appender: 'stdout', + level: verbose ? 'debug' : 'warn' + } + } + log4js.configure({ + appenders, + categories + }) + }, + + getLogger(...args) { + return log4js.getLogger(...args) + }, + + shutdown(cb) { + log4js.shutdown(cb) + } +} |