From 643942f57a6ebb0afb80f9d633b6b51affeee80d Mon Sep 17 00:00:00 2001 From: Evgeny Zinoviev Date: Tue, 15 Mar 2022 00:16:55 +0300 Subject: --reuse option --- browser.js | 123 ++++++++++++++++++++++++++++++++++--------------------------- index.js | 72 +++++++++++++++++++----------------- 2 files changed, 107 insertions(+), 88 deletions(-) diff --git a/browser.js b/browser.js index dfad399..d1f5c50 100644 --- a/browser.js +++ b/browser.js @@ -4,77 +4,90 @@ const cookiesStorage = require("./cookies-storage"); puppeteer.use(StealthPlugin()); -const options = { +const chromeOptions = { headless: true, args: [] }; let browser = null -let page = null let cdpClient = null -let interceptCallback = null -let interceptionNeededCallback = null -module.exports = { - async launch() { - browser = await puppeteer.launch(options) - }, +class PageWrapper { + constructor() { + this.intrNeededCallback = null + this.intrCallback = null + + this.page = null + } - async getPage(_interceptionNeededCallback, _interceptCallback) { - if (page && page.isClosed()) { - page.removeAllListeners && page.removeAllListeners() - page = null + async getPage(interceptionNeededCallback, interceptCallback) { + this.intrCallback = interceptCallback + this.intrNeededCallback = interceptionNeededCallback + + if (this.page !== null && this.page.isClosed()) { + this.page.removeAllListeners && this.page.removeAllListeners() + this.page = null } - interceptionNeededCallback = _interceptionNeededCallback - interceptCallback = _interceptCallback + if (this.page !== null) + return this.page - if (!page) { - page = await browser.newPage() - page.on('framenavigated', async () => { - let cookies = await page.cookies(); + this.page = await browser.newPage() + this.page.on('domcontentloaded', async () => { + try { + let cookies = await this.page.cookies(); if (cookies) await cookiesStorage.save(cookies) - }) - - await page.setCookie(...(await cookiesStorage.get())) - - cdpClient = await page.target().createCDPSession(); - await cdpClient.send('Network.setRequestInterception', { - patterns: [{ - urlPattern: '*', - resourceType: 'Document', - interceptionStage: 'HeadersReceived' - }], - }) - await cdpClient.on('Network.requestIntercepted', async e => { - let obj = { interceptionId: e.interceptionId } - if (interceptionNeededCallback && interceptionNeededCallback(e) === true) { - let ret = await cdpClient.send('Network.getResponseBodyForInterception', { - interceptionId: e.interceptionId - }) - interceptCallback(ret, e.responseHeaders) - obj['errorReason'] = 'BlockedByClient' - } - await cdpClient.send('Network.continueInterceptedRequest', obj) - }) - } + } catch (e) { + console.warn(e) + } + }) - return page - }, + await this.page.setCookie(...(await cookiesStorage.get())) - setProxy(proxy) { - options.args.push(`--proxy-server=${proxy}`) - }, + cdpClient = await this.page.target().createCDPSession(); + await cdpClient.send('Network.setRequestInterception', { + patterns: [{ + urlPattern: '*', + resourceType: 'Document', + interceptionStage: 'HeadersReceived' + }], + }) + await cdpClient.on('Network.requestIntercepted', async e => { + let obj = { interceptionId: e.interceptionId } + if (this.intrNeededCallback && this.intrNeededCallback(e) === true) { + let ret = await cdpClient.send('Network.getResponseBodyForInterception', { + interceptionId: e.interceptionId + }) + this.intrCallback(ret, e.responseHeaders) + obj['errorReason'] = 'BlockedByClient' + } + await cdpClient.send('Network.continueInterceptedRequest', obj) + }) - disableSandbox() { - options.args.push( - '--no-sandbox', - '--disable-setuid-sandbox' - ) + return this.page + } +} + +let singlePageWrapper = new PageWrapper() + +module.exports = { + async launch(options) { + if (options.proxy) + chromeOptions.args.push(`--proxy-server=${options.proxy}`) + + if (options.noSandbox) + chromeOptions.args.push( + '--no-sandbox', + '--disable-setuid-sandbox' + ) + + if (options.headful) + chromeOptions.headless = false + + browser = await puppeteer.launch(chromeOptions) }, - setHeadful() { - options.headless = false - } + singlePageWrapper, + PageWrapper, } \ No newline at end of file diff --git a/index.js b/index.js index 2d6d9bd..d96216e 100644 --- a/index.js +++ b/index.js @@ -1,20 +1,23 @@ const cookiesStorage = require('./cookies-storage') const browser = require('./browser') +const {singlePageWrapper, PageWrapper} = browser const os = require('os') const path = require('path') const argv = require('minimist')(process.argv.slice(2), { string: ['retries', 'timeout', 'cookies', 'port', 'proxy'], - boolean: ['no-sandbox', 'headful'], + boolean: ['no-sandbox', 'headful', 'reuse'], stopEarly: true, default: { port: 3000, retries: 10, timeout: 30000, - cookies: path.join(os.homedir(), '.rt-pupflare-cookies.json') + cookies: path.join(os.homedir(), '.rt-pupflare-cookies.json'), + reuse: false, } }) +let reusePage = argv.reuse const maxTryCount = parseInt(argv.retries) const loadingTimeout = parseInt(argv.timeout) @@ -34,30 +37,34 @@ router.get('/request', async (ctx, next) => { data: '' }; - /*if (ctx.method === "POST") { - await page.removeAllListeners('request'); - await page.setRequestInterception(true); - page.on('request', interceptedRequest => { - var data = { - 'method': 'POST', - 'postData': ctx.request.rawBody - }; - interceptedRequest.continue(data); - }); - }*/ - let responseSet = false + let pageWrapper = null + await new Promise(async (resolve, reject) => { - const page = await browser.getPage( - (e) => e.isDownload === true, - (response, headers) => { - Object.assign(myResult, { - data: response.base64Encoded ? response.body : btoa(response.body), - binary: true, - headers + const fInterceptionNeeded = (e) => e.isDownload === true + const fInterception = (response, headers) => { + Object.assign(myResult, { + data: response.base64Encoded ? response.body : btoa(response.body), + binary: true, + headers + }) + resolve() + } + + pageWrapper = reusePage ? singlePageWrapper : new PageWrapper() + const page = await pageWrapper.getPage(fInterceptionNeeded, fInterception) + + // not tested + if (ctx.method === "POST") { + await page.removeAllListeners('request') + await page.setRequestInterception(true) + page.on('request', interceptedRequest => { + interceptedRequest.continue({ + 'method': 'POST', + 'postData': ctx.request.rawBody }) - resolve() }) + } try { let tryCount = 0 @@ -96,6 +103,9 @@ router.get('/request', async (ctx, next) => { if (!responseSet) ctx.body = JSON.stringify(myResult) + if (!reusePage) + pageWrapper.page.close() + await next() }) .get('/cookies', async (ctx, next) => { @@ -107,21 +117,17 @@ router.get('/request', async (ctx, next) => { (async () => { cookiesStorage.setFileName(argv.cookies) - // console.log(argv) - - if (argv.proxy) - browser.setProxy(argv.proxy) - if (argv['no-sandbox']) - browser.disableSandbox() - if (argv.headful) - browser.setHeadful() - - await browser.launch() + await browser.launch({ + proxy: argv.proxy ?? null, + noSandbox: argv['no-sandbox'] ?? false, + headful: argv.headful ?? false, + }) app.use(router.routes()) .use(router.allowedMethods()) + app.on('error', (error) => { - console.error(error) + console.error('[app.onerror]', error) }) app.listen(parseInt(argv.port), '127.0.0.1') -- cgit v1.2.3