From 88bc698e878d608f696008e58c9570fcdc6606b1 Mon Sep 17 00:00:00 2001 From: Evgeny Zinoviev Date: Mon, 14 Mar 2022 02:56:14 +0300 Subject: fix race condition and other stuff --- browser.js | 37 ++++++++++++++++---- index.js | 113 ++++++++++++++++++++++++++++--------------------------------- 2 files changed, 83 insertions(+), 67 deletions(-) diff --git a/browser.js b/browser.js index 81f7cc6..bf4d87c 100644 --- a/browser.js +++ b/browser.js @@ -11,19 +11,24 @@ const options = { let browser = null let page = null - +let cdpClient = null +let interceptCallback = null +let interceptionNeededCallback = null module.exports = { async launch() { browser = await puppeteer.launch(options) }, - async getPage() { + async getPage(_interceptionNeededCallback, _interceptCallback) { if (page && page.isClosed()) { page.removeAllListeners && page.removeAllListeners() page = null } + interceptionNeededCallback = _interceptionNeededCallback + interceptCallback = _interceptCallback + if (!page) { page = await browser.newPage() page.on('framenavigated', async () => { @@ -33,23 +38,43 @@ module.exports = { }) page.setCookie(...(await cookiesStorage.get())) + + cdpClient = await page.target().createCDPSession(); + await cdpClient.send('Network.setRequestInterception', { + patterns: [{ + urlPattern: '*', + resourceType: 'Document', + interceptionStage: 'HeadersReceived' + }], + }) + await cdpClient.on('Network.requestIntercepted', async e => { + let obj = { interceptionId: e.interceptionId } + if (interceptionNeededCallback && interceptionNeededCallback(e) === true) { + let ret = await cdpClient.send('Network.getResponseBodyForInterception', { + interceptionId: e.interceptionId + }) + interceptCallback(ret, e.responseHeaders) + obj['errorReason'] = 'BlockedByClient' + } + await cdpClient.send('Network.continueInterceptedRequest', obj) + }) } return page }, - get() { - return browser - }, + setProxy(proxy) { options.args.push(`--proxy-server=${proxy}`) }, + disableSandbox() { options.args.push( '--no-sandbox', '--disable-setuid-sandbox' ) }, + setHeadful() { - options.headless = false; + options.headless = false } } \ No newline at end of file diff --git a/index.js b/index.js index faee991..8a36078 100644 --- a/index.js +++ b/index.js @@ -28,7 +28,11 @@ router.get('/request', async (ctx, next) => { if (!ctx.query.url) throw new Error('url not specified') - const page = await browser.getPage(); + const myResult = { + binary: false, + headers: [], + data: '' + }; /*if (ctx.method === "POST") { await page.removeAllListeners('request'); @@ -42,82 +46,69 @@ router.get('/request', async (ctx, next) => { }); }*/ - const client = await page.target().createCDPSession(); - await client.send('Network.setRequestInterception', { - patterns: [{ - urlPattern: '*', - resourceType: 'Document', - interceptionStage: 'HeadersReceived' - }], - }); - - let myResult = { - binary: false, - headers: [], - data: '' - } - - await client.on('Network.requestIntercepted', async e => { - let obj = { interceptionId: e.interceptionId }; - - if (e.isDownload) { - await client.send('Network.getResponseBodyForInterception', { - interceptionId: e.interceptionId - }).then((r) => { - if (r.base64Encoded) { - myResult.binary = true - myResult.data = r.body - } else { - console.error('not base64 encoded!') - } + let myError = false + await new Promise(async (resolve, reject) => { + const page = await browser.getPage( + (e) => e.isDownload === true, + (response, headers) => { + Object.assign(myResult, { + data: response.base64Encoded ? response.body : btoa(response.body), + binary: true, + headers + }) + resolve() }) - obj['errorReason'] = 'BlockedByClient'; - myResult.headers = e.responseHeaders - } - - await client.send('Network.continueInterceptedRequest', obj); - }); - - try { - let tryCount = 0 - let response = await page.goto(ctx.query.url, { - timeout: loadingTimeout, - waitUntil: 'domcontentloaded' - }) - let body = await response.text(); - - while ((body.includes("cf-browser-verification") || body.includes('cf-captcha-container')) && tryCount <= maxTryCount) { - let newResponse = await page.waitForNavigation({ + try { + let tryCount = 0 + let response = await page.goto(ctx.query.url, { timeout: loadingTimeout, waitUntil: 'domcontentloaded' - }); - if (newResponse) - response = newResponse; - body = await response.text(); - tryCount++; - } + }) - myResult.data = await response.text() - myResult.headers = await response.headers() - } catch (error) { - if (!error.toString().includes("ERR_BLOCKED_BY_CLIENT")) { - ctx.status = 500 - ctx.body = error + let body = await response.text() + while ((body.includes("cf-browser-verification") || body.includes('cf-captcha-container')) && tryCount <= maxTryCount) { + let newResponse = await page.waitForNavigation({ + timeout: loadingTimeout, + waitUntil: 'domcontentloaded' + }); + if (newResponse) + response = newResponse; + body = await response.text(); + tryCount++; + } + + myResult.data = await response.text() + myResult.headers = await response.headers() + + resolve() + } catch (error) { + if (!error.toString().includes("ERR_BLOCKED_BY_CLIENT")) { + ctx.status = 500 + ctx.body = error + + resolve() + } else { + myError = true + } } - } + }) + + if (!myError) + ctx.body = JSON.stringify(myResult) - ctx.body = JSON.stringify(myResult) + await next() }) .get('/cookies', async (ctx, next) => { ctx.body = JSON.stringify(await cookiesStorage.get()) + await next() }); (async () => { cookiesStorage.setFileName(argv.cookies) - console.log(argv) + // console.log(argv) if (argv.proxy) browser.setProxy(argv.proxy) -- cgit v1.2.3