diff options
Diffstat (limited to 'index.js')
-rw-r--r-- | index.js | 232 |
1 files changed, 128 insertions, 104 deletions
@@ -1,114 +1,138 @@ -const puppeteer = require('puppeteer-extra'); -const StealthPlugin = require('puppeteer-extra-plugin-stealth'); -puppeteer.use(StealthPlugin()); +const cookiesStorage = require('./cookies-storage') +const browser = require('./browser') +const os = require('os') +const path = require('path') + +const argv = require('minimist')(process.argv.slice(2), { + string: ['retries', 'timeout', 'cookies', 'port', 'proxy'], + boolean: ['no-sandbox', 'headful'], + stopEarly: true, + default: { + port: 3000, + retries: 10, + timeout: 30000, + cookies: path.join(os.homedir(), '.rt-pupflare-cookies.json') + } +}) + +const maxTryCount = parseInt(argv.retries) +const loadingTimeout = parseInt(argv.timeout) + const Koa = require('koa'); -const bodyParser = require('koa-bodyparser'); +const Router = require('@koa/router'); const app = new Koa(); -app.use(bodyParser()); -const jsesc = require('jsesc'); +const router = new Router(); -const headersToRemove = [ - "host", "user-agent", "accept", "accept-encoding", "content-length", - "forwarded", "x-forwarded-proto", "x-forwarded-for", "x-cloud-trace-context" -]; -const responseHeadersToRemove = ["Accept-Ranges", "Content-Length", "Keep-Alive", "Connection", "content-encoding", "set-cookie"]; -(async () => { - let options = { - headless: true, - args: ['--no-sandbox', '--disable-setuid-sandbox'] - }; - if (process.env.PUPPETEER_SKIP_CHROMIUM_DOWNLOAD) - options.executablePath = '/usr/bin/chromium-browser'; - if (process.env.PUPPETEER_HEADFUL) - options.headless = false; - if (process.env.PUPPETEER_PROXY) - options.args.push(`--proxy-server=${process.env.PUPPETEER_PROXY}`); - const browser = await puppeteer.launch(options); - app.use(async ctx => { - if (ctx.query.url) { - const url = ctx.url.replace("/?url=", ""); - let responseBody; - let responseData; - let responseHeaders; - const page = await browser.newPage(); - if (ctx.method == "POST") { - await page.removeAllListeners('request'); - await page.setRequestInterception(true); - page.on('request', interceptedRequest => { - var data = { - 'method': 'POST', - 'postData': ctx.request.rawBody - }; - interceptedRequest.continue(data); - }); - } - const client = await page.target().createCDPSession(); - await client.send('Network.setRequestInterception', { - patterns: [{ - urlPattern: '*', - resourceType: 'Document', - interceptionStage: 'HeadersReceived' - }], - }); +router.get('/request', async (ctx, next) => { + if (!ctx.query.url) + throw new Error('url not specified') - await client.on('Network.requestIntercepted', async e => { - let obj = { interceptionId: e.interceptionId }; - if (e.isDownload) { - await client.send('Network.getResponseBodyForInterception', { - interceptionId: e.interceptionId - }).then((result) => { - if (result.base64Encoded) { - responseData = Buffer.from(result.body, 'base64'); - } - }); - obj['errorReason'] = 'BlockedByClient'; - responseHeaders = e.responseHeaders; - } - await client.send('Network.continueInterceptedRequest', obj); - if (e.isDownload) - await page.close(); - }); - let headers = ctx.headers; - headersToRemove.forEach(header => { - delete headers[header]; - }); - await page.setExtraHTTPHeaders(headers); - try { - let response; - let tryCount = 0; - response = await page.goto(url, { timeout: 30000, waitUntil: 'domcontentloaded' }); - responseBody = await response.text(); - responseData = await response.buffer(); - while (responseBody.includes("cf-browser-verification") && tryCount <= 10) { - newResponse = await page.waitForNavigation({ timeout: 30000, waitUntil: 'domcontentloaded' }); - if (newResponse) response = newResponse; - responseBody = await response.text(); - responseData = await response.buffer(); - tryCount++; - } - responseHeaders = await response.headers(); - const cookies = await page.cookies(); - if (cookies) - cookies.forEach(cookie => { - const { name, value, secure, expires, domain, ...options } = cookie; - ctx.cookies.set(cookie.name, cookie.value, options); - }); - } catch (error) { - if (!error.toString().includes("ERR_BLOCKED_BY_CLIENT")) { - ctx.status = 500; - ctx.body = error; + const page = await browser.getPage(); + + /*if (ctx.method === "POST") { + await page.removeAllListeners('request'); + await page.setRequestInterception(true); + page.on('request', interceptedRequest => { + var data = { + 'method': 'POST', + 'postData': ctx.request.rawBody + }; + interceptedRequest.continue(data); + }); + }*/ + + const client = await page.target().createCDPSession(); + await client.send('Network.setRequestInterception', { + patterns: [{ + urlPattern: '*', + resourceType: 'Document', + interceptionStage: 'HeadersReceived' + }], + }); + + let myResult = { + binary: false, + headers: [], + data: '' + } + + await client.on('Network.requestIntercepted', async e => { + let obj = { interceptionId: e.interceptionId }; + + if (e.isDownload) { + await client.send('Network.getResponseBodyForInterception', { + interceptionId: e.interceptionId + }).then((r) => { + if (r.base64Encoded) { + myResult.binary = true + myResult.data = r.body + } else { + console.error('not base64 encoded!') } - } + }) + obj['errorReason'] = 'BlockedByClient'; + myResult.headers = e.responseHeaders + } - await page.close(); - responseHeadersToRemove.forEach(header => delete responseHeaders[header]); - Object.keys(responseHeaders).forEach(header => ctx.set(header, jsesc(responseHeaders[header]))); - ctx.body = responseData; + await client.send('Network.continueInterceptedRequest', obj); + }); + + try { + let tryCount = 0 + let response = await page.goto(ctx.query.url, { + timeout: loadingTimeout, + waitUntil: 'domcontentloaded' + }) + + let body = await response.text(); + + while ((body.includes("cf-browser-verification") || body.includes('cf-captcha-container')) && tryCount <= maxTryCount) { + let newResponse = await page.waitForNavigation({ + timeout: loadingTimeout, + waitUntil: 'domcontentloaded' + }); + if (newResponse) + response = newResponse; + body = await response.text(); + tryCount++; } - else { - ctx.body = "Please specify the URL in the 'url' query string."; + + myResult.data = await response.text() + myResult.headers = await response.headers() + } catch (error) { + if (!error.toString().includes("ERR_BLOCKED_BY_CLIENT")) { + ctx.status = 500 + ctx.body = error } - }); - app.listen(process.env.PORT || 3000); + } + + ctx.body = JSON.stringify(myResult) +}) +.get('/cookies', async (ctx, next) => { + ctx.body = JSON.stringify(await cookiesStorage.get()) +}); + + +(async () => { + cookiesStorage.setFileName(argv.cookies) + + console.log(argv) + + if (argv.proxy) + browser.setProxy(argv.proxy) + if (argv['no-sandbox']) + browser.disableSandbox() + if (argv.headful) + browser.setHeadful() + + await browser.launch() + + app.use(router.routes()) + .use(router.allowedMethods()) + app.on('error', (error) => { + console.error(error) + }) + + app.listen(parseInt(argv.port), '127.0.0.1') })(); |