aboutsummaryrefslogtreecommitdiff
path: root/index.js
diff options
context:
space:
mode:
Diffstat (limited to 'index.js')
-rw-r--r--index.js232
1 files changed, 128 insertions, 104 deletions
diff --git a/index.js b/index.js
index f769296..faee991 100644
--- a/index.js
+++ b/index.js
@@ -1,114 +1,138 @@
-const puppeteer = require('puppeteer-extra');
-const StealthPlugin = require('puppeteer-extra-plugin-stealth');
-puppeteer.use(StealthPlugin());
+const cookiesStorage = require('./cookies-storage')
+const browser = require('./browser')
+const os = require('os')
+const path = require('path')
+
+const argv = require('minimist')(process.argv.slice(2), {
+ string: ['retries', 'timeout', 'cookies', 'port', 'proxy'],
+ boolean: ['no-sandbox', 'headful'],
+ stopEarly: true,
+ default: {
+ port: 3000,
+ retries: 10,
+ timeout: 30000,
+ cookies: path.join(os.homedir(), '.rt-pupflare-cookies.json')
+ }
+})
+
+const maxTryCount = parseInt(argv.retries)
+const loadingTimeout = parseInt(argv.timeout)
+
const Koa = require('koa');
-const bodyParser = require('koa-bodyparser');
+const Router = require('@koa/router');
const app = new Koa();
-app.use(bodyParser());
-const jsesc = require('jsesc');
+const router = new Router();
-const headersToRemove = [
- "host", "user-agent", "accept", "accept-encoding", "content-length",
- "forwarded", "x-forwarded-proto", "x-forwarded-for", "x-cloud-trace-context"
-];
-const responseHeadersToRemove = ["Accept-Ranges", "Content-Length", "Keep-Alive", "Connection", "content-encoding", "set-cookie"];
-(async () => {
- let options = {
- headless: true,
- args: ['--no-sandbox', '--disable-setuid-sandbox']
- };
- if (process.env.PUPPETEER_SKIP_CHROMIUM_DOWNLOAD)
- options.executablePath = '/usr/bin/chromium-browser';
- if (process.env.PUPPETEER_HEADFUL)
- options.headless = false;
- if (process.env.PUPPETEER_PROXY)
- options.args.push(`--proxy-server=${process.env.PUPPETEER_PROXY}`);
- const browser = await puppeteer.launch(options);
- app.use(async ctx => {
- if (ctx.query.url) {
- const url = ctx.url.replace("/?url=", "");
- let responseBody;
- let responseData;
- let responseHeaders;
- const page = await browser.newPage();
- if (ctx.method == "POST") {
- await page.removeAllListeners('request');
- await page.setRequestInterception(true);
- page.on('request', interceptedRequest => {
- var data = {
- 'method': 'POST',
- 'postData': ctx.request.rawBody
- };
- interceptedRequest.continue(data);
- });
- }
- const client = await page.target().createCDPSession();
- await client.send('Network.setRequestInterception', {
- patterns: [{
- urlPattern: '*',
- resourceType: 'Document',
- interceptionStage: 'HeadersReceived'
- }],
- });
+router.get('/request', async (ctx, next) => {
+ if (!ctx.query.url)
+ throw new Error('url not specified')
- await client.on('Network.requestIntercepted', async e => {
- let obj = { interceptionId: e.interceptionId };
- if (e.isDownload) {
- await client.send('Network.getResponseBodyForInterception', {
- interceptionId: e.interceptionId
- }).then((result) => {
- if (result.base64Encoded) {
- responseData = Buffer.from(result.body, 'base64');
- }
- });
- obj['errorReason'] = 'BlockedByClient';
- responseHeaders = e.responseHeaders;
- }
- await client.send('Network.continueInterceptedRequest', obj);
- if (e.isDownload)
- await page.close();
- });
- let headers = ctx.headers;
- headersToRemove.forEach(header => {
- delete headers[header];
- });
- await page.setExtraHTTPHeaders(headers);
- try {
- let response;
- let tryCount = 0;
- response = await page.goto(url, { timeout: 30000, waitUntil: 'domcontentloaded' });
- responseBody = await response.text();
- responseData = await response.buffer();
- while (responseBody.includes("cf-browser-verification") && tryCount <= 10) {
- newResponse = await page.waitForNavigation({ timeout: 30000, waitUntil: 'domcontentloaded' });
- if (newResponse) response = newResponse;
- responseBody = await response.text();
- responseData = await response.buffer();
- tryCount++;
- }
- responseHeaders = await response.headers();
- const cookies = await page.cookies();
- if (cookies)
- cookies.forEach(cookie => {
- const { name, value, secure, expires, domain, ...options } = cookie;
- ctx.cookies.set(cookie.name, cookie.value, options);
- });
- } catch (error) {
- if (!error.toString().includes("ERR_BLOCKED_BY_CLIENT")) {
- ctx.status = 500;
- ctx.body = error;
+ const page = await browser.getPage();
+
+ /*if (ctx.method === "POST") {
+ await page.removeAllListeners('request');
+ await page.setRequestInterception(true);
+ page.on('request', interceptedRequest => {
+ var data = {
+ 'method': 'POST',
+ 'postData': ctx.request.rawBody
+ };
+ interceptedRequest.continue(data);
+ });
+ }*/
+
+ const client = await page.target().createCDPSession();
+ await client.send('Network.setRequestInterception', {
+ patterns: [{
+ urlPattern: '*',
+ resourceType: 'Document',
+ interceptionStage: 'HeadersReceived'
+ }],
+ });
+
+ let myResult = {
+ binary: false,
+ headers: [],
+ data: ''
+ }
+
+ await client.on('Network.requestIntercepted', async e => {
+ let obj = { interceptionId: e.interceptionId };
+
+ if (e.isDownload) {
+ await client.send('Network.getResponseBodyForInterception', {
+ interceptionId: e.interceptionId
+ }).then((r) => {
+ if (r.base64Encoded) {
+ myResult.binary = true
+ myResult.data = r.body
+ } else {
+ console.error('not base64 encoded!')
}
- }
+ })
+ obj['errorReason'] = 'BlockedByClient';
+ myResult.headers = e.responseHeaders
+ }
- await page.close();
- responseHeadersToRemove.forEach(header => delete responseHeaders[header]);
- Object.keys(responseHeaders).forEach(header => ctx.set(header, jsesc(responseHeaders[header])));
- ctx.body = responseData;
+ await client.send('Network.continueInterceptedRequest', obj);
+ });
+
+ try {
+ let tryCount = 0
+ let response = await page.goto(ctx.query.url, {
+ timeout: loadingTimeout,
+ waitUntil: 'domcontentloaded'
+ })
+
+ let body = await response.text();
+
+ while ((body.includes("cf-browser-verification") || body.includes('cf-captcha-container')) && tryCount <= maxTryCount) {
+ let newResponse = await page.waitForNavigation({
+ timeout: loadingTimeout,
+ waitUntil: 'domcontentloaded'
+ });
+ if (newResponse)
+ response = newResponse;
+ body = await response.text();
+ tryCount++;
}
- else {
- ctx.body = "Please specify the URL in the 'url' query string.";
+
+ myResult.data = await response.text()
+ myResult.headers = await response.headers()
+ } catch (error) {
+ if (!error.toString().includes("ERR_BLOCKED_BY_CLIENT")) {
+ ctx.status = 500
+ ctx.body = error
}
- });
- app.listen(process.env.PORT || 3000);
+ }
+
+ ctx.body = JSON.stringify(myResult)
+})
+.get('/cookies', async (ctx, next) => {
+ ctx.body = JSON.stringify(await cookiesStorage.get())
+});
+
+
+(async () => {
+ cookiesStorage.setFileName(argv.cookies)
+
+ console.log(argv)
+
+ if (argv.proxy)
+ browser.setProxy(argv.proxy)
+ if (argv['no-sandbox'])
+ browser.disableSandbox()
+ if (argv.headful)
+ browser.setHeadful()
+
+ await browser.launch()
+
+ app.use(router.routes())
+ .use(router.allowedMethods())
+ app.on('error', (error) => {
+ console.error(error)
+ })
+
+ app.listen(parseInt(argv.port), '127.0.0.1')
})();