74 lines
3 KiB
JavaScript
74 lines
3 KiB
JavaScript
// https://stackoverflow.com/questions/46745014/alternative-for-dirname-in-node-js-when-using-es6-modules
|
|
import path from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
// not the same since these will give the absolute paths for this file instead of for the file using them
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = path.dirname(__filename);
|
|
// explicit object instead of Object.fromEntries since the built-in type would loose the keys, better type: https://dev.to/svehla/typescript-object-fromentries-389c
|
|
const dataDir = s => path.resolve(__dirname, 'data', s);
|
|
export const dirs = {
|
|
data: dataDir('.'),
|
|
browser: dataDir('browser'),
|
|
screenshots: dataDir('screenshots'),
|
|
};
|
|
|
|
import { Low, JSONFile } from 'lowdb';
|
|
export const jsonDb = async file => {
|
|
const db = new Low(new JSONFile(dataDir(file)));
|
|
await db.read();
|
|
return db;
|
|
}
|
|
|
|
export const datetime = (d = new Date()) => d.toISOString();
|
|
export const sanitizeFilename = s => s.replace(/[^a-z0-9_\-]/gi, '_'); // alternative: https://www.npmjs.com/package/filenamify
|
|
|
|
// stealth with playwright: https://github.com/berstend/puppeteer-extra/issues/454#issuecomment-917437212
|
|
const newStealthContext = async (browser, contextOptions = {}, debug = false) => {
|
|
if (!debug) { // only need to fix userAgent in headless mode
|
|
const dummyContext = await browser.newContext();
|
|
const originalUserAgent = await (await dummyContext.newPage()).evaluate(() => navigator.userAgent);
|
|
await dummyContext.close();
|
|
// console.log('originalUserAgent:', originalUserAgent); // Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/96.0.4664.110 Safari/537.36
|
|
contextOptions = {
|
|
...contextOptions,
|
|
userAgent: originalUserAgent.replace("Headless", ""), // HeadlessChrome -> Chrome, TODO needed?
|
|
};
|
|
}
|
|
};
|
|
|
|
export const stealth = async (context) => {
|
|
// stealth with playwright: https://github.com/berstend/puppeteer-extra/issues/454#issuecomment-917437212
|
|
// https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth/evasions
|
|
const enabledEvasions = [
|
|
'chrome.app',
|
|
'chrome.csi',
|
|
'chrome.loadTimes',
|
|
'chrome.runtime',
|
|
// 'defaultArgs',
|
|
'iframe.contentWindow',
|
|
'media.codecs',
|
|
'navigator.hardwareConcurrency',
|
|
'navigator.languages',
|
|
'navigator.permissions',
|
|
'navigator.plugins',
|
|
// 'navigator.vendor',
|
|
'navigator.webdriver',
|
|
'sourceurl',
|
|
// 'user-agent-override', // doesn't work since playwright has no page.browser()
|
|
'webgl.vendor',
|
|
'window.outerdimensions'
|
|
];
|
|
const stealth = {
|
|
callbacks: [],
|
|
async evaluateOnNewDocument(...args) {
|
|
this.callbacks.push({ cb: args[0], a: args[1] })
|
|
}
|
|
}
|
|
for (const e of enabledEvasions) {
|
|
const evasion = await import(`puppeteer-extra-plugin-stealth/evasions/${e}/index.js`);
|
|
evasion.default().onPageCreated(stealth);
|
|
}
|
|
for (let evasion of stealth.callbacks) {
|
|
await context.addInitScript(evasion.cb, evasion.a);
|
|
}
|
|
}
|