The HTTP client, powered by Puppeteer and the Chrome/Chromium browser, excels at accessing dynamic web pages.
npm install @mikosoft/httpclient-pptrbash
$ npm install --save @mikosoft/httpclient-pptr
`
Options
Options is the object which is used as constructor parameter. The object properties are:
- puppeteerLaunchOptions :object - https://pptr.dev/api/puppeteer.puppeteerlaunchoptions
- device :string|object - device name of KnownDevices or custom object {name, userAgent, viewport}
- cookies :object[] - array of cookies [{name, value, domain, path, expires, httpOnly, secure}, ...]
- storage :{local:object, session:object} - localStorage and sessionStorage {local: {key1: val1, key2: val2, ...}, session: {key1: val1, key2: val2, ...}}
- evaluateOnNewDocument_callback :Function - a callback function that will be executed in page.evaluateOnNewDocument(this.opts.evaluateOnNewDocument_callback)
- extraRequestHeaders :object - additional request headers
- blockResources :string[] - resuources to block during the request, for example: ['image', 'stylesheet', 'font', 'script']
- gotoOpts :object goto options used in page.goto(url, opts) - {referer:string, timeout:number, waitUntil:'load'|'domcontentloaded'|'networkidle0'|'networkidle2'}
- closeBrowser :boolean - close the browser either after the response is received or if an error occurs during page.goto(url)
- waitCSSselector :{selector:string, timeout:number} - wait for CSS selector before sending answer -- default timeout is 10000ms
- postGoto :Function - function which will be executed after page.goto(), scroll, click on popup, etc. for example: page => {page.evaluate(...);}
- debug :booleanExample
`js
const puppeteer = require('puppeteer-core');
const { HttpClientPptr } = require('@mikosoft/httpclient-pptr');const fetchURL = async () => {
const opts = {
puppeteerLaunchOptions: {
executablePath: '/usr/bin/google-chrome',
headless: false, // new, old, false
devtools: false, // open Chrome devtools
dumpio: false, // If true, pipes the browser process stdout and stderr to process.stdout and process.stderr
slowMo: 13,
args: [
'--start-maximized', // full window width and height
],
ignoreDefaultArgs: [
'--enable-automation' // remove "Chrome is being controlled by automated test software"
],
defaultViewport: null, // override default viewport size {width: 800, height: 600} - https://pptr.dev/api/puppeteer.browserconnectoptions/#defaultviewport
},
device: null, // {name, userAgent, viewport}
cookies: null, // [{name, value, domain, path, expires, httpOnly, secure}, ...]
storage: null, // localStorage and sessionStorage {local: {key1: val1, key2: val2, ...}, session: {key1: val1, key2: val2, ...}}
evaluateOnNewDocument_callback: null,
extraRequestHeaders: {}, // additional HTTP request headers - {authorization: 'JWT ...'}
blockResources: [], // resuources to block during the request, for example: ['image', 'stylesheet', 'font', 'script']
gotoOpts: {}, // used in page.goto(url, opts) - {referer:string, timeout:number, waitUntil:'load'|'domcontentloaded'|'networkidle0'|'networkidle2'} - https://pptr.dev/api/puppeteer.gotooptions
closeBrowser: false, // close browser after answer is received or on page.goto error
waitCSSselector: null,
postGoto: null, // function which will be executed after page.goto(), scroll, click on popup, etc. for example: postGoto: page => {page.evaluate(...);}
debug: false
};
const hcp = new HttpClientPptr(opts);
hcp.injectPuppeteer(puppeteer);
const answer = await hcp.askOnce(url);
hcp.print(answer);
};
fetchURL().catch(console.log);
`
API
#### constructor(opts)#### injectPuppeteer(puppeteer)
Inject puppeteer or puppeteer-core. Check compatibility with the installed chrome browser https://pptr.dev/chromium-support .
#### set_executablePath(pathsObj)
Define executable path to chrome or chrome based browser, for example:
`
{
linux: '/usr/bin/google-chrome',
win32: 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe',
darwin: '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
}
`
#### set_window(width:number, height:number, x:number, y:number)
Set window size and position. If width or height has falsy value then window is maximized with '--start-maximized' arg.
#### set_device(dev :string|object)
Specify the device to be emulated. If a string is provided, it should match one of the KnownDevices. If an object is provided, a custom device configuration will be used.
#### set_cookies(cookieArr :object[])
Define a cookie array of objects that will be loaded before the page opens. The cookieArr is array of cookies [{name, value, domain, path, expires, httpOnly, secure}, ...].
#### set_storage(storageObj :{local:object, session:object})
Define a localStorage and sessionStorage object that will be loaded before the page opens.. The storageObj is {local: {key1: val1, key2: val2, ...}, session: {key1: val1, key2: val2, ...}}.
#### set_evaluateOnNewDocument(cb :Function)
Define callback function that will be executed within page.evaluateOnNewDocument(cb).
It's useful to set navigator.webdriver to false:
`js
await page.evaluateOnNewDocument(() => {
Object.defineProperty(navigator, 'webdriver', {
get: () => false,
});
});
`
$3
Get answer from the requested URL.`js
ANSWER::
{
requestMethod: 'GET',
requestURL: 'https://www.dex8.com',
finalURL: 'https://www.dex8.com/',
status: 200,
statusMessage: 'OK',
decompressed: true,
https: true,
req: {
headers: {
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
'sec-ch-ua': '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Linux"',
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,/;q=0.8,application/signed-exchange;v=b3;q=0.7'
}
},
res: {
headers: {
'access-control-allow-headers': 'Origin, X-Requested-With, Content-Type, Accept, Authorization',
'access-control-allow-methods': 'GET',
'access-control-allow-origin': '*',
'access-control-max-age': '3600',
connection: 'keep-alive',
'content-encoding': 'gzip',
'content-type': 'text/html; charset=utf-8',
date: 'Wed, 21 Aug 2024 10:04:17 GMT',
server: 'nginx/1.17.10 (Ubuntu)',
'transfer-encoding': 'chunked'
},
content: '\n' +
' \n' +
' \n' +
' DEX8 - Data Extraction and Browser Automation SaaS \n' +
' \n' +
' \n' +
' \n' +
' \n' +
' \n' +
' \n' +
' \n' +
'\n' +
' \n' +
' \n' +
' \n' +
' \n' +
' \n' +
' \n' +
' \n' +
' \n' +
' \n' +
' \n' +
' \n' +
' \n' +
'\n' +
' \n' +
' \n' +
'\n' +
' \n' +
' \n' +
'