Crawl web as easy as possible
npm install @web-master/node-web-crawler😎 @web-master/node-web-crawler 😎
Crawl web as easy as possible
It crawls the target page, collects links and scrapes data on each page :)
``bash`
$ npm install --save @web-master/node-web-crawler
`js
import crawl from '@web-master/node-web-crawler';
// crawl data on each link
const data = await crawl({
target: {
url: 'https://news.ycombinator.com',
iterator: {
selector: 'span.age > a',
convert: (x) => https://news.ycombinator.com/${x},
},
},
fetch: () => ({
title: '.title > a',
}),
});
console.log(data);
// [
// { title: 'An easiest crawling and scraping module for NestJS' },
// { title: 'A minimalistic boilerplate on top of Webpack, Babel, TypeScript and React' },
// ...
// ...
// { title: '[Experimental] React SSR as a view template engine' }
// ]
`
`js
import crawl from '@web-master/node-web-crawler';
// crawl data on each link
const data = await crawl({
target: {
url: 'https://news.ycombinator.com',
iterator: {
selector: 'span.age > a',
convert: (x) => https://news.ycombinator.com/${x},
},
},
waitFor: 3 * 1000, // wait for the content loaded! (like single page apps)
fetch: () => ({
title: '.title > a',
}),
});
console.log(data);
// [
// { title: 'An easiest crawling and scraping module for NestJS' },
// { title: 'A minimalistic boilerplate on top of Webpack, Babel, TypeScript and React' },
// ...
// ...
// { title: '[Experimental] React SSR as a view template engine' }
// ]
`
`ts
import crawl from '@web-master/node-web-crawler';
interface HackerNewsPage {
title: string;
}
const pages: HackerNewsPage[] = await crawl({
target: {
url: 'https://news.ycombinator.com',
iterator: {
selector: 'span.age > a',
convert: (x) => https://news.ycombinator.com/${x},
},
},
fetch: () => ({
title: '.title > a',
}),
});
console.log(pages);
// [
// { title: 'An easiest crawling and scraping module for NestJS' },
// { title: 'A minimalistic boilerplate on top of Webpack, Babel, TypeScript and React' },
// ...
// ...
// { title: '[Experimental] React SSR as a view template engine' }
// ]
``
- @web-master/node-web-fetch
- @web-master/node-web-scraper
- IonicaBizau/scrape-it