SharpAPI.com Node.js SDK for Web Scraping API
npm install @sharpapi/sharpapi-node-web-scraping

SharpAPI Web Scraping fetches and extracts content from web pages, providing structured data including page metadata, content, links, and more in a machine-readable JSON format. Perfect for data collection, content aggregation, SEO analysis, and research.
---
1. Requirements
2. Installation
3. Usage
4. API Documentation
5. Examples
6. Use Cases
7. Response Format
8. AI Integration
9. API Endpoint
10. Related Packages
11. License
---
- Node.js >= 16.x
- npm or yarn
---
\\\bash\
npm install @sharpapi/sharpapi-node-web-scraping
\\
Visit SharpAPI.com to get your API key.
---
\\\javascript
const { SharpApiWebScrapingService } = require('@sharpapi/sharpapi-node-web-scraping');
const apiKey = process.env.SHARP_API_KEY; // Store your API key in environment variables
const service = new SharpApiWebScrapingService(apiKey);
async function scrapeWebsite() {
try {
// Scrape a webpage
const data = await service.scrapeUrl('https://sharpapi.com');
console.log('Title:', data.title);
console.log('Description:', data.meta_description);
console.log('Keywords:', data.meta_keywords);
console.log('Content length:', data.content.length);
console.log('Links found:', data.links.length);
} catch (error) {
console.error('Error:', error.message);
}
}
scrapeWebsite();
\\\
---
This endpoint is synchronous and returns data immediately (no polling required).
#### scrapeUrl(url)
Scrape a webpage and extract its content in structured format.
Parameters:
- url (string, required): The URL to scrape (e.g., 'https://example.com' or 'example.com')
Returns: Promise
Extracts:
- Page metadata: Title, description, keywords, author
- Open Graph tags: OG:title, OG:description, OG:image, OG:type
- Twitter Card tags: Twitter:card, Twitter:title, Twitter:description
- Content structure: Headings (H1-H6), paragraphs, main content
- Links: Internal and external links with anchors
- Meta information: Language, charset, viewport, canonical URL
- Timestamps: Extraction date and time
Example:
\\\javascript\
const data = await service.scrapeUrl('https://example.com');
console.log('Page Title:', data.title);
console.log('Main Content:', data.content);
console.log('All Links:', data.links);
\\
---
\\\javascript
const { SharpApiWebScrapingService } = require('@sharpapi/sharpapi-node-web-scraping');
const service = new SharpApiWebScrapingService(process.env.SHARP_API_KEY);
async function scrapeExample() {
const result = await service.scrapeUrl('https://sharpapi.com');
console.log('=== Page Information ===');
console.log('Title:', result.title);
console.log('Description:', result.meta_description);
console.log('Language:', result.language);
console.log('Canonical URL:', result.canonical_url);
console.log('\\n=== Content ===');
console.log('Characters:', result.content.length);
console.log('Preview:', result.content.substring(0, 200) + '...');
console.log('\\n=== Links Found ===');
console.log('Total links:', result.links.length);
result.links.slice(0, 5).forEach(link => {
console.log(\- \${link.text || 'No text'}: \${link.url}\);
});
}
scrapeExample();
\\\
\\\javascript
const service = new SharpApiWebScrapingService(process.env.SHARP_API_KEY);
async function getSocialMetadata(url) {
const data = await service.scrapeUrl(url);
console.log('=== Open Graph Tags ===');
console.log('OG:Title:', data.og_title);
console.log('OG:Description:', data.og_description);
console.log('OG:Image:', data.og_image);
console.log('OG:Type:', data.og_type);
console.log('\\n=== Twitter Card ===');
console.log('Card Type:', data.twitter_card);
console.log('Title:', data.twitter_title);
console.log('Description:', data.twitter_description);
console.log('Image:', data.twitter_image);
}
getSocialMetadata('https://example.com/article');
\\\
\\\javascript
const service = new SharpApiWebScrapingService(process.env.SHARP_API_KEY);
async function analyzeSEO(url) {
const data = await service.scrapeUrl(url);
console.log('=== SEO Analysis ===');
console.log('Title:', data.title, \(\${data.title.length} chars)\);
console.log('Meta Description:', data.meta_description);
console.log('Keywords:', data.meta_keywords);
console.log('Canonical URL:', data.canonical_url);
console.log('Language:', data.language);
console.log('\\n=== Headings Structure ===');
if (data.headings) {
data.headings.forEach(heading => {
console.log(\\${heading.level}: \${heading.text}\);
});
}
console.log('\\n=== Link Analysis ===');
const internalLinks = data.links.filter(l => l.type === 'internal');
const externalLinks = data.links.filter(l => l.type === 'external');
console.log(\Internal links: \${internalLinks.length}\);External links: \${externalLinks.length}\
console.log(\);
}
analyzeSEO('https://your-website.com');
\\\
\\\javascript
const service = new SharpApiWebScrapingService(process.env.SHARP_API_KEY);
async function extractForAI(url) {
const data = await service.scrapeUrl(url);
// Extract clean content for AI processing
const cleanContent = {
title: data.title,
description: data.meta_description,
mainContent: data.content,
language: data.language,
author: data.author,
publishedDate: data.published_date,
modifiedDate: data.modified_date
};
console.log('Extracted content ready for AI processing:');
console.log(JSON.stringify(cleanContent, null, 2));
// Now you can pass this to SharpAPI AI endpoints:
// - Summarization: @sharpapi/sharpapi-node-summarize-text
// - Translation: @sharpapi/sharpapi-node-translate
// - Keywords: @sharpapi/sharpapi-node-generate-keywords
// - SEO Tags: @sharpapi/sharpapi-node-seo-tags
}
extractForAI('https://blog.example.com/article');
\\\
\\\javascript
const service = new SharpApiWebScrapingService(process.env.SHARP_API_KEY);
async function analyzeCompetitor(url) {
const data = await service.scrapeUrl(url);
console.log('=== Competitor Analysis ===');
console.log('Domain:', new URL(url).hostname);
console.log('Title Strategy:', data.title);
console.log('Description:', data.meta_description);
console.log('Keywords Focus:', data.meta_keywords);
console.log('\\n=== Content Strategy ===');
console.log('Content Length:', data.content.length, 'characters');
console.log('Word Count (approx):', Math.round(data.content.split(' ').length));
console.log('\\n=== Link Building ===');
const externalLinks = data.links.filter(l => l.type === 'external');
console.log('External Links:', externalLinks.length);
externalLinks.slice(0, 10).forEach(link => {
console.log(\ - \${link.url}\);
});
}
analyzeCompetitor('https://competitor-website.com');
\\\
---
- Content Aggregation: Collect content from multiple sources
- Price Monitoring: Track competitor pricing and availability
- Research: Gather data for analysis and insights
- Lead Generation: Extract business information from websites
- Market Intelligence: Monitor industry trends and news
- SEO Analysis: Analyze competitor websites and content
- Content Curation: Extract articles for content platforms
- Social Media Monitoring: Track mentions and brand presence
- Data Enrichment: Enhance existing data with web-sourced information
- Competitive Intelligence: Analyze competitor strategies
---
The API returns a comprehensive JSON object with the following structure:
\\\json
{
"url": "https://sharpapi.com/",
"title": "SharpAPI - AI-Powered Workflow Automation API",
"meta_description": "Automate workflows with AI-powered API...",
"meta_keywords": "AI API, automation, workflow",
"author": "SharpAPI Team",
"language": "en",
"charset": "UTF-8",
"canonical_url": "https://sharpapi.com/",
"viewport": "width=device-width, initial-scale=1",
"og_title": "SharpAPI - AI-Powered API",
"og_description": "Automate your workflows...",
"og_image": "https://sharpapi.com/og-image.jpg",
"og_type": "website",
"og_url": "https://sharpapi.com/",
"twitter_card": "summary_large_image",
"twitter_title": "SharpAPI",
"twitter_description": "AI-Powered API",
"twitter_image": "https://sharpapi.com/twitter-card.jpg",
"content": "Full page content as text...",
"text_content": "Clean text without HTML...",
"headings": [
{ "level": "h1", "text": "Main Heading" },
{ "level": "h2", "text": "Subheading" }
],
"links": [
{
"url": "https://sharpapi.com/about",
"text": "About Us",
"type": "internal",
"rel": null
},
{
"url": "https://example.com",
"text": "External Link",
"type": "external",
"rel": "nofollow"
}
],
"images": [
{
"src": "https://sharpapi.com/image.jpg",
"alt": "Image description"
}
],
"extracted_at": "2026-01-10T15:30:00Z",
"processing_time_ms": 1250
}
\\\
---
The extracted data can be seamlessly integrated with SharpAPI's AI-powered endpoints for further analysis:
\\\javascript
const { SharpApiWebScrapingService } = require('@sharpapi/sharpapi-node-web-scraping');
const { SharpApiSummarizeService } = require('@sharpapi/sharpapi-node-summarize-text');
const scrapingService = new SharpApiWebScrapingService(process.env.SHARP_API_KEY);
const summarizeService = new SharpApiSummarizeService(process.env.SHARP_API_KEY);
async function scrapeAndSummarize(url) {
// 1. Scrape the webpage
const scraped = await scrapingService.scrapeUrl(url);
// 2. Summarize the content
const statusUrl = await summarizeService.summarize(scraped.content);
const summary = await summarizeService.fetchResults(statusUrl);
console.log('Original length:', scraped.content.length);
console.log('Summary:', summary.getResultJson());
}
scrapeAndSummarize('https://blog.example.com/long-article');
\\\
---
GET /utilities/scrape_url?url={url}
This endpoint is synchronous and returns 200 OK immediately.
For detailed API specifications, refer to:
- Postman Documentation
- Product Page
---
- @sharpapi/sharpapi-node-detect-urls - Extract URLs from text
- @sharpapi/sharpapi-node-detect-emails - Extract emails from text
- @sharpapi/sharpapi-node-summarize-text - Summarize content
- @sharpapi/sharpapi-node-seo-tags - Generate SEO tags
- @sharpapi/sharpapi-node-client - Full SharpAPI SDK
---
This project is licensed under the MIT License. See the LICENSE.md file for details.
---
- Documentation: SharpAPI.com Documentation
- Issues: GitHub Issues
- Email: contact@sharpapi.com
---
Powered by SharpAPI - AI-Powered API Workflow Automation