Pure javascript cross-platform module to extract text from PDFs.
npm install @mmaaikel/pdf-parsePure javascript cross-platform module to extract texts from PDFs.
npm install @mmaaikel/pdf-parse``js
import fs from 'fs'
import PdfParse from '@mmaaikel/pdf-parse'
let dataBuffer = fs.readFileSync('path to PDF file...');
PdfParse(dataBuffer).then(function(data) {
// number of pages
console.log(data.numpages);
// number of rendered pages
console.log(data.numrender);
// PDF info
console.log(data.info);
// PDF metadata
console.log(data.metadata);
// PDF.js version
// check https://mozilla.github.io/pdf.js/getting_started/
console.log(data.version);
// PDF text
console.log(data.text);
});
`
`js
import fs from 'fs'
import PdfParse from '@mmaaikel/pdf-parse'
let dataBuffer = fs.readFileSync('path to PDF file...');
PdfParse(dataBuffer).then(function(data) {
// use data
})
.catch(function(error){
// handle exceptions
})
`
`js`
const DEFAULT_OPTIONS = {
// internal page parser callback
// you can set this option, if you need another format except raw text
pageRender: render_page,
// max page number to parse
max: 0,
//check https://mozilla.github.io/pdf.js/getting_started/
version: 'v1.10.100'
}$3
If you need another format except raw text.
* 'default''v1.9.426'
* 'v1.10.100'
* 'v1.10.88'
* 'v2.0.550'
*
>default version is v1.10.100
>mozilla.github.io/pdf.js
or npm test`
* A Guide for First-Timers
* How to create a merge request
* Check Contributing Guide