Convert PDF content and layout information with pdf.js
npm install pdf-parsernode example/node/pdf2json to run the example.
cd example/node and then node pdf2json.
var pdfParser = require('pdf-parser');
var PDF_PATH = 'test.pdf';
pdfParser.pdf2json(PDF_PATH, function (error, pdf) {
if(error != null){
console.log(error);
}else{
console.log(JSON.stringify(pdf));
}
});
`
The tool can convert pdf to json as bellow:
`
{
"pages":[
{
"width":612,
"height":792,
"pageId":0,
"texts":[
{
"text":"Hello World",
"direction":"ltr", //from left to right
"width":52.81644000000001,
"height":27.96,
"top":278.69,
"left":296.81,
"transform":[27.96,0,0,27.96,296.81,278.69],
"fontSize":27.96,
"fontName":"Times",
"fontOriginName":"TimesNewRomanPSMT",
"bold":false,
"italic":false,
"black":false,
"color":"[68,113,196]"
}
]
}
]
}
``