Published
Edited
Sep 9, 2020
1 fork
12 stars
Insert cell
Insert cell
pdfJS = require('pdfjs-dist@2.0.943').then(pdf => {
pdf.GlobalWorkerOptions.workerSrc = 'https://cdn.jsdelivr.net/npm/pdfjs-dist@2.0.943/build/pdf.worker.js';
return pdf;
})
Insert cell
url = 'https://file-pkouzgbimr.now.sh/'
Insert cell
doc = pdfJS.getDocument(url)
Insert cell
Insert cell
formatted = {
let text = '';
for (let pageNumber = 4; pageNumber <= 23; pageNumber++) {
const page = await doc.getPage(pageNumber);
const textContent = await page.getTextContent();
for (let item of textContent.items.slice(2)) {
if (item.str.trim() === '') continue;
if (item.fontName === 'g_d3_f2') {
text += '\n\n## ' + item.str + '\n\n';
} else if (item.transform[4] === 126) {
text += '\n\n' + item.str;
} else {
text += item.str;
}
}
}
return text;
}
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more