Published
Edited
Apr 15, 2021
Insert cell
md`# blank note`
Insert cell
pdfjsLib = require("pdfjs-dist@2.6.347")
Insert cell
pdfjsLib.GlobalWorkerOptions.workerSrc = "https://cdn.jsdelivr.net/npm/pdfjs-dist@2.6.347/build/pdf.worker.js";
Insert cell
cjs = require('https://bundle.run/hellocjs@1.0.0')
Insert cell
cjs.hellocjs('test')
Insert cell
ejs = require('https://bundle.run/helloejs@1.0.0')
Insert cell
ejs.helloejs('this is ejs module')
Insert cell
ppdf = require('https://bundle.run/@binarmaya/parsepdf@1.1.2')
Insert cell
parpdf = await import('https://cdn.jsdelivr.net/npm/@binarmaya/parsepdf@1.1.2/index.min.js')
Insert cell
mutable data = 0
Insert cell
pageNum = 1
Insert cell
pdf2base64= require('https://bundle.run/pdf-to-base64@1.0.3')
Insert cell
binardata = atob(await pdf2base64(file))
Insert cell
Insert cell
async function loadPdf(){
const binarypdf = atob(await pdf2base64(file));
// Asynchronous download of PDF
let loadingTask = await pdfjsLib.getDocument({data: binarypdf});
debugger;
let pdf = await (function(){return loadingTask.promise})()
let numPages = pdf._pdfInfo.numPages;
let Attachements = await (function(){return pdf.getAttachments()})();
let data = await (function(){return pdf.getData()})();
let Page = await (function(){return pdf.getPage(pageNum)})();
let Destinations = await (function(){return pdf.getDestinations()})();
let Metadata = await (function(){return pdf.getMetadata()})();
let Outline = await (function(){return pdf.getOutline()})();
let PageLayout = await (function(){return pdf.getPageLayout()})();
let annotationStorage = pdf.annotationStorage;
let textContent = await (function(){return Page.getTextContent()})()
let Viewport = await (function(){return Page.getViewport()})()
//get the maximum coordinate view
let [xMin, yMin, xMax, yMax] = Page._pageInfo.view
// get rotate info
// rotate 0 mean that the flow of row is from yMax to yMin based on y value
// rotate 90 mean that the flow of row is from xMin to xMax based on x value
// rotate -90 mean that the flow of row is from xMax to xMin based on x value
// rotate 180 mean that the flow of row is from yMin to yMax based on y value
let rotate = Page._pageInfo.rotate
//return parsepdf(textContent.items,rotate,Page._pageInfo.view).map(e=>e.map(f=>{return {x:f.x,r:f.r}}));
return parsepdf(textContent.items,rotate,Page._pageInfo.view).map(e=>e.map(f=>{return f?f.str:''}));
//return parsepdf(textContent.items,rotate,Page._pageInfo.view).map(e=>e.map(f=>f.y));
debugger;
return Page._pageInfo
}
Insert cell
function parsepdf(items,rotate,view){
//debugger;
let get_X = _getX;//ascending for rotate 0, the opposite for rotate 180
let get_Y = _getY;//descending for rotate 0, , the opposite for rotate 180
if(rotate===90||rotate===270){
get_X = _getY;//ascending for rotate 90, the opposite for rotate -90
get_Y = _getX;//ascending for rotate 90, the opposite for rotate -90
}
let ascX = (rotate===0||rotate===90)?true:false//in term of x
let ascY = (rotate===90||rotate===180)?true:false//in term of y
//row collection
let row = []
let col = []
let _left = []
let _right = []
row['ascX']=ascX
row['ascY']=ascY
//let spacewidth = []
for(let item of items){
//debugger;
//if(item.str===" "){spacewidth.push(item);continue}
item.x = get_X(item)
item.y = Math.round(get_Y(item))
item.t = item.y + item.height*(ascY?-1:1);
item.r = item.x + item.width*(ascX?1:-1);
item.ascX = ascX
item.ascY = ascY
pushitemtorow(item, row, col,view)
//debugger
}
//joinCell(row,col,view)
debugger;
arrangeCol(row,col,view,rotate)
//debugger;
return checkrowdif(row,rotate)
//debugger;
return row;
}
Insert cell
function checkrowdif(row,rotate){
let row0 = deepClone(row[0]);
let row1
let difY
let y0,y1,h1,h0,yt
let asc = (rotate===90||rotate===180)?true:false//in term of y
let row01
let rowMerged = []
for(let i=1;i<row.length;i+=1){
if(i===64)debugger;
row1 = deepClone(row[i]);
y1 = getYinRow(row1);
h1 = getHinRow(row1);
yt = (asc)?y1-h1:y1+h1;
y0 = getYinRow(row0);
difY = (asc)?y0-yt:yt-y0//if>0 then it should be merged
if(difY>=0){
//debugger;
//row1['y']=y1
//row1['yt']=yt
//row0['y']=y0
row01 = mergerow(row0,row1,rotate)
debugger;
if(row01){
row01['y'] = (y0+y1)/2
rowMerged.push(row01)
i+=1
row0 = i<row.length?deepClone(row[i]):{};
} else {
rowMerged.push(row0)
row0 = row1
}
//debugger;
} else {
rowMerged.push(row0)
row0 = row1
}
}
if(row0)rowMerged.push(row0)
//debugger;
return rowMerged
}
Insert cell
function mergerow(row0,row1,rotate){
let success = false;
let str0
let str1
let newc = []
let newi
let ascX = (rotate===0||rotate===90)?true:false//in term of x
//debugger;
if(row0.length>row1.length){
for(let i=0;i<row0.length;i+=1){
str0 = row0[i]?row0[i].str:undefined
str1 = row1[i]?row1[i].str:undefined
//if(str0&&str0.includes('Meninggal'))debugger;
//if(str1&&str1.includes('Meninggal'))debugger;
if(str0&&str1){success = false; break;} else {
newi = str0?row0[i]:row1[i];
if(newi&&newc[newc.length-1]&&newc[newc.length-1].r>newi.x){
success = false
} else {
newc.push(newi)
success = true
}
}
}
} else if(row0.length<row1.length) {
//debugger;
for(let i=0;i<row1.length;i+=1){
str0 = row0[i]?row0[i].str:undefined
str1 = row1[i]?row1[i].str:undefined
//if(str0&&str0.includes('Meninggal'))debugger;
//if(str1&&str1.includes('Meninggal'))debugger;
if(str0&&str1){success = false; break;} else {
newi = str0?row0[i]:row1[i];
if(newi&&newc[newc.length-1]&&newc[newc.length-1].r>newi.x){
success = false
} else {
newc.push(newi)
success = true
}
}
}
} else {
debugger;
newc = row0.concat(row1);
newc.sort((a,b)=>ascX?a.x-b.x:b.x-a.x)
success = true
}
if(success){
//debugger;
return newc
} else {
debugger;
return false
}
}
Insert cell
function arrangeCol(row,col,view,rotate){
let [xMin, yMin, xMax, yMax] = view;
let child = []
//determine if x is ascending or descending
//let asc = (row[0][1]&&row[0][1].x>row[0][0].x)?true:false;
let asc = (rotate===0||rotate===90)?true:false//in term of y
let ascX = row['ascX']
let ascY = row['ascY']
let dirX = ascX?1:-1;
let dirY = ascY?1:-1;
//sort row based on the smaller length or array member
row.sort((a,b)=>a.length-b.length)
debugger;

//loop over all row start from index 1
//at least 2 cells to arrange col
for(let i=0;i<row.length;i+=1){
if(row[i].length<col.length){
child = []
let length0 = row[i].length
let lasti = row[i].length-1;
let jx = asc?0:lasti;
let k = asc?lasti:0;
let xc// = row[i][jx].x;
let lf// = left[jl];
let rc
let rg
let lr //the average of l and r
do {
//for(let j=0;j<row[i].length;j+=1){
xc = row[i][jx].x;
rc = row[i][jx].r;
lf = col[jx].l//left[jx];
rg = col[jx].r//right[jx];
lr = (lf+rg)/2
//debugger;
//if(row[i][jx].str.includes('NOVEL'))debugger;
if(xc*dirX>lf*dirX){ //if x located on the right of current cell (asc) then push one cell
debugger;
row[i].splice(jx,0,{})
lasti = row[i].length-1;
k = asc?lasti:0;
} else if((asc&&rc<rg)||(!asc&&rc>rg)){
//debugger;
}
jx+=asc?1:-1;
//child.push(row[i][j])
} while(asc?jx<=k:jx>=k)//(child.length<row[i].length)//
//row[i] = deepClone(child);
//updateRight(left,right,row[i])
} else {
//updateRight(left,right,row[i])
}
}

//debugger;
//sort back row based on y position
row.sort((a,b)=>{
let yb = getYinRow(b);
let ya = getYinRow(a)
return (ya-yb)*dirY;
})
// debugger;
}
Insert cell
function updateRight(left,right,row){
let asc = (left[1]>left[0])?true:false;
console.log(`left prop: ${left}`);
console.log(`right prop: ${right}`);
console.log(`current string: ${row.map(e=>e.str)}`);
console.log(`current x: ${row.map(e=>e.x)}`);
console.log(`current r: ${row.map(e=>e.r)}`);
let rightEnd = right.length-1;
let leftEnd = left.length-1;
for(let j=1;j<row.length;j+=1){
if(!row[j])continue;
//console.log(left);
//console.log(right);
//if(row[j]&&row[j].str&&row[j].str.includes('- Negara'))debugger;
//if(!right[j])right.push(row[j].x)
//if(!left[j])left.push(row[j-1].r)
let r0 = right[j-1];
let l0 = left[j-1];
let r1 = row[j].x;
let l1 = (asc&&row[j-1].r<r0)||(!asc&&row[j-1].r>r0)?row[j-1].r:undefined;//must be less than r0 or x in prev cell
let rNew = 0;
let lNew = 0;
if(r1&&r0&&(r1!==r0)){
rNew = asc?Math.min(r0,r1):Math.max(r0,r1);
right[j-1] = rNew;
} else if(r1&&!r0){
right[j-1] = r1;
}
if(l1&&l0&&(l1!==l0)){
lNew = asc?Math.max(l0,l1):Math.min(l0,l1);
left[j-1] = lNew;
} else if(l1&&!l0){
left[j-1] = l1;
}
//if((asc&&row[j-1].r>r0)||(!asc&&row[j-1].r<r0)){console.log(`(asc&&row[j-1].r>r0)||(!asc&&row[j-1].r<r0)`);debugger;}
//if((asc&&r1<l0)||(!asc&&r1>l0)){console.log(`(asc&&r1<l0)||(!asc&&r1>l0)`);debugger;}
//debugger;
}
//debugger;
}
Insert cell
function getYinRow(row){
return row['y'];
}
Insert cell
function getHinRow(row){
for(let i=0;i<row.length;i+=1){
if(row[i].height){return row[i].height;break}
}
}
Insert cell
function pushitemtorow(item, row, col,view){
let r
let ascX
let dirX
if(row.length===0){
row[0] = []
row[0].push(item)
row[0]['y']=item.y;
return;
} //debugger;
r = findYinRow(item,row, col,view)
if(r==='NotFound'){
if(col?.inspect)inspectCol(row[col['inspect']],col);
row.push([item]);
row[row.length-1]['y']=item.y
} else {
ascX = row[r]['ascX']
dirX = ascX?1:-1;
row[r].push(item);
row[r].sort((a,b)=>(b.x-a.x)*dirX)
row[r] = joinItem(row[r],item, col,view)
if(row[r].length>1)col['inspect']=r
}
}
Insert cell
diffXTol = -2.5
Insert cell
function joinItem(rowr,item, col,view){
let [xMin, yMin, xMax, yMax] = view;
let dirX = item.ascX?1:-1;//the direction of X
let prevItem = rowr[0];
let currItem
let joinItem
let difX
let joinRow = []
joinRow['y'] = rowr['y']
for(let i = 1;i<rowr.length;i+=1){
currItem = rowr[i]
difX = (prevItem.r-currItem.x)*dirX;
//if difX + it must be joined
//if difX - and still within tolerance, then it musb be joined too
if(difX>0){// here we take r from last item plus width from new item as this will get the fartest of r
prevItem.str = prevItem.str + currItem.str;
prevItem.r = prevItem.r + currItem.width*dirX;
prevItem.width = (prevItem.r - prevItem.x)*dirX;
} else if(difX>diffXTol){ //this formula are different with above formula as
//we take the fartest position for the most right r from new item
prevItem.str = prevItem.str + currItem.str;
prevItem.r = currItem.r;
prevItem.width = (currItem.r - prevItem.x)*dirX;
} else {
joinRow.push(prevItem);
prevItem = currItem;
}
}
debugger;
joinRow.push(prevItem)
return joinRow
}
Insert cell
diffYTol = 1
Insert cell
function findYinRow(item, row, col,view){
let [xMin, yMin, xMax, yMax] = view;
for(let r=0;r<row.length;r+=1){
let diffY = Math.abs(row[r]['y']-item.y);
if(diffY<=diffYTol){
return r
break;
}
}
return 'NotFound';
}
Insert cell
function inspectCol(items,col){
//this function is to determine the border of cells based on r and x property of cell
//col will contain both l and r which are the border between 1st cell and the next cell
if(items.length<2)return;
if(!col.length||items.length>col.length){
for(let i=0;i<items.length-1;i+=1){
col[i]={l:items[i].r,r:items[i+1].x}
}
//debugger;
col.push({l:items[items.length-1].r});
} else if(items.length===col.length){
for(let i=0;i<items.length-1;i+=1){
let ascX = items[i].ascX
let dirX = ascX?1:-1
let cr = col[i].r
let cl = col[i].l
let il = items[i].r
let ir = items[i].x
if(col[i]&&items[i].x*dirX>col[i].l*dirX){//if x located on the right side then we can take x for r in col[i]
col[i].r = ascX?Math.min(ir,cr):Math.max(ir,cr)
items.splice(i,0,{});
} else if(col[i]&&items[i].r*dirX<col[i].r*dirX){//if r of item are reside before current r thus
//we can put x and r for new l and r in col
//debugger;
col[i].l = ascX?Math.max(il,cl):Math.min(il,cl)
if(i>0)col[i-1].r = ascX?Math.min(ir,cr):Math.max(ir,cr)
} else if(!col[i]){
debugger;
}
}
}
//debugger;
}
Insert cell
Insert cell
Insert cell
function deepClone(o){
let copied0 = JSON.parse(JSON.stringify({...o}))
let copied1 = []
for(let i in copied0){
copied1[i] = copied0[i]
}
return copied1
//return JSON.parse(JSON.stringify(o))
}
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more