Published
Edited
Dec 3, 2019
Importers
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
wikiParse('https://en.wikipedia.org/wiki/List_of_radioactive_isotopes_by_half-life')
Insert cell
Insert cell
wikiParse = async function (url) {
const response = await fetch('https://cors-anywhere.herokuapp.com/' + url);
const data = await response.text()
console.log(data);
// remove images to prevent 404 errors in console
var markup = data.replace(/<img[^>]*>/g, '');

// parse HTML
var dom = parseHTML(markup);

// find tables
var tables = dom.querySelectorAll('table.wikitable');
if (tables.length <= 0) {
throw new Error('Could not find any tables on the given Wiki page :(')
}

// loop tables
var tablesLen = tables.length;
var final = [];
for (var i = 0; i < tablesLen; i++) {

var tableEl = tables[i];
var csv = parseTable(tableEl);

final.push(csv);
}
return final;
}
Insert cell
parseHTML = function (str) {
if (typeof document['createRange'] === 'function') {
return document.createRange().createContextualFragment(str);
} else {
var el = document.createElement('div');
el.innerHTML = str;
return el.children[0];
}
};
Insert cell
parseTable = function (element) {
var result = '',
rows = element.querySelectorAll('tr'),
// get maximum number of cols
colsCount = getMaxColumns(rows),
allSpans = {};

// loop tr
for (var rowsIdx = 0, rowsLen = rows.length; rowsIdx < rowsLen; rowsIdx++) {
var row = rows[rowsIdx],
csvLine = [],
cells = row.querySelectorAll('th, td'),
spanIdx = 0;

// loop cells
for (var cellIdx = 0; cellIdx < colsCount; cellIdx++) {
var cell = cells[cellIdx],
rowSpan = 1,
colSpan = 1;

// get rowSpan & colSpan attr
if (typeof cell !== 'undefined') {
var attr1 = cell.getAttribute('rowSpan')
if (attr1) {
rowSpan = parseInt(attr1);
}
var attr2 = cell.getAttribute('colSpan')
if (attr2) {
colSpan = parseInt(attr2);
}
}

// loop colSpan, set rowSpan value
for (var j = 0; j < colSpan; j++) {

// check if there is a cell value for this index (set earlier by rowspan)
// console.debug('spanIdx', spanIdx)
while (allSpans.hasOwnProperty(spanIdx.toString())) {
// console.debug('Has value at span index', spanIdx)
var val = allSpans[spanIdx.toString()][1];
csvLine.push(val);

// decrease by 1 and remove if all rows are covered
allSpans[spanIdx.toString()][0] -= 1;
if (allSpans[spanIdx.toString()][0] == 0) {
delete allSpans[spanIdx.toString()];
}
spanIdx += 1;
}
// parse cell text
// don't append if cell is undefined at current index
if (typeof cell !== 'undefined') {
var cellText = parseCell(cell);
csvLine.push(cellText);
}
if (rowSpan > 1) {
allSpans[spanIdx.toString()] = [rowSpan - 1, cellText];
}
spanIdx += 1;
}
}
result += csvLine.join() + '\n';
}
return result
}
Insert cell
parseCell = function(cellItem) {

// first: remove invisible elements in cells
var every_el = cellItem.querySelectorAll('*');
for (var i = 0; i < every_el.length; i++) {
var el = every_el[i];
if (el.style.display == 'none' || getComputedStyle(el, 'display') == 'none') {
el.parentNode.removeChild(el);
}
}

var line = cellItem.textContent || cellItem.innerText;
line = line.trim();
line = line.replace(/\r?\n|\r/g, ' ');

// escape double quotes in line
if (/\"/.test(line)) {
line = line.replace(/\"/g, '""');
}

// put line in double quotes
// if line break, comma or quote found in line
if (/\r|\n|\"|,/.test(line)) {
line = '"' + line + '"';
}

return line;
}
Insert cell
getMaxColumns = function (rows) {
var result = 0
for (var i = 0, l = rows.length; i < l; i++) {
let count = rows[i].children.length
if (count > result) {
result = count
}
}
return result
}
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more