Published
Edited
Feb 23, 2021
2 forks
Importers
1 star
Insert cell
Insert cell
categories = FileAttachment("categories_2020-10-17@3.json").json()
Insert cell
Insert cell
import {chart} with {categories as data, margin as margin} from "@d3/collapsible-tree"
Insert cell
Insert cell
Insert cell
slugifiedCategories = {
const cloneDeep = (category) => {
const categoryClone = {
name: category.name,
slug: getOrCreateSlug(category)
}
if (category.children) {
categoryClone.children = category.children.map(cloneDeep);
}
return categoryClone;
}
return cloneDeep(categories);
}
Insert cell
categoryNodes = {
const root = d3.hierarchy(categories);
return root.descendants().map(d => d.data);
}
Insert cell
areUnique = {
const names = categoryNodes.map(d => d.name);
return categoryNodes.length === new Set(names).size;
}
Insert cell
categorySlugs = categoryNodes.map(getOrCreateSlug).sort()
Insert cell
// newly created slugs detected as invalid are fixed manually, by manipulating JSON in the attachment
getOrCreateSlug = (catgory) => catgory.slug ?? slugify(catgory.name)
Insert cell
invalidSlugs = {
return "Detection to be run manually"; // comment out to run
return detectInvalid(categorySlugs);
}
Insert cell
// can't use arrow syntax for generator functions
async function* detectInvalid(slugs) {
yield html`<progress value="0" max="${slugs.length}"></progress>`;
const invalidSlugs = [];
for (let i = 0; i < slugs.length; i++) {
const slug = slugs[i];
const isValidSlug = await isValid(slug);
if (!isValidSlug) {
invalidSlugs.push(slug);
}
yield html`<progress value="${i}" max="${slugs.length}"></progress>`;
// 75 request tokens per minute from the proxy, 4500 requests per hour,
// is more than stackshare.io is willing to handle (IP blacklisting, permanent, for static IPs)
await Promises.delay(1000);
}
yield invalidSlugs;
}
Insert cell
isValid = async (slug) => {
const proxyUrl = "https://nikita-sharov.herokuapp.com/"; // won't work for you
const url = `${proxyUrl}https://stackshare.io/${slug}`;
const response = await fetch(url, {method: "HEAD"}); // response body not neccessary
switch (response.status) {
case HttpStatus.OK:
return true;
case HttpStatus.NOT_FOUND:
return false;
case HttpStatus.TOO_MANY_REQUESTS:
throw "Request rate limiting by the proxy (Heroku)";
case HttpStatus.FORBIDDEN:
throw "IP blacklisting by the target (StackShare)";
default:
throw response.statusText;
}
}
Insert cell
HttpStatus = ({
OK: 200,
FORBIDDEN: 403,
NOT_FOUND: 404,
TOO_MANY_REQUESTS: 429
})
Insert cell
d3 = require("d3-hierarchy@2")
Insert cell
slugify = (name) => {
return name.toLowerCase()
.replaceAll("&", "and")
.replaceAll("c++", "c-plus-plus")
.replaceAll(" / ", "-")
.replaceAll("/", "-")
.replaceAll(" ", "-")
.replaceAll("(", "-")
.replaceAll(")", "-")
.replaceAll(".", "-")
.replaceAll(",", "");
}
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more