Public
Edited
Aug 16, 2023
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
parquetBuffer = {
console.time("download");
const ab = await fetch(buildingsUrl).then((response) =>
response.arrayBuffer()
);
console.timeEnd("download");
return ab;
}
Insert cell
arrowTable = {
console.log("started parsing");
console.time("parsing");
const parquetBytes = new Uint8Array(parquetBuffer);

console.time("parse to ffi table");
const wasmArrowTable = parquet.readParquetFFI(parquetBytes);
console.timeEnd("parse to ffi table");

const recordBatches = [];
for (let i = 0; i < wasmArrowTable.numBatches(); i++) {
// Note: Unless you know what you're doing, setting `true` below is recommended to _copy_
// table data from WebAssembly into JavaScript memory. This may become the default in the
// future.
console.time("parse record batch");
const recordBatch = arrowJsFFI.parseRecordBatch(
parquetMemory.buffer,
wasmArrowTable.arrayAddr(i),
wasmArrowTable.schemaAddr(),
true
);
console.timeEnd("parse record batch");

recordBatches.push(recordBatch);
}

const table = new arrow.Table(recordBatches);

console.timeEnd("parsing");
return table;
}
Insert cell
geometryColumn = arrowTable.getChildAt(0)
Insert cell
Insert cell
flatCoordinateArray = {
const flatCoordinateVector = geometryColumn
.getChildAt(0)
.getChildAt(0)
.getChildAt(0);
return flatCoordinateVector.data[0].values;
}
Insert cell
polygonOffsets = geometryColumn.getChildAt(0).data[0].valueOffsets
Insert cell
geomOffsets = geometryColumn.data[0].valueOffsets
Insert cell
Insert cell
areaArray = {
// Copy `flatCoordinateArray` into WebAssembly memory and construct a
// `geoarrow.CoordBuffer` object. In JavaScript, this object is just a
// pointer to the array in the Wasm memory space.
console.time("memory allocation");
const coordBuffer = geoarrow.CoordBuffer.from_interleaved_coords(
new geoarrow.InterleavedCoordBuffer(flatCoordinateArray)
);
console.timeEnd("memory allocation");

// Construct a `geoarrow.PolygonArray` from the coordinates and offsets
const polygonArray = new geoarrow.PolygonArray(
coordBuffer,
geomOffsets,
polygonOffsets
);

// Reproject the array of polygons from WGS84 to UTM
// ~3000ms on my machine for 1 million buildings.
// Check your browser console for the timing on your machine.
console.time("reproject");
const utmPolygonArray = polygonArray.reproject_rs(
"utm zone=12",
geoarrow.ReprojectDirection.Fwd
);
console.timeEnd("reproject");

// Compute the area
// ~290ms (on full 1M buildings)
console.time("area");
const areaWasmArray = utmPolygonArray.area();
console.timeEnd("area");

// "Export" the array from Wasm memory
const areaFFIArray = areaWasmArray.to_ffi();

// Use arrow-js-ffi to parse the field metadata from Wasm memory
const areaField = arrowJsFFI.parseField(
geoarrowMemory.buffer,
areaFFIArray.field_addr()
);

// Use arrow-js-ffi to copy the array back to JavaScript
// <2ms even for 1M buildings
console.time("parse vector");
const areaArray = arrowJsFFI.parseVector(
geoarrowMemory.buffer,
areaFFIArray.array_addr(),
areaField.type,
true
);
console.timeEnd("parse vector");

// Manually free the memory from WebAssembly to avoid memory leaks
//
// This sucks but WebAssembly does not have a garbage collector,
// so a certain level of manual memory management is required.
//
// Freeing the `arrowJsFFI` array is safe because we passed `copy=true`
// into `parseVector`, so we're no longer using the WebAssembly memory.
//
// `coordinateBuffer.free()` does not need to be run (and will in fact error!)
// because passing the coordinateBuffer into `geoarrow.PolygonArray` takes
// ownership of the coordinateBuffer, so `wasmPolygonArray.free()` also frees
// the coordinate buffer. This is indeed complex but hopefully will get easier
// to use over time.
console.time("memory freeing");
polygonArray.free();
utmPolygonArray.free();
areaWasmArray.free();
areaFFIArray.free();
console.timeEnd("memory freeing");

return areaArray;
}
Insert cell
Insert cell
areaArray.toArray()
Insert cell
Insert cell
Insert cell
colorScale = {
// Convert from value to RGB values
const lowerBound = 10;
const upperBound = 5000;
const linearScale = d3
.scaleLinear()
.domain([parseFloat(lowerBound), parseFloat(upperBound)])
.range([0, 1]);

// This is a fast vectorized approach: we store all colors in a single Float32Array instead of many small JS buffers
// Indices maps from geometry to vertex index
return (values, indices) => {
const vertexArrayLength = indices[indices.length - 1];
const outputArray = new Float32Array(vertexArrayLength * 3);
let lastIndex = 0;
for (let i = 0; i < values.length; ++i) {
let nextIndex = indices[i + 1];
const value = values[i];
const color = d3.color(d3.interpolateViridis(linearScale(Math.sqrt(value))));
const r = color.r / 255;
const g = color.g / 255;
const b = color.b / 255;
for (let j = lastIndex; j < nextIndex; ++j) {
outputArray[j * 3] = r;
outputArray[j * 3 + 1] = g;
outputArray[j * 3 + 2] = b;
}
lastIndex = nextIndex;
}

return outputArray;
};
}
Insert cell
Insert cell
colorAttribute = {
// On my computer, generating the color scale from the area array takes ~130ms.
// Check your browser console for your timing.
console.time("colorScale");
const result = colorScale(areaArray.toArray(), resolvedPolygonIndices);
console.timeEnd("colorScale");
return result;
}
Insert cell
Insert cell
resolvedPolygonIndices = {
console.time("prepare deckgl")
const resolvedIndices = new Int32Array(geomOffsets.length);
for (let i = 0; i < resolvedIndices.length; ++i) {
// Perform the lookup into the polygonOffsets array using the geomOffsets array
resolvedIndices[i] = polygonOffsets[geomOffsets[i]];
}
console.timeEnd("prepare deckgl")
return resolvedIndices;
}
Insert cell
Insert cell
deckglLayer = {
// Refer to https://deck.gl/docs/api-reference/layers/solid-polygon-layer#use-binary-attributes
const data = {
// Number of geometries
length: geometryColumn.length,
// Indices into coordinateArray where each polygon starts
startIndices: resolvedPolygonIndices,
attributes: {
// Flat coordinates array
getPolygon: { value: flatCoordinateArray, size: 2 },
// Pass in the color values per coordinate vertex
getFillColor: { value: colorAttribute, size: 3 }
}
};
const layer = new deck.SolidPolygonLayer({
// This is an Observable hack - changing the id will force the layer to refresh when the cell reevaluates
id: `layer-${Date.now()}`,
data,
// Skip normalization for binary data
_normalize: false,
// Counter-clockwise winding order
_windingOrder: "CCW"
});

deckglMap.setProps({ layers: [layer] });

return layer;
}
Insert cell
Insert cell
Insert cell
// Load the parquet-wasm library
geoarrowModule = {
const geoarrowModule = await import(
"https://unpkg.com/geoarrow-wasm@0.1.0/esm/index.js"
);
// Need to await the default export first to initialize the WebAssembly code
const { memory } = await geoarrowModule.default();
return [geoarrowModule, memory];
}
Insert cell
geoarrow = geoarrowModule[0]
Insert cell
geoarrowMemory = geoarrowModule[1]
Insert cell
// Load the parquet-wasm library
parquetModule = {
const parquetModule = await import(
"https://unpkg.com/parquet-wasm@0.4.0/esm/arrow2.js"
);
// Need to await the default export first to initialize the WebAssembly code
const { memory } = await parquetModule.default();
return [parquetModule, memory];
}
Insert cell
parquet = parquetModule[0]
Insert cell
parquetMemory = parquetModule[1]
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more