Public
Edited
Jul 30, 2023
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
utah = FileAttachment("Utah.parquet")
Insert cell
// parquetContentLength = utah.blob().then(r => r.size)
Insert cell
localUrl = 'http://localhost:8000/Utah-multi.parquet'
Insert cell
arrowTable = {
console.log('attempting to read meta from multiple-row group parquet')
const parquetContentLength = await fetch(localUrl, {method: 'HEAD'}).then(r => r.headers.get('Content-Length'));
console.log(parquetContentLength);
const parquetMeta = await parquet.readMetadataAsync(localUrl, parquetContentLength);
console.log(parquetMeta);
const arrowSchema = parquetMeta.arrowSchema();
const rowGroupCount = parquetMeta.numRowGroups();
console.log(rowGroupCount);
const chunks = [];
for(let i = 0; i < rowGroupCount; i++) {
const rowGroupMeta = parquetMeta.rowGroup(i);
console.log(`${i} of ${rowGroupCount}`);
chunks.push(await parquet.readRowGroupAsync(localUrl, rowGroupMeta, arrowSchema.copy()));
}
const recordBatchChunks = chunks.map(arrow.tableFromIPC);
console.log(recordBatchChunks[0]);
console.log('constructing table');
const constructedTable = new arrow.Table(recordBatchChunks.slice(0, 20))
return arrow.tableFromIPC(arrow.tableToIPC(constructedTable));
// return constructedTable.concat();
}
Insert cell
// arrowTable = {
// const parquetBytes = new Uint8Array(parquetBuffer);
// const decodedArrowBytes = parquet.readParquet(parquetBytes);
// const arrowTable = arrow.tableFromIPC(decodedArrowBytes);
// return arrowTable;
// }
Insert cell
Insert cell
arrowTable.schema.fields[0]
Insert cell
geometryColumn = arrowTable.getChildAt(0)
Insert cell
Insert cell
{
const firstGeometry = geometryColumn.get(0);
const firstRing = firstGeometry.get(0);
const firstCoord = firstRing.get(0);
return firstCoord.toArray();
}
Insert cell
Insert cell
// Pretty print the first geometry
// One ring with five coordinates
{
const firstGeom = geometryColumn.get(0);
const printedGeom = [];
for (const ring of firstGeom) {
const printedRing = [];
for (const coord of ring) {
const printedCoord = [coord.get(0), coord.get(1)];
printedRing.push(printedCoord);
}
printedGeom.push(printedRing);
}
return printedGeom;
}
Insert cell
Insert cell
Insert cell
Insert cell
simpleStatistics = await import('simple-statistics');
Insert cell
geometryColumn.getChildAt(0).data[0].valueOffsets.at(-1)
Insert cell
intermediate = geometryColumn.getChildAt(0).data[0]
Insert cell
flatCoordinateVector = geometryColumn.getChildAt(0).getChildAt(0).getChildAt(0)
Insert cell
firstFlatCoordinateArray = flatCoordinateVector.data[0].values
Insert cell
flatCoordinateArray = {
// const spanningByteLength = flatCoordinateVector.data.at(-1).values.byteOffset + flatCoordinateVector.data.at(-1).byteLength - firstFlatCoordinateArray.byteOffset;
// const view = new Float64Array(firstFlatCoordinateArray.buffer, firstFlatCoordinateArray.byteOffset, spanningByteLength / 8);
// console.log(view.at(-1) === flatCoordinateVector.data.at(-1).values.at(-1));
// return view
const totalCoordinateCount = simpleStatistics.sum(flatCoordinateVector.data.map(it => it.values.length));
const coordinateArray = new Float64Array(totalCoordinateCount);
for(const [chunkIndex, chunk] of flatCoordinateVector.data.entries()) {
const precedingChunksLength = simpleStatistics.sum(flatCoordinateVector.data.slice(0, chunkIndex).map(it => it.values.length));
coordinateArray.set(chunk.values, precedingChunksLength);
}
return coordinateArray;
}
Insert cell
resolvedPolygonIndices = {
const expectedTotal = geometryColumn.length;
const resolvedPolygonIndices = new Int32Array(simpleStatistics.sum(geometryColumn.data.map(it => it.length)));
for(let chunkIndex = 0; chunkIndex < geometryColumn.data.length; chunkIndex++) {
const chunkOffset = simpleStatistics.sum(geometryColumn.data.slice(0, chunkIndex).map(precedingChunk => precedingChunk.length));
// instead of concatenating the underlying coordinate array into a contiguous view (which requires a copy), we can use
// a view that spans from the beginning of the first chunk's backing array to the end of the last chunk's backing array.
// the chunk ring offset is therefore equal to (currentChunkCoordsArray.byteOffset - spanningView.byteOffset) / 8
// const chunkRingOffset = (flatCoordinateVector.data[chunkIndex].values.byteOffset - flatCoordinateArray.byteOffset) / 8;
// warning: when a set of at least 2 record batches are part of the same contiguous arraybuffer (i.e. one that has taken a roundtrip via tableFromIPC(tableToIPC), the last valueOffsets value of each
const chunkRingOffset = simpleStatistics.sum(geometryColumn.getChildAt(0).data.slice(0, chunkIndex).map(precedingChunk => precedingChunk.valueOffsets.at(-2)));
const chunkPolygonIndices = geometryColumn.data[chunkIndex].valueOffsets;
const chunkRingIndices = geometryColumn.getChildAt(0).data[chunkIndex].valueOffsets;
// NB: the last entry in the valueOffsets array of a chunk is always equal to the number of coordinates in that chunk
console.log(chunkOffset, 'chunk ring offsets', chunkRingOffset, chunkRingIndices, chunkPolygonIndices);
for(let i = 0; i < geometryColumn.data[chunkIndex].length; i++) {
resolvedPolygonIndices[chunkOffset + i] = chunkRingIndices[chunkPolygonIndices[i]] + chunkRingOffset;
}
}
return resolvedPolygonIndices
}
Insert cell
// Note that `data` here is an array with only one element because this Parquet file only had one row group
// Otherwise we'd need to do this process for each batch of columnar data
polygonIndices = geometryColumn.data[0].valueOffsets
Insert cell
Insert cell
ringIndices = geometryColumn.getChildAt(0).data[0].valueOffsets
Insert cell
Insert cell
// resolvedPolygonIndices = {
// const resolvedIndices = new Int32Array(polygonIndices.length);
// for (let i = 0; i < resolvedIndices.length; ++i) {
// // Perform the lookup into the ringIndices array using the polygonIndices array
// resolvedIndices[i] = ringIndices[polygonIndices[i]]
// }
// return resolvedIndices;
// }
Insert cell
Insert cell
coordinateVector = geometryColumn.getChildAt(0).getChildAt(0)
Insert cell
Insert cell
Insert cell
resolvedPolygonIndices.length
Insert cell
resolvedPolygonIndices.at(geometryColumn.length / 2)
Insert cell
deckglLayer = {
// Refer to https://deck.gl/docs/api-reference/layers/solid-polygon-layer#use-binary-attributes
const data = {
// Number of geometries
length: arrowTable.numRows,
// Indices into coordinateArray where each polygon starts
startIndices: resolvedPolygonIndices,
// Flat coordinates array
attributes: {
getPolygon: { value: flatCoordinateArray, size: 2 }
}
};
const layer = new deck.SolidPolygonLayer({
// This is an Observable hack - changing the id will force the layer to refresh when the cell reevaluates
id: `layer-${Date.now()}`,
data,
// Skip normalization for binary data
_normalize: false,
// Counter-clockwise winding order
_windingOrder: "CCW",
getFillColor: [0, 100, 60, 160],
});

deckglMap.setProps({ layers: [layer] });

return layer;
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more