Public
Edited
Sep 11, 2024
Insert cell
Insert cell
getTableMemoryUsage(inputTable)
Insert cell
getTableMemoryUsage(outputTable)
Insert cell
Insert cell
tableSize = 10000
Insert cell
function repeatString(val, numRows, colname) {
const vector = arrow.vectorFromArray(
Array.from({ length: numRows }).fill(val),
new arrow.Dictionary(new arrow.Float32, new arrow.Uint8),
);
return {
[colname]: vector,
[`${colname}_codes`]: new Uint8Array(Array.from({ length: numRows }).fill(0)),
}
}
Insert cell
inputTable = arrow.makeTable({
...repeatString(32.1, tableSize, "foo")
})
Insert cell
db = {
return tableSize && DuckDBClient.of({});
}
Insert cell
conn = await db._db.connect()
Insert cell
insertionComplete = {
const buffer = arrow.tableToIPC(inputTable, 'stream');
conn.insertArrowFromIPCStream(buffer, {
name: "arrowTable",
schema: "main"
});
return true;
}
Insert cell
outputTable = tableSize && insertionComplete && conn.query("SELECT foo_codes FROM arrowTable")
Insert cell
outputTable.batches[0].data.children[0].valueOffsets
Insert cell
Insert cell
inputTable.getChild("foo")
Insert cell
inputTable.getChild("foo").get(0)
Insert cell
outputTable.getChild("foo_codes")
Insert cell
outputTable.getChild("foo_codes").get(0)
Insert cell
inputTable.batches.length === 1
Insert cell
outputTable.batches.length === 1
Insert cell
outputTable.batches[0].data.children[0]
Insert cell
inputTable.batches[0].data.children[0].values
Insert cell
outputTable.batches[0].data.children[0].values
Insert cell
inputTable.batches[0].data.children[0].dictionary.data[0].values
Insert cell
outputTable.batches[0].data.children[0].values
Insert cell
outputTable.batches[0].data.children[0].valueOffsets
Insert cell
getTableMemoryUsage = (t) => {
const typedArrs = [];
t.batches.forEach(b => {
if(b.data.children[0].dictionary === undefined) {
typedArrs.push(b.data.children[0].values);
if(b.data.children[0].valueOffsets) {
typedArrs.push(b.data.children[0].valueOffsets);
}
} else {
typedArrs.push(b.data.children[0].values);
typedArrs.push(b.data.children[0].dictionary.data[0].values);
}
})
return typedArrs.map(a => a.length * a.BYTES_PER_ELEMENT).reduce((a, h) => a + h, 0);
}
Insert cell
// Reference: https://github.com/observablehq/feedback/issues/623#issuecomment-1999937586
arrow = await import('https://cdn.observableusercontent.com/npm/apache-arrow@11.0.0/+esm')
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more