Unlisted
Edited
Oct 25, 2023
Importers
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
// 23,907,432: 24mb
Insert cell
plot = (interval, level, counts, quantize, domain, ghost, caption, legend) =>
Plot.plot({
marks: [
// ghost
Plot.rectY(
quantizedBinRanges(level),
Plot.normalizeY("max", {
x1: (d) => d[0],
// adjust for the 'inclusive' bin count.
// todo: figure out how to not have the hanging extra guy at the end...
x2: (d, i, a) => d[1] + 1,
y: (d, i) => ghost[i],
fill: () => "Full dataset",
inset: 0.5
})
),
// counts
Plot.rectY(
quantizedBinRanges(level),
Plot.normalizeY("max", {
x1: (d) => d[0],
// adjust for the 'inclusive' bin count.
// todo: figure out how to not have the hanging extra guy at the end...
x2: (d, i, a) => d[1] + 1,
y: (d, i) => counts[i],
fill: () => "Filtered dataset",
// fill: (d) => interval[0] < d[1] && d[0] <= interval[1],
inset: 0.5
})
),
// filter selection
Plot.rectY([interval], {
x1: (d) => d[0],
x2: (d) => d[1] + 1,
y0: null,
y1: null,
fill: "#748FDE",
fillOpacity: 0.15,
stroke: "#748FDE",
strokeOpacity: 0.5,
insetTop: -7
}),
Plot.ruleY([0])
],
height,
color: {
legend,
domain: ["Filtered dataset", "Full dataset"],
range: ["#000", "#DDD8D4"]
},
x: { transform: quantize.invert },
y: { percent: true, tickFormat: "s", label: "% of dataset" },
caption
})
Insert cell
computeHourCounts = (distance, delay) => {
const hourRanges = quantizedBinRanges(hourLevel).map((hour) =>
encodeRange(distance, delay, hour)
);
return counts(hourRanges, "hour");
}
Insert cell
comma = d3.format(",.2s") // format for filtered indicator
Insert cell
overlaps = (a, b) => a[0] < b[1] && b[0] < a[1] // range a overlaps range b
Insert cell
// - note: yao found many things confusing; i have notes from july 15 in a bike outline. ('full dataset' should always be visible, maybe side-by-side bars; the gray in particular is hard to see, particularly when covered up; distinction between 'input' and 'output' dimensions (causally), unclear how filtering a dimension interacts with the black bars in the same dimension)
// - investigate and debug v strange behavior when one or more of the ranges are zero.
// x show the 'ghost' of the full dataset behind the filtered query.
// x show the brushed region on the chart.
// x debug incorrect filtering behavior.
// x can do date using wm range filtering.
// x figure out our approach wrt. the maximum symbol and querying for it inclusively
// x consider partially highlighting bars corresponding to the selection range
// - note: to remove the raw data and work entirely in the quantized domain, we had to record the original scale domains (eg. `distanceDomain`).
// todo: 'local' checkbox that is checked if localhost server is accessible
// - question: why is there a count of 1 result when querying all zero ranges (which should be fine since they are inclusive), but the only chart that contains a (nonzero) bar is the one for arrival delay, in the final bin? if there is a result, shouldn't each chart contain a bar?
// - bug: on mobile, zora observed "RuntimeError: Out of bounds table access (evaluating 'wasm.waveletmatrix32_count_symbol_ranges (retptr, this.wbg_ptr, ptr0, len0, range_lo, range_hi, dims)')"
// - why do the distance queries take the longest? i think z=distance, y=delay, x=hour.
// - experiment with the alternate approach of defining each "marginal" query as a set of contiguous z-ranges, split from the original bounding box using bigmin/litmax or similar.
Insert cell
fmt = d3.format(",.2r") // format for interval slider labels
Insert cell
quantizationLevel = 6 // number of bits per dimension
Insert cell
maxSymbolPerDim = 2 ** quantizationLevel - 1 // maximum value in a particular dimension
Insert cell
distanceDomain = [0, 4962] // recorded from the original data
Insert cell
delayDomain = [-60, 180] // recorded from the original data
Insert cell
hourDomain = [0, 24] // recorded from the original data
Insert cell
dims = 3 // number of dimensions each symbol represents
Insert cell
maxSymbol = 2 ** (quantizationLevel * dims) - 1 // the maximum allowable symbol is dictated by the quantization level and number of dimensions: e.g. 3 bits per level for 3d, and a quantization level of 8 means (3 * 8)-bit = 24 symbols.
Insert cell
quantizeDistance = d3
.scaleLinear()
.domain(distanceDomain)
.rangeRound([0, maxSymbolPerDim])
Insert cell
quantizeDelay = d3
.scaleLinear()
.domain(delayDomain)
.rangeRound([0, maxSymbolPerDim])
Insert cell
quantizeHour = d3
.scaleLinear()
.domain(hourDomain)
.rangeRound([0, maxSymbolPerDim])
Insert cell
// decode the wavelet matrix containing the quantized 'data cube'
wm = FileAttachment("wm (1).buf")
.arrayBuffer()
.then((data) => rs.WaveletMatrix32.decode(new Uint8Array(data)))
Insert cell
wm.get(0)
Insert cell
wm.max_symbol()
Insert cell
// lib = import(await FileAttachment("made-of-bits@3.js").url())
Insert cell
// import { lib } from "2aa3808823902569"
Insert cell
import { lib } from "2aa3808823902569"
Insert cell
{
const n = 25;

console.time("wm counts");
for (let i = 0; i < n; i++) wm.counts();
console.timeEnd("wm counts");

console.time("js counts");
for (let i = 0; i < n; i++) js.counts();
console.timeEnd("js counts");

console.log("");
}
Insert cell
js = new lib.WaveletMatrix(out, wm.max_symbol())
Insert cell
out = {
console.time("get");
let len = wm.len();
const out = new Uint32Array(len);
for (let i = 0; i < len; i++) {
out[i] = wm.get(i);
}
console.timeEnd("get");
return out;
}
Insert cell
{
const a = wm.counts();
const b = js.counts();
for (let i = 0; i < b.length; i++) {
const cond =
a.symbol[i] === b[i].symbol &&
a.start[i] === b[i].start &&
a.end[i] === b[i].end;
if (!cond) throw new Error(`non-equal results at index ${i}`);
}
console.log("");
return { a, b };
}
Insert cell
wm.num_levels()
Insert cell
// return bin ranges for a particula dimension that have been quantized to `level` bits. If `level` is the quantization level of the wavelet matrix then we'll return ranges corresponding to a step of 1 in the data cube along this dimension, eg. [[0, 1], [1, 2], ..., [62, 63]]
quantizedBinRanges = (level) =>
d3
.pairs(
d3.range(
0,
2 ** quantizationLevel + 1e-6,
2 ** (quantizationLevel - level)
)
)
.map(([lo, hi]) => [lo, hi - 1])
Insert cell
encodeRange = (distance, delay, hour) => [
encode3(distance[0], delay[0], hour[0]),
encode3(distance[1], delay[1], hour[1]) + 1 // exclusive endpoint but inclusive in each dimension
]
Insert cell
selectedRange = encodeRange(distance, delay, hour)
Insert cell
quantizedBinRanges(1)
Insert cell
delayLevel = res
Insert cell
distanceLevel = res
Insert cell
hourLevel = res
Insert cell
delayCounts = computeDelayCounts(distance, hour)
Insert cell
fullDelayCounts = computeDelayCounts(defaultInterval, defaultInterval)
Insert cell
computeDelayCounts = (distance, hour) => {
const delayRanges = quantizedBinRanges(delayLevel).map((delay) =>
encodeRange(distance, delay, hour)
);
return counts(delayRanges, "delay");
}
Insert cell
distanceCounts = computeDistanceCounts(delay, hour)
Insert cell
fullDistanceCounts = computeDistanceCounts(defaultInterval, defaultInterval)
Insert cell
computeDistanceCounts = (delay, hour) => {
const distanceRanges = quantizedBinRanges(distanceLevel).map((distance) =>
encodeRange(distance, delay, hour)
);
return counts(distanceRanges, "distance");
}
Insert cell
hourCounts = computeHourCounts(distance, delay)
Insert cell
fullHourCounts = computeHourCounts(defaultInterval, defaultInterval)
Insert cell
selectedCount = counts(selectedRange, "selected") //[0]
Insert cell
viewof reload
Insert cell
counts = (ranges, name) => {
let [startSymbol, endSymbol] = d3.zip(...ranges);
console.time(name + ": counts");
let ret = wm.count({
startSymbol,
endSymbol,
start: indexRange[0],
end: indexRange[1] + 1,
masks
});
console.timeEnd(name + ": counts");
return ret;
}
Insert cell
masks = wm.morton_masks_for_dims(3)
Insert cell
d3.zip(...d3.zip([1, 2, 3], [4, 5, 6]))
Insert cell
defaultInterval = [0, maxSymbolPerDim]
Insert cell
height = 175
Insert cell
import { interval } from "@mootari/range-slider"
Insert cell
import { encode3, decode3x, decode3y, decode3z } from "393eae81fa26b317"
Insert cell
import { rs, viewof reload } from "d4d70888d178542b"
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more