Published
Edited
Dec 24, 2020
Importers
15 stars
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
aq.fromArrow(country_feather.t).view()
Insert cell
Type JavaScript, then Shift-Enter. Ctrl-space for more options. Arrow ↑/↓ to switch modes.

Insert cell
import {aq, op} from '@uwdata/arquero'
Insert cell
mb = function(string) {
if (string < 1024 * 1024) {
return d3.format(",.3")(string/1024) + "KB"
}
return d3.format(",.3")(string/1024/1024) + "MB"
}
Insert cell
Insert cell
Insert cell
Insert cell
country = {
// This code draws the triangulated earcut versions.
while (true) {
const country = d3.shuffle(cycle_countries)[0]
const ctx = d3.select("#canvas1").node().getContext("2d")
ctx.moveTo(0, 0)

ctx.clearRect(0, 0, width, height)
for (let row of country_feather.t) {
if ((row.NAME) == (country)) {
if (row.coord_resolution === null) {
continue
}
const manhattan = row
const bounds_buffer = new Float32Array(new Uint8Array(row.bounds).buffer)
const [xmin, ymin, xmax, ymax] = d3.range(4).map(i => bounds_buffer[i])
let w = (xmax-xmin)*10
let h = (ymax-ymin)*10
while (d3.max([w, h]) > width) {
w *= .75
h *= .75
}
d3.select("#canvas1").attr("width", w).attr("height",width*1.25)
const x = d3.scaleLinear().domain([xmin, xmax]).range([0, w])
const y = d3.scaleLinear().domain([ymin, ymax]).range([0, h])
const n_vertices = row.vertices.length * 8 / row.coord_resolution;

const vertices = new DataView(row.vertices.buffer, row.vertices.byteOffset, row.vertices.byteLength)
// const verts = new B(new Uint8Array(row.vertices).buffer)
ctx.fillStyle = "blue";
const stride = row.coord_resolution/8
let getter = i => vertices[`getUint${row.coord_resolution}`](i*stride, true) + row.coord_buffer_offset
const offset = row.coord_buffer_offset
const project = ([x_, y_]) => {
return [x(x_), y(y_)]
}
for (let i=0; i < n_vertices; i+=3) {
ctx.fillStyle = d3.interpolateRainbow(i/n_vertices)
// Draw triangles one at a time.
ctx.beginPath();
const [a, b, c] = [0, 1, 2].map(j => country_feather.coord(getter(i+j)))

ctx.moveTo(...project(a))
ctx.lineTo(...project(b))
ctx.lineTo(...project(c))
ctx.lineTo(...project(a))

ctx.stroke();
ctx.fill();
yield country

}
//
//return vertices
}
}
for (let i of d3.range(30 * 5)) {
// stop at the end.
yield country
}
}
}
Insert cell
height = width * .66
Insert cell
Insert cell
Insert cell
import { radio, checkbox } from '@jashkenas/inputs'
Insert cell
md`# Data Converter

You can convert any geojson file into this format and download it using the button below; just change the source.

You can also live-transform by importing the feature_collection_to_feather_frame function below.
`
Insert cell
filename = url.split("/").reverse()[0].replace(".json", "").replace(".geojson", "")
Insert cell
url = 'https://raw.githubusercontent.com/martynafford/natural-earth-geojson/master/50m/cultural/ne_50m_admin_0_countries_lakes.json'//'https://cors-anywhere.herokuapp.com/eric.clst.org/assets/wiki/uploads/Stuff/gz_2010_us_050_00_500k.json'//'https://raw.githubusercontent.com/martynafford/natural-earth-geojson/master/110m/cultural/ne_110m_admin_0_countries_lakes.json'
Insert cell
raw_geojson = d3.text(url)
Insert cell
geojson = {
const geojson = JSON.parse(raw_geojson)
// For the mapping I'm doing of the world with distortion,
// it is preferable to remove exclaves like Alaska, the portions of Russia past -180 longitude,
// and French Guinea
if (sort_features == "Yes") {
geojson.features.sort((a, b) => (a.properties.REGION_WB > b.properties.REGION_WB ? -1 : 1))
}
return geojson
geojson.features = geojson.features.filter(d => d.properties.SOVEREIGNT !== "Antarctica")
for (let feature of geojson.features) {
if (false) {//feature.properties.GEOUNIT.match(/United States of America|United Kingdom|France|Russia/)) {
console.log(feature.properties.GEOUNIT)
reduce_to_largest_elements(feature)
}
}
return geojson
}
Insert cell
function reduce_to_largest_elements(feature, n = 1) {
const coords = feature.geometry.coordinates.sort((a, b) => +b[0].length - +a[0].length)
feature.geometry.coordinates = feature.geometry.coordinates.slice(0, n)
}
Insert cell
//projection = d3.geoGingery().scale(300).lobes(6).rotate([98, -40, 0])//d3.geoInterruptedMollweide().scale(5000)//d3.geoMercator().scale(2).translate([-0, 0])//d3.geoInterruptedHomolosine().translate([0, 0]).scale(.2)
Insert cell
country_feather = TriFeather.from_feature_collection(geojson, projection, {clip_to_sphere: clip_to_sphere == "yes"})

Insert cell
arrow = require('apache-arrow@2.0.0')
Insert cell
download_button(country_feather)
Insert cell
country_feather.coord(100)
Insert cell
country_feather.bytes.byteLength
Insert cell
function download_button(feather) {return html`
<h3>${filename} (${mb(feather.bytes.length)})</h3>
${DOM.download(new Blob([feather.bytes.buffer], {type: "application/octet-stream"}), `${filename}.gleofeather`, `Download ${feather.t.length} row file)`)}`}
Insert cell
country_feather.t.serialize()
Insert cell
md`# Functions to turn geoJSON into feather frames.`
Insert cell
clip = require('polygon-clipping')
Insert cell
projection = d3.geoMercator().scale(1000)
Insert cell
class TriRow {
constructor(row, parent) {
this.row = row
this.t = parent
}

*iter_triangles(indices = false) {
const {row, t} = this
const n_vertices = row.vertices.length * 8 / row.coord_resolution;
const vertices = new DataView(row.vertices.buffer, row.vertices.byteOffset, row.vertices.byteLength)
const stride = row.coord_resolution/8
if (row.coord_resolution === null) {
return
}
let getter = i => vertices[`getUint${row.coord_resolution}`](i*stride, true) + row.coord_buffer_offset
const offset = row.coord_buffer_offset
for (let i=0; i < n_vertices; i+=3) {
// Draw triangles one at a time.
if (indices) {
yield [0, 1, 2].map(j => getter(i+j))
} else {
yield [0, 1, 2].map(j => t.coord(getter(i+j)))
}
}
}
}
Insert cell
class TriFeather {

constructor(bytes) {
this.bytes = bytes
this.t = arrow.Table.from(bytes)
}

get n_coords() {
this.coord_buffer;
return this._n_coords;
}

get coord_buffer() {
if (this._coord_buffer) {
return this._coord_buffer
}
const d = this.t.get(0).vertices;
this._coord_bytes = d.byteOffset
this._n_coords = (d.byteLength/4/2)
this._coord_buffer = new DataView(d.buffer, d.byteOffset, d.byteLength)
return this._coord_buffer
}
static polygon_to_triangles(polygon) {
// Actually perform the earcut work on a polygon.
const el_pos = []
const coords = polygon.flat(2)
const vertices = earcut(...Object.values(earcut.flatten(polygon)))
return { coords, vertices }
}

static from_feature_collection(feature_collection,
projection,
options = {dictionary_threshold: .75, clip_to_sphere: false}) {

if (projection === undefined) {throw "Must define a projection"}
// feature_collections: a (parsed) geoJSON object.
// projection: a d3.geoProjection instance;
// eg, d3.geoMollweide().transform([10, 20])
// options:

const properties = new Map()
// Stores the number of bytes used for the coordinates.
const coord_resolutions = [null]
const coord_buffer_offset = [null]
// centroids let you have fun with shapes. Store x and y separately.
const centroids = [[null], [null]]
const bounds = [null]
// Storing areas makes it possible to weight centroids.
const areas = [null]
let i = -1;

const path = d3.geoPath()
let clip_shape;

let projected = d3.geoProject(feature_collection, projection)
if (options.clip_to_sphere) {
clip_shape = d3.geoProject({"type": "Sphere"}, projection)
for (let feature of projected.features) {
const new_coords = clip.intersection(feature.coordinates, clip_shape.coordinates)
if (projected.type == "Polygon" && typeof(new_coords[0][0][0] != "numeric")) {
projected.type = "MultiPolygon"
}
feature.coordinates = new_coords
}
}
const {indices, points} = this.lookup_map_and_coord_buffer(projected)
const coord_indices = indices;
const coord_codes = points;

// Stash the vertices in the first item of the array.
const vertices = [new Uint8Array(coord_codes.buffer)]
properties.set("id", ["Dummy feather row"])

i = 0;
for (let feature of projected.features) {
// start at one; the first slot is reserved for caching the full
// feature list
i++;
properties.get("id")[i] = feature.id || `Feature_no_${i}`

for (let [k, v] of Object.entries(feature.properties)) {
if (!properties.get(k)) {properties.set(k, [])}
properties.get(k)[i] = v
}

const projected = feature.geometry
const [x, y] = path.centroid(projected)
const bbox = new Float32Array(path.bounds(projected).flat())

centroids[0][i] = x; centroids[1][i] = y
areas[i] = path.area(projected)
bounds[i] = bbox
let loc_coordinates;
if (projected === null) {
console.warn("Error on", projected)
coord_resolutions[i] = null
vertices[i] = null
continue
} else if (projected.type == "Polygon") {
loc_coordinates = [projected.coordinates]
} else if (projected.type == "MultiPolygon") {
loc_coordinates = projected.coordinates
} else {
throw "All elements must be polygons or multipolgyons."
}
let all_coords = []
let all_vertices = []
for (let polygon of loc_coordinates) {
const { coords, vertices } = TriFeather.polygon_to_triangles(polygon);
// Allow coordinate lookups by treating them as a single 64-bit int.
const bigint_coords = new BigInt64Array(new Float32Array(coords.flat(3)).buffer);
// Reduce to the indices of the master lookup table.
const lookup_points = vertices.map(vx => coord_indices.get(bigint_coords[vx]))
all_vertices.push(...lookup_points)
}
const [start, end] = d3.extent(all_vertices)
const diff = end - start

coord_buffer_offset[i] = (start)

// Normalize the vertices around the lowest element.
// Allows some vertices to be stored at a lower resolution.
for (let j=0; j<all_vertices.length; j++) {
all_vertices[j] = all_vertices[j]-start
}

// Determine the type based on the offset.
let MyArray
if (diff < 2**8) {
coord_resolutions[i] = 8
MyArray = Uint8Array
} else if (diff < 2**16) {
coord_resolutions[i] = 16
MyArray = Uint16Array
} else {
// Will not allow more than 4 billion points on a single feature,
// should be fine.
coord_resolutions[i] = 32
MyArray = Uint32Array
}
vertices[i] = MyArray.from(all_vertices)
}

const cols = {
"vertices": this.pack_binary(vertices),
"bounds": this.pack_binary(bounds),
"coord_resolution": arrow.Uint8Vector.from(coord_resolutions),
"coord_buffer_offset": arrow.Uint32Vector.from(coord_buffer_offset),
"pixel_area": arrow.Float64Vector.from(areas),
"centroid_x": arrow.Float32Vector.from(centroids[0]),
"centroid_y": arrow.Float32Vector.from(centroids[1])
}

for (const [k, v] of properties.entries()) {
if (k in cols) {
// silently ignore.
//throw `Duplicate column names--rename ${k} `;
}
cols[k] = arrow.Vector.from({nullable: true, values: v, type: this.infer_type(v, options.dictionary_threshold)})
}

const named_columns = []
for (const [k, v] of Object.entries(cols)) {
// console.log(k, v)
named_columns.push(arrow.Column.new(k, v))
}
const tab = arrow.Table.new(...named_columns)

const afresh = tab.serialize()
return new TriFeather(afresh)

}


static infer_type(array, dictionary_threshold = .75) {
// Certainly reinventing the wheel here--
// determine the most likely type of something based on a number of examples.

// Dictionary threshold: a number between 0 and one. Character strings will be cast
// as a dictionary if the unique values of the array are less than dictionary_threshold
// times as long as the length of all (not null) values.
const seen = new Set()
let strings = 0
let floats = 0
let max_int = 0

for (let el of array) {

if (Math.random() > 200/array.length) {continue} // Only check a subsample for speed. Try
// to get about 200 instances for each row.
if (el === undefined || el === null) {
continue
}
if (typeof(el) === "string") {
strings += 1
seen.add(el)
} else if (typeof(el) === "number") {
if (el % 1 > 0) {
floats += 1
} else if (isFinite(el)) {
max_int = Math.max(Math.abs(el), max_int)
} else {

}
} else {
throw `No behavior defined for type ${typeof(el)}`
}
}
if ( strings > 0 ) {
// moderate overlap
if (seen.length < strings.length * .75) {
return new arrow.Dictionary(new arrow.Utf8(), new arrow.Int32())
} else {
return new arrow.Utf8()
}
}
if (floats > 0) {
return new arrow.Float32()
}
if (Math.abs(max_int) < 2**8) {
return new arrow.Int8()
}
if (Math.abs(max_int) < 2**16) {
return new arrow.Int16()
}
if (Math.abs(max_int) < 2**32) {
return new arrow.Int32()
} else {
return new arrow.Int64()
}

}


coord(ix) {
// NB this manually specifies little-endian, although
// Arrow can potentially support big-endian frames under
// certain (future?) circumstances.
return [
this.coord_buffer.getFloat32(ix*4*2, true),
this.coord_buffer.getFloat32(ix*2*4 + 4, true)
]
}
static pack_binary(els) {
const { Builder, Binary } = arrow;
const binaryBuilder = Builder.new({
type: new Binary(),
nullValues: [null, undefined],
highWaterMark: 2**16
});
for (let el of els) { binaryBuilder.append(el) }
return binaryBuilder.finish().toVector()
}


bind_to_regl(regl) {
this.regl = regl
this.element_handler = new Map();
// Elements can't share buffers (?) so just use a map.
this.regl_coord_buffer = regl.buffer(
{data: this.t.get(0).vertices, type: "float", usage: "static"})
this.prepare_features_for_regl()
}
prepare_features_for_regl() {
this.features = []
const {t, features, regl, element_handler, regl_coord_buffer} = this;
// Start at 1, not zero, to avoid the dummy.
for (let ix = 1; ix<this.t.length; ix++) {
const feature = this.t.get(ix)
if (feature.vertices.length === null) {
continue
}
element_handler.set(ix, this.regl.elements({
primitive: 'points',
usage: 'static',
data: feature.vertices,
type: "uint" + feature.coord_resolution,
length: feature.vertices.length, // in bytes
count: feature.vertices.length / feature.coord_resolution * 8
}))
const f = {
ix,
vertices: element_handler.get(ix),
coords: {buffer: this.regl_coord_buffer, stride: 8, offset: feature.coord_buffer_offset * 8},
properties: feature
}; // Other data can be bound to this object if desired, which makes programming easier than
// working off the static feather frame.
features.push(f)
}
}
get bbox() {
if (this._bbox) {return this._bbox}
this._bbox = {
x: d3.extent(d3.range(this.n_coords).map(i => this.coord(i)[0])),
y: d3.extent(d3.range(this.n_coords).map(i => this.coord(i)[1])),
}
return this._bbox
}
*[Symbol.iterator]() {
for (let feature of this.features) {
yield feature
}
}
static lookup_map_and_coord_buffer (geojson) {
const all_coordinates = new Float32Array(geojson.features.filter(d => d.geometry).map(d => d.geometry.coordinates).flat(4))
const feature_collection = geojson
const codes = new BigInt64Array(all_coordinates.buffer)
const indices = new Map()
for (let code of codes) {
if (!indices.has(code)) {
indices.set(code, indices.size)
}
}
const points = new BigInt64Array(indices.size)
for (let [k, v] of indices.entries()) {
points[v] = k
}
return {indices, points}
}
}
Insert cell
{
return {
x: d3.extent(d3.range(this.n_coords).map(i => this.coord(i)[0])),
y: d3.extent(d3.range(this.n_coords).map(i => this.coord(i)[1])),
}
}
Insert cell
d3.extent([[1,2], [3,4], [5, 6, 7]])
Insert cell
country_feather.n_coords
Insert cell
earcut = require("earcut")
Insert cell
d3 = require("d3-geo-projection", "d3-fetch", "d3-geo", "d3-array", "d3")
Insert cell
Type JavaScript, then Shift-Enter. Ctrl-space for more options. Arrow ↑/↓ to switch modes.

Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more