Public
Edited
Mar 9, 2023
Insert cell
Insert cell
Insert cell
Insert cell
viewof results = Inputs.search(species1, { label: "Search by species" })
Insert cell
viewof species = Inputs.table(results)
Insert cell
Insert cell
viewof species1 = db.table(
`
FROM trees2
SELECT common,
COUNT(*) AS count
GROUP BY common
ORDER by count DESC
`
)
Insert cell
(scatterplot.tooltip_html = function (datum) {
console.log({ ...datum });
let html = `<div class="species">${datum.common}</div><div class="address">${datum.latin}, ${datum.latin}</div>`;
if (!["common"].includes(selected)) {
html += `<div class="etc"><span>${selected}</span>: <span>${datum[selected]}</span></div>`;
}
return html;
})
Insert cell
Insert cell
{
results;
d3.select("#label")
.style("font-size", "48px")
.style("font-family", "sans-serif")
.style("background", "none")
.text(viewof results[0].value);
}
Insert cell
scatterplot.plotAPI({
background_options: {
opacity: 0.05,
size: 0.1
},
encoding: {
foreground: {
field: "common",
lambda
}
}
// tooltip_html: `return "${selected}: " + datum['${selected}']`
})
Insert cell
lambda = `d => new Set(${JSON.stringify(selected_species)}).has(d)`
Insert cell
lambda_size = `d => (new Set(${JSON.stringify(
selected_species
)}).has(d) ? 2 : 0.5)`
Insert cell
selected_species = species.map((d) => d.common)
Insert cell
img = fetch("https://maps1.nyc.gov/tms/1.0.0/photo/2018/21/617539/1308820.png8")
Insert cell
<img src="https://maps1.nyc.gov/tms/1.0.0/photo/2018/21/617539/1308820.png8" />

Insert cell
Insert cell
html`
<style type="text/css">

.tooltip {
background-color: black;
width: 400px;
font-family: sans-serif;
}

.rect {
fill: white;
}

</style>
`
Insert cell
deepscatter = import("https://benschmidt.org/deepscatter@2.8.0")
Insert cell
Insert cell
db = {
const db = await DuckDBClient.of({});
await db.query(
"CREATE TABLE trees2 AS SELECT * FROM 'https://benschmidt.org/treesbig.parquet'"
);
return db;
}
Insert cell
trees2_arrow = {
const categorical = ["TPStructure", "TPCondition", "latin", "common"];
const sels = categorical.map((d) => ` ${d}::${d} AS ${d}`).join(",");

const enums_made = await Promise.all(
categorical.map((k) => {
db.query("DROP TYPE " + k);
return db.query(
`CREATE TYPE ${k} AS ENUM (SELECT "${k}" FROM trees2 WHERE "${k}" IS NOT NULL)`
);
})
);
return db.query(
`SELECT x::FLOAT x, (y * -1.5)::FLOAT y, ${sels}, row_number() OVER () AS ix FROM trees2`
);
}
Insert cell
Insert cell
db.table(q)
Insert cell
q = `WITH
word_counts AS
(SELECT boroname || ': ' || nta_name category, spc_common as word, COUNT(*) count FROM trees
GROUP BY category, word
),
-- I created the counts as separate subqueries because I was unable to make PARTITION BY work correctly.
word_totals AS (
SELECT word, sum("count")::FLOAT AS word_total FROM word_counts GROUP BY word
),
category_totals AS (
SELECT category, sum("count")::FLOAT AS category_size FROM word_counts GROUP BY category
),
pmi_part1 AS (
SELECT
SUM("count") OVER ()::FLOAT as corpus_size,
*
FROM word_counts NATURAL JOIN word_totals NATURAL JOIN category_totals
),
pmi_part2 AS (
SELECT *,
greatest(word_total - "count", 0.2) AS count_excluding, -- 0.2 is a fudge here--you could call it a 'prior,', I guess. If a word doesn't appear, we pretend that it appeared *just a little* to make the logarithms work.
corpus_size - category_size AS size_excluding_category
FROM pmi_part1
),
pmi_part3 AS (
SELECT *,
category_size * (word_total / corpus_size) AS expected_here,
size_excluding_category * (word_total / corpus_size) AS expected_elsewhere,
FROM pmi_part2
),
pmi_part4 AS (
SELECT *,
2 * (
"count" * log("count" / expected_here) +
"count_excluding" * log(count_excluding / expected_elsewhere)
) AS llr,
log("count" / expected_here) AS pmi,
FROM pmi_part3),
ranked_ordered AS (
SELECT category,
*,
word || '(' || count::INT || ')' AS label,
count,
word_total,
PMI,
llr,
CASE WHEN PMI < 0 THEN 'under-represented' ELSE 'over-represented' END AS direction,
RANK() OVER (PARTITION BY category, PMI < 0 ORDER BY llr DESC) ordering,
FROM pmi_part4
)
SELECT
category, direction, list(label) terms FROM ranked_ordered
WHERE ordering < 6 GROUP BY direction, category ORDER BY direction, category`
Insert cell
trees.url()
Insert cell
import {
DuckDBClient,
arrow
} from "@bmschmidt/duckdb-client-1-20-0-arrow-10-0-1"
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more