Public
Edited
Mar 9, 2023
Insert cell
Insert cell
Insert cell
Insert cell
viewof results = Inputs.search(species1, { label: "Search by species" })
Insert cell
viewof species = Inputs.table(results)
Insert cell
Insert cell
viewof species1 = db.table(
`
FROM trees2
SELECT common,
COUNT(*) AS count
GROUP BY common
ORDER by count DESC
`
)
Insert cell
(scatterplot.tooltip_html = function (datum) {
console.log({ ...datum });
let html = `<div class="species">${datum.common}</div><div class="address">${datum.latin}, ${datum.latin}</div>`;
if (!["common"].includes(selected)) {
html += `<div class="etc"><span>${selected}</span>: <span>${datum[selected]}</span></div>`;
}
return html;
})
Insert cell
Insert cell
{
results;
d3.select("#label")
.style("font-size", "48px")
.style("font-family", "sans-serif")
.style("background", "none")
.text(viewof results[0].value);
}
Insert cell
scatterplot.plotAPI({
background_options: {
opacity: 0.05,
size: 0.1
},
encoding: {
foreground: {
field: "common",
lambda
}
}
// tooltip_html: `return "${selected}: " + datum['${selected}']`
})
Insert cell
lambda = `d => new Set(${JSON.stringify(selected_species)}).has(d)`
Insert cell
lambda_size = `d => (new Set(${JSON.stringify(
selected_species
)}).has(d) ? 2 : 0.5)`
Insert cell
selected_species = species.map((d) => d.common)
Insert cell
img = fetch("https://maps1.nyc.gov/tms/1.0.0/photo/2018/21/617539/1308820.png8")
Insert cell
<img src="https://maps1.nyc.gov/tms/1.0.0/photo/2018/21/617539/1308820.png8" />

Insert cell
Insert cell
html`
<style type="text/css">

.tooltip {
background-color: black;
width: 400px;
font-family: sans-serif;
}

.rect {
fill: white;
}

</style>
`
Insert cell
deepscatter = import("https://benschmidt.org/deepscatter@2.8.0")
Insert cell
Insert cell
db = {
const db = await DuckDBClient.of({});
await db.query(
"CREATE TABLE trees2 AS SELECT * FROM 'https://benschmidt.org/treesbig.parquet'"
);
return db;
}
Insert cell
trees2_arrow = {
const categorical = ["TPStructure", "TPCondition", "latin", "common"];
const sels = categorical.map((d) => ` ${d}::${d} AS ${d}`).join(",");

const enums_made = await Promise.all(
categorical.map((k) => {
db.query("DROP TYPE " + k);
return db.query(
`CREATE TYPE ${k} AS ENUM (SELECT "${k}" FROM trees2 WHERE "${k}" IS NOT NULL)`
);
})
);
return db.query(
`SELECT x::FLOAT x, (y * -1.5)::FLOAT y, ${sels}, row_number() OVER () AS ix FROM trees2`
);
}
Insert cell
Insert cell
db.table(q)
Insert cell
q = `WITH
word_counts AS
(SELECT boroname || ': ' || nta_name category, spc_common as word, COUNT(*) count FROM trees
GROUP BY category, word
),
-- I created the counts as separate subqueries because I was unable to make PARTITION BY work correctly.
word_totals AS (
SELECT word, sum("count")::FLOAT AS word_total FROM word_counts GROUP BY word
),
category_totals AS (
SELECT category, sum("count")::FLOAT AS category_size FROM word_counts GROUP BY category
),
pmi_part1 AS (
SELECT
SUM("count") OVER ()::FLOAT as corpus_size,
*
FROM word_counts NATURAL JOIN word_totals NATURAL JOIN category_totals
),
pmi_part2 AS (
SELECT *,
greatest(word_total - "count", 0.2) AS count_excluding, -- 0.2 is a fudge here--you could call it a 'prior,', I guess. If a word doesn't appear, we pretend that it appeared *just a little* to make the logarithms work.
corpus_size - category_size AS size_excluding_category
FROM pmi_part1
),
pmi_part3 AS (
SELECT *,
category_size * (word_total / corpus_size) AS expected_here,
size_excluding_category * (word_total / corpus_size) AS expected_elsewhere,
FROM pmi_part2
),
pmi_part4 AS (
SELECT *,
2 * (
"count" * log("count" / expected_here) +
"count_excluding" * log(count_excluding / expected_elsewhere)
) AS llr,
log("count" / expected_here) AS pmi,
FROM pmi_part3),
ranked_ordered AS (
SELECT category,
*,
word || '(' || count::INT || ')' AS label,
count,
word_total,
PMI,
llr,
CASE WHEN PMI < 0 THEN 'under-represented' ELSE 'over-represented' END AS direction,
RANK() OVER (PARTITION BY category, PMI < 0 ORDER BY llr DESC) ordering,
FROM pmi_part4
)
SELECT
category, direction, list(label) terms FROM ranked_ordered
WHERE ordering < 6 GROUP BY direction, category ORDER BY direction, category`
Insert cell
trees.url()
Insert cell
import {
DuckDBClient,
arrow
} from "@bmschmidt/duckdb-client-1-20-0-arrow-10-0-1"
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more