Public
Edited
Dec 1
1 fork
1 star
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
embed({
mark: "line",
title: `${
filters.includes("brand = 'Tenure Track'") ? "Tenure track" : ""
} history jobs in North America, by time of year, on H-Net. Ben Schmidt.`,
width: (width * 4) / 5,
height: (width * 1) / 2,
layer: [
{
data: {
values: windowedcounts.filter(
(d) => d.normalized_date < new Date(max_date),
(d) => d.comparable,
(d) => year.includes(d["Academic Year"])
)
},

mark: {
type: "bar",
// "extent": "maxmin",
borders: true
// fill: "opaque"
},
encoding: {
x: {
field: "Academic Year"
},
y: {
title: "Jobs Listed",
aggregate: "max",
field: "cumulative"
},
color: { field: "era", type: "nominal" },
y2: {
aggregate: "min",
field: "cumulative"
},
color: { field: "era", type: "nominal" }
}
}
]
})
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
regioncounts.filter((d) => d["academic year"] == 2023)
Insert cell
Insert cell
Insert cell
viewof sample = await client.table(
`SELECT string_split("nc:text", ' ')[8][1:750] AS incipit FROM filtered WHERE Region='${filter}' AND "academic year" == 2023 ORDER BY RANDOM()`
)
Insert cell
await client.table(
`SELECT Region, string_split("nc:text", ' ')[8][1:750] AS incipit FROM filtered WHERE Region='${filter}' AND "academic year" == 2023 AND Region != 'Pre-1789' ORDER BY RANDOM() LIMIT 10`
)
Insert cell
html`${sample.map((d) => d.incipit).join("<br>")}`
Insert cell
Insert cell
Insert cell
viewof windowedcounts = {
filtered;
year_groups;
return client.table(`WITH
-- cross join to fill in empty dates
ay_tb AS (SELECT * FROM (SELECT DISTINCT(normalized_date) FROM tb) t1 CROSS JOIN (SELECT DISTINCT "academic year" FROM tb)),
-- get counts for each individual day.
annual AS (SELECT "Academic Year", "normalized_date", (COUNT(*) FILTER (WHERE Position IS NOT NULL))::FLOAT count FROM filtered FULL OUTER JOIN ay_tb USING (normalized_date, "academic year") WHERE "Academic Year" > 2004 GROUP BY ALL)

SELECT "Academic Year", "normalized_date",
SUM(count) OVER (
PARTITION BY "Academic Year"
ORDER BY "normalized_date" ASC
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) cumulative,
era
FROM annual LEFT JOIN year_groups USING ("Academic Year") WHERE ("Academic Year" < 2024) OR (normalized_date < '${max_date}')`);
}
Insert cell
Insert cell
Insert cell
Plot.plot({
stroke: {
legend: true
},
marks: [
Plot.ruleY([0]),
Plot.lineY(comparisoncounts, {
x: "academic year",
y: "match_1",
stroke: "red"
}),
Plot.lineY(comparisoncounts, {
x: "academic year",
y: "match_2",
stroke: "blue"
})
]
})
Insert cell
{
const div = DOM.element("div");

const sel = d3
.select(div)
.attr(
"style",
"display:grid;grid-template-columns: 1fr 1fr 1fr; padding:5px; grid-gap: 20px;"
)
.style("max-width", width)
.selectAll("div")
.data(top_vals)
.join("div");
sel
.attr("style", "outline: 2px dotted gray; padding: 5px; font-size:12px")
.append("h4")
.text((d) => d.category);

const words = sel
.selectAll("span")
.data((d) => d.terms.toArray())
.join("span")
.style("margin-left", "3px")
.on("mouseover", (event, d) => {
console.log(d);
return sel
.selectAll("span")
.style("text-decoration", (e) => (e === d ? "underline" : "none"));
});

words.text((d) => d + " ");
return div;
}
Insert cell
where_terms.join(" AND ")
Insert cell
html`${v[0].text.split(" ").join("<>")}`
Insert cell
client
Type SQL, then Shift-Enter. Ctrl-space for more options.

Insert cell
client
WITH
titles AS
(SELECT LOWER(job) AS text, 2 * FLOOR("academic year" / 2) AS category FROM tb WHERE brand = 'Tenure Track' AND History = TRUE AND "North America" = TRUE AND "academic year" >= 2010),
word_counts AS
(SELECT category, word, COUNT(*) count FROM
-- This should be more unicode safe
(SELECT category, UNLIST(string_split_regex("text", '[^A-Za-zé]+')) word FROM titles)
GROUP BY category, word
),
-- I created the counts as separate subqueries because I was unable to make PARTITION BY work correctly.
word_totals AS (
SELECT word, sum("count")::FLOAT AS word_total FROM word_counts GROUP BY word
),
category_totals AS (
SELECT category, sum("count")::FLOAT AS category_size FROM word_counts GROUP BY category
),
pmi_part1 AS
( SELECT
SUM("count") OVER ()::FLOAT as corpus_size,
*
FROM word_counts NATURAL JOIN word_totals NATURAL JOIN category_totals
),
-- A few steps to define all the variables we need.
pmi_part2 AS (
SELECT *,
greatest(word_total - "count", 0.2) AS count_excluding,
corpus_size - category_size AS size_excluding_category
FROM pmi_part1
),
pmi_part3 AS (
SELECT *,
category_size * (word_total / corpus_size) AS expected_here,
size_excluding_category * (word_total / corpus_size) AS expected_elsewhere,
FROM pmi_part2
),
pmi_part4 AS (
SELECT *,
2 * (
"count" * log("count" / expected_here) +
"count_excluding" * log(count_excluding / expected_elsewhere)
) AS llr,
log("count" / expected_here) AS pmi,
FROM pmi_part3),
ranked_ordered AS (
SELECT category,
*,
word,
count,
word_total,
PMI,
llr,
RANK() OVER (PARTITION BY category ORDER BY llr DESC) ordering,
FROM pmi_part4 WHERE PMI > 0
)
SELECT
category, list(word) terms FROM ranked_ordered
WHERE ordering < 25 GROUP BY category ORDER BY category
Insert cell
viewof comparisoncounts = {
filtered
const query = `

SELECT "Academic Year",
COUNT(*) FILTER (WHERE "nc:text" SIMILAR TO '.*(${form.word_1}).*')::FLOAT match_1,
COUNT(*) FILTER (WHERE "nc:text" SIMILAR TO '.*(${form.word_2}).*')::FLOAT match_2
FROM filtered GROUP BY ALL`
return client.table(query)
}
Insert cell
years = {
const years = [];
for (let i = 2001; i <= 2024; i++) {
years.push(i);
}
return years;
}
Insert cell
viewof regioncounts = {
filtered;
return client.table(`
SELECT "${regionfield}", "Academic Year",
COUNT(*)::FLOAT count, comparable FROM filtered
GROUP BY ALL`);
}
Insert cell
client
Type SQL, then Shift-Enter. Ctrl-space for more options.

Insert cell
client.table(`SELECT "nc:text" FROM filtered WHERE "Academic Year" = 2016`)
Insert cell
filtered = {
client.table(`CREATE OR REPLACE TABLE filtered AS SELECT * FROM tb WHERE TRUE AND ${where_terms.join(" AND ")}`)
return client.table("SELECT * FROM filtered")
}
Insert cell
viewof counts = {
await client.query(`


CREATE OR REPLACE TABLE typecounts AS
WITH totalcounts AS (SELECT position, COUNT(*) c FROM tb GROUP BY position HAVING c > 1000)
SELECT Position, count(*)::FLOAT count, "Academic Year" FROM tb NATURAL JOIN totalcounts WHERE TRUE AND ${where_terms.filter(d => !d.match(/tt/)).join(" AND ")} GROUP BY ALL ORDER BY Position, "Academic Year"`)
return client.table("SELECT * FROM typecounts")
}
Insert cell
{
const rows = [
...(await client.query(
`SELECT * FROM filtered WHERE job LIKE '%Digital Humanities%' AND "academic year" <= 2026 ORDER BY "academic year" ASC LIMIT 300`
))
]
.map((d) => `${d.job} (${d.inst}) ${d['academic year']}`)
.map((d) => `* ${d}\n`);
return md`${rows}`;
}
Insert cell
counts
Insert cell
Plot.plot({
marks: [
Plot.ruleY([0]),
Plot.lineY(
counts.filter(
(d) =>
d["academic year"] <= 2023 &&
d["academic year"] > 2003 &&
d.brand !== "Tenure Track"
),
{ x: "academic year", y: "count", stroke: "position" }
)
],
y: {
domain: [0, 400]
},
color: {
reverse: true,
legend: true
}
})
Insert cell
where_terms = [...filters, "normalized_date > '2003-01-01'"]
Insert cell
client
SELECT * FROM (SELECT DISTINCT(normalized_date) FROM tb) t1 CROSS JOIN (SELECT DISTINCT "academic year" FROM tb)
Insert cell
client
SELECT COUNT(*), "academic year", COUNT(*) FILTER(WHERE "nc:text" SIMILAR TO '.*([Dd]igital [Hh]istory|[Dd]igital [Hh]umanities).*')::FLOAT DH, COUNT(*) FILTER (WHERE "nc:text" SIMILAR TO '.*(German|France|French|Italy|Italian|Japan).*')::FLOAT H FROM tb WHERE "North America" AND "academic year" > 2010 AND "history" AND "academic year" <= 2023 AND "Institution Type" = 'College / University' GROUP BY ALL ORDER BY "academic year"
Insert cell
Plot.plot({
color: {
legend: true
},
y: {
tickFormat: "s"
},
marks: [
Plot.areaY(data, { x: "academic year", y: "count", fill: "category" }),
Plot.ruleY([0])
]
})
Insert cell
client
SELECT
"academic year",
CASE WHEN category = '' THEN 'UNKNOWN' ELSE category END AS category,
COUNT(*)::FLOAT count
FROM (SELECT
"academic year",
-- "Primary Category" category,
((string_split("Secondary Categories", '--') ))[1] category,
FROM tb WHERE ("Region" = 'US/Canada' OR "Region" = 'Black/Af-Am') AND "North America" = TRUE AND "academic year" > 2005) t1
GROUP BY "academic year", "category"
HAVING count > 5
ORDER BY category, "academic year" ASC;
Insert cell
client
SELECT brand, COUNT(*) FROM tb GROUP BY ALL
Insert cell
client
SELECT Position, "Primary Category", "Secondary Categories", "job", "inst" FROM filtered WHERE "Posting Date" > '2020-03-14' AND "Secondary Categories" LIKE '%Early%' AND NOT tt
Insert cell
client.table(`SELECT * FROM tb USINg Sample 5`)
Insert cell
client = {
const client = await DuckDBClient.of({
tb_raw: FileAttachment("nc_hnet@16.parquet")
});
await client.query(`CREATE TABLE tb AS SELECT * FROM tb_raw`);

await client.query(
`UPDATE tb SET history = false WHERE "Primary Category" LIKE '%Art%'`
);
return client;
}
Insert cell
embed = require("vega-embed@6")
Insert cell
import {DuckDBClient} from '@cmudig/duckdb'

Insert cell
import { serialize } from '@palewire/saving-csv'
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more