H-Net Jobs Data / Benjamin Schmidt

Benjamin Schmidt

Digital Humanist. Manhattan-based.

Workspace

Public

Higher Education Datasets

Edited

Dec 1

1 fork

1 star

Higher Education Datasets

H-Net Jobs Data

Rankings of US PhD Programs implicit from hiring networks.Interactive exploration with DuckDB: 5.2 million rows of college degrees in the browser Exploring changing US College majors with Apache Arrow

embed({

mark: "line",

title: `${

filters.includes("brand = 'Tenure Track'") ? "Tenure track" : ""

} history jobs in North America, by time of year, on H-Net. Ben Schmidt.`,

width: (width * 4) / 5,

height: (width * 1) / 2,

layer: [

{

data: {

values: windowedcounts.filter(

(d) => d.normalized_date < new Date(max_date),

(d) => d.comparable,

(d) => year.includes(d["Academic Year"])

)

mark: {

type: "bar",

// "extent": "maxmin",

borders: true

// fill: "opaque"

encoding: {

x: {

field: "Academic Year"

y: {

title: "Jobs Listed",

aggregate: "max",

field: "cumulative"

color: { field: "era", type: "nominal" },

y2: {

aggregate: "min",

field: "cumulative"

color: { field: "era", type: "nominal" }

}

]

})

regioncounts.filter((d) => d["academic year"] == 2023)

viewof sample = await client.table(

`SELECT string_split("nc:text", ' ')[8][1:750] AS incipit FROM filtered WHERE Region='${filter}' AND "academic year" == 2023 ORDER BY RANDOM()`

)

await client.table(

`SELECT Region, string_split("nc:text", ' ')[8][1:750] AS incipit FROM filtered WHERE Region='${filter}' AND "academic year" == 2023 AND Region != 'Pre-1789' ORDER BY RANDOM() LIMIT 10`

)

html`${sample.map((d) => d.incipit).join("<br>")}`

viewof windowedcounts = {

filtered;

year_groups;

return client.table(`WITH

-- cross join to fill in empty dates

ay_tb AS (SELECT * FROM (SELECT DISTINCT(normalized_date) FROM tb) t1 CROSS JOIN (SELECT DISTINCT "academic year" FROM tb)),

-- get counts for each individual day.

annual AS (SELECT "Academic Year", "normalized_date", (COUNT(*) FILTER (WHERE Position IS NOT NULL))::FLOAT count FROM filtered FULL OUTER JOIN ay_tb USING (normalized_date, "academic year") WHERE "Academic Year" > 2004 GROUP BY ALL)

SELECT "Academic Year", "normalized_date",

SUM(count) OVER (

PARTITION BY "Academic Year"

ORDER BY "normalized_date" ASC

ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) cumulative,

era

FROM annual LEFT JOIN year_groups USING ("Academic Year") WHERE ("Academic Year" < 2024) OR (normalized_date < '${max_date}')`);

}

Plot.plot({

stroke: {

legend: true

marks: [

Plot.ruleY([0]),

Plot.lineY(comparisoncounts, {

x: "academic year",

y: "match_1",

stroke: "red"

}),

Plot.lineY(comparisoncounts, {

x: "academic year",

y: "match_2",

stroke: "blue"

})

]

})

{

const div = DOM.element("div");

const sel = d3

.select(div)

.attr(

"style",

"display:grid;grid-template-columns: 1fr 1fr 1fr; padding:5px; grid-gap: 20px;"

)

.style("max-width", width)

.selectAll("div")

.data(top_vals)

.join("div");

sel

.attr("style", "outline: 2px dotted gray; padding: 5px; font-size:12px")

.append("h4")

.text((d) => d.category);

const words = sel

.selectAll("span")

.data((d) => d.terms.toArray())

.join("span")

.style("margin-left", "3px")

.on("mouseover", (event, d) => {

console.log(d);

return sel

.selectAll("span")

.style("text-decoration", (e) => (e === d ? "underline" : "none"));

});

words.text((d) => d + " ");

return div;

}

where_terms.join(" AND ")

html`${v[0].text.split(" ").join("<>")}`

viewof comparisoncounts = {

filtered

const query = `

SELECT "Academic Year",

COUNT(*) FILTER (WHERE "nc:text" SIMILAR TO '.*(${form.word_1}).*')::FLOAT match_1,

COUNT(*) FILTER (WHERE "nc:text" SIMILAR TO '.*(${form.word_2}).*')::FLOAT match_2

FROM filtered GROUP BY ALL`

return client.table(query)

}

years = {

const years = [];

for (let i = 2001; i <= 2024; i++) {

years.push(i);

}

return years;

}

viewof regioncounts = {

filtered;

return client.table(`

SELECT "${regionfield}", "Academic Year",

COUNT(*)::FLOAT count, comparable FROM filtered

GROUP BY ALL`);

}

client.table(`SELECT "nc:text" FROM filtered WHERE "Academic Year" = 2016`)

filtered = {

client.table(`CREATE OR REPLACE TABLE filtered AS SELECT * FROM tb WHERE TRUE AND ${where_terms.join(" AND ")}`)

return client.table("SELECT * FROM filtered")

}

viewof counts = {

await client.query(`

CREATE OR REPLACE TABLE typecounts AS

WITH totalcounts AS (SELECT position, COUNT(*) c FROM tb GROUP BY position HAVING c > 1000)

SELECT Position, count(*)::FLOAT count, "Academic Year" FROM tb NATURAL JOIN totalcounts WHERE TRUE AND ${where_terms.filter(d => !d.match(/tt/)).join(" AND ")} GROUP BY ALL ORDER BY Position, "Academic Year"`)

return client.table("SELECT * FROM typecounts")

}

{

const rows = [

...(await client.query(

`SELECT * FROM filtered WHERE job LIKE '%Digital Humanities%' AND "academic year" <= 2026 ORDER BY "academic year" ASC LIMIT 300`

))

]

.map((d) => `${d.job} (${d.inst}) ${d['academic year']}`)

.map((d) => `* ${d}\n`);

return md`${rows}`;

}

counts

Plot.plot({

marks: [

Plot.ruleY([0]),

Plot.lineY(

counts.filter(

(d) =>

d["academic year"] <= 2023 &&

d["academic year"] > 2003 &&

d.brand !== "Tenure Track"

{ x: "academic year", y: "count", stroke: "position" }

)

y: {

domain: [0, 400]

color: {

reverse: true,

legend: true

}

})

where_terms = [...filters, "normalized_date > '2003-01-01'"]

Plot.plot({

color: {

legend: true

y: {

tickFormat: "s"

marks: [

Plot.areaY(data, { x: "academic year", y: "count", fill: "category" }),

Plot.ruleY([0])

]

})

client.table(`SELECT * FROM tb USINg Sample 5`)

client = {

const client = await DuckDBClient.of({

tb_raw: FileAttachment("nc_hnet@16.parquet")

});

await client.query(`CREATE TABLE tb AS SELECT * FROM tb_raw`);

await client.query(

`UPDATE tb SET history = false WHERE "Primary Category" LIKE '%Art%'`

);

return client;

}

embed = require("vega-embed@6")

import {DuckDBClient} from '@cmudig/duckdb'

import { serialize } from '@palewire/saving-csv'

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.

Learn more