Published
Edited
Aug 23, 2022
12 forks
5 stars
Insert cell
Insert cell
db = DuckDBClient.of({
survey2017: FileAttachment("survey_results_public_2017.parquet"), // 6MB
survey2018: FileAttachment("survey_results_public_2018.parquet"), // 11MB
survey2019: FileAttachment("survey_results_public_2019.parquet"), // 9MB
survey2020: FileAttachment("survey_results_public_2020.parquet"), // 5MB
survey2021: FileAttachment("survey_results_public_2021.parquet"), // 5MB
survey2022: FileAttachment("survey_results_public_2022.parquet"), // 7MB
})
Insert cell
import {DuckDBClient} from '@cmudig/duckdb'
Insert cell
Insert cell
db
Type SQL, then Shift-Enter. Ctrl-space for more options.

Insert cell
Insert cell
db
Type Table, then Shift-Enter. Ctrl-space for more options.

Insert cell
Insert cell
db
select LanguageHaveWorkedWith from survey2022
Insert cell
Insert cell
db
WITH languages AS (
select unnest(string_to_array(LanguageHaveWorkedWith, ';')) as language
from survey2022
-- adding conditions on the respondents goes here:
-- where Employment LIKE '%Employed, full-time%'
)
SELECT language, COUNT(*)::INT as count
FROM languages
GROUP BY language ORDER BY count DESC
Insert cell
Insert cell
Plot.plot({
caption: `Top 10 programming languages used in 2022`,
marks: [
Plot.barX(languages2022, {x: "count", y: "language", fill: "language", sort: {y: "x", reverse: true, limit: 10}}),
Plot.ruleX([0])
],
marginLeft: 100,
color: { domain: languagesDomain }
})
Insert cell
// a little trick we'll use to have consistent colors across our visualizations for each language
languagesDomain = languages2022.map(d => d.language).concat(["?"])
Insert cell
Insert cell
db
WITH languages AS (
select unnest(string_to_array(LanguageWorkedWith, ';')) as language
from survey2020
-- in 2020 the options for Employment were also different:
-- where Employment = 'Employed full-time'
)
SELECT language, COUNT(*)::INT as count
FROM languages
GROUP BY language ORDER BY count DESC
Insert cell
Insert cell
surveys = ["survey2022", "survey2021", "survey2020", "survey2019", "survey2018", "survey2017"]
Insert cell
viewof survey = Inputs.select(surveys, {label: "Select a survey"})
Insert cell
Insert cell
languageField = {
switch (survey) {
case "survey2017":
return "HaveWorkedLanguage";
case "survey2018":
return "LanguageWorkedWith";
case "survey2019":
return "LanguageWorkedWith";
case "survey2020":
return "LanguageWorkedWith";
case "survey2021":
return "LanguageHaveWorkedWith"
case "survey2022":
return "LanguageHaveWorkedWith"
}
}
Insert cell
We can use the `query` API to create a custom SQL query using our chosen field and table names:
Insert cell
languages = db.query(`WITH languages AS (
select unnest(string_to_array(${languageField}, ';')) as language
from ${survey}
)
SELECT language, COUNT(*)::INT as count
FROM languages
GROUP BY language ORDER BY count DESC`)
Insert cell
Insert cell
Inputs.table(languages)
Insert cell
Insert cell
Insert cell
Plot.plot({
caption: `Top 20 programming languages used in ${survey}`,
marks: [
Plot.barX(languages, {
x: "count",
y: d => d.language.trim(),
fill: (d) => languagesDomain.indexOf(d.language.trim()) < 0 ? "?" : d.language.trim(),
// stroke: "gray",
sort: { y: "x", reverse: true, limit: 20 }
}),
Plot.ruleX([0])
],
marginLeft: 100,
color: { domain: languagesDomain }
})
Insert cell
Insert cell
languages.toArray().map(Object.fromEntries);
Insert cell
Insert cell
questionsDB = DuckDBClient.of({
questions2017: { header: true, file: FileAttachment("survey_results_schema_2017.csv")},
questions2018: { header: true, file: FileAttachment("survey_results_schema_2018.csv")},
questions2019: { header: true, file: FileAttachment("survey_results_schema_2019.csv")},
questions2020: { header: true, file: FileAttachment("survey_results_schema_2020.csv")},
questions2021: d3.csvParse(txt2022fixed),
questions2022: d3.csvParse(txt2022fixed), // 7MB
})
Insert cell
questionsDB
Type Table, then Shift-Enter. Ctrl-space for more options.

Insert cell
Insert cell
txt2021 = FileAttachment("survey_results_schema_2021.csv").text()
Insert cell
txt2021fixed = txt2021.replace(/\"\"/g, '\'')
Insert cell
txt2022 = FileAttachment("survey_results_schema_2022.csv").text()
Insert cell
txt2022fixed = txt2022.replace(/\"\"/g, '\'')
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more