Public
Edited
Dec 1, 2023
Insert cell
Insert cell
db = DuckDBClient.of({
survey2017: FileAttachment("survey_results_public_2017.parquet"), // 6MB
survey2018: FileAttachment("survey_results_public_2018.parquet"), // 11MB
survey2019: FileAttachment("survey_results_public_2019.parquet"), // 9MB
survey2020: FileAttachment("survey_results_public_2020.parquet"), // 5MB
survey2021: FileAttachment("survey_results_public_2021.parquet"), // 5MB
survey2022: FileAttachment("survey_results_public_2022.parquet"), // 7MB
})
Insert cell
import {DuckDBClient} from '@cmudig/duckdb'
Insert cell
Insert cell
db
Type SQL, then Shift-Enter. Ctrl-space for more options.

Insert cell
Insert cell
db
Type Table, then Shift-Enter. Ctrl-space for more options.

Insert cell
Insert cell
db
select LanguageHaveWorkedWith from survey2022
Insert cell
Insert cell
db
WITH languages AS (
select unnest(string_to_array(LanguageHaveWorkedWith, ';')) as language
from survey2022
-- adding conditions on the respondents goes here:
-- where Employment LIKE '%Employed, full-time%'
)
SELECT language, COUNT(*)::INT as count
FROM languages
GROUP BY language ORDER BY count DESC
Insert cell
Insert cell
Plot.plot({
caption: `Top 10 programming languages used in 2022`,
marks: [
Plot.barX(languages2022, {x: "count", y: "language", fill: "language", sort: {y: "x", reverse: true, limit: 10}}),
Plot.ruleX([0])
],
marginLeft: 100,
color: { domain: languagesDomain }
})
Insert cell
// a little trick we'll use to have consistent colors across our visualizations for each language
languagesDomain = languages2022.map(d => d.language).concat(["?"])
Insert cell
Insert cell
db
WITH languages AS (
select unnest(string_to_array(LanguageWorkedWith, ';')) as language
from survey2020
-- in 2020 the options for Employment were also different:
-- where Employment = 'Employed full-time'
)
SELECT language, COUNT(*)::INT as count
FROM languages
GROUP BY language ORDER BY count DESC
Insert cell
Insert cell
surveys = ["survey2022", "survey2021", "survey2020", "survey2019", "survey2018", "survey2017"]
Insert cell
viewof survey = Inputs.select(surveys, {label: "Select a survey"})
Insert cell
Insert cell
languageField = {
switch (survey) {
case "survey2017":
return "HaveWorkedLanguage";
case "survey2018":
return "LanguageWorkedWith";
case "survey2019":
return "LanguageWorkedWith";
case "survey2020":
return "LanguageWorkedWith";
case "survey2021":
return "LanguageHaveWorkedWith"
case "survey2022":
return "LanguageHaveWorkedWith"
}
}
Insert cell
We can use the `query` API to create a custom SQL query using our chosen field and table names:
Insert cell
languages = db.query(`WITH languages AS (
select unnest(string_to_array(${languageField}, ';')) as language
from ${survey}
)
SELECT language, COUNT(*)::INT as count
FROM languages
GROUP BY language ORDER BY count DESC`)
Insert cell
Insert cell
Inputs.table(languages)
Insert cell
Insert cell
Insert cell
Plot.plot({
caption: `Top 20 programming languages used in ${survey}`,
marks: [
Plot.barX(languages, {
x: "count",
y: d => d.language.trim(),
fill: (d) => languagesDomain.indexOf(d.language.trim()) < 0 ? "?" : d.language.trim(),
// stroke: "gray",
sort: { y: "x", reverse: true, limit: 20 }
}),
Plot.ruleX([0])
],
marginLeft: 100,
color: { domain: languagesDomain }
})
Insert cell
Insert cell
languages.toArray().map(Object.fromEntries);
Insert cell
Insert cell
questionsDB = DuckDBClient.of({
questions2017: { header: true, file: FileAttachment("survey_results_schema_2017.csv")},
questions2018: { header: true, file: FileAttachment("survey_results_schema_2018.csv")},
questions2019: { header: true, file: FileAttachment("survey_results_schema_2019.csv")},
questions2020: { header: true, file: FileAttachment("survey_results_schema_2020.csv")},
questions2021: d3.csvParse(txt2022fixed),
questions2022: d3.csvParse(txt2022fixed), // 7MB
})
Insert cell
questionsDB
Type Table, then Shift-Enter. Ctrl-space for more options.

Insert cell
Insert cell
txt2021 = FileAttachment("survey_results_schema_2021.csv").text()
Insert cell
txt2021fixed = txt2021.replace(/\"\"/g, '\'')
Insert cell
txt2022 = FileAttachment("survey_results_schema_2022.csv").text()
Insert cell
txt2022fixed = txt2022.replace(/\"\"/g, '\'')
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more