Public
Edited
Fork of SQL + Chart
1 fork
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
data = await call_api_js(GitHubOrganization)
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
database2 = DuckDBClient.of({ dataFlat })
Insert cell
dataFlat
SELECT * FROM dataFlat LIMIT 10
Insert cell
Insert cell
Insert cell
Insert cell
function addYearToRepos(repos) {
return repos.map((repo) => {
const createdAt = new Date(repo.created_at);
const year = createdAt.getUTCFullYear();
const updatedRepo = { ...repo, created_at_year: year };
return updatedRepo;
});
}
Insert cell
function addAgeInDaysCol(repos) {
return repos.map((repo) => {
const createdAt = new Date(repo.created_at);
const currentDate = new Date();
const timeDiff = Math.abs(currentDate - createdAt);
const daysDiff = Math.ceil(timeDiff / (1000 * 60 * 60 * 24));
const updatedRepo = { ...repo, age_in_days: daysDiff };
return updatedRepo;
});
}
Insert cell
function addDaysSinceCols(repos,colName,newColName) {
return repos.map((repo) => {
const At = new Date(repo[colName]);
const currentDate = new Date();
const timeDiff = Math.abs(currentDate - At);
const daysDiff = Math.ceil(timeDiff / (1000 * 60 * 60 * 24));
const updatedRepo = { ...repo, [newColName]: daysDiff };
return updatedRepo;
});
}
Insert cell

function createRatioColumn(repos, col1, col2, newColName) {
return repos.map((repo) => {
const ratio = repo[col1] / repo[col2];
const updatedRepo = { ...repo, [newColName]: ratio };
return updatedRepo;
});
}
Insert cell
function parseColumnsIntoIntegersFromStrings(repos, arrayOfKeys) {
for (const repo of repos) {
for (const column of arrayOfKeys) {
if (repo.hasOwnProperty(column)) {
repo[column] = parseInt(repo[column]);
}
}
}
return repos;
}
Insert cell
function createCohortTestForNullEmpty(data, columnArray, cohortColName){
return data.map((item) => {
for (const column in columnArray){
if (item[column] == null || item[column] == "") { ///// typeof item[column] === "undefined" ||
console.log("true triggered for value: ",item[column])
return { ...item, [cohortColName]: true };
}
}
return { ...item, [cohortColName]: false };
});
}
Insert cell
function createCohortNumericalCol(data, column, cohortColName, baseThreshold, topThreshold) {
return data.map((item) => {
if (item[column] > baseThreshold && item[column] <= topThreshold) {
return { ...item, [cohortColName]: true };
} else {
return { ...item, [cohortColName]: false };
}
});
}
Insert cell

function createCohortNumericalColTwoTests(data, column, cohortColName, baseThreshold, topThreshold, columnB, baseThresholdB, topThresholdB) {
return data.map((item) => {
if (item[column] > baseThreshold && item[column] <= topThreshold && item[columnB] > baseThresholdB && item[columnB] <= topThresholdB) {
return { ...item, [cohortColName]: true };
} else {
return { ...item, [cohortColName]: false };
}
});
}
Insert cell
function createCohortStringListPossibleValues(data, column, cohortColName, valueList) {
return data.map((item) => {
const columnValue = item[column];
if (columnValue == null){
return { ...item, [cohortColName]: false };
}
const isSubstring = valueList.some((value) => columnValue.includes(value));
return { ...item, [cohortColName]: isSubstring };
});
}
Insert cell
function createCohortIfEitherColumnIsTrue(data, column1, column2, cohortColName) {
return data.map((item) => {
if (item[column1] || item[column2]) {
return { ...item, [cohortColName]: true };
} else {
return { ...item, [cohortColName]: false };
}
});
}
Insert cell
function createCohortColumns(repos, functionList) {
let modifiedRepos = [...repos];
for (const funcObj of functionList) {
const funcName = Object.keys(funcObj)[0];
const args = funcObj[funcName];
switch (funcName) {
case "addYearToRepos":
modifiedRepos = addYearToRepos(modifiedRepos);
break;
case "addAgeInDaysCol":
modifiedRepos = addAgeInDaysCol(modifiedRepos);
break;
case "addDaysSinceCols":
modifiedRepos = addDaysSinceCols(modifiedRepos, ...args);
break;
case "parseColumnsIntoIntegersFromStrings":
modifiedRepos = parseColumnsIntoIntegersFromStrings(modifiedRepos, args);
break;
case "createCohortTestForNullEmpty":
modifiedRepos = createCohortTestForNullEmpty(modifiedRepos, ...args);
break;
case "createCohortNumericalCol":
modifiedRepos = createCohortNumericalCol(modifiedRepos, ...args);
break;
case "createCohortNumericalColTwoTests":
modifiedRepos = createCohortNumericalColTwoTests(modifiedRepos, ...args);
break;
case "createCohortStringListPossibleValues":
modifiedRepos = createCohortStringListPossibleValues(modifiedRepos, ...args);
break;
case "createRatioColumn":
modifiedRepos = createRatioColumn(modifiedRepos, ...args);
break;
case "createCohortIfEitherColumnIsTrue":
modifiedRepos = createCohortIfEitherColumnIsTrue(modifiedRepos, ...args);
break;
default:
break;
}
}
return modifiedRepos;
}
Insert cell
// **
// * This constant holds a JSON type data structure that describes the cohorts to be created.
// * Each object in the array has a single key-value pair where the key is the name of the function to be called and the value is an array
// * of arguments to be passed to the function. By calling functions in a specific order we can create key: value pairs in the data structure
// * (aka colummns in the resulting table) that can be reused to create more complicated fields and finally cohorts. For example, "addAgeInDaysCol"
// * is calculated and then used to calculate the "cohort_age_baby30d" cohort key/column.
// */
// export const
jsonThatDescribesCohortsToCreate = [
//// created calculated columns used in cohorts ////
{"addYearToRepos":[]},
{"addAgeInDaysCol":[]},
{"addDaysSinceCols":["updated_at","daysSinceUpdated"]},
{"parseColumnsIntoIntegersFromStrings":["commit_stats_total_commits", "commit_stats_total_committers","commit_stats_mean_commits", "commit_stats_dds"]},
{"createRatioColumn":["stargazers_count","commit_stats_total_committers","ratioStargazersVsCommitters"]},
{"createRatioColumn":["stargazers_count","forks_count","ratioStargazersVsForks"]},
{"createRatioColumn":["subscribers_count","commit_stats_total_committers","ratioWatchersVsCommitters"]},
//// sample or demo or example cohorts ////
{"createCohortStringListPossibleValues":["full_name", "cohort_sample_fullName", ["sample","demo","example","tutorial"]]},
{"createCohortStringListPossibleValues":["description", "cohort_sample_Description", ["sample","demo","example","tutorial"]]},
{"createCohortIfEitherColumnIsTrue":["cohort_sample_fullName", "cohort_sample_Description", "cohort_sample"]},
//// committer community size cohorts ////
{"createCohortTestForNullEmpty":[["commit_stats_total_committers"], "cohort_committers_missingData"]},
{"createCohortNumericalCol":["commit_stats_total_committers", "cohort_committers_1-20", 0.2,20.5]},
{"createCohortNumericalCol":["commit_stats_total_committers", "cohort_committers_20-100", 20.5,100.5]},
{"createCohortNumericalCol":["commit_stats_total_committers", "cohort_committers_100plus", 100.5,10000000]},
//// age cohorts ////
{"createCohortTestForNullEmpty":[["age_in_days"], "cohort_age_missingData"]},
{"createCohortNumericalCol":["age_in_days", "cohort_age_baby30d", 0,30]},
{"createCohortNumericalCol":["age_in_days", "cohort_age_toddler30to90d", 30,90]},
{"createCohortNumericalCol":["age_in_days", "cohort_age_teen90to365d", 90,365]},
{"createCohortNumericalCol":["age_in_days", "cohort_age_adult365to1095d", 365,1095]},
{"createCohortNumericalCol":["age_in_days", "cohort_age_seniorMore1095d", 1095,100000000000000]},
//// Nadia cohorts ////
{"createCohortTestForNullEmpty":[["commit_stats_total_committers"], "cohort_Nadia_missingData"]},
{"createCohortNumericalCol":["commit_stats_total_committers", "cohort_Nadia_mid", 6,60]},
{"createCohortNumericalColTwoTests":["commit_stats_total_committers", "cohort_Nadia_club", 60,1000000, "ratioStargazersVsCommitters", 0,2]},
{"createCohortNumericalColTwoTests":["commit_stats_total_committers", "cohort_Nadia_federation", 60,1000000, "ratioStargazersVsCommitters", 2,1000000000]},
{"createCohortNumericalColTwoTests":["commit_stats_total_committers", "cohort_Nadia_stadium", 0.2, 6, "stargazers_count", 100,100000000]},
{"createCohortNumericalColTwoTests":["commit_stats_total_committers", "cohort_Nadia_toy", 0.2,6, "stargazers_count", 0,100]}
]

Insert cell
function repos_cohort_processed_BaseCohorts(repos, jsonThatDescribesCohortsToCreate){
return createCohortColumns(repos, jsonThatDescribesCohortsToCreate
)
}
Insert cell
Insert cell
Insert cell
Insert cell
function countsCohortGroup(data, cohortGroupArray){
const numberOfCohortGroups = cohortGroupArray.length
const modifiedData = data.map((item) => {
const trueValues = {};
for (const key in item) {
var numberOfCohortGroupsCovered = 0;
for (const substring of cohortGroupArray) {
if (key.includes(substring) && item[key] === true) {
trueValues[`${substring}_trueValueInGroup`] = key;
numberOfCohortGroupsCovered += 1;
if( numberOfCohortGroupsCovered === numberOfCohortGroups){
break;
}
}
}
}
return Object.assign({}, item, trueValues);
});
return modifiedData;
}
Insert cell
dataCohortsWithTrueInGroup = countsCohortGroup(dataCohorts,[ "cohort_sample","cohort_age","cohort_committers","cohort_Nadia"])
Insert cell
Insert cell
Insert cell
dataCohorts
SELECT * FROM dataCohorts LIMIT 3
Insert cell
Insert cell
dataCohorts
SELECT * FROM dataCohorts WHERE "cohort_sample" == true
Insert cell
dataCohorts
Type SQL, then Shift-Enter. Ctrl-space for more options.

Insert cell
dataCohorts
SELECT full_name, owner, description, archived, stargazers_count, forks_count, open_issues_count, subscribers_count FROM dataCohorts WHERE "forks_count" >10 LIMIT 10
Insert cell
Insert cell
dataCohorts
SELECT * FROM dataCohorts WHERE cohort_age_seniorMore1095d == true AND daysSinceUpdated < 30 LIMIT 20
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Plot.plot({
title: "Nadia community cohorts vs. days since last update",
marginTop: 20,
marginRight: 20,
marginBottom: 30,
marginLeft: 40,
grid: true,
width: 1000,
color: { legend: true } ,
marks: [
Plot.barY(
dataCohortsWithTrueInGroup,
Plot.groupX({ y: "count" }, { x: "cohort_Nadia_trueValueInGroup", fill: "cohort_age_trueValueInGroup", sort: "cohort_age_trueValueInGroup", lineWidth: 74, marginBottom: 40})
)
]
})
Insert cell
Insert cell
Insert cell
Insert cell
dataCohortsWithTrueInGroup
SELECT * FROM dataCohortsWithTrueInGroup WHERE cohort_Nadia_stadium == true LIMIT 50
Insert cell
Insert cell
dataCohortsWithTrueInGroup
SELECT * FROM dataCohortsWithTrueInGroup WHERE cohort_Nadia_federation == true LIMIT 50
Insert cell
Insert cell
dataCohortsWithTrueInGroup
SELECT * FROM dataCohortsWithTrueInGroup WHERE cohort_Nadia_toy == true LIMIT 50
Insert cell
Insert cell
dataCohortsWithTrueInGroup
SELECT * FROM dataCohortsWithTrueInGroup WHERE "cohort_committers_20-100" == True or "cohort_committers_100plus" == True LIMIT 50
Insert cell
Insert cell
dataCohortsWithTrueInGroup
SELECT * FROM dataCohortsWithTrueInGroup WHERE cohort_sample == true LIMIT 100
Insert cell
Insert cell
Insert cell
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more