Public
Edited
May 4, 2022
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
// Inputs.table(graph.nodes)
Insert cell
// viewof sel_group = Inputs.select(_.uniqBy(chosen_version.map(d => d.group)), {label: "Select Team"})
Insert cell
// {
// const dat_team = chosen_version.filter(d => d.group == sel_group)
// return Plot.plot({
// marginLeft: 330,
// width:1200,
// padding: 0,
// x: {
// round: true,
// grid: true,
// },
// color: {
// scheme: "Reds", legend: true, reverse: true
// },
// facet: {
// data: dat_team,
// marginLeft: 300,
// y: "Author"
// },
// marks: [
// Plot.barX(dat_team, Plot.binX({fill: "sum"}, {x: "date", fill: "diff_prev_row_tag_auth_h", inset: 0.5, thresholds: 300}))
// ]
// })
// }
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
gv_core_dvs
Insert cell
Insert cell
Insert cell
Insert cell
// original cpython dataset
cpython = FileAttachment("cpython_to_plot_all_versions_trim.csv").csv()
Insert cell
// graph collaboration
graph = FileAttachment("author_file_cluster_2_6@7.json").json()
Insert cell
time_release = d3.timeDay.count(d3.min(chosen_version.map(d => d.date)), d3.max(chosen_version.map(d => d.date)))
Insert cell
// Specific version of cpython + some time wrangling
chosen_version = {
return tidy(
cpython,
filter(d => d.tag === sel_v),
mutate({
is_toxic_new_score: d => +d.is_toxic_new_score,
// is_toxic: d => d.is_toxic === "True" ? "Toxic" : "Not Toxic",
is_toxic: d => d.is_toxic_new_score >= tog_D ? "Toxic" : "Not Toxic",
cumlines_tag: d => +d.cumlines_tag,
abs_lines_changed: d => +d.abs_lines_changed,
insertions: d => +d.insertions,
deletions: d => +d.deletions,
tot_lines_tag: d => +d.tot_ines_tag,
pct_completion_tag: d=> +d.pct_completion_tag,
diff_by_release_s: d=>+d.diff_by_release_s,
diff_prev_row_tag_auth_sec: d=>+d.diff_prev_row_tag_auth_sec,
Day: d => parseTime(d.Day)
}),
leftJoin(graph.nodes, { by: ['Author', 'Author'] })
).map((d) => {
const tzdate = Temporal.Instant.from(d.Date).toZonedDateTimeISO(
"America/Los_Angeles"
); // date in localized timezone
const date = new Date(tzdate.epochMilliseconds); // Javascript Date object, stores date in browser's local timezone
const sha = d.sha;
const group = d.group; // Augment cpython with team
const hour = tzdate.hour;
const dayOfMonth = tzdate.day;
const week = tzdate.weekOfYear;
const month = months[tzdate.month - 1];
const year = tzdate.year;
const abs_lines_changed = d.abs_lines_changed;
const diff_prev_row_tag_auth_sec = d.diff_prev_row_tag_auth_sec;
const diff_prev_row_tag_auth_m = d.diff_prev_row_tag_auth_sec/60;
const diff_prev_row_tag_auth_h = d.diff_prev_row_tag_auth_sec/60/60;
const Author = d.Author;
const toxicity_score = d.toxicity_score;
const is_toxic = d.is_toxic;
const cumlines_tag = d.cumlines_tag;
return {
sha,
date,
hour,
dayOfMonth,
week,
month,
year,
abs_lines_changed,
cumlines_tag,
diff_prev_row_tag_auth_sec,
diff_prev_row_tag_auth_m,
diff_prev_row_tag_auth_h,
Author,
toxicity_score,
is_toxic,
group
};
})
}
Insert cell
parseTime = d3.utcParse("%Y-%m-%d")
Insert cell
// Chosen version filtered by time brush and author
filtered_data = chosen_version.filter(d => d.Author === sel_a && (!limits || d.date >= limits[0] && d.date <= limits[1]) ? d.date : NaN)
Insert cell
gv = {
const k = Object.values(_.countBy(filtered_data.map(d => Math.round(d.diff_prev_row_tag_auth_h)))).sort(d3.descending)
const rank = d3.rank(k, d=>-d).map(d => d+1)
var dict = [];
for (let i = 0; i < k.length; ++i) {
dict.push({
k: k[i], rank: rank[i], y: Math.log10(k[i]), x: Math.log10(rank[i])
});
}
return dict
}
Insert cell
core_devs = tidy(chosen_version, arrange(desc("cumlines_tag")), distinct("Author"), sliceHead(top_auth)).map(d => d.Author)
Insert cell
gv_core_dvs = {
var out = [];
for (const author of core_devs) {
const auth_df = chosen_version.filter(d => d.Author === author)
const k = Object.values(_.countBy(auth_df.map(d => Math.round(d.diff_prev_row_tag_auth_h)))).sort(d3.descending)
const rank = d3.rank(k, d=>-d).map(d => d+1)
for (let j = 0; j < k.length; ++j) {
out.push({
k: k[j], rank: rank[j], y: Math.log10(k[j]), x: Math.log10(rank[j]), author: author
});
}
}
return out
}
Insert cell
gv_same_dev_over_time = {
const tags = tidy(cpython, distinct("tag")).map(d => d.tag)
var out = [];
for (const t of tags) {
const auth_tag_df = tidy(
cpython,
mutate({diff_prev_row_tag_auth_h: d=>d.diff_prev_row_tag_auth_sec/60/60}),
filter(d => d.Author === sel_a && d.tag === t)
)
const k = Object.values(_.countBy(auth_tag_df.map(d => Math.round(d.diff_prev_row_tag_auth_h)))).sort(d3.descending)
const rank = d3.rank(k, d => -d).map(d => d+1)
for (let j = 0; j < k.length; ++j) {
out.push({
k: k[j], rank: rank[j], y: Math.log10(k[j]), x: Math.log10(rank[j]), author: sel_a, tag: t
});
}
}
return out
}
Insert cell
linearRegression = reg.regressionLinear()
.x(d => d.x)
.y(d => d.y)
.domain([0, 10]);
Insert cell
// delta2sec = { return { "hour": 3600, "half-day": 43200, "day": 86400} }
Insert cell
delta_list = [3600, 43200, 86400]
Insert cell
event_sequence = {
var dict = [];
for (let j = 0; j < delta_list.length; ++j) {
var E_lab = [];
var E = [];
let lab = [];
let size_counter = 1;
let lab_counter = 1;
for (var i = 0; i <= filtered_data.length; i++) {
const time_diff = filtered_data.map(d => d.diff_prev_row_tag_auth_sec)[i]
{
if(time_diff <= delta_list[j]){
lab.push(lab_counter);
size_counter = size_counter + 1;
} else { // time_diff bigger than delta_t, then new event sequence
E.push(size_counter);
lab_counter = lab_counter + 1; // add 1 to lab counter
lab.push(lab_counter);
size_counter = 1; // restart size counter
}
} // end if statement
} // end for loop i
dict.push({size: E, lab: lab})
} // end for loop j
return dict
}
Insert cell
function prep_event() {
var dict = [];
for (let j = 0; j < 3; ++j) {
const count_tmp = _.countBy(event_sequence[j].size)
let k = Object.keys(count_tmp).map(d => Math.log10(+d));
let v = Object.values(count_tmp).map(d => Math.log(d));
for (let i = 0; i < k.length; ++i) {
dict.push({x:k[i], y:v[i], time_window: `${delta_list[j]}s`})
}
}
return dict
}
Insert cell
months = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]
Insert cell
weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
Insert cell
Insert cell
import {tidy, mutate, groupBy, filter, count, arrange, cumsum, mutateWithSummary,
sum, summarize, leftJoin, desc, sliceHead,distinct, pivotLonger, select,
n, max, mean, deviation} from "@pbeshai/tidyjs"
Insert cell
Plot = addRegression(Plot_)
Insert cell
reg = require("d3-regression@1")
Insert cell
Insert cell
import { Plot as Plot_ } from "@fil/plot-splom" // for Plot.transform & Plot.channel
Insert cell
import {brushFilterX} from "@observablehq/brush-filter-x"
Insert cell
import {serialize} from "@palewire/saving-csv"
Insert cell
// Load the Temporal API using a Polyfill
Temporal = {
const TemporalLib = await require("@js-temporal/polyfill@0.3.0")
return TemporalLib.Temporal
}
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more