Public
Edited
Feb 8, 2024
Insert cell
Insert cell
data = FileAttachment("histogramsigir2@2.csv").csv({typed: true})
Insert cell
/*Plot.plot({
y: {grid: true},
color: {legend: true},
marks: [
Plot.rectY(data, Plot.binX({y: "count"}, {x: "qdel", fill: "status", thresholds: [ 0, .1, .2, .3, .4, .5, .6, .7, .8, .9, 1.0, 1.1]})),
Plot.ruleY([0])
]
})*/
Insert cell
vl.markBar()
.data(data)
.transform(
vl.calculate("if(datum.status === 200, 0, if(datum.status === '3xx', 1, 2))").as('statusorder'),
vl.calculate("if(datum.status === 200, 'Never Moved', if(datum.status === '3xx', 'Moved and Found', if(datum.status === '4xx', 'Not Found', ' ')))").as('Content Status')
)
.encode(
vl.x().fieldQ('qdel').bin({ extent: [0, 1.1], maxbins: 11 }).type("ordinal").axis({labelExpr: "substring(datum.value, 0, 1) == '1' ? '[100]' : substring(datum.value,2,3) == '9' ? '[90 - 100)' : substring(datum.value,6,7) == '1' ? '[0 - 10)' : '[' + substring(datum.value,2,3) + '0 - ' + substring(datum.value,8,9) + '0)'", labelAngle: -45}).title("Percent of queries per page with deleted terms (stop words removed)"),
vl.y().count().title("Page Count"),
vl.color().fieldN('Content Status').scale({
//"domain": [200, "3xx", "4xx"],
"domain": ["Never Moved", "Moved and Found", "Not Found", " "],
//"range": ["#BBE7F9", "#B2D8B2", "#FFE4B2"]
"range": ["#B2D8B2", "#FFE4B2", "#F59385", "#FFFFFF"]
//#F5BA85
}) .legend(
{orient: 'bottom-right', offset: 50, direction: 'horizontal', fillColor: '#ffffff', strokeColor: '#000000', padding: 6} ),
vl.order().fieldQ('statusorder').sort('ascending')
)
.width(400)
.height(200)
//.title("Over half of pages had deleted terms in at least half of their queries")
.config({ view: { stroke: null }})
.render()
Insert cell
data2 = FileAttachment("scatter-tld2.csv").csv({typed: true})
Insert cell
{
for (let i = 0; i < data2.length; i++) {
data2[i]['text_x'] = data2[i]['qdel_avg'];
data2[i]['text_y'] = data2[i]['qtr_avg'];
}
}
Insert cell
{
data2[12]['text_x'] = .72;
data2[9]['text_y'] = .015;
data2[1]['text_x'] = .88;
data2[1]['text_y'] = .015;
data2[7]['text_x'] = .81;
data2[7]['text_y'] = .023;
//with size
data2[4]['text_y'] = .0005;
data2[0]['text_y'] = -.024;
data2[2]['text_y'] = .009;
data2[2]['text_x'] = .74;
}
Insert cell
data2[7]
Insert cell
vl.layer(
vl.markArea({color: '#e5e5e5', opacity: .5})
.data([{'qtr_avg': -.14, 'qdel_avg': 0},
{'qtr_avg': -.14, 'qdel_avg': .5}
])
.encode(
vl.x().fieldQ('qdel_avg'),
vl.y().fieldQ('qtr_avg')
),
vl.markRule()
.data([{'qtr_avg': 0}])
.encode(
vl.y().fieldQ('qtr_avg')
),
vl.markCircle() //{color: '#000000'}
.data(data2)
.transform(
vl.calculate("if(datum.count > 10, 'More than 10', '10 or fewer')").as('Page Count')
)
.encode(
vl.x().fieldQ('qdel_avg'),
vl.y().fieldQ('qtr_avg').scale({domain: [-.13,.13]}),
vl.size().fieldN('Page Count').scale({range: [20, 55]}).legend({offset:-86, fillColor: '#ffffff', strokeColor: '#000000', padding: 6}),
vl.color().fieldN('Page Count').scale({
"range": ["#231151", "#b6377a"]}
)
),
vl.markText({ align: "left", dx: 5, dy: 8 })
.data(data2)
.encode(
vl.x().fieldQ('text_x').axis({title: "Percent of queries with deleted terms (average)",
labelExpr: "substring(datum.value,0,1) == 1 ? '100' : substring(datum.value,2,3) + '0'"}),
vl.y().fieldQ('text_y').axis({title: "Query terms ratio (average)"}),
vl.text().fieldN("tld")
)
)
.width(400)
.height(400)
.render()
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more