Public
Edited
Jul 9, 2024
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Plot.plot({
title: "average of height and weight for each sport",
subtitle: "the red dot represents you.",
width: width,
height: 500,
r: {range: [0, 12]},
marks: [
Plot.dot(centroids, {x: "weight", y: "height", fill: 'steelblue', r: 'athletes'}),
Plot.text(centroids, {x: "weight", y: d => d.height + 1, text: 'sport'}),

Plot.dot([{weight: userWeight, height: userHeight}], {x: "weight", y: "height", fill: 'red', opacity: .5, r: 5}),

Plot.text([{weight: userWeight, height: userHeight, text: "you"}], {x: "weight", y: d => d.height + 1, text: 'text'}),

]
})
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
distances = centroids.map(center => {
const deltaX = userWeight - center.weight
const deltaY = userHeight - center.height

return {sport: center.sport, distance: Math.hypot(deltaX, deltaY) }
}).sort((a,b) => a.distance - b.distance)
Insert cell
centroids.map(d => d.sport)
Insert cell
centroids = d3.rollups(dataBySex, d => {
return {height: d3.mean(d, v => v.height), weight: d3.mean(d, v => v.weight), athletes: d3.count(d, v => v.weight) }
}, d => d.sport ).map(d => {
return {sport: d[0], ...d[1]}
}).sort((a,b) => a.height - b.height)
Insert cell
dataBySex = athletes.filter(d => d.sex === gender)
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
olympicsDB
SELECT sub.*
FROM (
SELECT
Name,
min(Age) as age,
min(Height) as height,
min(Weight) as weight,
min(Sex) as sex,
CASE
WHEN Sport = 'Athletics'
THEN TRIM(REPLACE(REPLACE(MIN(Event), 'Athletics Men''s', '' ), 'Athletics Women''s' , '' ))
ELSE TRIM(Sport)
END AS Sport,
FROM athletes
WHERE Season = 'Summer' AND Sport NOT IN ('Lacrosse',
'Ice Hockey', 'Figure Skating',
'Tug-Of-War', 'Art Competitions', 'Baseball')
GROUP BY Name, Sport
) sub
WHERE sub.height != 'NaN' AND sub.weight != 'NaN'
Insert cell
athletes1
SELECT height, weight, sex,age,
CASE
WHEN TRIM(Sport) IN ('Marathon', '100 metres', '800 metres', '1,500 metres', '5,000 metres', '3,000 metres Steeplechase') THEN 'Distance running'
WHEN TRIM(Sport) IN ('4 x 100 metres Relay', '4 x 400 metres Relay', '200 metres', '400 metres', '110 metres Hurdles', '10,000 metres', '400 metres Hurdles') THEN 'Sprinting and Hurdling'
WHEN TRIM(Sport) IN ('High Jump', 'Triple Jump', 'Long Jump') THEN 'Jumping events'
WHEN TRIM(Sport) IN ('20 kilometres Walk', '50 kilometres Walk') THEN 'Race Walking'
WHEN TRIM(Sport) IN ('Discus Throw', 'Hammer Throw', 'Shot Put') THEN 'Discus, Hammer & Shot Put'

WHEN TRIM(Sport) IN ('Heptathalon', 'Decathlon', 'Pentathlon') THEN 'Decathlon & Pentathlon'
ELSE Sport
END AS sport
from athletes1

WHERE sport NOT IN ('10 mile Walk',
'10 kilometres Walk',
'2,590 metres Steeplechase',
'3 mile, Team',
'4 mile, Team'
'2,500 metres Steeplechase',
'3,000 metres, Team',
'5,000 metres, Team',
'All-Around Championship',
'Motorboating',
'56-pound Weight Throw',
'Pentathlon (Ancient)',
'1,600 metres Medley Relay',
'2,500 metres Steeplechase',
'200 metres Hurdles',
'3,200 metres Steeplechase',
'3,000 metres Walk',
'Standing Long Jump',
'Cross-Country, Individual',
'5 mile',
'4 mile, Team',
'3,500 metres Walk',
'Standing High Jump'
)
Insert cell
olympicsDB
WITH AthleteData AS (
SELECT
Name as name,
MIN(Team) as country,
MIN(Height) as height,
MIN(Weight) as weight,
MIN(Age) as age,
MIN(Sex) as sex,
MIN(Sport) as sport,
MAX(Year) as last_competed
FROM athletes
WHERE Season = 'Summer'
AND Sport NOT IN ('Lacrosse', 'Ice Hockey', 'Figure Skating', 'Tug-Of-War', 'Art Competitions')
GROUP BY Name
HAVING MIN(Height) != 'NaN' AND MIN(Weight) != 'NaN' AND MAX(Year) >= 2016
),
SportAverages AS (
SELECT
sport,
AVG(height::numeric) AS avg_height,
AVG(weight::numeric) AS avg_weight
FROM AthleteData
GROUP BY sport
),
DistanceCalculation AS (
SELECT
a.name,
a.country,
a.height,
a.weight,
a.age,
a.sex,
a.sport,
a.last_competed,
sa.avg_height,
sa.avg_weight,
SQRT(POWER(a.height::numeric - sa.avg_height, 2) + POWER(a.weight::numeric - sa.avg_weight, 2)) AS distance
FROM AthleteData a
JOIN SportAverages sa ON a.sport = sa.sport
),
ClosestAthlete AS (
SELECT
sport,
name,
country,
height,
weight,
age,
sex,
last_competed,
distance,
ROW_NUMBER() OVER (PARTITION BY sport ORDER BY distance) AS rank
FROM DistanceCalculation
)
SELECT
sport,
name,
country,
height,
weight,
age,
sex,
last_competed
FROM ClosestAthlete
WHERE rank = 1
ORDER BY sport;

Insert cell
s = sports.map(d => d.sport)
Insert cell
Insert cell
athletes
SELECT sport, COUNT(*) as athletes
FROM athletes
GROUP BY sport
ORDER BY athletes DESC
;
Insert cell
olympicsDB
SELECT sub.Event, sub.participants
FROM
(SELECT event, count(name) as participants, max(Year) as latest_year
FROM athletes
WHERE sport = 'Athletics' AND sex = 'M'
GROUP BY event
ORDER BY latest_year DESC, participants DESC
) sub
WHERE sub.latest_year = 2016
Insert cell
Insert cell
schemes = [
"Blues", "BuPu", "Purples", "Greens", "Oranges", "OrRd", "Reds",
]
Insert cell
colorScheme = schemes[Math.floor(centroids.findIndex(d => d.sport === distances[0].sport) / Math.ceil(centroids.length / schemes.length) )]
Insert cell
function rollupData(data, sex) {
return d3.rollups(data, d => {
return {height: d3.mean(d, v => v.height), weight: d3.mean(d, v => v.weight), athletes: d3.count(d, v => v.weight) }
}, d => d.sport ).map(d => {
return {sport: d[0], ...d[1], sex: sex}
}).sort((a,b) => a.height - b.height)

}
Insert cell
finalAppData = [...rollupData(athletes.filter(d => d.sex === 'M'), 'M'), ...rollupData(athletes.filter(d => d.sex === 'F'), 'F')]
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more