Public
Edited
Aug 25, 2023
1 fork
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
dbInitial = DuckDBClient.of({})
Insert cell
view_query = `
CREATE OR REPLACE VIEW nyc_taxi_tiny AS
SELECT *
FROM parquet_scan(${(urls)});
`
Insert cell
Insert cell
urls
Insert cell
Insert cell
Insert cell
dbView = dbInitial.query(view_query)
Insert cell
con = dbView, dbInitial
Insert cell
con
SELECT *
FROM nyc_taxi_tiny
LIMIT 5;
Insert cell
con
SELECT date_part('year', pickup_datetime) as pickup_year, date_part('month', pickup_datetime) as pickup_month, COUNT(*)
FROM nyc_taxi_tiny
GROUP BY pickup_year, pickup_month
ORDER BY pickup_year, pickup_month;
Insert cell
con
SELECT pickup_longitude, pickup_latitude
FROM nyc_taxi_tiny
WHERE pickup_longitude IS NOT NULL
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
con
CREATE OR REPLACE TABLE pickupRaw AS
SELECT x, y, COUNT(*) AS pickup
FROM (
SELECT
*,
CAST(ROUND(${pixels} * unit_scaled_x, CAST(ROUND(0.0, 0) AS INTEGER)) AS INTEGER) AS x,
CAST(ROUND(${pixels} * unit_scaled_y, CAST(ROUND(0.0, 0) AS INTEGER)) AS INTEGER) AS y
FROM (
SELECT
*,
(pickup_longitude - -74.05) / 0.3 AS unit_scaled_x,
(pickup_latitude - 40.6) / 0.3 AS unit_scaled_y
FROM nyc_taxi_tiny
WHERE
(NOT((pickup_longitude IS NULL))) AND
(NOT((pickup_latitude IS NULL))) AND
(pickup_longitude > -74.05) AND
(pickup_longitude < (-74.05 + 0.3)) AND
(pickup_latitude > 40.6) AND
(pickup_latitude < (40.6 + 0.3))
) q01
) q02
GROUP BY x, y
Insert cell
Insert cell
con_pickupRaw = pickupRawQuery, con
Insert cell
con_pickupRaw
SELECT *
FROM pickupRaw
Insert cell
Insert cell
con
CREATE OR REPLACE TABLE xycombs AS
SELECT series.generate_series as x, series2.generate_series as y
FROM generate_series(0, CAST(${pixels} AS INTEGER)-1 ) as series,
generate_series(0, CAST(${pixels} AS INTEGER)-1 ) as series2
Insert cell
con_xycombs = xycombsQuery, con
Insert cell
con_xycombs
SELECT *
FROM xycombs
Insert cell
Insert cell
con_pickupQuery = pickupRaw, xycombsQuery, con
Insert cell
con_pickupQuery
CREATE OR REPLACE TABLE pickup AS
SELECT xycombs.x, xycombs.y, COALESCE(pickup, 0) AS pickup
FROM xycombs
LEFT JOIN pickupRaw
ON xycombs.x = pickupRaw.x
AND xycombs.y = pickupRaw.y
Insert cell
con_pickup = pickupQuery, con
Insert cell
con_pickup
SELECT *
FROM pickup
Insert cell
Insert cell
con_pickup
SELECT *
FROM pickup
WHERE pickup > 0
Insert cell
{
console.log(JSON.stringify(pickupAny))
return pickupAny;
}
Insert cell
Insert cell
Insert cell
Insert cell
// viewof years = Inputs.checkbox([...Array(14).keys()].map(i => (2009+i).toString()), {label: "Year(s)", value: ['2009']})
Insert cell
years = [year_drop]
Insert cell
yearstrs = years.map(s => 'year='+s)
Insert cell
fileurls_filtered = fileurls.filter(u => yearstrs.some(ys => u.includes(ys)))
Insert cell
fileurls
Insert cell
fileurls = await fetch("https://raw.githubusercontent.com/jzavala-gonzalez/nyc-taxi-tiny/main/fileurls.txt").then(response => {
if (!response.ok) throw new Error(response.status);
return response.text().then(t => t.split('\n'));
}).catch(error => {return error;})
Insert cell
urls = ('[' + fileurls_filtered.map(s => "'"+s+"'").join(',') + ']')
Insert cell
`
SELECT date_part('year', pickup_datetime) as pickup_year, date_part('month', pickup_datetime) as pickup_month, COUNT(*)
FROM parquet_scan(${urls})
GROUP BY pickup_year, pickup_month
ORDER BY pickup_year, pickup_month;
`
Insert cell
// Esto va a crashear si hay demasiados pixeles
// Plot.plot({
// width: width,
// y: {reverse: true},
// padding: 0,
// color: {
// scheme: "bupu",
// },
// marks: [
// Plot.cell(pickupAny, {
// x: d => Number(d.x),
// y: d => Number(d.y),
// fill: d => Math.log10(Number(d.pickup)) ,
// inset: 0.0,
// })
// ]
// })
Insert cell
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more