OOM question / Aldo Viramontes

Aldo Viramontes

Workspace

Public

Edited

Jun 21, 2023

Fork of Mark's Indeed Project

Plot.plot({

color: {

legend: true

marginTop: 75,

marginLeft: 200,

height: 10000,

width: 1000,

fx: {

padding: 0.2

marks: [

Plot.frame(),

Plot.axisX({anchor: 'top', label: 'Count of applies'}),

Plot.axisFx({label: null, textAnchor: 'middle', tickSize: 35, stroke: 'white'}),

Plot.barX(merged_file, {

x: 'percent',

y: 'applied city',

fill: 'market',

fx: 'date',

sort: {y: 'x'}

})

]

})

viewof houston = file_1_date

.concat(file_2_date)

.concat(file_3_date)

.derive({

apply_distance: d => op.parse_int(d['distance from user to applied city']),

'created at': aq.escape(d => d3.utcDay(op.parse_date(d['created at'])))

})

.filter(d => d.apply_distance > 40)

.spread({

user_city: d => op.split(d['user location'], ',')}, {as: ['city', 'state'] // split the location column into 2 columns

})

.filter(

d => d.city != '' // exclude those that have a city = 'blank'

)

.derive({

market: d => d['distance from user to applied city'] > 40 ? 'oom' : 'im' // tag distances as out of market (oom) and in market (im)

})

.filter(d => d['applied city'] == 'Houston, TX')

.groupby('date','applied city', 'job board', 'user location')

.count()

.orderby('date', aq.desc('count'))

// .filter(d => d['user location'] == 'Brooklyn, NY')

// .filter(d => d['job board'] == 'Apptness CPA UL')

.view()

Plot.plot({

height: 300,

width: 300,

marginTop: 50,

marks: [

Plot.axisX({interval: 'month'}),

Plot.axisY({label: 'Number of cities with higher % of OOM'}),

Plot.rectY(merged_file, {

x: 'date',

y: 'count',

interval: d3.utcWeek,

insetRight: 3,

fill: appcast_color_palette[0]

})

]

})

viewof merged_file = file_1_date

.concat(file_2_date)

.concat(file_3_date)

.concat(file_4_date)

.derive({

apply_distance: d => op.parse_int(d['distance from user to applied city'])

})

//.filter(d => d.apply_distance > 40)

.spread({

user_city: d => op.split(d['user location'], ',')}, {as: ['city', 'state'] // split the location column into 2 columns

})

.spread({

applied_city: d => op.split(d['applied city'], ',')}, {as: ['applied_city', 'applied_state'] // split the location column into 2 columns

})

.filter(

d => d.city != '' // exclude those that have a city = 'blank'

)

// extract texas

.derive({applied_state: d => op.trim(d.applied_state)})

//.filter(d => d.applied_state != 'TX')

.derive({

market: d => d['distance from user to applied city'] > 40 ? 'oom' : 'im' // tag distances as out of market (oom) and in market (im)

})

//.filter(d => d.market == 'oom')

.select('date', 'user location', 'clicked job location', 'applied city', 'apply_distance', 'market')

.orderby('date', 'market', aq.desc('apply_distance'))

.groupby('date', 'applied city', 'market')

.count()

.groupby('date', 'applied city')

.derive({

percent: d => d.count / op.sum(d.count)

})

.orderby('date', 'applied city', 'market', 'percent')

.filter(d => d.market == 'oom' && d.percent > 0.6)

.groupby('date')

.count()

.orderby('date')

.derive({date: d => op.parse_date(d.date)})

.view()

viewof file_1_date = aq.from(file_1)

.derive({date: d => '2023-05-26'})

.view()

viewof file_2_date = aq.from(file_2)

.derive({date: d => '2023-06-02'})

.view()

viewof file_3_date = aq.from(file_3)

.derive({date: d => '2023-06-09'})

.view()

viewof file_4_date = aq.from(file_4)

.derive({date: d => '2023-05-19'})

.view()

file_1 = FileAttachment("Lyft_distance_2023-05-26_2023-06-01 (1).csv").csv()

file_2 = FileAttachment("Lyft_distance_2023-06-02_2023-06-08.csv").csv()

file_3 = FileAttachment("Lyft_distance_2023-06-09_2023-06-15.csv").csv()

file_4 = FileAttachment("Lyft_distance_2023-05-19_2023-05-25.csv").csv()

viewof clean = aq.from(raw_data)

.filter(aq.escape(d => d.date >= start_date && d.date <= end_date))

.view()

raw_data = file.csv({typed: true})

// this is a really cool way of displaying tables. Much more aesthetically pleasing than the defaults of arquero and input.table

import { formatTable } from "@saneef/pretty-tables"

import {toc} from "@nebrius/indented-toc"

date_parser = d3.utcParse('%m/%d/%Y')

// sample of how to place html blocks in a grid

// html `<div style="display: grid; grid-template-columns: 1fr 1fr 1fr; column-gap: 0px; row-gap: 5px;">

// ${viewof spend}

// ${viewof cpl}

// ${viewof cpc}

// </div>`

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.

Learn more