viewof merged_file = file_1_date
.concat(file_2_date)
.concat(file_3_date)
.concat(file_4_date)
.derive({
apply_distance: d => op.parse_int(d['distance from user to applied city'])
})
.spread({
user_city: d => op.split(d['user location'], ',')}, {as: ['city', 'state']
})
.spread({
applied_city: d => op.split(d['applied city'], ',')}, {as: ['applied_city', 'applied_state']
})
.filter(
d => d.city != ''
)
.derive({applied_state: d => op.trim(d.applied_state)})
.derive({
market: d => d['distance from user to applied city'] > 40 ? 'oom' : 'im'
})
.select('date', 'user location', 'clicked job location', 'applied city', 'apply_distance', 'market')
.orderby('date', 'market', aq.desc('apply_distance'))
.groupby('date', 'applied city', 'market')
.count()
.groupby('date', 'applied city')
.derive({
percent: d => d.count / op.sum(d.count)
})
.orderby('date', 'applied city', 'market', 'percent')
.filter(d => d.market == 'oom' && d.percent > 0.6)
.groupby('date')
.count()
.orderby('date')
.derive({date: d => op.parse_date(d.date)})
.view()