Public
Edited
Mar 6
Insert cell
Insert cell
client
from ingreso_ganado
Insert cell
client
show tables
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
client
with data_2023 as (
select year_contributivo, estado_civil,
credito_trabajo, credito_trabajo_planillas, credito_trabajo_promedio
from estado_civil
),

data_before as (
select year_contributivo, estado_civil,
sum(credito_trabajo_casados) + sum(credito_trabajo_individual) as credito_trabajo,
sum(credito_trabajo_casados_planillas) + sum(credito_trabajo_individual_planillas) as credito_trabajo_planillas,
credito_trabajo / credito_trabajo_planillas as credito_trabajo_promedio
from eitc_t6_estado_sexo_2019_2022
group by all
)

from data_2023
union all by name
from data_before


Insert cell
Insert cell
Insert cell
Insert cell
client
with data_2023 as (
select year_contributivo, sexo,
sum(credito_trabajo) as credito_trabajo, sum(credito_trabajo_planillas) as credito_trabajo_planillas,
sum(credito_trabajo) / sum(credito_trabajo_planillas) as credito_trabajo_promedio
from estado_civil_sexo
where sexo != 'Otros'
group by all
),

data_before as (
select year_contributivo, sexo,
sum(credito_trabajo_casados) + sum(credito_trabajo_individual) as credito_trabajo,
sum(credito_trabajo_casados_planillas) + sum(credito_trabajo_individual_planillas) as credito_trabajo_planillas,
credito_trabajo / credito_trabajo_planillas as credito_trabajo_promedio
from eitc_t6_estado_sexo_2019_2022
where sexo != 'No marcó o otro'
group by all
)

from data_2023
union all by name
from data_before
Insert cell
Insert cell
Insert cell
client
with data_2023 as (
select year_contributivo, sexo, estado_civil,
credito_trabajo, credito_trabajo_planillas, credito_trabajo_promedio,
ingreso_ganado, ingreso_ganado_planillas, ingreso_ganado_promedio,
from estado_civil_sexo
where sexo != 'Otros'
),
data_before as (
select year_contributivo, sexo, estado_civil,
credito_trabajo_casados + credito_trabajo_individual as credito_trabajo,
credito_trabajo_casados_planillas + credito_trabajo_individual_planillas as credito_trabajo_planillas,
credito_trabajo / credito_trabajo_planillas as credito_trabajo_promedio,

ingreso_ganado_casados + ingreso_ganado_individual as ingreso_ganado,
ingreso_ganado_casados_planillas + ingreso_ganado_individual_planillas as ingreso_ganado_planillas,
ingreso_ganado / ingreso_ganado_planillas as ingreso_ganado_promedio,
from eitc_t6_estado_sexo_2019_2022
where sexo != 'No marcó o otro'
-- group by all
)

from data_2023
union all by name
from data_before
Insert cell
client
select
year_contributivo, sexo, estado_civil,
ingreso_ganado_casados + ingreso_ganado_individual as ingreso_ganado,
ingreso_ganado_casados_planillas + ingreso_ganado_individual_planillas as ingreso_ganado_planillas,
ingreso_ganado / ingreso_ganado_planillas as ingreso_ganado_promedio,
from eitc_t6_estado_sexo_2019_2022
Insert cell
Insert cell
Insert cell
Insert cell
client
from estado_civil_sexo
select sexo, estado_civil, sum(credito_trabajo_planillas) as credito_trabajo_planillas
group by all
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
client
with initial as (
select * replace (regexp_extract(edad_table.edad, '\d+')::INT as edad)
from edad as edad_table
),

is_joven_aggregates as (
select edad <= 26 as is_joven,
sum(credito_trabajo_planillas) as credito_trabajo_planillas,
sum(credito_trabajo) as credito_trabajo,
sum(credito_trabajo) / sum(credito_trabajo_planillas) as credito_trabajo_promedio,
sum(ingreso_ganado) / sum(ingreso_ganado_planillas) as ingreso_ganado_promedio,
from initial
group by all
)

select *,
credito_trabajo_planillas / sum(credito_trabajo_planillas) over () as credito_trabajo_planillas_pct,
credito_trabajo / sum(credito_trabajo) over () as credito_trabajo_pct,
from is_joven_aggregates
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
client
select regexp_extract(edad_table.edad, '\d+')::INT as edad, credito_trabajo_promedio, credito_trabajo,
credito_trabajo_planillas, ingreso_ganado_planillas, ingreso_ajustado_planillas,
ingreso_ajustado, ingreso_ajustado_promedio,
ingreso_ganado, ingreso_ganado_promedio,
ct_cantidad_dependientes,
credito_trabajo / ingreso_ganado as credito_trabajo_pct_by_ingreso_ganado,
credito_trabajo / ingreso_ajustado as credito_trabajo_pct_by_ingreso_ajustado,
ct_cantidad_dependientes / credito_trabajo_planillas as ct_cantidad_dependientes_promedio,
credito_trabajo_planillas - ct_cantidad_dependientes_planillas as credito_trabajo_planillas_sin_dependientes,
credito_trabajo_planillas_sin_dependientes / credito_trabajo_planillas as credito_trabajo_planillas_sin_dependientes_pct,
1 - credito_trabajo_planillas_sin_dependientes_pct as credito_trabajo_planillas_con_dependientes_pct
from edad as edad_table
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
client
with initial as (
select
regexp_extract(ct_categoria_dependientes, '\d+').regexp_replace('^$', '0')::INT as ct_categoria_dependientes_num,
sum(credito_trabajo) / sum(credito_trabajo_planillas) as credito_trabajo_promedio,
sum(credito_trabajo_planillas) as credito_trabajo_planillas,
sum(credito_trabajo) as credito_trabajo,
from dependientes
group by ct_categoria_dependientes_num
)

select *,
credito_trabajo_planillas / sum(credito_trabajo_planillas) over () as credito_trabajo_planillas_pct_by_num_deps
from initial
Insert cell
client
with initial as (
select
estado_civil,
regexp_extract(ct_categoria_dependientes, '\d+').regexp_replace('^$', '0')::INT as ct_categoria_dependientes_num,
sum(credito_trabajo) / sum(credito_trabajo_planillas) as credito_trabajo_promedio,
sum(credito_trabajo_planillas) as credito_trabajo_planillas,
from dependientes
group by estado_civil, ct_categoria_dependientes_num
)

select *,
credito_trabajo_planillas / sum(credito_trabajo_planillas) over () as credito_trabajo_planillas_pct_by_num_deps
from initial
order by estado_civil desc, ct_categoria_dependientes_num
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
client
with initial as (
select
sexo,
regexp_extract(ct_categoria_dependientes, '\d+').regexp_replace('^$', '0')::INT as ct_categoria_dependientes_num,
sum(credito_trabajo) / sum(credito_trabajo_planillas) as credito_trabajo_promedio,
sum(credito_trabajo_planillas) as credito_trabajo_planillas
from dependientes_sexo
where sexo != 'Otros'
group by sexo, ct_categoria_dependientes_num
order by sexo, ct_categoria_dependientes_num
)

select *,
credito_trabajo_planillas / sum(credito_trabajo_planillas) over (partition by sexo) as credito_trabajo_planillas_pct_by_sexo
from initial
Insert cell
client
with initial as (
select
sexo,
regexp_extract(ct_categoria_dependientes, '\d+').regexp_replace('^$', '0')::INT as ct_categoria_dependientes_num,
sum(credito_trabajo) / sum(credito_trabajo_planillas) as credito_trabajo_promedio,
sum(ct_cantidad_dependientes) as ct_cantidad_dependientes,
-- sum(ct_cantidad_dependientes_planillas) as ct_cantidad_dependientes_planillas, -- NO usar porque usa 0 planillas para 0 deps
sum(credito_trabajo) as credito_trabajo,
sum(credito_trabajo_planillas) as credito_trabajo_planillas
from dependientes_sexo
where sexo != 'Otros'
group by sexo, ct_categoria_dependientes_num
order by sexo, ct_categoria_dependientes_num
),

absolute_aggregates as (
select sexo, ct_categoria_dependientes_num > 0 as ct_has_dependientes,
sum(ct_cantidad_dependientes) as ct_cantidad_dependientes,
sum(credito_trabajo) as credito_trabajo,
sum(credito_trabajo_planillas) as credito_trabajo_planillas,
sum(credito_trabajo) / sum(credito_trabajo_planillas) as credito_trabajo_promedio,
sum(ct_cantidad_dependientes) / sum(credito_trabajo_planillas) as ct_cantidad_dependientes_promedio
from initial
group by all
)

select *,
credito_trabajo_planillas / sum(credito_trabajo_planillas) over (partition by sexo) as credito_trabajo_planillas_pct_by_sexo,
from absolute_aggregates
order by sexo, ct_has_dependientes
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
client
select ocupacion, credito_trabajo_promedio, credito_trabajo, credito_trabajo_planillas, ingreso_ganado_promedio
from ocupacion
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
client
select categoria_ingreso, credito_trabajo_promedio, credito_trabajo, credito_trabajo_planillas, ingreso_ganado_promedio
from fuente_ingreso
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
data_by_municipio_with_geojson.features.filter(d => d.properties.eitc_municipal === undefined).map(d => Object({'en_geojson': d.properties.municipio, 'en_hacienda': d.properties.eitc_municipal?.municipio}))
Insert cell
Insert cell
municipios_label_hack = Object({
'Quebradillas': '\nQuebradillas',
'Camuy': '\nCamuy',
'Guaynabo': '\nGuaynabo',
'Canóvanas': '\nCanóvanas',
})
Insert cell
Insert cell
Insert cell
Insert cell
data_by_municipio_with_geojson = add_data_to_geojson(
municipios_geojson,
data_by_municipio,
['municipio', 'municipio'],
'eitc_municipal'
)
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
client
with initial as (
select escala_ingreso_ganado,
regexp_extract(escala_ingreso_ganado, '(\d+) <= x < (\d+)', 1)::INT as escala_ingreso_ganado_limite_inferior,
regexp_extract(escala_ingreso_ganado, '(\d+) <= x < (\d+)', 2)::INT as escala_ingreso_ganado_limite_superior,
credito_trabajo, credito_trabajo_planillas, credito_trabajo_promedio, ingreso_ganado, ingreso_ganado_planillas,
from ingreso_ganado
)

select sum(credito_trabajo_planillas), sum(credito_trabajo), sum(credito_trabajo) / sum(credito_trabajo_planillas),
sum(ingreso_ganado) / sum(ingreso_ganado_planillas)
from initial
where escala_ingreso_ganado_limite_inferior >= 29_000
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
datos_by_estado_civil_ingreso_ganado_pivot_by_sexo
Insert cell
client
-- datos_by_estado_civil_ingreso_ganado_pivot_by_sexo
with edited_ingreso_ganado_sexo as (
select * replace (
escala_ingreso_ganado.replace('45500 ó más', '45000 <= x < Infinity') as escala_ingreso_ganado
)
from ingreso_ganado_sexo
where sexo != 'Otros' -- No hay escala de ingreso disponible para 'Otros'
),

escalas_processed as (
select sexo, escala_ingreso_ganado,
regexp_extract(escala_ingreso_ganado, '(\d+) <= x < (\d+)?', 1)::INT as escala_ingreso_ganado_limite_inferior,
regexp_extract(escala_ingreso_ganado, '(\d+) <= x < (\d+)?', 2).nullif('')::INT as escala_ingreso_ganado_limite_superior,
credito_trabajo_promedio_individual, credito_trabajo_individual_planillas,
credito_trabajo_promedio_casados, credito_trabajo_casados_planillas,
from edited_ingreso_ganado_sexo
),

estado_civil_unpivot as (
unpivot escalas_processed
ON (credito_trabajo_promedio_individual, credito_trabajo_individual_planillas) AS "Contribuyente Individual",
(credito_trabajo_promedio_casados, credito_trabajo_casados_planillas) AS "Casado"
INTO
NAME estado_civil
VALUE credito_trabajo_promedio, credito_trabajo_planillas
),

estado_civil_filtered as (
-- Filter out escalas que caen fuera del programa (e.g. estado individual es ingreso < 44,500)
from estado_civil_unpivot
where not ((estado_civil = 'Contribuyente Individual') and (escala_ingreso_ganado_limite_inferior >= 44500)) -- Limite Individual es 44,147
),

sexo_pivot as (
pivot (from estado_civil_filtered select * replace (sexo.lower() as sexo))
on sexo IN ('femenino', 'masculino') -- Need the IN clause con mi current version del Observable client.
-- See https://github.com/observablehq/framework/issues/1016 para arreglarlo
using sum(credito_trabajo_promedio) as credito_trabajo_promedio, sum(credito_trabajo_planillas) as credito_trabajo_planillas
group by escala_ingreso_ganado, escala_ingreso_ganado_limite_inferior, escala_ingreso_ganado_limite_superior, estado_civil
),
sexo_pct_windows as (
select *,
femenino_credito_trabajo_planillas / sum(femenino_credito_trabajo_planillas) over (partition by estado_civil) as femenino_credito_trabajo_planillas_pct_by_estado,
masculino_credito_trabajo_planillas / sum(masculino_credito_trabajo_planillas) over (partition by estado_civil) as masculino_credito_trabajo_planillas_pct_by_estado,
femenino_credito_trabajo_planillas_pct_by_estado - masculino_credito_trabajo_planillas_pct_by_estado as sexo_diferencia_credito_trabajo_planillas_pct_by_estado,
femenino_credito_trabajo_promedio - masculino_credito_trabajo_promedio as sexo_diferencia_credito_trabajo_promedio,
from sexo_pivot
)

select *,
sum(sexo_diferencia_credito_trabajo_planillas_pct_by_estado) over (partition by estado_civil order by escala_ingreso_ganado_limite_inferior ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as cumu_sexo_diferencia_credito_trabajo_planillas_pct_by_estado
from sexo_pct_windows
order by estado_civil, escala_ingreso_ganado_limite_inferior

-- NOTE: El tope de las escalas para Casados de hombre son mayores a las escalas disponibles para mujeres
-- Una solucion es merge them pero preferiby usando arquero luego cuando necesario
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
client
with data as (
select
regexp_extract(ct_categoria_dependientes, '\d+').regexp_replace('^$', '0')::INT as ct_categoria_dependientes_num,
sum(credito_trabajo) / sum(credito_trabajo_planillas) as credito_trabajo_promedio,
sum(ingreso_ganado) / sum(ingreso_ganado_planillas) as ingreso_ganado_promedio
from dependientes
group by ct_categoria_dependientes_num
)

unpivot data
on credito_trabajo_promedio, ingreso_ganado_promedio
into
name variable_name value cantidad
order by ct_categoria_dependientes_num, variable_name desc
Insert cell
client
describe dependientes
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
participacion_laboral = {
let data = await FileAttachment("tasa_participacion_laboral@1.csv").csv({typed: true})
data = aq.from(data).derive({
rate: d => d.rate /100
})
return data
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
{
// let thevariable = variables_metadata.filter(d => (d.dato === dato_input) & (d.promedio_o_total === 'promedio'))[0];
let thevariable = {variable: 'outflow_per_capita'}
let thedata = federal_outflows.filter(d => d.outflow_per_capita < 0)
thedata = aq.from(thedata)
.derive({
'outflow_per_capita': d => d.outflow_per_capita*-1
})
.derive({
'below_pr': d => d.outflow_per_capita > 10300,
})
// .orderby('outflow_per_capita')
let eitc_promedio = aq.from(thedata)
.rollup({
'outflow_per_capita': op.mean('outflow_per_capita')
})
.objects()[0]['outflow_per_capita']

let state_labels = {
'District of Columbia': 'District of\nColumbia',
}
let theplot = Plot.plot({
marginLeft: 160,
// marginRight: 70,
marginBottom: 50,
// marginTop: 30,
// width: 1000,
height: 700,
// caption: 'ACTUALIZAR MOVER XAXIS LABEL AL CENTRO',
// x: {percent: true,},
marks: [
Plot.axisX({
fontFamily: 'jaf-bernino-sans',
fontVariant: 'tabular-nums',
tickSize: 0,
fontSize: 16,
tickFormat: '$,.0s',
label: 'Deficits in Net Federal Expenditures per Capita',
}),
Plot.axisY({
reverse: true,
label: null,
fontFamily: 'jaf-bernino-sans',
tickSize: 0,
fontSize: 16,
// rotate: -90,
// reverse: true,
// tickSpacing: .3,
// tickPadding: 30,
textAnchor: 'end', lineAnchor: 'middle',
// tickFormat: d => state_labels[d] ?? d,
}),
Plot.ruleX([eitc_promedio]),
Plot.barX(thedata, {
x: thevariable.variable,
y: 'state',
// fill: EA_COLORES_MAP['INDIGO'],
fill: d => d.state === 'Puerto Rico' ? EA_COLORES_MAP['INDIGO'] : (
d.below_pr ? chroma(EA_COLORES_MAP['GREEN']).alpha(0.35).hex() : '#cdd1ec'
),
// opacity: d => d['País'] === 'Puerto Rico (2023'
sort: {y: "x", reverse: true},
}),
// Plot.ruleX([credito_promedio]),
// Plot.text(thedata, {
// filter: d => d[thevariable.variable] < 0,
// fontFamily: 'jaf-bernino-sans',
// fontVariant: 'tabular-nums',
// fontWeight: 'bold',
// fontSize: 14.5,
// y: thevariable.variable,
// x: 'state',
// fill: d => d.state === 'Puerto Rico' ? 'black' : 'black',
// text: d => d3.format('$,.0f')(d[thevariable.variable]),
// // dx: -5,
// // textAnchor: 'end',
// sort: {x: "y", reverse: true}}),
Plot.text(thedata, {
filter: d => d[thevariable.variable] >= 30000,
fontFamily: 'jaf-bernino-sans',
fontVariant: 'tabular-nums',
fontWeight: 'bold',
fontSize: 14.5,
x: thevariable.variable,
// rotate: -90,
y: 'state',
fill: d => d.state === 'Puerto Rico' ? 'black' : 'black',
text: d => d3.format('$,.0f')(d[thevariable.variable]),
dx: -5,
textAnchor: 'end',
// dy: 8,
// sort: {y: "x", reverse: false},
}),
Plot.text(thedata, {
filter: d => d[thevariable.variable] < 30000,
fontFamily: 'jaf-bernino-sans',
fontVariant: 'tabular-nums',
fontWeight: 'bold',
fontSize: 14.5,
x: thevariable.variable,
// rotate: -90,
y: 'state',
fill: d => d.state === 'Puerto Rico' ? 'black' : 'black',
text: d => d3.format('$,.0f')(d[thevariable.variable]),
dx: 5,
stroke: 'white', strokeWidth: 6,
textAnchor: 'start',
// dy: -8,
// sort: {y: "x", reverse: false},
}),
Plot.text([0], {
x: eitc_promedio,
dx: 5,
// dy: -8,
y: d => 'Idaho',
text: d => `Average\n${d3.format('$,.0f')(eitc_promedio)}`,
textAnchor: 'start',
lineAnchor: 'bottom',
fontFamily: 'jaf-bernino-sans',
fontWeight: 'bold',
fontSize: 13,
}),
]
})
// console.log('ugh', theplot)
add_estilos(theplot, 'somebar')

return theplot
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
data_by_year_sexo_estado_civil
Insert cell
Insert cell
Insert cell
credito_trabajo_promedio_by_fuente_ingreso
Insert cell
Insert cell
fuente_ingreso_newline_labels = Object({
'Empleado de empresa privada': 'Empleado de\nempresa privada',
'Empleado del gobierno federal': 'Empleado del\ngobierno federal',
'Empleado del gobierno': 'Empleado del\ngobierno local',
'Otro': 'Otra',
'Retirado / Pensionado': 'Retirado\no pensionado',
'Trabajo por cuenta propia': 'Trabajo por\ncuenta propia',
})
Insert cell
client
with data_2023 as (
select categoria_ingreso, year_contributivo,
credito_trabajo, credito_trabajo_planillas, credito_trabajo_promedio
from fuente_ingreso
),
data_before as (
select
ifnull((map_from_entries([
('Empleado del Gobierno, municipios o Corporaciones Públicas', 'Empleado del gobierno'),
('Empleado de Empresa Privada', 'Empleado de empresa privada'),
('Trabajo por Cuenta Propia', 'Trabajo por cuenta propia'),
('Empleado del Gobierno Federal', 'Empleado del gobierno federal')
]))["Fuente de Mayor Ingreso"][1], "Fuente de Mayor Ingreso") as categoria_ingreso, year_contributivo,
credito_trabajo_casados + credito_trabajo_individual as credito_trabajo,
credito_trabajo_casados_planillas + credito_trabajo_individual_planillas as credito_trabajo_planillas,
credito_trabajo / credito_trabajo_planillas as credito_trabajo_promedio,
from eitc_t4_fuente_ingreso_2019_2022
where categoria_ingreso is not null
)

from data_2023
union all by name
from data_before
Insert cell
Insert cell
eitc_participation_rates = {
let data = await eitc_participation_rates_file.csv({typed: true})
data = aq.from(data)
// .derive({'latest_rate': d => d['Tax Year 2020']})
.rename({'Participation Rate by State': 'state'})
.fold(aq.not('state'), {'as': ['year', 'rate']})
.derive({
// state: d => op.capital(d.state),
year: d => +op.replace(d.year, 'Tax Year ', '')
})
.objects()
data.forEach(d => {
d.state = convertToTitleCase(d.state)
d.state = state_replace[d.state] ?? d.state
d.rate = (+d.rate?.slice(0, -1))/100
})
data= [{
state: 'Puerto Rico',
year: 2021,
// rate: .872,
rate: 1.234,
}, ...data]
return data
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
dot_abbreviation_hack = Object({
'AK': 'AK ',
'CO': '\n\n\n CO',
'CT': ' CT',
'DC': '\n\n\n DC',
'IA': '\n\n\n IA',
'ID': '\n\n\n\n ID',
'IL': '\n IL',
'KS': '\nKS ',
// 'MA': ' MA',
'ME': ' ME',
'MD': ' MD',
'MO': 'MO ',
'MN': 'MN ',
'MT': '\n\n\n MT',
// 'NC': 'NC ',
'ND': '\n\n\n\n ND',
'NH': '\n\n\n\n NH',
'NJ': ' NJ',
// 'NM': 'NM ',
'OH': '\nOH ',
'RI': '\n\nRI ',
'SD': 'SD ',
'UT': '\n\n\n\n UT',
'VT': '\n\n\n\nVT',
'WY': '\n\n\nWY ',
})
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
eitc_returns_with_local
select state, total_eitc, claims, average_eitc, jurisdiction, amount, share
from eitc_returns_with_local
where scale = 'total' and jurisdiction = 'federal'

Insert cell
Insert cell
{
let aq_eitc_local = aq.from(eitc_local_governments)

return aq.from(eitc_returns)
.join_left(aq_eitc_local, 'state')
// .filter(d => d.percentage_of_federal_credit !== undefined)
.impute({ percentage_of_federal_credit: () => 0 })
.rename({
'total_eitc': 'total_eitc_federal',
'average_eitc': 'average_eitc_federal',
})
.derive({
'total_eitc_local': d => (d.total_eitc_federal * d.percentage_of_federal_credit),
'average_eitc_local': d => (d.total_eitc_federal * d.percentage_of_federal_credit)/d.claims,
})
.derive({
'total_eitc': d => d.total_eitc_federal + d.total_eitc_local,
'average_eitc': d => d.average_eitc_federal + d.average_eitc_local
})
.fold(aq.not('state', 'claims', 'year', 'percentage_of_federal_credit', 'refundable', 'total_eitc', 'average_eitc'), {as: ['eitc_var', 'amount']})
.derive({
'scale': d => op.split(d.eitc_var, '_')[0],
'jurisdiction': d => op.split(d.eitc_var, '_')[2],
})
.derive({
'share': d => d.amount / (d.scale === 'total' ? d.total_eitc : d.average_eitc),
})
.objects()
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
irs_collections = {
let data = await irs_collections_file.csv({typed: true})
// collections estan en thousands y las llevamos a ones
data = aq.from(data)
.derive({
collections: d => d.collections*1000
})
.objects()
return data
}
Insert cell
federal_outflows = aq.from(poverty_table)
.select('state', 'total_population')
.join_full(aq.from(usaspending_obligations))
.join_full(aq.from(irs_collections))
.derive({
'outflow': d => d.collections - d.obligations, // outflow means FROM the state TO los federales
})
.derive({
'outflow_per_capita': d => d.outflow / d.total_population,
})
.orderby(aq.desc('outflow_per_capita'))
.objects()
Insert cell
Insert cell
Insert cell
htl.html`
<style>
figcaption {
max-width: 100%;
}
</style>
`
Insert cell
Insert cell
Insert cell
fonts_css_content = await fetch(fonts_link).then(r => r.text())
Insert cell
fonts_css = htl.html`
<style>
${fonts_css_content}
<style/>
`
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
import {municipios_geojson} from "22dc6182207c44c8"
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
db_instance.getVersion()
Insert cell
duckdb = import(
"https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@1.28.1-dev232.0/+esm"
)
Insert cell
bundle = {
const bundles = duckdb.getJsDelivrBundles()
if (duckdb_bundle === 'Auto') {
return duckdb.selectBundle(bundles)
} else {
const bun = bundles[duckdb_bundle]
bun['pthreadWorker'] = null;
return bun;
}
// return bundles['mvp']
}
Insert cell
async function makeDB() {
const logger = new duckdb.ConsoleLogger();
const worker = await duckdb.createWorker(bundle.mainWorker);
const db = new duckdb.AsyncDuckDB(logger, worker);
await db.instantiate(bundle.mainModule);
return db
}
Insert cell
db_instance = {
// Initialize database
const db = await makeDB()

// Insert files directly into tables
// await insertFile(db, 'penguins_file', penguins_file);
// await insertFile(db, 'eitc_t4_fuente_ingreso_2019_2022_file', eitc_t4_fuente_ingreso_2019_2022_file)
await insertFile(db, 'eitc_t6_estado_sexo_2019_2022_file', eitc_t6_estado_sexo_2019_2022_file)

// Alternatively, register file into the db's filesystem
const db_buffer = await eitc_2023_db_file.arrayBuffer();
await db.registerFileBuffer(
'eitc_2023_database.db', // Choose filename to use inside db
new Uint8Array(db_buffer)
);

const t4_buffer = await eitc_t4_fuente_ingreso_2019_2022_file.arrayBuffer();
await db.registerFileBuffer(
'eitc_t4_fuente_ingreso_2019_2022.csv', // Choose filename to use inside db
new Uint8Array(t4_buffer)
);
return db
}
Insert cell
client = {
const client_class = (duckdb_client === 'Compatibility') ? DuckDBClientCompat : DuckDBClient;
const c = new client_class(db_instance);

// Attach external db
await c.query(`attach if not exists 'eitc_2023_database.db'`)
// await c.query(`use eitc_2023_database`)

// Create views in main db for convenience
// For some reason el 'use eitc_2023_database' statement is not working
// pero this workaround is good enough
const tables_in_db = (await c.query(`
select name
from (show all tables)
where database = 'eitc_2023_database'
`)).map(d => d.name)

for (let table_name of tables_in_db) {
await c.query(`
create or replace view ${table_name} as (
from eitc_2023_database.${table_name}
)
`)
}

await c.query(`
create or replace table eitc_t4_fuente_ingreso_2019_2022_file as (
from 'eitc_t4_fuente_ingreso_2019_2022.csv'
)
`)

await process_old_table(c, 'eitc_t4_fuente_ingreso_2019_2022_file', 'eitc_t4_fuente_ingreso_2019_2022')
await process_old_table(c, 'eitc_t6_estado_sexo_2019_2022_file', 'eitc_t6_estado_sexo_2019_2022')

// Create tables here
// await c.query(`
// create or replace table penguins as (
// from penguins_file
// )
// `)
// await c.query(`
// create or replace table iris as (
// from 'iris.csv' -- Filename used as first arg to db.registerFileBuffer
// )
// `)
return c;
}
Insert cell
async function process_old_table(client, input_table, output_table) {
// client
// const input_table = 'eitc_t6_estado_sexo_2019_2022_file'
// const output_table = 'eitc_t6_estado_sexo_2019_2022'

// const rename_mapper = Object({
// 'Estado Civil': 'estado_civil',
// 'Sexo Contribuyente': 'sexo',
// 'Año contributivo': 'year_contributivo',
// 'Casados - Total Ingreso bruto ganado para la determinación del EITC': 'ingreso_ganado_casados',
// 'Planillas Casados - Total Ingreso bruto ganado para la determinación del EITC': 'ingreso_ganado_casados_planillas',
// })
let rename_mapper = structuredClone(planillas_column_label_to_name)
rename_mapper = {...rename_mapper, ...{
'Sexo Contribuyente': 'sexo',
}}

const input_columns = (await client.query(`describe ${input_table}`)).map(d => d.column_name)
const renameable_columns = input_columns.filter(d => d in rename_mapper)

let rename_query = ''
if (renameable_columns.length > 0) {
rename_query = 'select\n'
for (let col of input_columns) {
rename_query += `\t"${col}"${renameable_columns.includes(col) ? ' as '+rename_mapper[col] : ''},\n`
}
}
rename_query += `from ${input_table}`

let final_query = `
create or replace table ${output_table} as (
with renamed as (
${rename_query}
)

from renamed
)
`

await client.query(final_query)
return final_query
}
Insert cell
client
describe eitc_t6_estado_sexo_2019_2022
Insert cell
client
describe estado_civil_sexo
Insert cell
Insert cell
class DuckDBClientCompat extends DuckDBClient {
async queryStream(query, params) {
const connection = await this._db.connect();
let reader, batch;
try {
if (params?.length > 0) {
const statement = await connection.prepare(query);
reader = await statement.send(...params);
} else {
reader = await connection.send(query);
}
batch = await reader.next();
if (batch.done) throw new Error("missing first batch");
} catch (error) {
await connection.close();
throw error;
}

// Mosaic utility: convert Arrow value to Javascript value
const converters = {}
batch.value.schema.fields.forEach(d => {
console.log('Type for ', d.name, d.type)
converters[d.name] = convertArrowValue(d.type)
})
return {
schema: getArrowTableSchema(batch.value),
async *readRows() {
try {
while (!batch.done) {
let batch_array = batch.value.toArray();

// Convert all values to Javascript version
let object_array = []
for (let i = 0; i < batch_array.length; i++) {
const d_proxy = batch_array[i];
const d_obj = {}
for (let k of Object.keys(converters)) {
d_obj[k] = converters[k](d_proxy[k])
}
object_array.push(d_obj)
}
yield object_array;
batch = await reader.next();
}
} finally {
await connection.close();
}
}
};
}
}
Insert cell
Insert cell
Insert cell
getArrowTableSchema = observable_stdlib.getArrowTableSchema
Insert cell
convertArrowValue = mosaic_core.convertArrowValue
Insert cell
observable_stdlib = await import('https://cdn.jsdelivr.net/npm/@observablehq/stdlib@5.8.7/+esm');
Insert cell
mosaic_core = await import('https://cdn.jsdelivr.net/npm/@uwdata/mosaic-core@0.9.0/+esm');
Insert cell
Insert cell
penguins_file = FileAttachment("penguins.csv")
Insert cell
iris_file = FileAttachment("iris.csv")
Insert cell
eitc_2023_db_file = FileAttachment("eitc_2023@1.db")
Insert cell
planillas_2023_metadata_file = FileAttachment("planillas_2023_metadata.xlsx")
Insert cell
eitc_returns_2023_file = FileAttachment("eitc_returns_2023.csv")
Insert cell
state_abbreviations_file = FileAttachment("states_abbreviations.csv")
Insert cell
poverty_income_file = FileAttachment("PovertyIncome-ACS-5y-2023.xlsx")
Insert cell
usaspending_obligations_file = FileAttachment("usaspending_obligations_scrape_Y2023_at_2025_02_10.csv")
Insert cell
irs_collections_file = FileAttachment("irs_gross_collections_2023.csv")
Insert cell
eitc_participation_rates_file = FileAttachment("eitc_participation_rates@1.csv")
Insert cell
Insert cell
eitc_t4_fuente_ingreso_2019_2022_file = FileAttachment("eitc_t4_fuente_ingreso_2019_2022.csv")
Insert cell
eitc_t6_estado_sexo_2019_2022_file = FileAttachment("eitc_t6_estado_sexo_2019_2022.parquet")
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more