EITC para el año contributivo 2023 / José M. Zavala González

from ingreso_ganado

with data_2023 as (

select year_contributivo, estado_civil,

credito_trabajo, credito_trabajo_planillas, credito_trabajo_promedio

from estado_civil

),

data_before as (

select year_contributivo, estado_civil,

sum(credito_trabajo_casados) + sum(credito_trabajo_individual) as credito_trabajo,

sum(credito_trabajo_casados_planillas) + sum(credito_trabajo_individual_planillas) as credito_trabajo_planillas,

credito_trabajo / credito_trabajo_planillas as credito_trabajo_promedio

from eitc_t6_estado_sexo_2019_2022

group by all

)

from data_2023

union all by name

from data_before

data_by_municipio_with_geojson.features.filter(d => d.properties.eitc_municipal === undefined).map(d => Object({'en_geojson': d.properties.municipio, 'en_hacienda': d.properties.eitc_municipal?.municipio}))

municipios_label_hack = Object({

'Quebradillas': '\nQuebradillas',

'Camuy': '\nCamuy',

'Guaynabo': '\nGuaynabo',

'Canóvanas': '\nCanóvanas',

})

data_by_municipio_with_geojson = add_data_to_geojson(

municipios_geojson,

data_by_municipio,

['municipio', 'municipio'],

'eitc_municipal'

)

datos_by_estado_civil_ingreso_ganado_pivot_by_sexo

participacion_laboral = {

let data = await FileAttachment("tasa_participacion_laboral@1.csv").csv({typed: true})

data = aq.from(data).derive({

rate: d => d.rate /100

})

return data

}

{

// let thevariable = variables_metadata.filter(d => (d.dato === dato_input) & (d.promedio_o_total === 'promedio'))[0];

let thevariable = {variable: 'outflow_per_capita'}

let thedata = federal_outflows.filter(d => d.outflow_per_capita < 0)

thedata = aq.from(thedata)

.derive({

'outflow_per_capita': d => d.outflow_per_capita*-1

})

.derive({

'below_pr': d => d.outflow_per_capita > 10300,

})

// .orderby('outflow_per_capita')

let eitc_promedio = aq.from(thedata)

.rollup({

'outflow_per_capita': op.mean('outflow_per_capita')

})

.objects()[0]['outflow_per_capita']

let state_labels = {

'District of Columbia': 'District of\nColumbia',

}

let theplot = Plot.plot({

marginLeft: 160,

// marginRight: 70,

marginBottom: 50,

// marginTop: 30,

// width: 1000,

height: 700,

// caption: 'ACTUALIZAR MOVER XAXIS LABEL AL CENTRO',

// x: {percent: true,},

marks: [

Plot.axisX({

fontFamily: 'jaf-bernino-sans',

fontVariant: 'tabular-nums',

tickSize: 0,

fontSize: 16,

tickFormat: '$,.0s',

label: 'Deficits in Net Federal Expenditures per Capita',

}),

Plot.axisY({

reverse: true,

label: null,

fontFamily: 'jaf-bernino-sans',

tickSize: 0,

fontSize: 16,

// rotate: -90,

// reverse: true,

// tickSpacing: .3,

// tickPadding: 30,

textAnchor: 'end', lineAnchor: 'middle',

// tickFormat: d => state_labels[d] ?? d,

}),

Plot.ruleX([eitc_promedio]),

Plot.barX(thedata, {

x: thevariable.variable,

y: 'state',

// fill: EA_COLORES_MAP['INDIGO'],

fill: d => d.state === 'Puerto Rico' ? EA_COLORES_MAP['INDIGO'] : (

d.below_pr ? chroma(EA_COLORES_MAP['GREEN']).alpha(0.35).hex() : '#cdd1ec'

),

// opacity: d => d['País'] === 'Puerto Rico (2023'

sort: {y: "x", reverse: true},

}),

// Plot.ruleX([credito_promedio]),

// Plot.text(thedata, {

// filter: d => d[thevariable.variable] < 0,

// fontFamily: 'jaf-bernino-sans',

// fontVariant: 'tabular-nums',

// fontWeight: 'bold',

// fontSize: 14.5,

// y: thevariable.variable,

// x: 'state',

// fill: d => d.state === 'Puerto Rico' ? 'black' : 'black',

// text: d => d3.format('$,.0f')(d[thevariable.variable]),

// // dx: -5,

// // textAnchor: 'end',

// sort: {x: "y", reverse: true}}),

Plot.text(thedata, {

filter: d => d[thevariable.variable] >= 30000,

fontFamily: 'jaf-bernino-sans',

fontVariant: 'tabular-nums',

fontWeight: 'bold',

fontSize: 14.5,

x: thevariable.variable,

// rotate: -90,

y: 'state',

fill: d => d.state === 'Puerto Rico' ? 'black' : 'black',

text: d => d3.format('$,.0f')(d[thevariable.variable]),

dx: -5,

textAnchor: 'end',

// dy: 8,

// sort: {y: "x", reverse: false},

}),

Plot.text(thedata, {

filter: d => d[thevariable.variable] < 30000,

fontFamily: 'jaf-bernino-sans',

fontVariant: 'tabular-nums',

fontWeight: 'bold',

fontSize: 14.5,

x: thevariable.variable,

// rotate: -90,

y: 'state',

fill: d => d.state === 'Puerto Rico' ? 'black' : 'black',

text: d => d3.format('$,.0f')(d[thevariable.variable]),

dx: 5,

stroke: 'white', strokeWidth: 6,

textAnchor: 'start',

// dy: -8,

// sort: {y: "x", reverse: false},

}),

Plot.text([0], {

x: eitc_promedio,

dx: 5,

// dy: -8,

y: d => 'Idaho',

text: d => `Average\n${d3.format('$,.0f')(eitc_promedio)}`,

textAnchor: 'start',

lineAnchor: 'bottom',

fontFamily: 'jaf-bernino-sans',

fontWeight: 'bold',

fontSize: 13,

}),

]

})

// console.log('ugh', theplot)

add_estilos(theplot, 'somebar')

return theplot

}

data_by_year_sexo_estado_civil

credito_trabajo_promedio_by_fuente_ingreso

fuente_ingreso_newline_labels = Object({

'Empleado de empresa privada': 'Empleado de\nempresa privada',

'Empleado del gobierno federal': 'Empleado del\ngobierno federal',

'Empleado del gobierno': 'Empleado del\ngobierno local',

'Otro': 'Otra',

'Retirado / Pensionado': 'Retirado\no pensionado',

'Trabajo por cuenta propia': 'Trabajo por\ncuenta propia',

})

eitc_participation_rates = {

let data = await eitc_participation_rates_file.csv({typed: true})

data = aq.from(data)

// .derive({'latest_rate': d => d['Tax Year 2020']})

.rename({'Participation Rate by State': 'state'})

.fold(aq.not('state'), {'as': ['year', 'rate']})

.derive({

// state: d => op.capital(d.state),

year: d => +op.replace(d.year, 'Tax Year ', '')

})

.objects()

data.forEach(d => {

d.state = convertToTitleCase(d.state)

d.state = state_replace[d.state] ?? d.state

d.rate = (+d.rate?.slice(0, -1))/100

})

data= [{

state: 'Puerto Rico',

year: 2021,

// rate: .872,

rate: 1.234,

}, ...data]

return data

}

dot_abbreviation_hack = Object({

'AK': 'AK ',

'CO': '\n\n\n CO',

'CT': ' CT',

'DC': '\n\n\n DC',

'IA': '\n\n\n IA',

'ID': '\n\n\n\n ID',

'IL': '\n IL',

'KS': '\nKS ',

// 'MA': ' MA',

'ME': ' ME',

'MD': ' MD',

'MO': 'MO ',

'MN': 'MN ',

'MT': '\n\n\n MT',

// 'NC': 'NC ',

'ND': '\n\n\n\n ND',

'NH': '\n\n\n\n NH',

'NJ': ' NJ',

// 'NM': 'NM ',

'OH': '\nOH ',

'RI': '\n\nRI ',

'SD': 'SD ',

'UT': '\n\n\n\n UT',

'VT': '\n\n\n\nVT',

'WY': '\n\n\nWY ',

})

{

let aq_eitc_local = aq.from(eitc_local_governments)

return aq.from(eitc_returns)

.join_left(aq_eitc_local, 'state')

// .filter(d => d.percentage_of_federal_credit !== undefined)

.impute({ percentage_of_federal_credit: () => 0 })

.rename({

'total_eitc': 'total_eitc_federal',

'average_eitc': 'average_eitc_federal',

})

.derive({

'total_eitc_local': d => (d.total_eitc_federal * d.percentage_of_federal_credit),

'average_eitc_local': d => (d.total_eitc_federal * d.percentage_of_federal_credit)/d.claims,

})

.derive({

'total_eitc': d => d.total_eitc_federal + d.total_eitc_local,

'average_eitc': d => d.average_eitc_federal + d.average_eitc_local

})

.fold(aq.not('state', 'claims', 'year', 'percentage_of_federal_credit', 'refundable', 'total_eitc', 'average_eitc'), {as: ['eitc_var', 'amount']})

.derive({

'scale': d => op.split(d.eitc_var, '_')[0],

'jurisdiction': d => op.split(d.eitc_var, '_')[2],

})

.derive({

'share': d => d.amount / (d.scale === 'total' ? d.total_eitc : d.average_eitc),

})

.objects()

}

irs_collections = {

let data = await irs_collections_file.csv({typed: true})

// collections estan en thousands y las llevamos a ones

data = aq.from(data)

.derive({

collections: d => d.collections*1000

})

.objects()

return data

}

federal_outflows = aq.from(poverty_table)

.select('state', 'total_population')

.join_full(aq.from(usaspending_obligations))

.join_full(aq.from(irs_collections))

.derive({

'outflow': d => d.collections - d.obligations, // outflow means FROM the state TO los federales

})

.derive({

'outflow_per_capita': d => d.outflow / d.total_population,

})

.orderby(aq.desc('outflow_per_capita'))

.objects()

htl.html`

<style>

figcaption {

max-width: 100%;

}

</style>

`

fonts_link = "https://use.typekit.net/szl4srx.css"

fonts_css_content = await fetch(fonts_link).then(r => r.text())

fonts_css = htl.html`

<style>

${fonts_css_content}

`

import {municipios_geojson} from "22dc6182207c44c8"

db_instance.getVersion()

duckdb = import(

"https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@1.28.1-dev232.0/+esm"

)

bundle = {

const bundles = duckdb.getJsDelivrBundles()

if (duckdb_bundle === 'Auto') {

return duckdb.selectBundle(bundles)

} else {

const bun = bundles[duckdb_bundle]

bun['pthreadWorker'] = null;

return bun;

}

// return bundles['mvp']

}

async function makeDB() {

const logger = new duckdb.ConsoleLogger();

const worker = await duckdb.createWorker(bundle.mainWorker);

const db = new duckdb.AsyncDuckDB(logger, worker);

await db.instantiate(bundle.mainModule);

return db

}

db_instance = {

// Initialize database

const db = await makeDB()

// Insert files directly into tables

// await insertFile(db, 'penguins_file', penguins_file);

// await insertFile(db, 'eitc_t4_fuente_ingreso_2019_2022_file', eitc_t4_fuente_ingreso_2019_2022_file)

await insertFile(db, 'eitc_t6_estado_sexo_2019_2022_file', eitc_t6_estado_sexo_2019_2022_file)

// Alternatively, register file into the db's filesystem

const db_buffer = await eitc_2023_db_file.arrayBuffer();

await db.registerFileBuffer(

'eitc_2023_database.db', // Choose filename to use inside db

new Uint8Array(db_buffer)

);

const t4_buffer = await eitc_t4_fuente_ingreso_2019_2022_file.arrayBuffer();

await db.registerFileBuffer(

'eitc_t4_fuente_ingreso_2019_2022.csv', // Choose filename to use inside db

new Uint8Array(t4_buffer)

);

return db

}

client = {

const client_class = (duckdb_client === 'Compatibility') ? DuckDBClientCompat : DuckDBClient;

const c = new client_class(db_instance);

// Attach external db

await c.query(`attach if not exists 'eitc_2023_database.db'`)

// await c.query(`use eitc_2023_database`)

// Create views in main db for convenience

// For some reason el 'use eitc_2023_database' statement is not working

// pero this workaround is good enough

const tables_in_db = (await c.query(`

select name

from (show all tables)

where database = 'eitc_2023_database'

`)).map(d => d.name)

for (let table_name of tables_in_db) {

await c.query(`

create or replace view ${table_name} as (

from eitc_2023_database.${table_name}

)

`)

}

await c.query(`

create or replace table eitc_t4_fuente_ingreso_2019_2022_file as (

from 'eitc_t4_fuente_ingreso_2019_2022.csv'

)

`)

await process_old_table(c, 'eitc_t4_fuente_ingreso_2019_2022_file', 'eitc_t4_fuente_ingreso_2019_2022')

await process_old_table(c, 'eitc_t6_estado_sexo_2019_2022_file', 'eitc_t6_estado_sexo_2019_2022')

// Create tables here

// await c.query(`

// create or replace table penguins as (

// from penguins_file

// )

// `)

// await c.query(`

// create or replace table iris as (

// from 'iris.csv' -- Filename used as first arg to db.registerFileBuffer

// )

// `)

return c;

}

async function process_old_table(client, input_table, output_table) {

// client

// const input_table = 'eitc_t6_estado_sexo_2019_2022_file'

// const output_table = 'eitc_t6_estado_sexo_2019_2022'

// const rename_mapper = Object({

// 'Estado Civil': 'estado_civil',

// 'Sexo Contribuyente': 'sexo',

// 'Año contributivo': 'year_contributivo',

// 'Casados - Total Ingreso bruto ganado para la determinación del EITC': 'ingreso_ganado_casados',

// 'Planillas Casados - Total Ingreso bruto ganado para la determinación del EITC': 'ingreso_ganado_casados_planillas',

// })

let rename_mapper = structuredClone(planillas_column_label_to_name)

rename_mapper = {...rename_mapper, ...{

'Sexo Contribuyente': 'sexo',

}}

const input_columns = (await client.query(`describe ${input_table}`)).map(d => d.column_name)

const renameable_columns = input_columns.filter(d => d in rename_mapper)

let rename_query = ''

if (renameable_columns.length > 0) {

rename_query = 'select\n'

for (let col of input_columns) {

rename_query += `\t"${col}"${renameable_columns.includes(col) ? ' as '+rename_mapper[col] : ''},\n`

}

rename_query += `from ${input_table}`

let final_query = `

create or replace table ${output_table} as (

with renamed as (

${rename_query}

)

from renamed

)

`

await client.query(final_query)

return final_query

}

class DuckDBClientCompat extends DuckDBClient {

async queryStream(query, params) {

const connection = await this._db.connect();

let reader, batch;

try {

if (params?.length > 0) {

const statement = await connection.prepare(query);

reader = await statement.send(...params);

} else {

reader = await connection.send(query);

}

batch = await reader.next();

if (batch.done) throw new Error("missing first batch");

} catch (error) {

await connection.close();

throw error;

}

// Mosaic utility: convert Arrow value to Javascript value

const converters = {}

batch.value.schema.fields.forEach(d => {

console.log('Type for ', d.name, d.type)

converters[d.name] = convertArrowValue(d.type)

})

return {

schema: getArrowTableSchema(batch.value),

async *readRows() {

try {

while (!batch.done) {

let batch_array = batch.value.toArray();

// Convert all values to Javascript version

let object_array = []

for (let i = 0; i < batch_array.length; i++) {

const d_proxy = batch_array[i];

const d_obj = {}

for (let k of Object.keys(converters)) {

d_obj[k] = converters[k](d_proxy[k])

}

object_array.push(d_obj)

}

yield object_array;

batch = await reader.next();

}

} finally {

await connection.close();

}

};

}

getArrowTableSchema = observable_stdlib.getArrowTableSchema

convertArrowValue = mosaic_core.convertArrowValue

observable_stdlib = await import('https://cdn.jsdelivr.net/npm/@observablehq/stdlib@5.8.7/+esm');

mosaic_core = await import('https://cdn.jsdelivr.net/npm/@uwdata/mosaic-core@0.9.0/+esm');

penguins_file = FileAttachment("penguins.csv")

iris_file = FileAttachment("iris.csv")

eitc_2023_db_file = FileAttachment("eitc_2023@1.db")

planillas_2023_metadata_file = FileAttachment("planillas_2023_metadata.xlsx")

eitc_returns_2023_file = FileAttachment("eitc_returns_2023.csv")

state_abbreviations_file = FileAttachment("states_abbreviations.csv")

poverty_income_file = FileAttachment("PovertyIncome-ACS-5y-2023.xlsx")

usaspending_obligations_file = FileAttachment("usaspending_obligations_scrape_Y2023_at_2025_02_10.csv")

irs_collections_file = FileAttachment("irs_gross_collections_2023.csv")

eitc_participation_rates_file = FileAttachment("eitc_participation_rates@1.csv")

eitc_t4_fuente_ingreso_2019_2022_file = FileAttachment("eitc_t4_fuente_ingreso_2019_2022.csv")

eitc_t6_estado_sexo_2019_2022_file = FileAttachment("eitc_t6_estado_sexo_2019_2022.parquet")

One platform to build and deploy the best data apps