Published
Edited
Jun 2, 2022
1 fork
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
// The table name depends on whether we're querying the public or private repository.
tableName = datasource === "Public" ? '"neumark/revolut-sample:latest"."file_1"' : '"neumark/revolut:latest"."file_1"'
Insert cell
Insert cell
Insert cell
query = datasource === "Public" ? q : (queryString) => db.query(queryString);
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
topByTotalSpent = query(`SELECT
"Description" AS business,
CAST(SUM("Amount")*-1 AS DOUBLE PRECISION) as total_spent
FROM
${tableName}
WHERE "Type" = 'CARD_PAYMENT'
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10;`)
Insert cell
Plot.plot({
style: { paddingLeft: 20, overflow: "visible" },
width: 1200,
height: 500,
x: {
domain: d3.sort(topByTotalSpent, d => d).map(d => d.business)
},
y: {
grid: true
},
marks: [
Plot.barY(topByTotalSpent, {y: "total_spent", x: "business"}),
Plot.ruleY([0])
]
})
Insert cell
Insert cell
// If you're using your own bank account statement, you probably need to add some businesses to the patterns below.
categories = ({
"groceries": [
new RegExp("Spar "),
new RegExp("^Cba Pri"),
new RegExp("^Penny "),
new RegExp("Lidl .*"),
new RegExp("Tesco"),
new RegExp("Auchan.*"),
new RegExp("Aldi .*"),
"Family Market",
"Sayed Parviz Kft",
new RegExp("szatyorbolt"),
new RegExp("E'lelmiszer")
],
"pets": [
"Tutivet Allatorvosi",
new RegExp("petissimo.hu"),
new RegExp("Zooplus Ag"),
new RegExp("Fressnapf "),
new RegExp("www\\.nrdogs\\.hu")
],
"furniture": [
new RegExp("Ikea"),
new RegExp("Mo:max"),
new RegExp("Diego"),
new RegExp("^Jysk"),
],
"coffee": [
"Harminckettesek Tere",
"Koronaherceg Kft.",
"Lipo'ti Pe'kse'g E's K",
new RegExp("^Rauf "),
new RegExp("^Sumup .*mtrm Roast$"),
"Muterem Kavezo",
new RegExp("outside The Bo$"),
new RegExp("^Starbucks"),
"Addicted To Caffeine",
new RegExp("Kavezo"),
"Budai 3"
],
"pharmacy": [
"Benu Hajnal Gyo'gyszer",
"Harom Sas Bt.",
new RegExp("Gyogyszertar"),
"Fehe'rva'r Gyo'gyszert"
],
"household": [
new RegExp("Rossmann "),
new RegExp("^Dm ")
],
"restaurant": [
"Padthai Wokbar",
"Jk-Gold Kft",
new RegExp("Delivery Hero Hungary"),
"Hummus Bar",
new RegExp("[p|P]izza"),
new RegExp("netpincer")
]
})
Insert cell
Insert cell
Insert cell
categorizedByTotalSpend = getCategoryTotals(categorise(await query(`
SELECT
"Description" AS business,
-- Note: the CAST is needed so the total_spent column contains numbers not strings
-- We multiply by -1 to make the amounts positive since spending shows up as a negative
-- value in our account history.
CAST(SUM("Amount")*-1 AS DOUBLE PRECISION) as total_spent
FROM
${tableName}
-- Filter transactions past date when we adopted our family dog.
WHERE "Type" = 'CARD_PAYMENT' AND "Started Date" > '2020-12-08'::DATE
GROUP BY 1
ORDER BY 2 DESC
LIMIT 100;`), categories))
Insert cell
Plot.plot({
style: { paddingLeft: 50, overflow: "visible" },
width: 1000,
x: {
domain: d3.sort(categorizedByTotalSpend, d => -d.total).map(d => d.category)
},
y: {
grid: true
},
marks: [
Plot.barY(categorizedByTotalSpend, {y: "total", x: "category"}),
Plot.ruleY([0])
]
})
Insert cell
Insert cell
addTooltips(Plot.plot({
y: {
grid: true
},
marks: [
Plot.line(
await query(`
WITH weekly_transactions AS (SELECT
t.week_start,
u.*
FROM (
SELECT * FROM generate_series(
'2018-01-01'::DATE,
'2022-06-01'::DATE,
'1 week'
) AS week_start
) t
JOIN (
SELECT
"Description" AS business,
"Started Date" as ts,
-- Note: the CAST is needed so the total_spent column contains numbers not strings
-- Multiply by -1 to make the amounts positive since spending shows up as a negative
-- value in the account statement.
CAST("Amount" * -1 AS DOUBLE PRECISION) as amount
FROM
${tableName}
WHERE
"Type" = 'CARD_PAYMENT'
) u ON
u.ts > t.week_start AND
u.ts <= t.week_start + interval '1 week')
SELECT
week_start::DATE as week,
SUM(amount) as amount
FROM weekly_transactions
GROUP BY 1
ORDER BY 1
`),
{marker: 'circle', x: "week", y: "amount", title: d => `${d.week.toISOString().split('T')[0]}: ${d.amount}`})
]
}))
Insert cell
Insert cell
Insert cell
sumByKey = (arr, keyField='week', valueField='amount', keyEncode=(x)=>x, keyDecode=(x)=>x) => [...(arr.reduce((acc, row) => {
const key = keyEncode(row[keyField]);
acc.set(key, (acc.get(key) ?? 0) + row[valueField]);
return acc;
}, new Map()).entries())].map(([key, value]) => ({[keyField]:keyDecode(key), [valueField]:value}))
Insert cell
weeklyCategorySpend = Object.entries(categorise(await query(`
WITH weekly_transactions AS (SELECT
t.week_start,
u.*
FROM (
SELECT * FROM generate_series(
'2018-01-01'::DATE,
'2022-06-01'::DATE,
'1 week'
) AS week_start
) t
JOIN (
SELECT
"Description" AS business,
"Started Date" as ts,
-- Note: the CAST is needed so the total_spent column contains numbers not strings
-- Multiply by -1 to make the amounts positive since spending shows up as a negative
-- value in the account statement.
CAST("Amount" * -1 AS DOUBLE PRECISION) as amount
FROM
${tableName}
WHERE
"Type" = 'CARD_PAYMENT'
) u ON
u.ts > t.week_start AND
u.ts <= t.week_start + interval '1 week')
SELECT
week_start::DATE as week,
business,
SUM(amount) as amount
FROM weekly_transactions
GROUP BY 1, 2
ORDER BY 1
`), categories)).flatMap(
([category, weeks]) => sumByKey(weeks).map(
({week, amount}) => ({week, amount, category}))).sort(
(a, b) => a.week - b.week)
Insert cell
addTooltips(Plot.plot({
color: {
legend: true
},
width: 1000,
style: { paddingLeft: 50, overflow: "visible" },
y: {
grid: true,
},
x: {
type: "band",
tickFormat: d => d.toLocaleString("en", {month: "narrow"}),
label: null
},
marks: [
Plot.barY(weeklyCategorySpend, {
order: "sum",
x: "week",
y: "amount",
fill: "category",
title: d => `${d.week.toISOString().split('T')[0]} - ${d.category}: ${d.amount}`}),
Plot.ruleY([0])
]
}))
Insert cell
Insert cell
addTooltips(Plot.plot({
color: {
legend: true
},
width: 1000,
style: { paddingLeft: 50, overflow: "visible" },
y: {
grid: true,
},
x: {
type: "band",
tickFormat: d => d.toLocaleString("en", {month: "narrow"}),
label: null
},
marks: [
Plot.barY(
Object.entries(categorise(await query(`
WITH monthly_transactions AS (SELECT
t.month_start,
u.*
FROM (
SELECT * FROM generate_series(
'2018-01-01'::DATE,
'2022-06-01'::DATE,
'1 month'
) AS month_start
) t
JOIN (
SELECT
"Description" AS business,
"Started Date" as ts
FROM
${tableName}
WHERE
"Type" = 'CARD_PAYMENT'
) u ON
u.ts > t.month_start AND
u.ts <= t.month_start + interval '1 month')
SELECT
month_start::DATE as month,
business,
COUNT(0)::INTEGER as purchases
FROM monthly_transactions
GROUP BY 1, 2
ORDER BY 1
`), categories)).flatMap(
([category, months]) => sumByKey(
months,
'month',
'purchases',
// These are needed because Javascript cant === compare Date objects. Instead, we use getTime() to get
// a numeric unix timestamp which can be compared. For charting Date objects work better though, so
// the last argument converts the timestamp back to a Date.
x => x.getTime(),
x => {return new Date(x)}
).map(
({month, purchases}) => ({month, purchases, category}))).sort(
(a, b) => a.month - b.month)
, {
order: "sum",
x: "month",
y: "purchases",
fill: "category",
title: d => `${d.month.toISOString().split('T')[0].slice(0,-3)} ${d.category}: ${d.purchases}`}),
Plot.ruleY([0])
]
}))


Insert cell
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more