Public
Edited
Oct 17, 2024
11 stars
GeoParquet viewer with DuckDB spatial
La discrétisation "Head/tail" valorise mieux les phénomènes hiérarchisés
Nested means avec DuckDB
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
/**
* Head/tail algorithm v1.0 based on Jiang (2019).
* @param {Number[]} values - An array of numerical values.
* @param {Number} nb - Optional number of class. If nb < natural breaks results, upper classes are merged. If nb > natural breaks results, nb is not used and the natural breaks results is returned.
* @returns {Number[]} An array of break points with min and max.
*/
function headtail_js(values, options = {}) {
const { nb, threshold = 0.4, minmax = false } = options;
let min, max;

if (minmax) [min, max] = d3.extent(values);

const breaks = [];
if (minmax) breaks.push(min);

/**
* A recursive function that calculates the next break point.
* @param {Number[]} values - An array of numerical values.
*/
function getBreak(values) {
// Add mean to breaks value
const mean = d3.mean(values);
breaks.push(mean);

// Recursive call to get next break point
const head = values.filter((d) => d > mean);
while (head.length > 1 && head.length / values.length <= threshold)
return getBreak(head);
}

getBreak(values);

// Handle optional number of class
if (nb && nb !== null) {
const diff = nb - 1 - breaks.length;
if (diff < 0) breaks.splice(nb - 1);
}

// Add max to breaks
if (minmax) breaks.push(max);

return breaks;
}
Insert cell
Insert cell
Insert cell
macro_sql = {
const conn = await db.connect()
await conn.query(`
CREATE OR REPLACE FUNCTION headtail_discr(tbname, varnum, nb := 10, threshold := 0.4) AS (

WITH RECURSIVE headtail(break, values_count) AS (
-- Initialisation avec break = moyenne, values_count = nb d'observations
FROM query_table(tbname)
SELECT avg(varnum), -- break
count(*) -- values_count
UNION ALL
-- headtail suivant se réfère à la dernière ligne de la table en cours de croissance
FROM query_table(tbname), headtail
SELECT avg(varnum), -- next break
count(*) head_count -- next values_count
WHERE varnum > headtail.break
GROUP BY ALL
HAVING head_count > 1 AND head_count / headtail.values_count <= threshold
)

FROM headtail
SELECT list(break)[1:nb] AS breaks
)`)
await conn.close()

return "Macro headtail_discr créée dans DuckDB"
}
Insert cell
Insert cell
Insert cell
Insert cell
macro_sql_v2 = {
const conn = await db.connect()
await conn.query(`
CREATE OR REPLACE FUNCTION headtail_discr_v2(tbname, varnum, nb := 10, threshold := 0.4) AS (

WITH RECURSIVE headtail(break, values_count, l_pct_head) AS (
FROM query_table(tbname)
SELECT avg(varnum),
count(*),
[]::double[]
UNION ALL (
WITH t1 AS (
FROM query_table(tbname), headtail
SELECT avg(varnum) next_break,
count(*) h_count,
headtail.values_count v_count,
headtail.l_pct_head l
WHERE varnum > headtail.break
GROUP BY ALL
)
FROM t1
SELECT next_break, h_count,
list_append(l, h_count / v_count) next_l
WHERE h_count > 1 AND list_avg(next_l) <= threshold
)
)
FROM headtail
SELECT list(break)[1:nb] AS breaks
)`)
await conn.close()

return "Macro headtail_discr_v2 créée dans DuckDB"
}
Insert cell
headtail_loyers_sql_v2 = {
loyers2023
macro_sql
const conn = await db.connect()

const rs = await conn.query(`SELECT headtail_discr_v2(loyers2023, loypredm2, threshold := 0.42) breaks`)
await conn.close()

return [...rs.getChild('breaks').get(0)].map(d => parseFloat(d.toFixed(3)))
}
Insert cell
Insert cell
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more