Public
Edited
Mar 28, 2023
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
db
Insert cell
db
SHOW TABLES
Insert cell
db
SELECT * FROM '0/0/0' WHERE song_id LIKE '%Don''t%'
Insert cell
tableNamesFromSQL
Insert cell
Insert cell
tableNames = formatRawQueryForUse(
await db.query("SHOW TABLES").then((d) => d)
).values.map(([table]) => table)
Insert cell
generateQueryStringForOneTable = (searchText, columns = "*") =>
(tableName) => {
const searchTerm = `'%${searchText}%'`;
return `SELECT ${columns} FROM '${tableName}' WHERE performer LIKE ${searchTerm}`;
}
Insert cell
generateQueryForAllTables = (tableNames, generateQueryStringForOneTable) => {
return (
tableNames
.map((name) => generateQueryStringForOneTable(name))
.join(" UNION ")
);
}
Insert cell
`SELECT * FROM '${"0/0/0"}' WHERE performer LIKE ${"'%John%'"} UNION SELECT * FROM '${"2/0/1"}' WHERE performer LIKE ${"'%John%'"}`
Insert cell
generateQueryForAllTables(tableNames, generateQueryStringForOneTable)
Insert cell
raw_query_result = db
.query(
generateQueryForAllTables(
tableNames,
generateQueryStringForOneTable("John L")
)
)
.then((d) => d)
Insert cell
raw_query_result
Insert cell
Insert cell
formatRawQueryForUse = (raw_query_result) => {
const temp = {
resultCount: raw_query_result?.toArray().length,
labels: [],
values: [],
keyValPairs: []
};

raw_query_result.schema.fields.forEach((field) => {
temp.labels.push(field.name);
const child = raw_query_result.getChild(field.name);
let vals = child?.toArray() || [];

vals.forEach((v, i) => {
if (!temp.values[i]) {
temp.values[i] = [];
}
if (!temp.keyValPairs[i]) {
temp.keyValPairs[i] = {};
}
temp.values[i].push(vals instanceof BigInt64Array ? Number(v) : v);
temp.keyValPairs[i][field.name] =
vals instanceof BigInt64Array ? Number(v) : v;
});
});

return temp;
}
Insert cell
formatRawQueryForUse(raw_query_result)
Insert cell
Insert cell
clicheQueryForATable = (songId, columns = "*") =>
(tableName) => {
const searchTerm = `'%${songId}%'`;
return `SELECT ${columns} FROM '${tableName}' WHERE song_id LIKE ${searchTerm}`;
}
Insert cell
songId = "Shape Of YouEd Sheeran"
Insert cell
lyricsForSong = db
.query(generateQueryForAllTables(tableNames, clicheQueryForATable(songId)))
.then((d) => formatRawQueryForUse(d))
Insert cell
lyricsForSong.keyValPairs[0].lyric_line
Insert cell
similar_index_array = JSON.parse(
lyricsForSong.keyValPairs[0]["similar_lyric_that_came_before_indices"]
)
Insert cell
similarForLyricLines = db
.query(
generateQueryForAllTables(
tableNames,
(tableName) =>
`SELECT * FROM '${tableName}' WHERE the_index IN (${similar_index_array.join(
", "
)})`
)
)
.then((d) =>
formatRawQueryForUse(d).keyValPairs.map(({ lyric_line }) => lyric_line)
)
Insert cell
Insert cell
getClicheDataForASong = async ({ songId, db }) => {
// FORMATTING
const formatRawQueryForUse = (raw_query_result) => {
const temp = {
resultCount: raw_query_result?.toArray().length,
labels: [],
values: [],
keyValPairs: []
};

raw_query_result.schema.fields.forEach((field) => {
temp.labels.push(field.name);
const child = raw_query_result.getChild(field.name);
let vals = child?.toArray() || [];

vals.forEach((v, i) => {
if (!temp.values[i]) {
temp.values[i] = [];
}
if (!temp.keyValPairs[i]) {
temp.keyValPairs[i] = {};
}
temp.values[i].push(vals instanceof BigInt64Array ? Number(v) : v);
temp.keyValPairs[i][field.name] =
vals instanceof BigInt64Array ? Number(v) : v;
});
});

return temp;
};
// Tables
const tableNames = formatRawQueryForUse(
await db.query("SHOW TABLES").then((d) => d)
).values.map(([table]) => table);

// Utility for quering all tables
const generateQueryForAllTables = (
tableNames,
generateQueryStringForOneTable
) => {
return tableNames
.map((name) => generateQueryStringForOneTable(name))
.join(" UNION ");
};

// Get lines for a song
const generateClicheQueryForATable =
(songId, columns = "*") =>
(tableName) => {
const searchTerm = `'%${songId}%'`;
return `SELECT ${columns} FROM '${tableName}' WHERE song_id LIKE ${searchTerm}`;
};

const rawLyricsQuery = await db.query(
generateQueryForAllTables(tableNames, generateClicheQueryForATable(songId))
);
const lyricsForSong = formatRawQueryForUse(rawLyricsQuery);

// Get cliches for a line
const querySimilar = async (similar_index_array) => {
if (similar_index_array.length === 0) {
return [];
}
const similarLyrics = await db.query(
generateQueryForAllTables(
tableNames,
(tableName) =>
`SELECT * FROM '${tableName}' WHERE the_index IN (${similar_index_array.join(
", "
)})`
)
);
return formatRawQueryForUse(similarLyrics).keyValPairs;
};

const getClichesForLyricLine = async (lyricLineRow) => {
const similar_before_index_array = JSON.parse(
lyricLineRow["similar_lyric_that_came_before_indices"]
);
const similar_after_index_array = JSON.parse(
lyricLineRow["similar_lyric_that_came_after_indices"]
);

return {
similarBefore: await querySimilar(similar_before_index_array),
similarAfter: await querySimilar(similar_after_index_array),
lyricLineRow
};
};

// Resolve promise for each row
return Promise.all(
lyricsForSong.keyValPairs.map((lyricLineRow) =>
getClichesForLyricLine(lyricLineRow)
)
);
}
Insert cell
getClicheDataForASong({
songId,
db
})
Insert cell
Insert cell
db
.query(
`SELECT * FROM '${"0/0/0"}' WHERE performer LIKE ${"'%John%'"} UNION SELECT * FROM '${"2/0/1"}' WHERE performer LIKE ${"'%John%'"}`
)
.then((d) => d)
Insert cell
scatterplot
Insert cell
tableNamesWithColName = tableNames
.map(({ name }) => `'${name}.song_id'`)
.join(", ")
Insert cell
db
-- SHOW TABLES;
-- SELECT * FROM ;
SELECT performer FROM '0/0/0' WHERE performer LIKE '%John%'
UNION
SELECT performer FROM '2/0/1' WHERE performer LIKE '%John%'
-- WHERE 'performer' IS 'John Lennon'
-- SELECT * FROM * WHERE lyric_viz = 'Oh, yeah'
Insert cell
scatterplot = {
const plot = new deepscatter.default("#plot", 900, 600);
await plot.plotAPI({
source_url: "https://davidnmora.github.io/lyric-viz/deepscatter-tiles",
point_size: 1,
max_points: 1e6,
zoom_balance: 0.35,
alpha: 40,
background_color: "#FFFAF2",
encoding: {
x: {
field: "x",
transform: "literal"
},
y: {
field: "y",
transform: "literal"
},
color: {
field: "generic_genre",
range: "category10"
}
}
});

invalidation.then(() => plot.destroy());
return plot;
}
Insert cell
Insert cell
db = {
const db = await DuckDBClient.of({});
return db;
}
Insert cell
Insert cell
async function create_duckdb_copy(tile) {
// Builds a copy on duckdb of the data in a tile.
// the new table will have the same name as the tile.
const key = tile.key;

const table_exists_already = db
.query(
`SELECT * FROM information_schema.tables WHERE table_name = '${key}'`
)
.then((d) => d.numRows > 0);

const table_is_downloaded = tile.download;
const [is_there, is_ready_to_post] = await Promise.all([
table_exists_already,
table_is_downloaded
]);

if (is_there) {
return key;
}
const tb = new arrow.Table(tile.record_batch.schema, tile.record_batch);
if (tb.length === 0) {
throw new Error("humbug");
}
const ipc = arrow.tableToIPC(tb);
const name = key;
const databadse = await db.db();
const conn = await databadse.connect();
await conn.query(`DROP TABLE IF EXISTS "${name}"`);
await conn.insertArrowFromIPCStream(ipc, {
name,
schema: "main", // Not sure what this is.
options: {} // no options.
});
await conn.close();
return key;
}
Insert cell
Insert cell
listener = {
// This being observable, the code below will run every time 'searchterm' changes.

// It creates a new async transformation on deepscatter that returns `1` for each entry where the regex matches, and `0` for each where it doesn't.
if (searchterm == "") {
return;
}
scatterplot._root.transformations[searchterm] = async function (tile) {
// First ensure it exists in duckdb.
await create_duckdb_copy(tile);
const key = tile.key;

// Get the regex query out of duckdb.
const TEXT_COLUMN_NAME = "lyric_line";
const value = await db.query(
`SELECT regexp_matches(${TEXT_COLUMN_NAME}, '${searchterm}')::FLOAT AS "${searchterm}" FROM "${key}"`
);
// Grab just the column called 'searchterm' and convert it to
// a floating point array. DuckDB returns multi-record-batch chunks,
// so we have to use the Float32Array rather than inserting the arrow
// column directly.
const asArray = value.getChild(searchterm).toArray(); // DAVID NOTE: maybe chose a constant, safer column name that ${searchterm}?
return asArray;
};
}
Insert cell
Insert cell
transformation_plot = {
listener;
const BACKGROUND_SIZE_RATIO = 0.3
const FOREGROUND_SIZE_RATIO = 3
if (searchterm != "") {
scatterplot.plotAPI({
background_options: {
size: [BACKGROUND_SIZE_RATIO, FOREGROUND_SIZE_RATIO] // Background half normal size, foreground double normal size.
},
encoding: {
foreground: {
field: searchterm, // basically, we're using the newly inserted binary column to "foreground" search matches
op: "eq",
a: 1
}
}
});
} else {
scatterplot.plotAPI({
background_options: {
size: [BACKGROUND_SIZE_RATIO, 1] // Bring the foreground size back down to normal.
},

encoding: {
foreground: null
}
});
}
}
Insert cell
Insert cell
html`
<style type="text/css">

.tooltip {
background-color: black;
width: 400px;
font-family: sans-serif;
}

.rect {
fill: white;
}

</style>
`
Insert cell
Insert cell
deepscatter = import("https://benschmidt.org/deepscatter@2.9.3")
Insert cell
Insert cell
arrow = scatterplot.arrow
Insert cell
import {
DuckDBClient,
} from "@bmschmidt/duckdb-client-1-24-0-arrow-11-0-0"
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more