Public
Edited
Oct 30, 2024
1 fork
5 stars
Insert cell
Insert cell
Insert cell
viewof useEverything = Inputs.toggle({ label: "Use blogmarks and quotes" })
Insert cell
Insert cell
Insert cell
markdown = `### Weeknotes:

${useEverything ? otherContentMarkdown : entriesMarkdown}
${museumsMarkdown}
${releasesMarkdown}
${tilsMarkdown}
`.replace(/\n{2,}/g, "\n\n")
Insert cell
otherContentMarkdown = `#### On the blog\n\n${Array.from(
Object.keys(otherContent)
)
.map(
(key) =>
"\n**" + key + "**\n" + otherContent[key].map((l) => `- ` + l).join("\n")
)
.join("\n")}`
Insert cell
otherContent = chronologicalTagAssignment(contentSinceLatest, [
"ai",
"generative-ai",
"llms"
])
Insert cell
tilsMarkdown = {
let bits = newTILs.map((r) => `* [${r.title}](${r.url}) - ${r.date}`);
if (bits.length) {
bits = ["\n#### TILs", "", ...bits];
}
return bits.join("\n");
}
Insert cell
entriesMarkdown = {
let bits = entries.map((r) => `* [${r.title}](${r.url})`);
if (bits.length) {
bits = ["#### Blog entries", "", ...bits];
}
return bits.join("\n");
}
Insert cell
museumsMarkdown = {
let bits = museums.map((r) => `* [${r.name}](${r.url})`);
if (bits.length) {
bits = ["#### Museums", "", ...bits];
}
return bits.join("\n");
}
Insert cell
releasesMarkdown = {
let bits = [];
bits = newReleases.map(
(r) =>
`* **[${r.title} ${r.version}](${r.releaseUrl})** - ${r.date}<br />${r.description}`
);
if (bits.length) {
bits = ["#### Releases", "", ...bits];
}
return bits.join("\n");
}
Insert cell
previousWeeknotes = {
const sql = `select
id, title, created, body
from
blog_entry
where
id in (select entry_id from blog_entry_tags where tag_id = (
select id from blog_tag where tag = 'weeknotes'
)
)
order by created desc limit 3`;
const r = await fetch(
`https://datasette.simonwillison.net/simonwillisonblog.json?sql=${encodeURIComponent(
sql
)}&_shape=array`
);
const rows = await r.json();
return rows.map((row) => {
const { id, title, body, created } = row;
return {
id,
title,
body,
created: new Date(created)
};
});
}
Insert cell
latest = previousWeeknotes[0]
Insert cell
Insert cell
Insert cell
// Helper function to generate markdown link
approaches = {
const makeLink = (item) => `[${item.title}](${item.url})`;

// Approach 1: Most frequent tags
function mostFrequentTags(data, numTags = 5) {
console.log("mostFrequentTags", data);
const tagCounts = {};
data.forEach((item) => {
item.tags.forEach((tag) => {
tagCounts[tag] = (tagCounts[tag] || 0) + 1;
});
});

const topTags = Object.entries(tagCounts)
.sort((a, b) => b[1] - a[1])
.slice(0, numTags)
.map(([tag]) => tag);

const result = {};
topTags.forEach((tag) => {
result[tag] = data
.filter((item) => item.tags.includes(tag))
.map(makeLink);
});

return result;
}

// Approach 2: Unique tag assignment
function uniqueTagAssignment(data) {
const result = {};
const usedItems = new Set();

data.forEach((item) => {
if (!usedItems.has(item.url)) {
const chosenTag = item.tags.reduce((a, b) =>
(result[a] || []).length <= (result[b] || []).length ? a : b
);
result[chosenTag] = (result[chosenTag] || []).concat(makeLink(item));
usedItems.add(item.url);
}
});

return result;
}

// Approach 3: Tag co-occurrence
function tagCoOccurrence(data, numPairs = 5) {
const coOccurrence = {};
data.forEach((item) => {
for (let i = 0; i < item.tags.length; i++) {
for (let j = i + 1; j < item.tags.length; j++) {
const pair = [item.tags[i], item.tags[j]].sort().join("-");
coOccurrence[pair] = (coOccurrence[pair] || 0) + 1;
}
}
});

const topPairs = Object.entries(coOccurrence)
.sort((a, b) => b[1] - a[1])
.slice(0, numPairs)
.map(([pair]) => pair.split("-"));

const result = {};
topPairs.forEach(([tag1, tag2]) => {
const combinedTag = `${tag1}-${tag2}`;
result[combinedTag] = data
.filter((item) => item.tags.includes(tag1) && item.tags.includes(tag2))
.map(makeLink);
});

return result;
}

// Approach 4: Chronological tag emergence
function chronologicalTagEmergence(data, numTags = 5) {
const tagFirstAppearance = {};
const result = {};

data.sort((a, b) => new Date(a.created) - new Date(b.created));

for (const item of data) {
for (const tag of item.tags) {
if (!(tag in tagFirstAppearance)) {
tagFirstAppearance[tag] = item.created;
if (Object.keys(result).length < numTags) {
result[tag] = [makeLink(item)];
} else {
return result;
}
}
}
}

return result;
}

// Approach 5: Tag diversity score
function tagDiversityScore(data, numTags = 5) {
const tagItems = {};
data.forEach((item) => {
item.tags.forEach((tag) => {
tagItems[tag] = (tagItems[tag] || []).concat(item);
});
});

const tagScores = Object.entries(tagItems).map(([tag, items]) => {
const uniqueUrls = new Set(items.map((item) => item.url));
return [tag, uniqueUrls.size / items.length];
});

const topDiverseTags = tagScores
.sort((a, b) => b[1] - a[1])
.slice(0, numTags)
.map(([tag]) => tag);

const result = {};
topDiverseTags.forEach((tag) => {
result[tag] = tagItems[tag].map(makeLink);
});

return result;
}

function tagDiversityScoreNoDuplicates(data, numTags = 10) {
// Helper function to calculate diversity score
const calculateDiversityScore = (items) => {
const uniqueUrls = new Set(items.map((item) => item.url));
return uniqueUrls.size / items.length;
};

// Group items by tag
const tagItems = {};
data.forEach((item) => {
item.tags.forEach((tag) => {
tagItems[tag] = (tagItems[tag] || []).concat(item);
});
});

// Calculate initial diversity scores
let tagScores = Object.entries(tagItems).map(([tag, items]) => ({
tag,
score: calculateDiversityScore(items),
items
}));

// Sort tags by diversity score
tagScores.sort((a, b) => b.score - a.score);

const result = {};
const usedItems = new Set();

// Iterate through sorted tags
for (const { tag, items } of tagScores) {
if (Object.keys(result).length >= numTags) break;

// Filter out already used items
const unusedItems = items.filter((item) => !usedItems.has(item.url));

if (unusedItems.length > 0) {
result[tag] = unusedItems.map(makeLink);
unusedItems.forEach((item) => usedItems.add(item.url));

// Recalculate score for remaining tags
tagScores = tagScores.map((ts) => ({
...ts,
items: ts.items.filter((item) => !usedItems.has(item.url)),
score: calculateDiversityScore(
ts.items.filter((item) => !usedItems.has(item.url))
)
}));

// Resort tags
tagScores.sort((a, b) => b.score - a.score);
}
}

return result;
}

// Function to generate markdown output
function generateMarkdown(approach, result) {
let markdown = `## ${approach}\n\n`;
for (const [tag, links] of Object.entries(result)) {
markdown += `**${tag}**: ${links.slice(0, 3).join(", ")}\n\n`;
}
return markdown;
}

// Usage example (assuming 'data' is your JSON data)
const approaches = [
{ name: "Most Frequent Tags", func: mostFrequentTags },
{ name: "Unique Tag Assignment", func: uniqueTagAssignment },
{ name: "Tag Co-occurrence", func: tagCoOccurrence },
{ name: "Chronological Tag Emergence", func: chronologicalTagEmergence },
{ name: "Tag Diversity Score", func: tagDiversityScore },
{
name: "Tag Diversity Score no dupes",
func: tagDiversityScoreNoDuplicates
},
{
name: "tagsWithThreePlusItems",
func: tagsWithThreePlusItems
}
];

// Generate and log results
return approaches;
}
Insert cell
contentSinceLatest = {
const sql = `
with everything as (select
'blogmark' as type,
created,
link_title as title,
slug,
(
select json_group_array(blog_tag.tag)
from blog_tag
where blog_tag.id in (
select tag_id from blog_blogmark_tags
where blogmark_id = blog_blogmark.id
)
) as tags
from
blog_blogmark
where created > (
select created from blog_entry where id = :id
)
union all
select
'entry',
created,
title,
slug,
(
select json_group_array(blog_tag.tag)
from blog_tag
where blog_tag.id in (
select tag_id from blog_entry_tags
where entry_id = blog_entry.id
)
) as tags
from
blog_entry
where created > (
select created from blog_entry where id = :id
)
union all
select
'quotation' as type,
created,
source || ': ' || substr(quotation, 0, 50) || '...' as title,
slug,
(
select json_group_array(blog_tag.tag)
from blog_tag
where blog_tag.id in (
select tag_id from blog_quotation_tags
where quotation_id = blog_quotation.id
)
) as tags
from
blog_quotation
where created > (
select created from blog_entry where id = :id
)
order by
created desc)
select
type,
created,
title,
'https://simonwillison.net/' || strftime('%Y', created) || '/' ||
case strftime('%m', created)
when '01' then 'Jan'
when '02' then 'Feb'
when '03' then 'Mar'
when '04' then 'Apr'
when '05' then 'May'
when '06' then 'Jun'
when '07' then 'Jul'
when '08' then 'Aug'
when '09' then 'Sep'
when '10' then 'Oct'
when '11' then 'Nov'
when '12' then 'Dec'
end || '/' ||
case
when substr(strftime('%d', created), 1, 1) = '0'
then substr(strftime('%d', created), 2, 1)
else strftime('%d', created)
end || '/' || slug AS url,
tags
from everything order by created`;
const r = await fetch(
`https://datasette.simonwillison.net/simonwillisonblog.json?sql=${encodeURIComponent(
sql
)}&_shape=array&id=${latest.id}`
);
const rows = await r.json();
return rows.map((row) => {
const { type, title, url, created, tags } = row;
return {
type,
title,
url,
tags: JSON.parse(tags),
created: new Date(created)
};
});
}
Insert cell
latest.id
Insert cell
previousUrls = {
const hrefs = [];
const parser = new DOMParser();
previousWeeknotes.forEach((latest) => {
const doc = parser.parseFromString(latest.body, "text/html");
doc.querySelectorAll("a").forEach((anchor) => {
const href = anchor.getAttribute("href");
if (href) {
hrefs.push(href);
}
});
});
return hrefs;
}
Insert cell
urls = skipExisting ? previousUrls : []
Insert cell
// Filter just to TILs up to the first one we have seen before
newTILs = {
const index = tils.findIndex((til) => urls.includes(til.url));
if (index === -1) {
return tils;
}
return tils.slice(0, index);
}
Insert cell
// Filter just to releases up to the first one we have seen before
newReleases = {
if (!skipExisting) {
// return last 5 as a neater demo
return releases.slice(0, 5);
}
const index = releases.findIndex((release) =>
urls.includes(release.releaseUrl)
);
if (index === -1) {
return releases;
}
return releases.slice(0, index);
}
Insert cell
Inputs.table(releases)
Insert cell
releases = {
const r = await fetch(
"https://raw.githubusercontent.com/simonw/simonw/main/releases.md"
);
const raw = await r.text();
// Everything between <!-- recent_releases starts --> and <!-- recent_releases ends -->
const startMarker = "<!-- recent_releases starts -->";
const endMarker = "<!-- recent_releases ends -->";
const regex = new RegExp(`${startMarker}([\\s\\S]*?)${endMarker}`, "i");
const match = raw.match(regex);
const releasesText = match && match[1] ? match[1].trim() : "";
// Now extract the releases themselves
const pattern =
/[*]\s*\*\*\[(.*?)\]\((.*?)\)\*\*:\s*\[(.*?)\]\((.*?)\)\s*-\s*((\(\[(.*?)\sreleases? total\]\((.*?)\)\s*-\s*)?(\d{4}-\d{2}-\d{2}))\n<br \/>(.*?)(?=\n[*]|$)/g;
const releases = [];
let m;
while ((m = pattern.exec(releasesText)) !== null) {
releases.push({
title: m[1],
url: m[2],
version: m[3],
releaseUrl: m[4],
releaseCount: m[7] ? parseInt(m[7], 10) : 1,
releasesUrl: m[8] || `https://github.com/simonw/${m[1]}/releases`,
date: m[9],
description: m[10]
});
}
return releases;
}
Insert cell
raw_releases = {
const r = await fetch(
"https://raw.githubusercontent.com/simonw/simonw/main/releases.md"
);
return await r.text();
}
Insert cell
Inputs.table(releases)
Insert cell
tils = {
const sql = `
select
title,
'https://til.simonwillison.net/' || topic || '/' || slug as url,
date(created_utc) as date
from til
where created_utc >= date('now', '-28 days')
order by created_utc desc
`;
const r = await fetch(
`https://til.simonwillison.net/tils.json?sql=${sql}&_shape=array`
);
return await r.json();
}
Insert cell
Inputs.table(tils)
Insert cell
museums = {
const sql = `
select
name,
'https://www.niche-museums.com/' || id as url
from museums
where created >= date('now', '-28 days')
order by created desc
`;
const r = await fetch(
`https://www.niche-museums.com/browse.json?sql=${sql}&_shape=array`
);
const museums = await r.json();
const index = museums.findIndex((museum) => urls.includes(museum.url));
if (index === -1) {
return museums;
}
return museums.slice(0, index);
}
Insert cell
entries_url = `https://datasette.simonwillison.net/simonwillisonblog/blog_entry.json?_shape=array&_col=title&_col=id&_col=created&_col=slug&_sort_desc=id&_size=20&created__gt=${encodeURIComponent(
latest.created.toISOString()
)}`
Insert cell
Insert cell
entries = {
const r = await fetch(entries_url);
const entries = await r.json();
entries.forEach((e) => {
const d = new Date(e.created);
e.url = buildUrl(e.created, e.slug);
});
return entries;
}
Insert cell
latest.created
Insert cell
function buildUrl(created, slug) {
const date = created.split("T")[0];
const [year, month, day] = Array.from(date.split(/\-/g), (d) =>
parseInt(d, 10)
);
const monthNames = [
"Jan",
"Feb",
"Mar",
"Apr",
"May",
"Jun",
"Jul",
"Aug",
"Sep",
"Oct",
"Nov",
"Dec"
];
return `https://simonwillison.net/${year}/${
monthNames[month - 1]
}/${day}/${slug}/`;
}
Insert cell
Insert cell
// Approach 5 (Modified): Tag diversity score without duplicates and tag preprocessing
function tagsWithThreePlusItems(data, ignoreTags = []) {
// Helper function to calculate diversity score
const calculateDiversityScore = (items) => {
const uniqueUrls = new Set(items.map((item) => item.url));
return uniqueUrls.size / items.length;
};

// Clone the data to avoid modifying the original
let processedData = JSON.parse(JSON.stringify(data));

// Calculate tag counts
const tagCounts = {};
processedData.forEach((item) => {
item.tags.forEach((tag) => {
tagCounts[tag] = (tagCounts[tag] || 0) + 1;
});
});

// Remove tags with less than three items
const validTags = new Set(
Object.entries(tagCounts)
.filter(([tag, _]) => !ignoreTags.includes(tag))
.filter(([_, count]) => count >= 3)
.map(([tag, _]) => tag)
);
// Remove ignoreTags from that

processedData.forEach((item) => {
item.tags = item.tags.filter((tag) => validTags.has(tag));
});

// Remove items that now have no tags
processedData = processedData.filter((item) => item.tags.length > 0);

// Group items by tag
const tagItems = {};
processedData.forEach((item) => {
item.tags.forEach((tag) => {
tagItems[tag] = (tagItems[tag] || []).concat(item);
});
});

// Calculate initial diversity scores
let tagScores = Object.entries(tagItems).map(([tag, items]) => ({
tag,
score: calculateDiversityScore(items),
items
}));

// Sort tags by diversity score
tagScores.sort((a, b) => b.score - a.score);

const result = {};
const usedItems = new Set();

// Iterate through sorted tags
for (const { tag, items } of tagScores) {
// Filter out already used items
const unusedItems = items.filter((item) => !usedItems.has(item.url));

if (unusedItems.length > 0) {
result[tag] = unusedItems.map(makeLink);
unusedItems.forEach((item) => usedItems.add(item.url));

// Recalculate score for remaining tags
tagScores = tagScores.map((ts) => ({
...ts,
items: ts.items.filter((item) => !usedItems.has(item.url)),
score: calculateDiversityScore(
ts.items.filter((item) => !usedItems.has(item.url))
)
}));

// Resort tags
tagScores.sort((a, b) => b.score - a.score);
}
}

return result;
}
Insert cell
function escapeMarkdown(s) {
// Characters to escape: \ ` * _ { } [ ] ( ) # + - . !
return s.replace(/\r?\n/g, " ").replace(/[\\`*_{}[\]()#+\-.!]/g, "\\$&");
}
Insert cell
html`<pre>${escapeMarkdown("goo`bar`")}</pre>`
Insert cell
function chronologicalTagAssignment(data, ignoreTags = []) {
// Helper function to generate markdown link
const makeLink = (item) =>
`[${escapeMarkdown(item.title)}](${item.url}) - ${
item.created.toISOString().split("T")[0]
}`;

// Sort items by date, oldest first
const sortedItems = [...data].sort(
(a, b) => new Date(a.created) - new Date(b.created)
);

// Calculate tag counts, ignoring specified tags
const tagCounts = {};
sortedItems.forEach((item) => {
item.tags.forEach((tag) => {
if (!ignoreTags.includes(tag)) {
tagCounts[tag] = (tagCounts[tag] || 0) + 1;
}
});
});

const result = {};
const seenTags = new Set();

// Process items chronologically
sortedItems.forEach((item) => {
// Find the highest scoring tag for this item
let highestScoringTag = null;
let highestScore = 0;

item.tags.forEach((tag) => {
if (!ignoreTags.includes(tag) && tagCounts[tag] > highestScore) {
highestScoringTag = tag;
highestScore = tagCounts[tag];
}
});

// If a valid tag was found and it's new, add it to the result
if (highestScoringTag && !seenTags.has(highestScoringTag)) {
result[highestScoringTag] = [makeLink(item)];
seenTags.add(highestScoringTag);
} else if (highestScoringTag) {
// If the tag has been seen before, append to its list
result[highestScoringTag].push(makeLink(item));
}
});

return result;
}
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more