Public
Edited
May 6
1 fork
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
data = await FileAttachment("spambase_csv.csv").csv({ typed: true })
Insert cell
viewof classFilter = Inputs.radio(["All", "Spam", "Not Spam"], {
label: "Filter by Email Type",
value: "All"
})
Insert cell
filteredData = {
if (classFilter === "All") return data;
const isSpam = classFilter === "Spam" ? 1 : 0;
return data.filter(d => d.class === isSpam);
}
Insert cell
wordAvgData = {
const words = ["word_freq_you", "word_freq_your"];
const spam = filteredData.filter(d => d.class === 1);
const notSpam = filteredData.filter(d => d.class === 0);

return words.map(word => ({
word: word.replace("word_freq_", ""),
spam: d3.mean(spam, d => d[word]),
not_spam: d3.mean(notSpam, d => d[word])
}));
}
Insert cell
Plot.plot({
marks: [
Plot.barY(
wordAvgData.flatMap(d => [
{ word: d.word, class: "Spam", freq: d.spam },
{ word: d.word, class: "Not Spam", freq: d.not_spam }
]),
{
x: "word",
y: "freq",
fill: "class",
tip: true
}
)
],
color: {
domain: ["Spam", "Not Spam"],
range: ["#e63946", "#2a9d8f"],
label: "Email Type",
legend: true
},
x: { label: "Word" },
y: { label: "Average Frequency" },
width: 600,
height: 350,
title: "Word Frequency Comparison: Spam vs Not Spam"
})
Insert cell
Plot.plot({
marks: [
Plot.dot(
filteredData.map(d => ({
...d,
label: d.class === 1 ? "Spam" : "Not Spam"
})), {
x: "capital_run_length_average",
y: "capital_run_length_longest",
fill: "label", // Use string label for fill
tip: true
}
)
],
x: { label: "Average Capital Run Length" },
y: { label: "Longest Capital Run Length" },
color: {
domain: ["Spam", "Not Spam"],
range: ["#e63946", "#2a9d8f"],
legend: true
},
width: 600,
height: 350,
title: "Capitalization Behavior in Emails"
})
Insert cell
Plot.plot({
marks: [
Plot.boxY(filteredData, {
x: d => d.class === 1 ? "Spam" : "Not Spam",
y: "char_freq_%21",
fill: "orange",
tip: true
}),
Plot.boxY(filteredData, {
x: d => d.class === 1 ? "Spam" : "Not Spam",
y: "char_freq_%24",
fill: "blue",
tip: true
})
],
y: { label: "Special Character Frequency" },
x: { label: "Email Class" },
width: 800,
height: 350,
title: "Use of ! and $ in Emails"
})
Insert cell
html`<div style="font-family: Arial, sans-serif; padding: 20px;">

<h2 style="text-align: center;"> SpamDetect: Visual Analytics on Email Text Patterns</h2>

<div style="margin: 20px auto; text-align: center;">
${viewof classFilter}
</div>

<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 30px;">

<!-- Chart 1 -->
<div>
<h4>1️⃣ Word Frequency</h4>
${Plot.plot({
marks: [
Plot.barY(
wordAvgData.flatMap(d => [
{ word: d.word, class: "Spam", freq: d.spam },
{ word: d.word, class: "Not Spam", freq: d.not_spam }
]),
{
x: "word",
y: "freq",
fill: "class",
tip: true
}
)
],
color: {
domain: ["Spam", "Not Spam"],
range: ["#e63946", "#2a9d8f"],
legend: true
},
x: { label: "Word" },
y: { label: "Avg Frequency" },
width: 500,
height: 300
})}
</div>

<!-- Chart 2 -->
<div>
<h4>2️⃣ Capital Run Length Scatter</h4>
${Plot.plot({
marks: [
Plot.dot(filteredData, {
x: "capital_run_length_average",
y: "capital_run_length_longest",
fill: d => d.class === 1 ? "#e63946" : "#2a9d8f",
tip: true
})
],
x: { label: "Avg Capital Run" },
y: { label: "Longest Capital Run" },
width: 500,
height: 300
})}
</div>

</div>

<div style="margin-top: 40px;">
<h4>3️⃣ Special Character Usage (! and $)</h4>
${Plot.plot({
marks: [
Plot.boxY(filteredData, {
x: d => d.class === 1 ? "Spam" : "Not Spam",
y: "char_freq_%21",
fill: "orange",
tip: true
}),
Plot.boxY(filteredData, {
x: d => d.class === 1 ? "Spam" : "Not Spam",
y: "char_freq_%24",
fill: "blue",
tip: true
})
],
y: { label: "Frequency" },
width: 1040,
height: 300
})}
</div>

</div>`
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more