Spam Email Classification / Madhavi Indukuri

Madhavi Indukuri

Workspace

Public

Edited

May 6

1 fork

data = await FileAttachment("spambase_csv.csv").csv({ typed: true })

viewof classFilter = Inputs.radio(["All", "Spam", "Not Spam"], {

label: "Filter by Email Type",

value: "All"

})

filteredData = {

if (classFilter === "All") return data;

const isSpam = classFilter === "Spam" ? 1 : 0;

return data.filter(d => d.class === isSpam);

}

wordAvgData = {

const words = ["word_freq_you", "word_freq_your"];

const spam = filteredData.filter(d => d.class === 1);

const notSpam = filteredData.filter(d => d.class === 0);

return words.map(word => ({

word: word.replace("word_freq_", ""),

spam: d3.mean(spam, d => d[word]),

not_spam: d3.mean(notSpam, d => d[word])

}));

}

Plot.plot({

marks: [

Plot.barY(

wordAvgData.flatMap(d => [

{ word: d.word, class: "Spam", freq: d.spam },

{ word: d.word, class: "Not Spam", freq: d.not_spam }

]),

{

x: "word",

y: "freq",

fill: "class",

tip: true

}

)

color: {

domain: ["Spam", "Not Spam"],

range: ["#e63946", "#2a9d8f"],

label: "Email Type",

legend: true

x: { label: "Word" },

y: { label: "Average Frequency" },

width: 600,

height: 350,

title: "Word Frequency Comparison: Spam vs Not Spam"

})

Plot.plot({

marks: [

Plot.dot(

filteredData.map(d => ({

...d,

label: d.class === 1 ? "Spam" : "Not Spam"

})), {

x: "capital_run_length_average",

y: "capital_run_length_longest",

fill: "label", // Use string label for fill

tip: true

}

)

x: { label: "Average Capital Run Length" },

y: { label: "Longest Capital Run Length" },

color: {

domain: ["Spam", "Not Spam"],

range: ["#e63946", "#2a9d8f"],

legend: true

width: 600,

height: 350,

title: "Capitalization Behavior in Emails"

})

Plot.plot({

marks: [

Plot.boxY(filteredData, {

x: d => d.class === 1 ? "Spam" : "Not Spam",

y: "char_freq_%21",

fill: "orange",

tip: true

}),

Plot.boxY(filteredData, {

x: d => d.class === 1 ? "Spam" : "Not Spam",

y: "char_freq_%24",

fill: "blue",

tip: true

})

y: { label: "Special Character Frequency" },

x: { label: "Email Class" },

width: 800,

height: 350,

title: "Use of ! and $ in Emails"

})

html`<div style="font-family: Arial, sans-serif; padding: 20px;">

<h2 style="text-align: center;"> SpamDetect: Visual Analytics on Email Text Patterns</h2>

${viewof classFilter}

</div>

<div>

<h4>1️⃣ Word Frequency</h4>

${Plot.plot({

marks: [

Plot.barY(

wordAvgData.flatMap(d => [

{ word: d.word, class: "Spam", freq: d.spam },

{ word: d.word, class: "Not Spam", freq: d.not_spam }

]),

{

x: "word",

y: "freq",

fill: "class",

tip: true

}

)

color: {

domain: ["Spam", "Not Spam"],

range: ["#e63946", "#2a9d8f"],

legend: true

x: { label: "Word" },

y: { label: "Avg Frequency" },

width: 500,

height: 300

})}

</div>

<div>

<h4>2️⃣ Capital Run Length Scatter</h4>

${Plot.plot({

marks: [

Plot.dot(filteredData, {

x: "capital_run_length_average",

y: "capital_run_length_longest",

fill: d => d.class === 1 ? "#e63946" : "#2a9d8f",

tip: true

})

x: { label: "Avg Capital Run" },

y: { label: "Longest Capital Run" },

width: 500,

height: 300

})}

</div>

<h4>3️⃣ Special Character Usage (! and $)</h4>

${Plot.plot({

marks: [

Plot.boxY(filteredData, {

x: d => d.class === 1 ? "Spam" : "Not Spam",

y: "char_freq_%21",

fill: "orange",

tip: true

}),

Plot.boxY(filteredData, {

x: d => d.class === 1 ? "Spam" : "Not Spam",

y: "char_freq_%24",

fill: "blue",

tip: true

})

y: { label: "Frequency" },

width: 1040,

height: 300

})}

</div>

</div>`

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.

Learn more