Public
Edited
Aug 9, 2024
1 fork
1 star
Insert cell
Insert cell
Insert cell
Insert cell
Words
Type Table, then Shift-Enter. Ctrl-space for more options.

Insert cell
thewords_clusters
Type Table, then Shift-Enter. Ctrl-space for more options.

Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
TheWords = FileAttachment("TheWords.json").json()

// It took a long time to filter this.
//_.filter(Kaggle_words, obj => words_in_all.includes(obj.word))
Insert cell
Insert cell
thewords_umap = FileAttachment("thewords-umap.csv").csv()
Insert cell
thewords_hdbscan = FileAttachment("thewords-hdbscan.csv").csv()
Insert cell
word_clusters = d3.group(Words, d=>d.cluster)
Insert cell
thewords_clusters = FileAttachment("thewords-clusters.csv").csv()
Insert cell
Insert cell
Words = TheWords.map( (d,i) => ({word: d.word, count: +d.count, x: +thewords_umap[i].x, y: +thewords_umap[i].y, cluster: +thewords_hdbscan[i].cluster }))
Insert cell
Insert cell
thewords_processed = FileAttachment("thewords-processed.csv").csv()
Insert cell
Insert cell
englishWords = _.split(await FileAttachment("english.txt").text(), '\n').filter(line => line.trim() !== '' && !line.includes(' ')); // filter out words with spaces
Insert cell
archive = FileAttachment("archive.zip").zip() // https://www.kaggle.com/datasets/rtatman/english-word-frequency
Insert cell
Kaggle_words = archive.file("unigram_freq.csv").csv()
Insert cell
Insert cell
all_the_words = {

const all_arrays = [A_words,B_words,C_words,D_words,E_words,F_words,G_words,H_words,I_words,J_words,K_words,L_words,M_words,N_words,O_words,
P_words,Q_words,R_words,S_words,T_words,U_words,V_words,W_words,X_words,Y_words,Z_words]

const combinedArray = _.flatMap(all_arrays, Object.values);

return _.flattenDeep(combinedArray.map(word=>_.toArray(word)))
}
Insert cell
eowlV112 = FileAttachment("EOWL-v1.1.2.zip").zip()
Insert cell
data1 = eowlV112.filenames
Insert cell
A_words = eowlV112.file('EOWL-v1.1.2/CSV Format/A Words.csv').csv()
Insert cell
B_words = eowlV112.file('EOWL-v1.1.2/CSV Format/B Words.csv').csv()
Insert cell
C_words = eowlV112.file('EOWL-v1.1.2/CSV Format/C Words.csv').csv()
Insert cell
D_words = eowlV112.file('EOWL-v1.1.2/CSV Format/D Words.csv').csv()
Insert cell
E_words = eowlV112.file('EOWL-v1.1.2/CSV Format/E Words.csv').csv()
Insert cell
F_words = eowlV112.file('EOWL-v1.1.2/CSV Format/F Words.csv').csv()
Insert cell
G_words = eowlV112.file('EOWL-v1.1.2/CSV Format/G Words.csv').csv()
Insert cell
H_words = eowlV112.file('EOWL-v1.1.2/CSV Format/H Words.csv').csv()
Insert cell
I_words = eowlV112.file('EOWL-v1.1.2/CSV Format/I Words.csv').csv()
Insert cell
J_words = eowlV112.file('EOWL-v1.1.2/CSV Format/J Words.csv').csv()
Insert cell
K_words = eowlV112.file('EOWL-v1.1.2/CSV Format/K Words.csv').csv()
Insert cell
L_words = eowlV112.file('EOWL-v1.1.2/CSV Format/L Words.csv').csv()
Insert cell
M_words = eowlV112.file('EOWL-v1.1.2/CSV Format/M Words.csv').csv()
Insert cell
N_words = eowlV112.file('EOWL-v1.1.2/CSV Format/N Words.csv').csv()
Insert cell
O_words = eowlV112.file('EOWL-v1.1.2/CSV Format/O Words.csv').csv()
Insert cell
P_words = eowlV112.file('EOWL-v1.1.2/CSV Format/P Words.csv').csv()
Insert cell
Q_words = eowlV112.file('EOWL-v1.1.2/CSV Format/Q Words.csv').csv()
Insert cell
R_words = eowlV112.file('EOWL-v1.1.2/CSV Format/R Words.csv').csv()
Insert cell
S_words = eowlV112.file('EOWL-v1.1.2/CSV Format/S Words.csv').csv()
Insert cell
T_words = eowlV112.file('EOWL-v1.1.2/CSV Format/T Words.csv').csv()
Insert cell
U_words = eowlV112.file('EOWL-v1.1.2/CSV Format/U Words.csv').csv()
Insert cell
V_words = eowlV112.file('EOWL-v1.1.2/CSV Format/V Words.csv').csv()
Insert cell
W_words = eowlV112.file('EOWL-v1.1.2/CSV Format/W Words.csv').csv()
Insert cell
X_words = eowlV112.file('EOWL-v1.1.2/CSV Format/X Words.csv').csv()
Insert cell
Y_words = eowlV112.file('EOWL-v1.1.2/CSV Format/Y Words.csv').csv()
Insert cell
Z_words = eowlV112.file('EOWL-v1.1.2/CSV Format/Z Words.csv').csv()
Insert cell
Insert cell
Insert cell
Insert cell
kwords = _.sortBy(Kaggle_words.map( d => d.word))
Insert cell
words_in_all = _.intersection(words_in_both_datasers,englishWords)
Insert cell
words_in_both_datasers = _.intersection(kwords,all_the_words)
Insert cell
import {Plot} from "@observablehq/plot-0-6-7"
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
trainingWords = ({
grade1: [...grade1],
grade2: [...grade2],
grade3: [...grade3],
grade4: [...grade4],
grade5: [...grade5],
grade6: [...grade6],
grade7: [...grade7],
grade8: [...grade8],
grade9: [...grade9],
grade10: [...grade10],
grade11: [...grade11],
grade12: [...grade12]
})
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more