Insert cell
Insert cell
Insert cell
SYNONYMS = py`
#the "eval" code makes SYNONYMS not JSON.stringify well enough for python, so it is inlined.
SYNONYMS = {
"pepper": "black pepper",
"black pepper": "black pepper",
"egg": "egg",
"eggs": "egg",

"vanilla": "vanilla",
"vanilla extract": "vanilla",
"flour": "flour",
"all-purpose flour": "flour",

"onions": "onion",
"onion": "onion",
"carrots": "carrot",
"carrot": "carrot",

"potatoes": "potato",
"potato": "potato",
"tomatoes": "tomato",
"fresh tomatoes": "tomato",
"fresh tomato": "tomato",
"garlic": "garlic",
"garlic clove": "garlic",
"garlic cloves": "garlic",
}
SYNONYMS`
Insert cell
Insert cell
ingredientset_from_recipes = py`

SYNONYMS =${SYNONYMS}

import csv
import os,io

MAX_ROW = 250000 # 231638

max_context = 0
min_context = 1000

recipes = []
vocab = set()

with io.open('all_ind.csv','wb') as f:
f.write(${await kgzip
.file("kglab-main/dat/all_ind.csv")
.arrayBuffer()}.tobytes())

with open("all_ind.csv", "r") as f:
reader = csv.reader(f)
next(reader, None) # remove file header

for i, row in enumerate(reader):
id = row[0]
ind_set = set()
# substitute synonyms
for ind in set(eval(row[3])):
if ind in SYNONYMS:
ind_set.add(SYNONYMS[ind])
else:
ind_set.add(ind)
if len(ind_set) > 1:
recipes.append([id, ind_set])
vocab.update(ind_set)
max_context = max(max_context, len(ind_set))
min_context = min(min_context, len(ind_set))

if i > MAX_ROW:
break

{"max context": str(max_context) + " unique ingredients per recipe",
"min context": str(min_context) + " unique ingredients per recipe",
"vocab size": str(len(list(vocab))), "recipes": recipes }
`
Insert cell
Insert cell
py`
import pickle
recipes=${ingredientset_from_recipes.get("recipes")}

pickle.dump(recipes, open("tmp.pkl", "wb"))

recipes[:3]
`
Insert cell
Insert cell
Insert cell
py`
vectors = [
[
ind
for ind in ind_set
]
for id, ind_set in ${ingredientset_from_recipes.get("recipes")}
]

vectors[:3]
`
Insert cell
Insert cell
model = py`
MIN_COUNT = 2
model_path = "tmp.w2v"

model = ${gensim}.models.Word2Vec(vectors, min_count=MIN_COUNT, window=max_context)
model.save(model_path)`
Insert cell
Insert cell
term_ratio = py`
import numpy as np
import pylev #${otherKglabDependencies}

def term_ratio (target, description):
d_set = set(description.split(" "))
num_inter = len(d_set.intersection(target))
return num_inter / float(len(target))
term_ratio`
Insert cell
Insert cell
Insert cell
df = py`
${pd}.set_option("max_rows", None)

target = set([ "basil" ])
get_related = ${get_related_CHEATING}
#model =

#CHEATING USING PICKLE TO MAKE SURE pylev works in pyodide, it does.
df = get_related("NOT THE MODEL ACTUALLY", "dried basil", target, n=50)
df`
Insert cell
table(df.to_dict("records").toJs().map(Object.fromEntries))
Insert cell
Insert cell
Insert cell
py`
import matplotlib
plt = ${plt}
df = ${df}
matplotlib.style.use("ggplot")

df["similarity"].plot(alpha=0.75, rot=0)
plt.show()
`
Insert cell
Insert cell
Insert cell
df_ranked = py`
from kglab import root_mean_square #${kglab}

def rank_related (df):
df2 = df.copy(deep=True)
df2["related"] = df2.apply(lambda row: root_mean_square([ row[2], row[4] ]), axis=1)
return df2.sort_values(by=["related"], ascending=False)

df = rank_related(${df})
df
`
Insert cell
table(df_ranked.to_dict("records").toJs().map(Object.fromEntries))
Insert cell
Insert cell
df_ranked_filtered = py`
df = ${df_ranked}
df.loc[ (df["related"] >= 50) & (df["term_ratio"] > 0) ]
`
Insert cell
table(df_ranked_filtered.to_dict("records").toJs().map(Object.fromEntries))
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
/* Patch plt.show() to work in Observable from https://observablehq.com/@gnestor/pyodide-demo */
plt = py`
import matplotlib
from matplotlib import pyplot as plt #${packagesPy}
import types
import io
import base64
from js import document

def show(self):
buf = io.BytesIO()
self.savefig(buf, format='png')
buf.seek(0)
img_str = 'data:image/png;base64,' + base64.b64encode(buf.read()).decode('UTF-8')
el = document.createElement('img')
el.src = img_str
return el

plt._show = types.MethodType(plt.show, plt)
plt.show = types.MethodType(show, plt)

plt`
Insert cell
Insert cell
gensim = py`import gensim
gensim`
Insert cell
pd = py`import pandas as pd #${packagesPy}
pd`
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
kglab = py`
import kglab
# ${otherKglabDependencies} ${kglabDependency} ${micropipDependenciesPy}
kglab
`
Insert cell
py`dir(${kglab})`
Insert cell
py`repr(${kglab})`
Insert cell
Insert cell
import { py, pyodide } from "@thadk/pyodide-18"
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more