Published
Edited
Jan 3, 2022
Insert cell
Insert cell
Insert cell
kg = py`

namespaces = {
"wtm": "http://purl.org/heals/food/",
"ind": "http://purl.org/heals/ingredient/",
"skos": "http://www.w3.org/2004/02/skos/core#",
}

kg = ${kglab}.KnowledgeGraph(
name = "A recipe KG example based on Food.com",
base_uri = "https://www.food.com/recipe/",
namespaces = namespaces,
)
kg`
Insert cell
py`repr(${kg})`
Insert cell
Insert cell
html`${await py`${kg}.describe_ns().to_html()`}`
Insert cell
Insert cell
common_ingredient = py`
common_ingredient = {
"water": ${kg}.get_ns("ind").Water,
"salt": ${kg}.get_ns("ind").Salt,
"pepper": ${kg}.get_ns("ind").BlackPepper,
"black pepper": ${kg}.get_ns("ind").BlackPepper,
"dried basil": ${kg}.get_ns("ind").Basil,

"butter": ${kg}.get_ns("ind").Butter,
"milk": ${kg}.get_ns("ind").CowMilk,
"egg": ${kg}.get_ns("ind").ChickenEgg,
"eggs": ${kg}.get_ns("ind").ChickenEgg,
"bacon": ${kg}.get_ns("ind").Bacon,

"sugar": ${kg}.get_ns("ind").WhiteSugar,
"brown sugar": ${kg}.get_ns("ind").BrownSugar,
"honey": ${kg}.get_ns("ind").Honey,
"vanilla": ${kg}.get_ns("ind").VanillaExtract,
"vanilla extract": ${kg}.get_ns("ind").VanillaExtract,

"flour": ${kg}.get_ns("ind").AllPurposeFlour,
"all-purpose flour": ${kg}.get_ns("ind").AllPurposeFlour,
"whole wheat flour": ${kg}.get_ns("ind").WholeWheatFlour,

"olive oil": ${kg}.get_ns("ind").OliveOil,
"vinegar": ${kg}.get_ns("ind").AppleCiderVinegar,

"garlic": ${kg}.get_ns("ind").Garlic,
"garlic clove": ${kg}.get_ns("ind").Garlic,
"garlic cloves": ${kg}.get_ns("ind").Garlic,

"onion": ${kg}.get_ns("ind").Onion,
"onions": ${kg}.get_ns("ind").Onion,
"cabbage": ${kg}.get_ns("ind").Cabbage,
"carrot": ${kg}.get_ns("ind").Carrot,
"carrots": ${kg}.get_ns("ind").Carrot,
"celery": ${kg}.get_ns("ind").Celery,
"potato": ${kg}.get_ns("ind").Potato,
"potatoes": ${kg}.get_ns("ind").Potato,
"tomato": ${kg}.get_ns("ind").Tomato,
"tomatoes": ${kg}.get_ns("ind").Tomato,
"baking powder": ${kg}.get_ns("ind").BakingPowder,
"baking soda": ${kg}.get_ns("ind").BakingSoda,
}
common_ingredient
`
Insert cell
Insert cell
Insert cell
file = kgzip.file("kglab-main/dat/recipes.csv").text()
Insert cell
df = py`
from os.path import dirname
import os
import io
import pandas as pd

buffer = io.StringIO(${file})
df = pd.read_csv(buffer)
df
`
Insert cell
// html`${df.head().to_html()}`
table(df.to_dict("records").toJs().map(Object.fromEntries))
Insert cell
Insert cell
represent_recipe_of_rows = py`import rdflib

for index, row in ${df}.iterrows():
recipe_id = row["id"]
node = rdflib.URIRef("https://www.food.com/recipe/{}".format(recipe_id))
${kg}.add(node, ${kg}.get_ns("rdf").type, ${kg}.get_ns("wtm").Recipe)

recipe_name = row["name"]
${kg}.add(node, ${kg}.get_ns("skos").definition, rdflib.Literal(recipe_name))
cook_time = row["minutes"]
cook_time_literal = "PT{}M".format(int(cook_time))
code_time_node = rdflib.Literal(cook_time_literal, datatype=${kg}.get_ns("xsd").duration)
${kg}.add(node, ${kg}.get_ns("wtm").hasCookTime, code_time_node)
ind_list = eval(row["ingredients"])

for ind in ind_list:
ingredient = ind.strip()
ingredient_obj = ${common_ingredient}[ingredient]
${kg}.add(node, ${kg}.get_ns("wtm").hasIngredient, rdflib.URIRef(ingredient_obj))
`
/* small change at end to wrap ingredient_obj with rdflib.URIRef() for some reason */
Insert cell
Insert cell
Insert cell
pyVisFrame(
await py` #depends on ${represent_recipe_of_rows} having completed
VIS_STYLE = {
"wtm": {
"color": "orange",
"size": 20,
},
"ind":{
"color": "blue",
"size": 35,
},
}

subgraph = ${kglab}.SubgraphTensor(${kg})
pyvis_graph = subgraph.build_pyvis_graph(notebook=True, style=VIS_STYLE)

pyvis_graph.force_atlas_2based()
pyvis_graph.show("tmp.fig01.html")`
)
Insert cell
py` #depends on ${represent_recipe_of_rows} having completed
${kg}.rdf_graph().serialize()
`
Insert cell
Insert cell
Insert cell
Insert cell
write_times_ttl = py` #depends on ${represent_recipe_of_rows} having completed

import time

write_times = []

t0 = time.time()
${kg}.save_rdf("tmp.ttl")
write_times.append(round((time.time() - t0) * 1000.0, 2))
write_times`
Insert cell
Insert cell
write_times_other = py` #depends on ${represent_recipe_of_rows} having completed
import time
import os
write_times = []
t0 = time.time()
${kg}.save_rdf("tmp.xml", format="xml")
write_times.append(round((time.time() - t0) * 1000.0, 2))

t0 = time.time()
${kg}.save_jsonld("tmp.jsonld")
write_times.append(round((time.time() - t0) * 1000.0, 2))

#/* pyarrow not included in js yet */
#t0 = time.time()
#${kg}.save_parquet("tmp.parquet")
#write_times.append(round((time.time() - t0) * 1000.0, 2))
write_times
`
Insert cell
write_times = write_times_ttl.concat(write_times_other)
Insert cell
html`${await py` #depends on ${represent_recipe_of_rows} having completed
import os, time
import pandas as pd
import os

df = ${df}

file_paths = ["tmp.ttl", "tmp.xml", "tmp.jsonld"] #"tmp.parquet"
file_sizes = [os.path.getsize(file_path) for file_path in file_paths]

df = pd.DataFrame({"file_path": file_paths, "file_size": file_sizes, "write_time": ${write_times}})
df["ms_per_byte"] =df["write_time"] / df["file_size"]
df.to_html()`}`
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
kglab = py`
#make completely sure pyvis is loaded first
import micropip
micropip.install("https://thadk.net/pyodide_packages/pyodide_18_1/pyvis-0.1.9-py2.py3-none-any.whl")

import kglab
# ${packagesPy} ${micropipDependenciesPy} ${otherKglabDependencies} ${kglabDependency}
kglab
`
Insert cell
py`dir(${kglab})`
Insert cell
py`repr(${kglab})`
Insert cell
Insert cell
Insert cell
Insert cell
import { py, pyodide } from "@thadk/pyodide-18"
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more