Public
Edited
May 31
1 fork
7 stars
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
housing = {
const url = 'https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv'
return p.pd.read_csv(pyodide.pyodide_py.open_url(url))
}
Insert cell
Insert cell
housing_obj = df2Obj(housing)
Insert cell
Insert cell
Inputs.table(df2Obj(housing.head(10)))
Insert cell
Inputs.table(df2Obj(housing.describe()))
Insert cell
Insert cell
Insert cell
Insert cell
Plot.plot({
height: 200,
y: {
grid: true
},
marks: [
Plot.rectY(housing_obj, Plot.binX({y: "count"}, {x: varName, fill: 'steelblue'})),
Plot.ruleY([0])
]
})
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
housing.isna().sum().to_string()
Insert cell
Insert cell
data = {
const tempResult = await py`housing_clean = ${housing}.dropna(subset=['total_bedrooms'])

housing_cat = housing_clean[['ocean_proximity']]
ordinal_encoder = ${p.sk_pre.OrdinalEncoder}()
housing_cat_encoded = ordinal_encoder.fit_transform(housing_cat)
housing_clean['housing_cat_encoded'] = housing_cat_encoded
housing_clean = housing_clean.drop(['ocean_proximity'], axis=1)

train, test = ${p.sk_ms.train_test_split}(housing_clean, test_size=0.2, random_state=42)
train_X = train.drop(['median_house_value'], axis=1)
train_y = ${p.pd.DataFrame}(train['median_house_value'])
test_X = test.drop(['median_house_value'], axis=1)
test_y = ${p.pd.DataFrame}(test['median_house_value'])
{'train_X':train_X, 'train_y':train_y, 'test_X':test_X, 'test_y':test_y}
`

let temp = {};
const arr = Array.from(tempResult.keys());
for (const k of arr) {
temp[k] = tempResult.get(k);
}
return temp;
}
Insert cell
Insert cell
train_prepared = {
return py`housing_clean = ${data.train_X}

scaler = ${scalerSelection == 'StandardScaler' ? p.sk_pre.StandardScaler : p.sk_pre.MinMaxScaler}()
# scaler = ${p.sk_pre.StandardScaler}()
housing_clean_scale = ${p.pd.DataFrame}(scaler.fit_transform(${data.train_X}), columns=housing_clean.columns)
housing_clean_scale`
}
Insert cell
Insert cell
Insert cell
lin_rmse = {
return py`lin_reg = ${p.sk_lm.LinearRegression}()
lin_reg.fit(${train_prepared}, ${data.train_y})
train_predictions = lin_reg.predict(${train_prepared})
test_predictions = lin_reg.predict(${data.test_X})
train_rmse = ${p.np.sqrt}(${p.sk_mt.mean_squared_error}(${data.train_y}, train_predictions))
test_rmse = ${p.np.sqrt}(${p.sk_mt.mean_squared_error}(${data.test_y}, test_predictions))
{'train_rmse':train_rmse, 'test_rmse':test_rmse, 'params': [lin_reg.intercept_, lin_reg.coef_.flatten()]}`
}
Insert cell
Insert cell
sgd_rmse = {
return py`old_stdout = ${p.sys}.stdout
${p.sys}.stdout = mystdout = ${p.io}.StringIO()

lin_sgd = ${p.sk_lm.SGDRegressor}(verbose=1)
lin_sgd.fit(${train_prepared}, ${data.train_y})

${p.sys}.stdout = old_stdout
loss_history = mystdout.getvalue()

train_predictions = lin_sgd.predict(${train_prepared})
test_predictions = lin_sgd.predict(${data.test_X})
train_rmse = ${p.np.sqrt}(${p.sk_mt.mean_squared_error}(${data.train_y}, train_predictions))
test_rmse = ${p.np.sqrt}(${p.sk_mt.mean_squared_error}(${data.test_y}, test_predictions))

{'train_rmse':train_rmse, 'test_rmse':test_rmse, 'params': [lin_sgd.intercept_, lin_sgd.coef_], 'loss':loss_history}`
}
Insert cell
Insert cell
Insert cell
normalEquation = {
return py`X = ${p.np.concatenate}(([${p.np.ones}(${train_prepared}.shape[0])], ${p.np.array}(${train_prepared}).T), axis=0).T
y = ${p.np.array(data.train_y)}
one = ${p.np.identity}(X.shape[1])
one[0,0] = 0
lamda = ${lamda}
theta = ${p.np.linalg.pinv}(X.T.dot(X) + lamda * one).dot(X.T).dot(y)
prediction = X.dot(theta)
rmse = ${p.np.sqrt}(${p.sk_mt.mean_squared_error}(${data.train_y}, prediction))
{'theta':theta, 'prediction':prediction, 'rmse':rmse}`
}
Insert cell
Insert cell
Insert cell
lin_gd = {
const temp = await py`X = ${p.np.array(train_prepared)}
y = ${p.np.array(data.train_y)}

m, n = X.shape
l2_penality = 0.0001
W = ${p.np.reshape}(${p.np.zeros}(n), (-1,1))
b = 0
learning_rate = 0.01
iterations = 200

params = []

for i in range(iterations):
Y_pred = X.dot(W) + b
loss = ${p.np.sum}((y - Y_pred)**2) / m
dW = ( -1 * ( 2 * ( X.T ).dot(y - Y_pred)) + ( 2 * l2_penality * W ) ) / m
db = - 2 * ${p.np.sum}( y - Y_pred ) / m
W = W - learning_rate * dW
b = b - learning_rate * db
params.append([i+1, W, b, loss])

params`

return temp.map(d => ({'i':d[0], 'W':d[1], 'b':d[2], 'loss':d[3]}))
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
df2Obj = (df) => {
return df.to_dict('records').toJs().map(Object.fromEntries)
}
Insert cell
libs = py`import pandas as pd
import numpy as np
import sklearn.model_selection as ms
import sklearn.preprocessing as pre
import sklearn.linear_model as lm
import sklearn.metrics as mt
import sys
import io
{'pd':pd, 'np':np, 'sk_ms':ms, 'sk_pre':pre, 'sk_lm':lm, 'sk_mt':mt, 'sys':sys, 'io':io}`
Insert cell
p = {
let temp = {};
const arr = Array.from(libs.keys());
for (const k of arr) {
temp[k] = libs.get(k);
}
return temp;
}
Insert cell
loadingPackages = {
let i = 1;
while(i < 30) {
yield Object.keys(pyodide.loadedPackages);
await Promises.tick(500);
i++;
}
}
Insert cell
py = async (strings, ...expressions) => {
let globals = {};
const code = strings.reduce((result, string, index) => {
if (expressions[index]) {
const name = `x${index}`;
globals[name] = expressions[index];
return result + string + name;
}
return result + string;
}, '');
await pyodide.loadPackagesFromImports(code);
const result = await pyodide.pyodide_py.eval_code_async(
code,
pyodide.toPy(globals)
);
if (result?.toJs) return result.toJs();
return result;
}
Insert cell
pyodide = {
const pyodide = await require("//cdn.jsdelivr.net/pyodide/v0.19.1/full/pyodide.js");
return pyodide.loadPyodide({
indexURL: "https://cdn.jsdelivr.net/pyodide/v0.19.1/full/"
});
}
Insert cell
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more