Published
Edited
Aug 26, 2020
8 stars
Insert cell
Insert cell
Insert cell
dfd = require('danfojs@0.0.15/dist/index.min.js').catch(() => {
window.dfd.Series.prototype.print = window.dfd.DataFrame.prototype.print = function() { return print(this) };
return window.dfd;
})
Insert cell
tf = require('@tensorflow/tfjs')
Insert cell
Insert cell
df = dfd.read_csv(await FileAttachment("wine_dataset.csv").url())
Insert cell
print(df.head())
Insert cell
df.shape
Insert cell
Insert cell
// df1 = df.rename({mapper:{'type':'wine_type'}})
Insert cell
// print(df1.head())
Insert cell
Insert cell
// print(df1.describe())
Insert cell
// print(df1.isna().sum())
Insert cell
Insert cell
// print(df1.isna().sum().div(df1.isna().count()).round(4))
Insert cell
Insert cell
// df_filled = {
// let df2
// df1.columns.forEach(function(feat,i){
// if(df1[feat].dtypes[0]!="string"){
// let mean = Number(df1[feat].mean().toFixed(4))
// df2 = df1.fillna({columns:[feat],values:[mean]})
// }
// })
// return df2
// }
Insert cell
// print(df_filled.head())
Insert cell
// print(df_filled.isna().sum())
Insert cell
Insert cell
// dum_df = {
// let df1 = df_filled.replace({"replace": "white", "with": 0, "in": ["wine_type"]})
// let df2 = df1.replace({"replace": "red", "with": 1, "in": ["wine_type"]})
// df2.astype({column: "wine_type", dtype: "int32"})
// return df2
// }
Insert cell
// print(dum_df.head())
Insert cell
// print(dum_df.ctypes)
Insert cell
Insert cell
// function div(fn) {
// const d = html`<div>`;
// fn(d);
// return d;
// }
Insert cell
Insert cell
// dum_df.quality.nunique()
Insert cell
// print(dum_df.quality.value_counts())
Insert cell
// {
// let layout = {
// title: 'Wine Quality Counts Plot',
// }
// let quality_count = dum_df.quality.value_counts()
// return div(d => quality_count.plot(d).bar({layout:layout}))
// }
Insert cell
// {
// let layout = {
// title: 'wine quality proportion',
// }
// let quality_count = dum_df.quality.value_counts()
// return div(d => quality_count.plot(d).pie({layout:layout}))
// }
Insert cell
Insert cell
// {
// let mapper = {1:0,2:0,3:0,4:0,5:0,6:1,7:2,8:2,9:2}
// let new_cols = dum_df['quality'].map(mapper).values
// dum_df.addColumn({ "column": "wine_quality", "value": new_cols })
// }
Insert cell
// print(dum_df.head())
Insert cell
Insert cell
// df4 = dum_df.drop({ columns: ["quality"], axis: 1, inplace: false})
Insert cell
// print(df4.head())
Insert cell
// {
// let layout = {
// title: 'newly grouped wine quality bar plot',
// xaxis: {
// title: 'wine quality',
// },
// yaxis: {
// title: 'counts',
// }
// }
// let quality_count = df4.wine_quality.value_counts()
// return div(d => quality_count.plot(d).bar({layout:layout}))
// }
Insert cell
Insert cell
// {
// let layout = {
// title: 'fixed acidity Distribution',
// }
// return div(d => dum_df['fixed acidity'].plot(d).hist({layout:layout}))
// }
Insert cell
// {
// let layout = {
// title: 'fixed acidity Distribution',
// }
// return div(d => df4['volatile acidity'].plot(d).hist({layout:layout}))
// }
Insert cell
Insert cell
// {
// let layout = {
// title: 'fixed acidity relationship to target feature',
// }
// return div(d => df4.plot(d).bar({x:"wine_quality",y:"fixed acidity",layout:layout}))
// }
Insert cell
// {
// let layout = {
// title: 'volatile acidity relationship to target feature',
// }
// return div(d => df4.plot(d).bar({x:"wine_quality",y:"volatile acidity",layout:layout}))
// }
Insert cell
// {
// let layout = {
// title: 'citric acid relationship to target feature',
// }
// return div(d => df4.plot(d).bar({x:"wine_quality",y:"citric acid",layout:layout}))
// }
Insert cell
// {
// let layout = {
// title: 'residual sugar relationship to target feature',
// }
// return div(d => df4.plot(d).bar({x:"wine_quality",y:"residual sugar",layout:layout}))
// }
Insert cell
// {
// let layout = {
// title: 'chlorides relationship to target feature',
// }
// return div(d => df4.plot(d).bar({x:"wine_quality",y:"chlorides",layout:layout}))
// }
Insert cell
// {
// let layout = {
// title: 'free sulfur dioxide relationship to target feature',
// }
// return div(d => df4.plot(d).bar({x:"wine_quality",y:"free sulfur dioxide",layout:layout}))
// }
Insert cell
// {
// let layout = {
// title: 'total sulfur dioxide relationship to target feature',
// }
// return div(d => df4.plot(d).bar({x:"wine_quality",y:"total sulfur dioxide",layout:layout}))
// }
Insert cell
// print(df4.head())
Insert cell
// {
// let sub_df = df4.loc({ columns: ["fixed acidity","volatile acidity", "citric acid","residual sugar","chlorides",
// "free sulfur dioxide","total sulfur dioxide","density","pH","alcohol"] })
// let layout = {
// title: 'features box plot before scaling', xaxis: {title: 'X',},yaxis: {title: 'Y',
// }
// }
// return div(d => sub_df.plot(d).box({layout:layout}))
// }
Insert cell
Insert cell
// df_scaled = {
// let scaler = new dfd.MinMaxScaler()
// let X = df4.iloc({ columns: ["0:11"] })
// scaler.fit(X)
// let scaled_df = scaler.transform(X)
// return new dfd.DataFrame(scaled_df.values,{columns: df4.columns.slice(0,12)}).round(4)
// }
Insert cell
// print(df_scaled.tail())
Insert cell
Insert cell
// df_scaled.addColumn({ "column": "wine_quality", "value": df4.wine_quality.values })
Insert cell
// print(df_scaled.head())
Insert cell
// {
// let sub_df = df_scaled.loc({ columns: ["fixed acidity","volatile acidity", "citric acid","residual sugar","chlorides",
// "free sulfur dioxide","total sulfur dioxide","density","pH","alcohol"] })
// let layout = {
// title: 'box plot of scaled dataset', xaxis: {title: 'X',},yaxis: {title: 'Y',
// }
// }
// return div(d => sub_df.plot(d).box({layout:layout}))
// }
Insert cell
// print(df_scaled.head())
Insert cell
Insert cell
// y_hot = {
// let encode = new dfd.OneHotEncoder()
// let y = df_scaled["wine_quality"].values
// encode.fit(y)
// let y_enc = encode.transform(y)
// return y_enc
// }
Insert cell
// print(y_hot)
Insert cell
Insert cell
// df5 = {
// let X = df_scaled.drop({ columns: ["wine_quality"], axis: 1, inplace: false})
// let com_df = dfd.concat({ df_list: [X, y_hot], axis: 1 })
// return com_df
// }
Insert cell
// print(df5.head())
Insert cell
// n_rows = df_scaled.shape[0]
Insert cell
Insert cell
// train_test_split = { let df_len = df_scaled.shape[0]
// let df_sample = df5.sample(df_len)
// let X = df5.iloc({columns: ["0:11"]})
// let y = df5.iloc({columns: ["12:"]})
// let X_train = X.iloc({rows: ["0:4999"]}).tensor
// let y_train = y.iloc({rows: ["0:4999"]}).tensor
// let X_test = X.iloc({rows: ["4999:"]}).tensor
// let y_test = y.iloc({rows: ["4999:"]}).tensor
// let data_array = [X_train,X_test,y_train,y_test]
// return data_array
// }
Insert cell
// train_test_split[1].shape
Insert cell
// get input shape for MLP
// input_shape = train_test_split[0].shape[1]
Insert cell
// training_stats = {
// return {
// accuracy: 0,
// val_acc:0,
// }
// }
Insert cell
Insert cell
// model = {

// // Define a model for linear regression.
// const model = tf.sequential({
// layers: [
// tf.layers.dense({inputShape: [input_shape], units: 32, activation: 'relu'}),
// tf.layers.dense({units: 64, activation: 'relu'}),
// tf.layers.dense({units: 128, activation: 'relu'}),
// tf.layers.dense({units: 64, activation: 'relu'}),
// tf.layers.dense({units: 32, activation: 'relu'}),
// tf.layers.dense({units: 3, activation: 'softmax'}),
// ]
// });

// // Prepare the model for training: Specify the loss and the optimizer.
// model.compile({
// optimizer: 'sgd',
// loss: 'categoricalCrossentropy',
// metrics: ['accuracy'],
// });
// return model
// }
Insert cell
// // array for evaluation plotting
// validation_acc = []
Insert cell
// // array for evaluation plotting
// training_acc = []

Insert cell
Insert cell
// model.fit(train_test_split[0], train_test_split[2], {
// epochs: 100,
// batchSize: 32,
// validationSplit: 0.2,
// callbacks: {onEpochEnd: (batch, logs) => {
// training_acc.push(logs.acc)
// validation_acc.push(logs.val_acc)
// }, callbacks :tf.callbacks.earlyStopping({monitor: 'val_acc'})}
// }).then(info => {
// });
Insert cell
Insert cell
// df_eva = {
// let data = {"train_acc":training_acc,"val_acc":validation_acc}
// let df_output = new dfd.DataFrame(data)
// return df_output
// }
Insert cell
Insert cell
// {
// let layout = {
// title: 'Model Evaluation',
// xaxis: {
// title: 'Number of Epochs',
// },
// yaxis: {
// title: 'Accuracy',
// }
// }
// return div(d => df_eva.plot(d).line({layout:layout}))
// }
Insert cell
// model.evaluate(train_test_split[1],train_test_split[3])
Insert cell
function print(df) {
const {col_types, series, columns, index, values} = df;
const table = html`
<div style="overflow: auto; max-height: 300px;">
<table class="df-table">
<thead>
<tr>
<th></th>
${series
? html`<th class="${col_types[0]}">${columns}</th>`
: columns.map((name, i) => html`<th class="${col_types[i]}">${name}</th>`)}
</tr>
</thead>
<tbody>
${values.map((row, i) => html`
<tr>
<th>${index[i]}</th>
${series
? html`<td class="${col_types[0]}">${row}</td>`
: row.map((v, j) => html`<td class="${col_types[j]}">${v}</td>`)}
</tr>
`)}
</tbody>
</table>
</div>
<style>
table.df-table { white-space: pre; }
table.df-table th, td { padding: 2px 5px; font-variant-numeric: tabular-nums; }
table.df-table .float32, .int32 { text-align: right; }
</style>
`;
table.value = df;
return table;
}
Insert cell
// import {print} from "@visnup/hello-danfo-js"
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more