Learning to predict Wine quality using TF.js and Danfo.js / Bamigbade Opeyemi

Bamigbade Opeyemi

Workspace

Published

Edited

Aug 26, 2020

Fork of Learning to predict Iris species using TF.js and Danfo.js

8 stars

dfd = require('danfojs@0.0.15/dist/index.min.js').catch(() => {

window.dfd.Series.prototype.print = window.dfd.DataFrame.prototype.print = function() { return print(this) };

return window.dfd;

})

tf = require('@tensorflow/tfjs')

df = dfd.read_csv(await FileAttachment("wine_dataset.csv").url())

print(df.head())

df.shape

// df1 = df.rename({mapper:{'type':'wine_type'}})

// print(df1.head())

// print(df1.describe())

// print(df1.isna().sum())

// print(df1.isna().sum().div(df1.isna().count()).round(4))

// df_filled = {

// let df2

// df1.columns.forEach(function(feat,i){

// if(df1[feat].dtypes[0]!="string"){

// let mean = Number(df1[feat].mean().toFixed(4))

// df2 = df1.fillna({columns:[feat],values:[mean]})

// }

// })

// return df2

// }

// print(df_filled.head())

// print(df_filled.isna().sum())

// dum_df = {

// let df1 = df_filled.replace({"replace": "white", "with": 0, "in": ["wine_type"]})

// let df2 = df1.replace({"replace": "red", "with": 1, "in": ["wine_type"]})

// df2.astype({column: "wine_type", dtype: "int32"})

// return df2

// }

// print(dum_df.head())

// print(dum_df.ctypes)

// function div(fn) {

// const d = html`<div>`;

// fn(d);

// return d;

// }

// dum_df.quality.nunique()

// print(dum_df.quality.value_counts())

// {

// let layout = {

// title: 'Wine Quality Counts Plot',

// }

// let quality_count = dum_df.quality.value_counts()

// return div(d => quality_count.plot(d).bar({layout:layout}))

// }

// {

// let layout = {

// title: 'wine quality proportion',

// }

// let quality_count = dum_df.quality.value_counts()

// return div(d => quality_count.plot(d).pie({layout:layout}))

// }

// {

// let mapper = {1:0,2:0,3:0,4:0,5:0,6:1,7:2,8:2,9:2}

// let new_cols = dum_df['quality'].map(mapper).values

// dum_df.addColumn({ "column": "wine_quality", "value": new_cols })

// }

// print(dum_df.head())

// df4 = dum_df.drop({ columns: ["quality"], axis: 1, inplace: false})

// print(df4.head())

// {

// let layout = {

// title: 'newly grouped wine quality bar plot',

// xaxis: {

// title: 'wine quality',

// },

// yaxis: {

// title: 'counts',

// }

// let quality_count = df4.wine_quality.value_counts()

// return div(d => quality_count.plot(d).bar({layout:layout}))

// }

// {

// let layout = {

// title: 'fixed acidity Distribution',

// }

// return div(d => dum_df['fixed acidity'].plot(d).hist({layout:layout}))

// }

// {

// let layout = {

// title: 'fixed acidity Distribution',

// }

// return div(d => df4['volatile acidity'].plot(d).hist({layout:layout}))

// }

// {

// let layout = {

// title: 'fixed acidity relationship to target feature',

// }

// return div(d => df4.plot(d).bar({x:"wine_quality",y:"fixed acidity",layout:layout}))

// }

// {

// let layout = {

// title: 'volatile acidity relationship to target feature',

// }

// return div(d => df4.plot(d).bar({x:"wine_quality",y:"volatile acidity",layout:layout}))

// }

// {

// let layout = {

// title: 'citric acid relationship to target feature',

// }

// return div(d => df4.plot(d).bar({x:"wine_quality",y:"citric acid",layout:layout}))

// }

// {

// let layout = {

// title: 'residual sugar relationship to target feature',

// }

// return div(d => df4.plot(d).bar({x:"wine_quality",y:"residual sugar",layout:layout}))

// }

// {

// let layout = {

// title: 'chlorides relationship to target feature',

// }

// return div(d => df4.plot(d).bar({x:"wine_quality",y:"chlorides",layout:layout}))

// }

// {

// let layout = {

// title: 'free sulfur dioxide relationship to target feature',

// }

// return div(d => df4.plot(d).bar({x:"wine_quality",y:"free sulfur dioxide",layout:layout}))

// }

// {

// let layout = {

// title: 'total sulfur dioxide relationship to target feature',

// }

// return div(d => df4.plot(d).bar({x:"wine_quality",y:"total sulfur dioxide",layout:layout}))

// }

// print(df4.head())

// {

// let sub_df = df4.loc({ columns: ["fixed acidity","volatile acidity", "citric acid","residual sugar","chlorides",

// "free sulfur dioxide","total sulfur dioxide","density","pH","alcohol"] })

// let layout = {

// title: 'features box plot before scaling', xaxis: {title: 'X',},yaxis: {title: 'Y',

// }

// return div(d => sub_df.plot(d).box({layout:layout}))

// }

// df_scaled = {

// let scaler = new dfd.MinMaxScaler()

// let X = df4.iloc({ columns: ["0:11"] })

// scaler.fit(X)

// let scaled_df = scaler.transform(X)

// return new dfd.DataFrame(scaled_df.values,{columns: df4.columns.slice(0,12)}).round(4)

// }

// print(df_scaled.tail())

// df_scaled.addColumn({ "column": "wine_quality", "value": df4.wine_quality.values })

// print(df_scaled.head())

// {

// let sub_df = df_scaled.loc({ columns: ["fixed acidity","volatile acidity", "citric acid","residual sugar","chlorides",

// "free sulfur dioxide","total sulfur dioxide","density","pH","alcohol"] })

// let layout = {

// title: 'box plot of scaled dataset', xaxis: {title: 'X',},yaxis: {title: 'Y',

// }

// return div(d => sub_df.plot(d).box({layout:layout}))

// }

// print(df_scaled.head())

// y_hot = {

// let encode = new dfd.OneHotEncoder()

// let y = df_scaled["wine_quality"].values

// encode.fit(y)

// let y_enc = encode.transform(y)

// return y_enc

// }

// print(y_hot)

// df5 = {

// let X = df_scaled.drop({ columns: ["wine_quality"], axis: 1, inplace: false})

// let com_df = dfd.concat({ df_list: [X, y_hot], axis: 1 })

// return com_df

// }

// print(df5.head())

// n_rows = df_scaled.shape[0]

// train_test_split = { let df_len = df_scaled.shape[0]

// let df_sample = df5.sample(df_len)

// let X = df5.iloc({columns: ["0:11"]})

// let y = df5.iloc({columns: ["12:"]})

// let X_train = X.iloc({rows: ["0:4999"]}).tensor

// let y_train = y.iloc({rows: ["0:4999"]}).tensor

// let X_test = X.iloc({rows: ["4999:"]}).tensor

// let y_test = y.iloc({rows: ["4999:"]}).tensor

// let data_array = [X_train,X_test,y_train,y_test]

// return data_array

// }

// train_test_split[1].shape

// get input shape for MLP

// input_shape = train_test_split[0].shape[1]

// training_stats = {

// return {

// accuracy: 0,

// val_acc:0,

// }

// model = {

// // Define a model for linear regression.

// const model = tf.sequential({

// layers: [

// tf.layers.dense({inputShape: [input_shape], units: 32, activation: 'relu'}),

// tf.layers.dense({units: 64, activation: 'relu'}),

// tf.layers.dense({units: 128, activation: 'relu'}),

// tf.layers.dense({units: 64, activation: 'relu'}),

// tf.layers.dense({units: 32, activation: 'relu'}),

// tf.layers.dense({units: 3, activation: 'softmax'}),

// ]

// });

// // Prepare the model for training: Specify the loss and the optimizer.

// model.compile({

// optimizer: 'sgd',

// loss: 'categoricalCrossentropy',

// metrics: ['accuracy'],

// });

// return model

// }

// // array for evaluation plotting

// validation_acc = []

// // array for evaluation plotting

// training_acc = []

// model.fit(train_test_split[0], train_test_split[2], {

// epochs: 100,

// batchSize: 32,

// validationSplit: 0.2,

// callbacks: {onEpochEnd: (batch, logs) => {

// training_acc.push(logs.acc)

// validation_acc.push(logs.val_acc)

// }, callbacks :tf.callbacks.earlyStopping({monitor: 'val_acc'})}

// }).then(info => {

// });

// df_eva = {

// let data = {"train_acc":training_acc,"val_acc":validation_acc}

// let df_output = new dfd.DataFrame(data)

// return df_output

// }

// {

// let layout = {

// title: 'Model Evaluation',

// xaxis: {

// title: 'Number of Epochs',

// },

// yaxis: {

// title: 'Accuracy',

// }

// return div(d => df_eva.plot(d).line({layout:layout}))

// }

// model.evaluate(train_test_split[1],train_test_split[3])

function print(df) {

const {col_types, series, columns, index, values} = df;

const table = html`

<thead>

<tr>

${series

? html`<th class="${col_types[0]}">${columns}</th>`

: columns.map((name, i) => html`<th class="${col_types[i]}">${name}</th>`)}

</tr>

</thead>

<tbody>

${values.map((row, i) => html`

<tr>

<th>${index[i]}</th>

${series

? html`<td class="${col_types[0]}">${row}</td>`

: row.map((v, j) => html`<td class="${col_types[j]}">${v}</td>`)}

</tr>

`)}

</tbody>

</table>

</div>

<style>

table.df-table { white-space: pre; }

table.df-table th, td { padding: 2px 5px; font-variant-numeric: tabular-nums; }

table.df-table .float32, .int32 { text-align: right; }

</style>

table.value = df;

return table;

}

// import {print} from "@visnup/hello-danfo-js"

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.

Learn more