function optimize(grad_loss, epochs, lr, momentum, init_params){
let params = init_params;
let z = params.map(w => 0);
let memo = {loss: [], params: []};
for (let i = 0; i <= epochs; i++) {
let {value, grads} = grad_loss(params);
z = grads.map((g, i) => tf.add(g, tf.mul(momentum, z[i])))
params = params.map((w, i) => tf.sub(w, z[i].mul(lr))) ;
memo["loss"].push(value);
memo["params"].push(params);
}
return memo
}