function simple_bandit(true_values, epsilon) {
var Q = new Array(true_values.length).fill(0);
var N = new Array(true_values.length).fill(0);
let choices = d3.range(true_values.length);
let random_choice = (arr) => arr[Math.floor(Math.random()*arr.length)];
let strategy = () => {
let choice = random_choice(choices);
if(Math.random() > epsilon) {
let qs = Q.map((q,a) => {return {choice:a, q:q}});
qs.sort((a,b) => (a.q < b.q));
choice = random_choice(qs.filter((a) => a == qs[0])).choice;
}
let reward = bandit_reward(true_values,choice);
N[choice] += 1;
Q[choice] += (reward - Q[choice]) / N[choice];
return {choice: choice, reward: reward};
}
return strategy;
}