Public
Edited
May 20, 2018
1 star
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
function Qestimate(action, history) {
let action_history = history.filter((s) => s.action == action);
let total_reward = action_history.reduce((acc,s) => s.reward + acc , 0);
if(action_history.length == 0) return 0;
return total_reward / action_history.length;
}
Insert cell
Insert cell
function greedy(history) {
return actions.reduce( (best, action) => {
let reward = Qestimate(action, history);
if(!reward) return best;
if( reward < best.reward) return best;
else return { action:action, reward:reward };
}, {action:4, reward:0}).action;
}
Insert cell
Insert cell
Insert cell
Insert cell
true_values = () => range(10).map((v) => normal())
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
function simple_bandit(true_values, epsilon) {
var Q = new Array(true_values.length).fill(0);
var N = new Array(true_values.length).fill(0);
let choices = d3.range(true_values.length);
let random_choice = (arr) => arr[Math.floor(Math.random()*arr.length)];
let strategy = () => {
// Random (exploratory) selection
let choice = random_choice(choices);
if(Math.random() > epsilon) {
// Greedy choice
let qs = Q.map((q,a) => {return {choice:a, q:q}});
qs.sort((a,b) => (a.q < b.q));
choice = random_choice(qs.filter((a) => a == qs[0])).choice;
}
let reward = bandit_reward(true_values,choice);
N[choice] += 1;
Q[choice] += (reward - Q[choice]) / N[choice];
return {choice: choice, reward: reward};
}
return strategy;
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more