A Random Walk Through the Grid World / Jim Kan

Jim Kan

Workspace

Public

Reinforcement Learning

Edited

Apr 15, 2019

Fork of Temporal-Difference Learning: SARSA(0)

•

1 fork

Importers

4 stars

Reinforcement Learning

Q-Learning On-policy Monte Carlo control (for ε-soft policies)Temporal-Difference Learning: SARSA(0)SARSA(λ)

A Random Walk Through the Grid World

Reinforcement Learning notes

chooseAction = (env, observation, policyFn) => {

const actionProbabilities = policyFn(env, observation);

return env.actions[chooseRandomly(actionProbabilities)];

}

function* genEpisodeStep(env, policyFn, episodeNumber, limit) {

let { state: state, observation: observation } = reset(env);

let action = chooseAction(env, observation, policyFn);

let done = false,

i = 0;

while (!done && i < limit) {

const result = step(env, state, action);

const nextAction = !result.done && chooseAction(env, result.observation, policyFn);

yield {

current: {

observation: observation,

action: action,

_state: state

// Technically, the agent can not directly access the current state;

// it can only make observations that are possibly noisy measurements of

// the current state -- but it's convenient to keep it here for

// visualization purposes, so we slap the underscore in front here.

reward: result.reward,

next: {

observation: result.observation,

action: nextAction,

_state: result.state // technically, state is hidden... hence the underscore

done: result.done,

episodeNumber: episodeNumber,

stepNumber: i

};

done = result.done;

state = result.state;

observation = result.observation;

action = nextAction;

++i;

}

// episode = (policy, limit) => [...genEpisodeStep(env, policy, limit)]

function* genEpisode(env, policy, limit) {

let epi = [];

for (const aStep of genEpisodeStep(env, policy, epi.length, limit)) {

epi.push(aStep);

yield epi;

}

function* slowGenEpisode(env, policyFn, limit=100, interval=1000) {

yield* slowly(() => genEpisode(env, policyFn, limit), interval);

// for (const aStep of genEpisode(env, policyFn, limit))

// yield Promises.tick(interval, aStep);

}

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.

Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.

Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.

Learn more