function* genEpisodeStep(env, policyFn, episodeNumber, limit) {
let { state: state, observation: observation } = reset(env);
let action = chooseAction(env, observation, policyFn);
let done = false,
i = 0;
while (!done && i < limit) {
const result = step(env, state, action);
const nextAction = !result.done && chooseAction(env, result.observation, policyFn);
yield {
current: {
observation: observation,
action: action,
_state: state
},
reward: result.reward,
next: {
observation: result.observation,
action: nextAction,
_state: result.state
},
done: result.done,
episodeNumber: episodeNumber,
stepNumber: i
};
done = result.done;
state = result.state;
observation = result.observation;
action = nextAction;
++i;
}
}