Published
Edited
Apr 20, 2021
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
// Next: key, step by step or episode, map "language"?
actorCriticViz = function(viz, sim, history, controls, {cellSize = 50} = {}) {
const borderColor = colors.gray.brighter(0.25);
const innerBorderColor = colors.gray.brighter(.5);
const beginColor = colors.green;
const endColor = colors.red;
const wallColor = colors.brown;
const agentColor = colors.gray.darker(2.5);
const stepColor = colors.pink;
const rewardScale = d3.scaleDiverging(
// [-100, 0, 100],
[-50, 0, 50],
d3.piecewise(d3.interpolateHcl, [colors.yellow, colors.white, colors.cyan]),
// d3.piecewise(d3.interpolateHcl, [colors.red, colors.white, colors.red]),
).clamp(true);
const valueScale = d3.scaleDiverging(
[-50, 0, 50],
// [-100, 0, 100],
// d3.piecewise(d3.interpolateHcl, [colors.orange, colors.white, colors.blue]),
d3.piecewise(d3.interpolateHcl, [colors.blue, colors.white, colors.green]),
).clamp(true);
const policyScale = d3.scaleDiverging(
// [-100, 0, 100],
[-50, 0, 20],
// d3.piecewise(d3.interpolateHcl, [colors.orange, colors.white, colors.blue]),
d3.piecewise(d3.interpolateHcl, [colors.red, colors.gray, colors.yellow]),
).clamp(true);
const stateSize = 100;
const rewardSize = 84;
const agentSize = 68;
const valueSize = 38;
const policySize = 54;
const stepSize = 8;
// d3.select("body").append(
return html`${svg`
<svg
width="${sim.columns * cellSize + 1}"
height="${sim.rows * cellSize + 1}"
viewBox="-0.5, -0.5, ${sim.columns * 100 + 1}, ${sim.rows * 100 + 1}"
style="display: block;"
>
<defs>
<marker
id="circle"
viewBox="${-stepSize - 1} ${-stepSize - 1} ${stepSize * 2 + 2} ${stepSize * 2 + 2}"
markerUnits="userSpaceOnUse"
markerWidth="${stepSize * 2 + 2}"
markerHeight="${stepSize * 2 + 2}"
orient="auto-start-reverse"
>
<circle r="${stepSize}" fill="${stepColor}"/>
</marker>
<marker
id="bar"
viewBox="${-stepSize - 1} ${-stepSize - 1} ${stepSize * 2 + 2} ${stepSize * 2 + 2}"
markerUnits="userSpaceOnUse"
markerWidth="${stepSize * 2 + 2}"
markerHeight="${stepSize * 2 + 2}"
orient="auto-start-reverse"
>
<rect x="-3" y="${-stepSize}" width="4" height="${stepSize * 2}" fill="${stepColor}"/>
</marker>
</defs>
${math.map(sim.states, (state, [row, column]) => {
return svg`
<g transform="translate(${100 * (column + 0.5)} ${100 * (row + 0.5)})">
<!-- State -->
<rect
width="${stateSize}"
height="${stateSize}"
x="${-stateSize/2}"
y="${-stateSize/2}"
stroke="${borderColor}"
stroke-width="2"
fill="${
(state === sim.STATES.BEGIN)
? beginColor
: (state === sim.STATES.END)
? endColor
: (state === sim.STATES.WALL)
? wallColor
: colors.white
}"
>
<title>${
(state === sim.STATES.BEGIN)
? `Begin!\n`
: (state === sim.STATES.END)
? `End!\n`
: (state === sim.STATES.WALL)
? `Wall!\n`
: ``
}State: [${row}, ${column}]
</title>
</rect>
${(state !== sim.STATES.WALL)
? `
<!-- Reward -->
<rect
width="${rewardSize}"
height="${rewardSize}"
x="${-rewardSize/2}"
y="${-rewardSize/2}"
stroke="none"
fill="${rewardScale(sim.rewards[row][column])}"
>
<title>State: [${row}, ${column}]\nReward: ${sim.rewards[row][column]}</title>
</rect>
<g stroke="none" transform="rotate(-45) ">
<!-- Agent -->
${math.deepEqual(sim.state, [row, column])
? `
<rect
width="${agentSize}"
height="${agentSize}"
x="${-agentSize/2}"
y="${-agentSize/2}"
stroke="none"
fill="${agentColor}"
>
<title>Agent!\nState: [${row}, ${column}]</title>
</rect>`
: ``
}
<!-- Policy -->
<rect
width="${policySize/2}"
height="${policySize/2}"
x="${-policySize/2}"
y="${-policySize/2}"
fill="${policyScale(sim.policy[row][column][0])}"
stroke="${innerBorderColor}"
stroke-width="2"
>
<title>State: [${row}, ${column}]\nAction: 'Left'\nPolicy: ${sim.policy[row][column][0]}</title>
</rect>
<rect
width="${policySize/2}"
height="${policySize/2}"
x="0"
y="${-policySize/2}"
fill="${policyScale(sim.policy[row][column][1])}"
stroke="${innerBorderColor}"
stroke-width="2"
>
<title>State: [${row}, ${column}]\nAction: 'Up'\nPolicy: ${sim.policy[row][column][1]}</title>
</rect>
<rect
width="${policySize/2}"
height="${policySize/2}"
x="0"
y="0"
fill="${policyScale(sim.policy[row][column][2])}"
stroke="${innerBorderColor}"
stroke-width="2"
>
<title>State: [${row}, ${column}]\nAction: 'Right'\nPolicy: ${sim.policy[row][column][2]}</title>
</rect>
<rect
width="${policySize/2}"
height="${policySize/2}"
x="${-policySize/2}"
y="0"
fill="${policyScale(sim.policy[row][column][3])}"
stroke="${innerBorderColor}"
stroke-width="2"
>
<title>State: [${row}, ${column}]\nAction: 'Down'\nPolicy: ${sim.policy[row][column][3]}</title>
</rect>
</g>
<!-- Value -->
<rect
width="${valueSize}"
height="${valueSize}"
x="${-valueSize/2}"
y="${-valueSize/2}"
stroke="${innerBorderColor}"
stroke-width="2"
fill="${valueScale(sim.value[row][column])}"
>
<title>State: [${row}, ${column}]\nValue: ${sim.value[row][column]}</title>
</rect>
`
: ``
}
</g>
`;
}).flat()}
${history.steps.map((step, index, steps) => {
return svg`
<g transform="translate(${100 * (step.state[1] + 0.5)} ${100 * (step.state[0] + 0.5)})" opacity="0.5">
${
(index === 0)
? `<!-- Start state -->
<line
x1="0"
y1="0"
x2="0"
y2="0"
marker-end="url(#circle)"
>`
: (!math.deepEqual(step.state, steps[index - 1].state))
? `<!-- Successful action -->
<line
x1="${sim.actions[step.action][1] * -(stateSize - stepSize)}"
y1="${sim.actions[step.action][0] * -(stateSize - stepSize)}"
x2="0"
y2="0"
stroke="${stepColor}"
stroke-width="4"
marker-end="url(#circle)"
>`
: `<!-- Hit-a-wall action -->
<line
x1="0"
y1="0"
x2="${
(sim.actions[step.action][1] * stateSize/2) + (
(sim.actions[step.action][1] < 0)
? 2
: (sim.actions[step.action][1] > 0)
? -2
: 0
)
}"
y2="${
(sim.actions[step.action][0] * stateSize/2) + (
(sim.actions[step.action][0] < 0)
? 2
: (sim.actions[step.action][0] > 0)
? -2
: 0
)
}"
stroke="${stepColor}"
stroke-width="4"
marker-start="url(#circle)"
marker-end="url(#bar)"
>
`
}
<title>State: [${step.state[0]}, ${step.state[1]}]\nStep: ${index}</title>
</line>
</g>
`;
})}
</svg>
`}
<table>
<thead>
<tr>
<th></th>
<th></th>
<th></th>
</tr>
</thead>
<tbody>
<!--
<tr>
<td>Reward</td>
<td></td>
<td>${ramp(rewardScale, "", 100, true)}</td>
</tr>
-->
<tr>
<td>Value</td>
<td>
<svg width = "30" height = "30">
<g>
<rect stroke="#000" fill="#000" x="7" y="6" width="15" height="15"/>
</g>
</svg>
</td>
<td>${ramp(valueScale, "", 100, true)}</td>
</tr>
<tr>
<td>Policy</td>
<td>
<svg width = "30" height = "30">
<g>
<rect stroke="#000" fill="#000" x="-10" y="10" width="20" height="20" transform="rotate(-45)"/>
<rect stroke="#fff" fill="#fff" x="7" y="6" width="15" height="15"/>
</g>
</svg>
</td>
<td>${ramp(policyScale, "", 100, true)}</td>
</tr>
</tbody>
</table>
`;
}
Insert cell
Insert cell
Insert cell
// ramp(rewardScale, "reward");
// ramp(valueScale, "value");
// ramp(policyScale, "policy");
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more