actorCriticViz = function(viz, sim, history, controls, {cellSize = 50} = {}) {
const borderColor = colors.gray.brighter(0.25);
const innerBorderColor = colors.gray.brighter(.5);
const beginColor = colors.green;
const endColor = colors.red;
const wallColor = colors.brown;
const agentColor = colors.gray.darker(2.5);
const stepColor = colors.pink;
const rewardScale = d3.scaleDiverging(
[-50, 0, 50],
d3.piecewise(d3.interpolateHcl, [colors.yellow, colors.white, colors.cyan]),
).clamp(true);
const valueScale = d3.scaleDiverging(
[-50, 0, 50],
d3.piecewise(d3.interpolateHcl, [colors.blue, colors.white, colors.green]),
).clamp(true);
const policyScale = d3.scaleDiverging(
[-50, 0, 20],
d3.piecewise(d3.interpolateHcl, [colors.red, colors.gray, colors.yellow]),
).clamp(true);
const stateSize = 100;
const rewardSize = 84;
const agentSize = 68;
const valueSize = 38;
const policySize = 54;
const stepSize = 8;
return html`${svg`
<svg
width="${sim.columns * cellSize + 1}"
height="${sim.rows * cellSize + 1}"
viewBox="-0.5, -0.5, ${sim.columns * 100 + 1}, ${sim.rows * 100 + 1}"
style="display: block;"
>
<defs>
<marker
id="circle"
viewBox="${-stepSize - 1} ${-stepSize - 1} ${stepSize * 2 + 2} ${stepSize * 2 + 2}"
markerUnits="userSpaceOnUse"
markerWidth="${stepSize * 2 + 2}"
markerHeight="${stepSize * 2 + 2}"
orient="auto-start-reverse"
>
<circle r="${stepSize}" fill="${stepColor}"/>
</marker>
<marker
id="bar"
viewBox="${-stepSize - 1} ${-stepSize - 1} ${stepSize * 2 + 2} ${stepSize * 2 + 2}"
markerUnits="userSpaceOnUse"
markerWidth="${stepSize * 2 + 2}"
markerHeight="${stepSize * 2 + 2}"
orient="auto-start-reverse"
>
<rect x="-3" y="${-stepSize}" width="4" height="${stepSize * 2}" fill="${stepColor}"/>
</marker>
</defs>
${math.map(sim.states, (state, [row, column]) => {
return svg`
<g transform="translate(${100 * (column + 0.5)} ${100 * (row + 0.5)})">
<!-- State -->
<rect
width="${stateSize}"
height="${stateSize}"
x="${-stateSize/2}"
y="${-stateSize/2}"
stroke="${borderColor}"
stroke-width="2"
fill="${
(state === sim.STATES.BEGIN)
? beginColor
: (state === sim.STATES.END)
? endColor
: (state === sim.STATES.WALL)
? wallColor
: colors.white
}"
>
<title>${
(state === sim.STATES.BEGIN)
? `Begin!\n`
: (state === sim.STATES.END)
? `End!\n`
: (state === sim.STATES.WALL)
? `Wall!\n`
: ``
}State: [${row}, ${column}]
</title>
</rect>
${(state !== sim.STATES.WALL)
? `
<!-- Reward -->
<rect
width="${rewardSize}"
height="${rewardSize}"
x="${-rewardSize/2}"
y="${-rewardSize/2}"
stroke="none"
fill="${rewardScale(sim.rewards[row][column])}"
>
<title>State: [${row}, ${column}]\nReward: ${sim.rewards[row][column]}</title>
</rect>
<g stroke="none" transform="rotate(-45) ">
<!-- Agent -->
${math.deepEqual(sim.state, [row, column])
? `
<rect
width="${agentSize}"
height="${agentSize}"
x="${-agentSize/2}"
y="${-agentSize/2}"
stroke="none"
fill="${agentColor}"
>
<title>Agent!\nState: [${row}, ${column}]</title>
</rect>`
: ``
}
<!-- Policy -->
<rect
width="${policySize/2}"
height="${policySize/2}"
x="${-policySize/2}"
y="${-policySize/2}"
fill="${policyScale(sim.policy[row][column][0])}"
stroke="${innerBorderColor}"
stroke-width="2"
>
<title>State: [${row}, ${column}]\nAction: 'Left'\nPolicy: ${sim.policy[row][column][0]}</title>
</rect>
<rect
width="${policySize/2}"
height="${policySize/2}"
x="0"
y="${-policySize/2}"
fill="${policyScale(sim.policy[row][column][1])}"
stroke="${innerBorderColor}"
stroke-width="2"
>
<title>State: [${row}, ${column}]\nAction: 'Up'\nPolicy: ${sim.policy[row][column][1]}</title>
</rect>
<rect
width="${policySize/2}"
height="${policySize/2}"
x="0"
y="0"
fill="${policyScale(sim.policy[row][column][2])}"
stroke="${innerBorderColor}"
stroke-width="2"
>
<title>State: [${row}, ${column}]\nAction: 'Right'\nPolicy: ${sim.policy[row][column][2]}</title>
</rect>
<rect
width="${policySize/2}"
height="${policySize/2}"
x="${-policySize/2}"
y="0"
fill="${policyScale(sim.policy[row][column][3])}"
stroke="${innerBorderColor}"
stroke-width="2"
>
<title>State: [${row}, ${column}]\nAction: 'Down'\nPolicy: ${sim.policy[row][column][3]}</title>
</rect>
</g>
<!-- Value -->
<rect
width="${valueSize}"
height="${valueSize}"
x="${-valueSize/2}"
y="${-valueSize/2}"
stroke="${innerBorderColor}"
stroke-width="2"
fill="${valueScale(sim.value[row][column])}"
>
<title>State: [${row}, ${column}]\nValue: ${sim.value[row][column]}</title>
</rect>
`
: ``
}
</g>
`;
}).flat()}
${history.steps.map((step, index, steps) => {
return svg`
<g transform="translate(${100 * (step.state[1] + 0.5)} ${100 * (step.state[0] + 0.5)})" opacity="0.5">
${
(index === 0)
? `<!-- Start state -->
<line
x1="0"
y1="0"
x2="0"
y2="0"
marker-end="url(#circle)"
>`
: (!math.deepEqual(step.state, steps[index - 1].state))
? `<!-- Successful action -->
<line
x1="${sim.actions[step.action][1] * -(stateSize - stepSize)}"
y1="${sim.actions[step.action][0] * -(stateSize - stepSize)}"
x2="0"
y2="0"
stroke="${stepColor}"
stroke-width="4"
marker-end="url(#circle)"
>`
: `<!-- Hit-a-wall action -->
<line
x1="0"
y1="0"
x2="${
(sim.actions[step.action][1] * stateSize/2) + (
(sim.actions[step.action][1] < 0)
? 2
: (sim.actions[step.action][1] > 0)
? -2
: 0
)
}"
y2="${
(sim.actions[step.action][0] * stateSize/2) + (
(sim.actions[step.action][0] < 0)
? 2
: (sim.actions[step.action][0] > 0)
? -2
: 0
)
}"
stroke="${stepColor}"
stroke-width="4"
marker-start="url(#circle)"
marker-end="url(#bar)"
>
`
}
<title>State: [${step.state[0]}, ${step.state[1]}]\nStep: ${index}</title>
</line>
</g>
`;
})}
</svg>
`}
<table>
<thead>
<tr>
<th></th>
<th></th>
<th></th>
</tr>
</thead>
<tbody>
<!--
<tr>
<td>Reward</td>
<td></td>
<td>${ramp(rewardScale, "", 100, true)}</td>
</tr>
-->
<tr>
<td>Value</td>
<td>
<svg width = "30" height = "30">
<g>
<rect stroke="#000" fill="#000" x="7" y="6" width="15" height="15"/>
</g>
</svg>
</td>
<td>${ramp(valueScale, "", 100, true)}</td>
</tr>
<tr>
<td>Policy</td>
<td>
<svg width = "30" height = "30">
<g>
<rect stroke="#000" fill="#000" x="-10" y="10" width="20" height="20" transform="rotate(-45)"/>
<rect stroke="#fff" fill="#fff" x="7" y="6" width="15" height="15"/>
</g>
</svg>
</td>
<td>${ramp(policyScale, "", 100, true)}</td>
</tr>
</tbody>
</table>
`;
}