Stabilizing the cart pole inverse pendulum using LQR / Patrick Mineault

Patrick Mineault

Workspace

Published

Edited

Sep 10, 2021

1 fork

12 stars

render = {

simulate;

let score = 0;

let canvas = d3.create("canvas").attr("width", width/2).attr("height", 200).attr("tabindex", 1);

canvas.node().focus();

let myCartPole = cartPole();

let state = [0, 0, Math.PI + .05, 0];

let u = 0.0;

canvas.on("keydown", (e) => {

mutable debug2 = e.code;

if(e.code == "ArrowLeft") {

u = -50;

} else if(e.code == "ArrowRight") {

u = 50;

}

}).on("keyup", (e) => {

u = 0;

});

let alive = true;

let i = 0;

while(1) {

state = advanceOneStep(state, u, myCartPole, 1 / 60);

// Magnify from simulation coordinates to screen coordinates.

myCartPole.x = state[0] * myCartPole.magX;

myCartPole.theta = state[2];

renderCartPole(myCartPole, canvas.node());

const context = canvas.node().getContext('2d');

context.font = "30px Arial";

context.fillStyle = '#000';

context.textAlign = "right";

context.fillText(`${score}`, width/2 - 20, 50);

yield canvas.node();

if (i == 0) {

// Hack to focus only once.

canvas.node().focus();

}

if(Math.cos(myCartPole.theta) > 0) {

alive = false;

}

if(alive) {

score++;

}

i++;

}

mutable debug2 = "";

function advanceOneStep(state, u, p, dt) {

let s = [...state];

// Use a backward Euler method to in

for(let i = 0; i < 3; i++) {

let timeDerivatives = calculateTimeDerivatives(s, u, p);

s = math.evaluate('state + timeDerivatives * dt', {state: state, timeDerivatives: timeDerivatives, dt: dt});

}

return s;

}

function calculateTimeDerivatives(state, u, p) {

// Here the state is [x, v, theta, omega], where v = dx/dt and

// omega = dtheta/dt

// Equations 8.67 in Steve's book.

let [x, v, theta, omega] = state;

let denom = (p.m * p.L * p.L * (p.M + p.m * (1 - Math.pow(Math.cos(theta), 2))));

let xdot = v;

let thetadot = omega;

let vdot = (-p.m * p.m * p.L * p.L * p.g * Math.cos(theta) * Math.sin(theta)

+ p.m * p.L * p.L * (p.m * p.L * omega * omega * Math.sin(theta) - p.delta * v)

+ p.m * p.L * p.L * u) / denom

// There's a sign error in eq. 8.67d. Code (8.2) is correct.

let omegadot = ((p.m + p.M) * p.m * p.g * p.L * Math.sin(theta)

- p.m * p.L * Math.cos(theta) * (p.m * p.L * omega * omega * Math.sin(theta) - p.delta * v) - p.m * p.L * Math.cos(theta) * u) / denom;

return [xdot, vdot, thetadot, omegadot];

}

md`Here *f* is the nonlinear function defined in \`calculateTimeDerivatives\`. We can linearize this function around a point *x<sub>0</sub>* to obtain:`

function linearizeA(b, p) {

// Linearize the system around a fixed point.

// If f = calculateTimeDerivatives, then here we are calculating the Jacobian, a 4x4 matrix:

// f = [df_1/dx_1, df_1/dx_2, ...

// [df_2/dx_1, df_2/dx_2, ...]

// [...]

// evaluated at either theta = 0 or theta = pi

// Following Brunton, b = 1 corresponds to theta = pi and b = -1 to theta = 0

// Equation 8.68

let A = [[0, 1, 0, 0],

[0, -p.delta / p.M, b * p.m * p.g / p.M, 0],

[0, 0, 0, 1],

[0, -b * p.delta / p.M / p.L, -b * (p.m + p.M) * p.g / p.M / p.L, 0]];

return A;

}

function getB(b, p) {

return [0, 1/p.M, 0, b / p.M / p.L];

}

proportional = {

restart;

let state = [0, 0, Math.PI + .1, 0];

let myCartPole = cartPole();

// Let's compute some eigenvalues!

let A = linearizeA(1, myCartPole);

let B = math.reshape(getB(1, myCartPole), [4, 1]);

let K = math.reshape([0, 0, gain, 0], [1, 4]);

let T = math.evaluate('A - B*K', {A:A, B:B, K:K});

let eigs = new matlib.EigenvalueDecomposition(new matlib.Matrix(T));

let biggestEig = d3.max(eigs.realEigenvalues).toFixed(2);

let vis = d3.create("div").text(`Largest eigenvalue: ${biggestEig}`);

let canvas = vis.append("canvas").attr("width", width/2).attr("height", 200);

while(1) {

// Policy: move left is element is pole is to the left, right if pole is to the right.

let u = math.evaluate('-K * (x - x0)', {x:state, K:K, x0: [0, 0, math.PI, 0]});

state = advanceOneStep(state, u, myCartPole, 1 / 60);

// Magnify from simulation coordinates to screen coordinates.

myCartPole.x = state[0] * myCartPole.magX;

myCartPole.theta = state[2];

renderCartPole(myCartPole, canvas.node());

yield vis.node();

}

//return canvas.node();

}

function solveRicatti(A, B, Q, R) {

let BRB = math.evaluate("-B*inv(R)*B'", {B:B, R:R});

let Z = math.concat(

math.concat(A, BRB, 1),

math.concat(math.evaluate('-Q', {Q:Q}), math.evaluate("-A'", {A:A}), 1)

, 0);

let e = new matlib.EigenvalueDecomposition(new matlib.Matrix(Z));

// Find the columns corresponding to negative eigenvalues.

let indexes = [];

let u1Indexes = [];

let u2Indexes = [];

for(let i = 0; i < e.n; i++) {

if(e.realEigenvalues[i] < 0) {

indexes.push(i);

}

if(i < e.n / 2) {

u1Indexes.push(i);

} else {

u2Indexes.push(i);

}

let U = e.eigenvectorMatrix.subMatrixColumn(indexes);

let U1 = U.subMatrixRow(u1Indexes);

let U2 = U.subMatrixRow(u2Indexes);

let P = U2.mmul(matlib.inverse(U1)).to2DArray();

return P;

}

ricattiHolds = {

let myCartPole = cartPole();

// Let's compute some eigenvalues!

let A = linearizeA(1, myCartPole);

let B = math.reshape(getB(1, myCartPole), [4, 1]);

let Q = math.diag([1, 1, 1, 1]);

let R = [[1]]

let X = solveRicatti(A, B, Q, R);

let zero = math.evaluate("A'*P + P*A - P*B*inv(R)*B'*P + Q", {A:A, B:B, Q:Q, R:R, P:X});

return (math.sum(math.square(zero)) < 1e-12);

}

lqr = {

restartlqr;

let states = [];

let state = [0, 0, Math.PI + .2, 0];

let myCartPole = cartPole();

// Let's compute an optimal control matrix!

let A = linearizeA(1, myCartPole);

let B = math.reshape(getB(1, myCartPole), [4, 1]);

let Q = math.diag([1, 1, 10, 10]);

let R = [[fuelPenalty]];

let X = solveRicatti(A, B, Q, R);

let K = math.evaluate("inv(R)*B'*X", {R:R, B:B, X:X});

let T = math.evaluate('A - B*K', {A:A, B:B, K:K});

let eigs = new matlib.EigenvalueDecomposition(new matlib.Matrix(T));

let biggestEig = d3.max(eigs.realEigenvalues).toFixed(2);

let vis = d3.create("div").text(`Largest eigenvalue: ${biggestEig}`);

let thediv = vis.append("div").node();//.appendChild(node);

console.log(thediv);

//thediv.appendChild(node);

let canvas = vis.append("canvas").attr("width", width/2).attr("height", 200);

let i = 0;

while(1) {

// Policy: move left is element is pole is to the left, right if pole is to the right.

let u = math.evaluate('-K * (x - x0)', {x:state, K:K, x0: [0, 0, math.PI, 0]});

state = advanceOneStep(state, u, myCartPole, 1 / 60);

// Magnify from simulation coordinates to screen coordinates.

myCartPole.x = state[0] * myCartPole.magX;

myCartPole.theta = state[2];

renderCartPole(myCartPole, canvas.node());

states.push({t: i, u: u});

yield vis.node();

i++;

}

//return canvas.node();

}

function renderCartPole(cartPole, canvas) {

/* From tf.js https://github.com/tensorflow/tfjs-examples/blob/master/cart-pole/ui.js

originally released under an Apache license.

if (!canvas.style.display) {

canvas.style.display = 'block';

}

const X_MIN = -cartPole.xThreshold;

const X_MAX = cartPole.xThreshold;

const xRange = X_MAX - X_MIN;

const scale = canvas.width / xRange;

const context = canvas.getContext('2d');

context.clearRect(0, 0, canvas.width, canvas.height);

const halfW = canvas.width / 2;

// Draw the cart.

const railY = canvas.height * 0.8;

const cartW = cartPole.cartWidth * scale;

const cartH = cartPole.cartHeight * scale;

const cartX = cartPole.x * scale + halfW;

context.beginPath();

context.strokeStyle = '#000000';

context.lineWidth = 2;

context.rect(cartX - cartW / 2, railY - cartH / 2, cartW, cartH);

context.stroke();

// Draw the wheels under the cart.

const wheelRadius = cartH / 4;

for (const offsetX of [-1, 1]) {

context.beginPath();

context.lineWidth = 2;

context.arc(

cartX - cartW / 4 * offsetX, railY + cartH / 2 + wheelRadius,

wheelRadius, 0, 2 * Math.PI);

context.stroke();

}

// Draw the pole.

const angle = cartPole.theta + 3*Math.PI / 2;

const poleTopX =

halfW + scale * (cartPole.x + Math.cos(angle) * cartPole.length);

const poleTopY = railY -

scale * (cartPole.cartHeight / 2 + Math.sin(angle) * cartPole.length);

context.beginPath();

context.strokeStyle = '#ffa500';

context.lineWidth = 6;

context.moveTo(cartX, railY - cartH / 2);

context.lineTo(poleTopX, poleTopY);

context.stroke();

// Draw the ground.

const groundY = railY + cartH / 2 + wheelRadius * 2;

context.beginPath();

context.strokeStyle = '#000000';

context.lineWidth = 1;

context.moveTo(0, groundY);

context.lineTo(canvas.width, groundY);

context.stroke();

const nDivisions = 40;

for (let i = 0; i < nDivisions; ++i) {

const x0 = canvas.width / nDivisions * i;

const x1 = x0 + canvas.width / nDivisions / 2;

const y0 = groundY + canvas.width / nDivisions / 2;

const y1 = groundY;

context.beginPath();

context.moveTo(x0, y0);

context.lineTo(x1, y1);

context.stroke();

}

// Draw the left and right limits.

const limitTopY = groundY - canvas.height / 2;

context.beginPath();

context.strokeStyle = '#ff0000';

context.lineWidth = 2;

context.moveTo(1, groundY);

context.lineTo(1, limitTopY);

context.stroke();

context.beginPath();

context.moveTo(canvas.width - 1, groundY);

context.lineTo(canvas.width - 1, limitTopY);

context.stroke();

}

cartPole = () => {

return {

xThreshold: 200,

cartWidth: 100,

cartHeight: 10,

theta: .01,

x: -200,

length: 70, // We have both the visual length and the length for the simulation.

L: 2,

m: 1,

M: 5,

delta: 3,

g: -10,

magX: 100

}};

matlib = require("https://www.unpkg.com/ml-matrix@6.8.0");

math = require("mathjs@7")

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.

Learn more