Public
Edited
Jul 8, 2024
Insert cell
Insert cell
bigIntFirstParagraph = {
const totalChars = BigInt(firstParagraphChars.length);
const charToBigInt = new Map(firstParagraphChars.split("").map((c, i) => [c, BigInt(i)]))
// initiate our compressed "string" as a `BigInt`.
let sum = 0n, n = 0n, i = 0;
// loop over each character in the paragraph, multiplying
// the encoded string by the number of unique characters,
// then adding the integer the character maps to.
for (i = 0; i < firstParagraph.length; i++) {
n = charToBigInt.get(firstParagraph[i]);
sum = totalChars * sum + n;
}
return sum;
}
Insert cell
Insert cell
compressedFirstParagraph = {
let bigIntStr = bigIntFirstParagraph;
const bytes = [];
// repeatedly extract the lower eight bits
// of the big int until it is zero
while (bigIntStr) {
const byte = Number(bigIntStr & 255n);
bytes.push(byte);
bigIntStr = bigIntStr >> 8n;
}
return bytes
}
Insert cell
// The number of characters of the regular string, which equals the number of bytes in utf-8
firstParagraph.length
Insert cell
// ratio of bytes
compressedFirstParagraph.length / firstParagraph.length
Insert cell
Insert cell
function bytesToBigInt(bytes) {

// conver the bytes back to a big integer
let bigIntStr = 0n;
for (let i = bytes.length - 1; i >= 0; i--) {
bigIntStr = (bigIntStr << 8n) + BigInt(bytes[i]);
}
return bigIntStr
}
Insert cell
// Sanity check: is this the same number as our big integer?
bytesToBigInt(compressedFirstParagraph) == bigIntFirstParagraph
Insert cell
bigIntToChar = new Map(firstParagraphChars.split("").map((c, i) => [BigInt(i), c]))
Insert cell
function decodeBytes(bytes) {
let bigIntStr = bytesToBigInt(bytes);
const uniqueChars = BigInt(firstParagraphChars.length);
const chars = [];
while (bigIntStr) {
// Note: we are decoding the characters in reverse!
const char = bigIntToChar.get(bigIntStr % uniqueChars);
bigIntStr /= uniqueChars;
chars.push(char);
}
return chars.reverse().join("");
}
Insert cell
decompressedParagraph = decodeBytes(compressedFirstParagraph)
Insert cell
// Is our decompressed string the same?
decompressedParagraph === firstParagraph
Insert cell
Insert cell
encodedUniqueChars = {
// initiate a byte-mapping, starting out one-to-one
const byteMap = [0];
for (let i = 1; i < 256; i++) byteMap[i] = i;

// convert the characters of our first paragraph to UTF-8 encoded bytes
const charBytes = (new TextEncoder()).encode(firstParagraphChars);
// before we would shift the sum, then add the new char. In this case
// we actually wish to shift the new character instead, because that
// way we can decode our number later.
let shiftPow = BigInt(256), shift = 1n, sum = 0n;
for (let i = 0; i < charBytes.length; i++) {
const char = charBytes[i];
const charIdx = byteMap.indexOf(char);
sum = sum + shift * BigInt(charIdx);

// update our byte map: replace the encoded byte
// with the last byte in the map, then shrink it.
byteMap[charIdx] = byteMap[byteMap.length - 1];
byteMap.pop();

// increase our shift value
shift *= shiftPow;
shiftPow--;
}

let encodedBits = [];
const encoded = sum;
while (sum > 0n) {
encodedBits.push(Number(sum & 1n));
sum >>= 1n;
}

return {encoded, encodedBits, originalBits: charBytes.length * 8, ratio: (encodedBits.length * 100 / (charBytes.length * 8)).toFixed(2) + "%"};
}
Insert cell
decodedUniqueChars = {
// reconstruct our big int number
const {encodedBits} = encodedUniqueChars;
let sum = 0n;
for (let i = encodedBits.length - 1; i >= 0; i--) {
sum = (sum << 1n) + BigInt(encodedBits[i]);
}

// re-create the byte-mapping
const byteMap = [0];
for (let i = 1; i < 256; i++) byteMap[i] = i;

const bytes = [];
let possibleCharacters = 256n;
while (sum) {
// mask out the current index
const idx = Number(sum % possibleCharacters);
bytes.push(byteMap[idx])

// update the byte map
byteMap[idx] = byteMap[byteMap.length - 1];
byteMap.pop();

// remove the decoded character from the sum
// and reduce the total possible characters
sum /= possibleCharacters;
possibleCharacters--;
}
return (new TextDecoder()).decode(Uint8Array.from(bytes))
}
Insert cell
Insert cell
encodedFullAlphabet = {
// initiate a byte-mapping, starting out one-to-one
const byteMap = [0];
for (let i = 1; i < 256; i++) byteMap[i] = i;

const charBytes = Uint8Array.from({length: 256}, (v, i) => i);
let shiftPow = BigInt(256), shift = 1n, sum = 0n;
for (let i = 0; i < charBytes.length; i++) {
const char = charBytes[i];
const charIdx = byteMap.indexOf(char);
sum = sum + shift * BigInt(charIdx);
byteMap[charIdx] = byteMap[byteMap.length - 1];
byteMap.pop();
shift *= shiftPow;
shiftPow--;
}

let encodedBits = [];
const encoded = sum;
while (sum > 0n) {
encodedBits.push(Number(sum & 1n));
sum >>= 1n;
}

return {encoded, encodedBits, originalBits: charBytes.length * 8, ratio: (encodedBits.length * 100 / (charBytes.length * 8)).toFixed(2) + "%"};
}
Insert cell
Insert cell
Insert cell
firstParagraphChars.length
Insert cell
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more