Public
Edited
Jul 8, 2024
Insert cell
Insert cell
bigIntFirstParagraph = {
const totalChars = BigInt(firstParagraphChars.length);
const charToBigInt = new Map(firstParagraphChars.split("").map((c, i) => [c, BigInt(i)]))
// initiate our compressed "string" as a `BigInt`.
let sum = 0n, n = 0n, i = 0;
// loop over each character in the paragraph, multiplying
// the encoded string by the number of unique characters,
// then adding the integer the character maps to.
for (i = 0; i < firstParagraph.length; i++) {
n = charToBigInt.get(firstParagraph[i]);
sum = totalChars * sum + n;
}
return sum;
}
Insert cell
Insert cell
compressedFirstParagraph = {
let bigIntStr = bigIntFirstParagraph;
const bytes = [];
// repeatedly extract the lower eight bits
// of the big int until it is zero
while (bigIntStr) {
const byte = Number(bigIntStr & 255n);
bytes.push(byte);
bigIntStr = bigIntStr >> 8n;
}
return bytes
}
Insert cell
// The number of characters of the regular string, which equals the number of bytes in utf-8
firstParagraph.length
Insert cell
// ratio of bytes
compressedFirstParagraph.length / firstParagraph.length
Insert cell
Insert cell
function bytesToBigInt(bytes) {

// conver the bytes back to a big integer
let bigIntStr = 0n;
for (let i = bytes.length - 1; i >= 0; i--) {
bigIntStr = (bigIntStr << 8n) + BigInt(bytes[i]);
}
return bigIntStr
}
Insert cell
// Sanity check: is this the same number as our big integer?
bytesToBigInt(compressedFirstParagraph) == bigIntFirstParagraph
Insert cell
bigIntToChar = new Map(firstParagraphChars.split("").map((c, i) => [BigInt(i), c]))
Insert cell
function decodeBytes(bytes) {
let bigIntStr = bytesToBigInt(bytes);
const uniqueChars = BigInt(firstParagraphChars.length);
const chars = [];
while (bigIntStr) {
// Note: we are decoding the characters in reverse!
const char = bigIntToChar.get(bigIntStr % uniqueChars);
bigIntStr /= uniqueChars;
chars.push(char);
}
return chars.reverse().join("");
}
Insert cell
decompressedParagraph = decodeBytes(compressedFirstParagraph)
Insert cell
// Is our decompressed string the same?
decompressedParagraph === firstParagraph
Insert cell
Insert cell
encodedUniqueChars = {
// initiate a byte-mapping, starting out one-to-one
const byteMap = [0];
for (let i = 1; i < 256; i++) byteMap[i] = i;

// convert the characters of our first paragraph to UTF-8 encoded bytes
const charBytes = (new TextEncoder()).encode(firstParagraphChars);
// before we would shift the sum, then add the new char. In this case
// we actually wish to shift the new character instead, because that
// way we can decode our number later.
let shiftPow = BigInt(256), shift = 1n, sum = 0n;
for (let i = 0; i < charBytes.length; i++) {
const char = charBytes[i];
const charIdx = byteMap.indexOf(char);
sum = sum + shift * BigInt(charIdx);

// update our byte map: replace the encoded byte
// with the last byte in the map, then shrink it.
byteMap[charIdx] = byteMap[byteMap.length - 1];
byteMap.pop();

// increase our shift value
shift *= shiftPow;
shiftPow--;
}

let encodedBits = [];
const encoded = sum;
while (sum > 0n) {
encodedBits.push(Number(sum & 1n));
sum >>= 1n;
}

return {encoded, encodedBits, originalBits: charBytes.length * 8, ratio: (encodedBits.length * 100 / (charBytes.length * 8)).toFixed(2) + "%"};
}
Insert cell
decodedUniqueChars = {
// reconstruct our big int number
const {encodedBits} = encodedUniqueChars;
let sum = 0n;
for (let i = encodedBits.length - 1; i >= 0; i--) {
sum = (sum << 1n) + BigInt(encodedBits[i]);
}

// re-create the byte-mapping
const byteMap = [0];
for (let i = 1; i < 256; i++) byteMap[i] = i;

const bytes = [];
let possibleCharacters = 256n;
while (sum) {
// mask out the current index
const idx = Number(sum % possibleCharacters);
bytes.push(byteMap[idx])

// update the byte map
byteMap[idx] = byteMap[byteMap.length - 1];
byteMap.pop();

// remove the decoded character from the sum
// and reduce the total possible characters
sum /= possibleCharacters;
possibleCharacters--;
}
return (new TextDecoder()).decode(Uint8Array.from(bytes))
}
Insert cell
Insert cell
encodedFullAlphabet = {
// initiate a byte-mapping, starting out one-to-one
const byteMap = [0];
for (let i = 1; i < 256; i++) byteMap[i] = i;

const charBytes = Uint8Array.from({length: 256}, (v, i) => i);
let shiftPow = BigInt(256), shift = 1n, sum = 0n;
for (let i = 0; i < charBytes.length; i++) {
const char = charBytes[i];
const charIdx = byteMap.indexOf(char);
sum = sum + shift * BigInt(charIdx);
byteMap[charIdx] = byteMap[byteMap.length - 1];
byteMap.pop();
shift *= shiftPow;
shiftPow--;
}

let encodedBits = [];
const encoded = sum;
while (sum > 0n) {
encodedBits.push(Number(sum & 1n));
sum >>= 1n;
}

return {encoded, encodedBits, originalBits: charBytes.length * 8, ratio: (encodedBits.length * 100 / (charBytes.length * 8)).toFixed(2) + "%"};
}
Insert cell
Insert cell
Insert cell
firstParagraphChars.length
Insert cell
Insert cell
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more