Beating the built-in sort, sometimes / Job van der Zwan

viewof inputWidth = Inputs.range(

[8, 32],

{ label: html`<b>Bit-width of input:</b>`, value: 32, step: 1 }

)

arrayLength = 1 << 14;

totalArrays = 50;

time_sort = {

const shift = 32 - inputWidth;

const partialShuffle = (arr, n) => {

for(let i = 0; i < n; i++) {

const j = arr.length - 1 - (Math.random() * (arr.length/2) | 0);

const t = arr[j];

arr[j] = arr[arr.length - 1 - i];

arr[arr.length - 1 - i] = t;

}

};

const randomize = partial_checkbox ?

(n = 10) => { for (const input of testInputs) partialShuffle(input, n); } :

() => { for (const input of testInputs) randomValues(input, shift); };

randomize();

// start out sorted

for (let input of testInputs) input.sort((a, b) => a - b);

const timings = {radix: [], quicksort: [], quicksortNR: [], native: [], testInputs};

if (time_sort_checkbox){

let t0 = 0, t1 = 0, maxTiming = 0;

const runs = 50;

for (let i = 0; i < runs && maxTiming < 5000; i++) {

randomize();

if (arrayType === "Uint8Array") {

t0 = performance.now();

for (let input of testInputs) radixsortU8(input, 0, input.length);

t1 = performance.now();

} else {

t0 = performance.now();

for (let input of testInputs) radixsort(input);

t1 = performance.now();

}

t1 -= t0;

timings.radix.push(t1);

if (t1 > maxTiming) {

maxTiming = t1;

}

randomize()

if (Array.isArray(testInputs[0])) {

t0 = performance.now();

for (let input of testInputs) input.sort((a, b) => a - b);

t1 = performance.now();

} else {

t0 = performance.now();

for (let input of testInputs) input.sort();

t1 = performance.now();

}

t1 -= t0;

timings.native.push(t1);

if (t1 > maxTiming) {

maxTiming = t1;

}

// Not relevant

// for (const input of testInputs) randomValues(input);

// t0 = performance.now();

// for (const input of testInputs) quickSort(input, 0, input.length)

// t1 = performance.now();

// t1 -= t0;

// timings.quicksort.push(t1);

// if (t1 > maxTiming) {

// maxTiming = t1;

// }

// for (const input of testInputs) randomValues(input);

// t0 = performance.now();

// for (const input of testInputs) quickSortNR(input)

// t1 = performance.now();

// t1 -= t0;

// timings.quicksortNR.push(t1);

// if (t1 > maxTiming) {

// maxTiming = t1;

// }

yield timings;

}

yield timings;

}

testInputs = {

if (arrayType === "Array") {

return Array.from({ length: totalArrays }, _ => {

const t = [0];

for (let i = 0; i < arrayLength; i++) t[i] = 0;

return t;

});

}

const arrayConstructor = {

Uint8Array,

Uint16Array,

Uint32Array,

Int8Array,

Int16Array,

Int32Array,

Float32Array,

Float64Array

}[arrayType];

return Array.from({ length: totalArrays }, _ => new arrayConstructor(arrayLength));

}

/**

* Concept: flag sort, with a whole bunch of small optimizations:

*

* 1) We revert to insertion sort for small ranges (standard optimization, really)

*

* 2) Assume numbers fit in the smallest bit-width (8, 16, 24, 32, in that order)

* until proven otherwise. Using this we can skip doing multiple passes on arrays

* of (mostly) small numbers or sparse data. We also repeat this on sub-passes

* so in the unlikely case that we have values where only the upper and lower

* bits are set (that is, are like 0xFF0000FF), the middle passes are also skipped

*

* 3) The last pass is optimized by overwriting the last eight bits. Together with

* the previous point this makes sorting values < 256 much faster than native sorts

* for all arrays

*

* 4) With a bit of prep-work we can avoid swaps and directly write the values to

* the correct bucket, saving on precious memory bandwidth.

*

* 5) We check if a range is already sorted and bail out early - this should also

* benefit nearly sorted data, because it allows us to bail out early on buckets

* that were already sorted

*

* 6) If one bucket contains all values in a single pass, we immediately recurse

* to the next pass (speeds up cases where all values are large but only the last

* bits have different values)

*

* I won't bother with implementing a version that handles the floating point or 64 bit case,

* but in theory they could be radix sorted too (requires a copy to and from a typed array though)

*/

radixsort = {

// Make the backing array for counting a continguous piece of memory, for cache purposes

const backingArray8 = new Uint8Array(256 * 4 * (4 + 2 + 1));

const countingArrays8 = [0, 1, 2, 3].map(i =>

backingArray8.subarray(i * 256, (i + 1) * 256)

);

const backingArray16 = new Uint16Array(backingArray8.buffer);

const countingArrays16 = [0, 1, 2, 3].map(i =>

backingArray16.subarray(512 + i * 256, 512 + (i + 1) * 256)

);

const backingArray32 = new Uint32Array(backingArray8.buffer);

const countingArrays32 = [0, 1, 2, 3].map(i =>

backingArray32.subarray(512 + 256 + i * 256, 512 + 256 + (i + 1) * 256)

);

function radixSortL32(array, left = 0, right = array.length, pass = 0) {

// used to mask out the higher bits for later passes

// pass == 0 for the full 32 bits, pass == 3 for 8 bits

const mask = 0xFFFFFFFF >>> (8 * pass);

let val = (array[left] & mask) >>> 0;

let maxVal = val | 0;

let i = left;

// It's practically free to check if the array is already sorted,

// because we spend most time waiting for reading values of the

// input array and updating the counting arrays

let sorted = true,

prev = val;

if (maxVal < 0x100) {

// 8 bit values

const countingArray = countingArrays32[3];

countingArray.fill(0);

for (; i < right; i++) {

// We really only care about whether or not the max value

// requires 8, 16, 24 or 32 bit, so "maxVal" is a bit

// misleading here, but bitmasking should be faster than testing.

val = (array[i] & mask) >>> 0;

maxVal = (maxVal | val) >>> 0;

// if maxVal is bigger than 8 bits, the current index has to

// fall through to the next appropriate bit width

if (maxVal > 0xFF) break;

// can only reach this point if val < 256

countingArray[val]++;

sorted = sorted && prev <= (prev = val);

}

if (maxVal < 0x10000 && i < right) {

// 16 bit values

const countingArray = countingArrays32[2];

countingArray.fill(0);

// if we already advanced a few points, we know that

// these will all go into the first bucket

countingArray[0] = i - left;

for (; i < right; i++) {

val = (array[i] & mask) >>> 0;

maxVal = (maxVal | (val & mask)) >>> 0;

if (maxVal > 0xFFFF) break;

countingArray[(val >>> 8) & 0xFF]++;

sorted = sorted && prev <= (prev = val);

}

if (maxVal < 0x1000000 && i < right) {

// 24 bit values

const countingArray = countingArrays32[1];

countingArray.fill(0);

countingArray[0] = i - left;

for (; i < right; i++) {

val = (array[i] & mask) >>> 0;

maxVal = (maxVal | (val & mask)) >>> 0;

if (maxVal > 0xFFFFFF) break;

countingArray[(val >>> 16) & 0xFF]++;

sorted = sorted && prev <= (prev = val);

}

if (i < right) {

// remaining 32 bit values

const countingArray = countingArrays32[0];

countingArray.fill(0);

countingArray[0] = i - left;

while (i < right) {

val = array[i++];

countingArray[val >>> 24]++;

sorted = sorted && prev <= (prev = val);

}

// Fast path for the sorted (sub)array case

if (sorted) return array;

// Now we do the actual sorting. Again, based on maxVal

// we determine which approach we need

if (maxVal < 0x100) {

// for the last byte we can do a special micro-optimization:

// instead of swapping, overwrite the lowest eight bits

const countingArray = countingArrays32[3];

let i = left,

j = left,

n = 0,

maskVal = array[i] - (array[i] & 0xFF);

while (n < 256 && i < right) {

j += countingArray[n++];

while (i < j) array[i++] = maskVal;

maskVal++;

}

return array;

} else if (maxVal < 0x10000) {

// regardless of what pass we originally had,

// we can adjust to the smallest max value

pass = 2;

} else if (maxVal < 0x1000000) {

pass = 1;

}

const countingArray = countingArrays32[pass];

const shift = 24 - pass * 8;

// Half-swap trick. We lift the first value of every non-empty bucket

// This leaves "holes" in every bucket to be filled. Then we move the

// first lifted value to the correct bucket, advance the idx of that

// bucket to its next value, then move that value, and repeat until

// we reach the end of a bucket. Then we repeat this with the next lifted

// until we run out of lifted values to move.

// This way we can avoid swaps: we can directly move values to the

// correct bucket instead.

const liftedValues = [];

// Re-use 8bit array for storing the starting indices each bucket,

// and convert the regular count array into marking the end-points

// of every bucket (which effectively are the starting indices of

// the next bucket).

const indexArray = countingArrays32[3];

const range = right - left;

const nextSort = range < 256 ? radixSort8 : range < 0xFFFF ? radixSort16 : radixSortL32;

// Create indices array + lift values

if (range === countingArray[0]) {

// If one bucket contains all values in the current range then

// this pass will effectively do nothing. So in that case we

// can immediately recurse to the next pass (or return from the

// range if pass === 3 because that means it is already sorted

return radixSortL32(array, left, right, ++pass);

} else if (countingArray[0] > 0) {

liftedValues.push(array[left]);

}

indexArray[0] = left;

for (let i = 1, prevCount = (countingArray[0] += left); i < 256; i++) {

const count = countingArray[i];

if (range === count) return radixSortL32(array, left, right, ++pass);

else if (count > 0) liftedValues.push(array[prevCount]);

indexArray[i] = prevCount;

prevCount += count;

countingArray[i] = prevCount;

}

// The part where we actually move the values to the correct bucket

for (let i = 0; i < liftedValues.length - 1; i++) {

// We put one of the values in the hole in the

// bucket it belongs, pick the next value from

// that bucket (leaving a hole), put that value

// into the hole in the bucket where it belongs,

// and repeat this process until we reach the end

// of a bucket. At that point we move onto the next

// value in the "hole" aray,

let val = liftedValues[i];

let bucket = (val >>> shift) & 0xFF;

let idx = indexArray[bucket]++;

let end = countingArray[bucket];

array[idx++] = val;

while (idx < end) {

val = array[idx];

bucket = (val >>> shift) & 0xFF;

idx = indexArray[bucket]++;

end = countingArray[bucket];

array[idx++] = val;

}

// Clean-up: plug the last value back into the last hole

// Here's a fun mini-optimization: if out of a total of N

// buckets N-1 are "sorted", then the last bucket can only

// contain values that should be there and therefore must

// already be sorted as well.

val = liftedValues[liftedValues.length - 1];

array[indexArray[(val >>> shift) & 0xFF]] = val;

// Move on to the next pass

pass++;

for (let i = 0, sum = left; i < 256; i++) {

const nextSum = countingArray[i];

if (sum < nextSum) {

// insertion sort fallback for small ranges

// TODO: maybe fall-back to quicksort at slightly

// bigger range is also worth it?

const range = nextSum - sum;

if (range <= 40) {

for (let i = sum + 1, j; i < nextSum; i++) {

const v = array[i];

for (j = i - 1; j >= sum && array[j] > v; j--) {

array[j + 1] = array[j];

}

if (++j < i) array[j] = v;

}

// else if (range < 256) radixSort8(array, sum, nextSum, pass);

// else if (range < 0xFFFF) radixSort16(array, sum, nextSum, pass);

else radixSortL32(array, sum, nextSum, pass);

}

sum = nextSum;

}

return array;

}

// Range length < 65536

function radixSort16(array, left = 0, right = array.length, pass = 0) {

const mask = 0xFFFFFFFF >>> (8 * pass);

let val = (array[left] & mask) >>> 0;

let maxVal = val | 0;

let i = left;

let sorted = true, prev = val;

if (maxVal < 0x100) {

const countingArray = countingArrays16[3];

countingArray.fill(0);

for (; i < right; i++) {

val = (array[i] & mask) >>> 0;

maxVal = (maxVal | val) >>> 0;

if (maxVal > 0xFF) break;

countingArray[val]++;

sorted = sorted && prev <= (prev = val);

}

if (maxVal < 0x10000 && i < right) {

const countingArray = countingArrays16[2];

countingArray.fill(0);

countingArray[0] = i - left;

for (; i < right; i++) {

val = (array[i] & mask) >>> 0;

maxVal = (maxVal | (val & mask)) >>> 0;

if (maxVal > 0xFFFF) break;

countingArray[(val >>> 8) & 0xFF]++;

sorted = sorted && prev <= (prev = val);

}

if (maxVal < 0x1000000 && i < right) {

const countingArray = countingArrays16[1];

countingArray.fill(0);

countingArray[0] = i - left;

for (; i < right; i++) {

val = (array[i] & mask) >>> 0;

maxVal = (maxVal | (val & mask)) >>> 0;

if (maxVal > 0xFFFFFF) break;

countingArray[(val >>> 16) & 0xFF]++;

sorted = sorted && prev <= (prev = val);

}

if (i < right) {

const countingArray = countingArrays16[0];

countingArray.fill(0);

countingArray[0] = i - left;

while (i < right) {

val = array[i++];

countingArray[val >>> 24]++;

sorted = sorted && prev <= (prev = val);

}

if (sorted) return array;

if (maxVal < 0x100) {

const countingArray = countingArrays16[3];

let i = left,

j = left,

n = 0,

maskVal = array[i] - (array[i] & 0xFF);

while (n < 256 && i < right) {

j += countingArray[n++];

while (i < j) array[i++] = maskVal;

maskVal++;

}

return array;

} else if (maxVal < 0x10000) pass = 2;

else if (maxVal < 0x1000000) pass = 1;

const countingArray = countingArrays16[pass];

const shift = 24 - pass * 8;

const liftedValues = [];

const indexArray = countingArrays16[3];

const range = right - left;

const nextSort = range < 256 ? radixSort8 : radixSort16;

if (range === countingArray[0]) {

return radixSort16(array, left, right, ++pass);

} else if (countingArray[0] > 0) {

liftedValues.push(array[left]);

}

indexArray[0] = left;

for (let i = 1, prevCount = (countingArray[0] += left); i < 256; i++) {

const count = countingArray[i];

if (range === count) return radixSort16(array, left, right, ++pass);

else if (count > 0) liftedValues.push(array[prevCount]);

indexArray[i] = prevCount;

prevCount += count;

countingArray[i] = prevCount;

}

for (let i = 0; i < liftedValues.length - 1; i++) {

let val = liftedValues[i];

let bucket = (val >>> shift) & 0xFF;

let idx = indexArray[bucket]++;

let end = countingArray[bucket];

array[idx++] = val;

while (idx < end) {

val = array[idx];

bucket = (val >>> shift) & 0xFF;

idx = indexArray[bucket]++;

end = countingArray[bucket];

array[idx++] = val;

}

val = liftedValues[liftedValues.length - 1];

array[indexArray[(val >>> shift) & 0xFF]] = val;

pass++;

for (let i = 0, sum = left; i < 256; i++) {

const nextSum = countingArray[i];

if (sum < nextSum) {

const range = nextSum - sum;

if (range <= 40) {

for (let i = sum + 1, j; i < nextSum; i++) {

const v = array[i];

for (j = i - 1; j >= sum && array[j] > v; j--) {

array[j + 1] = array[j];

}

if (++j < i) array[j] = v;

}

// else if (range < 256) radixSort16(array, sum, nextSum, pass);

else radixSort16(array, sum, nextSum, pass);

}

sum = nextSum;

}

return array;

}

// Array length less than 256

function radixSort8(array, left = 0, right = array.length, pass = 0) {

const mask = 0xFFFFFFFF >>> (8 * pass);

let val = (array[left] & mask) >>> 0;

let maxVal = val | 0;

let i = left;

let sorted = true, prev = val;

if (maxVal < 0x100) {

const countingArray = countingArrays8[3];

countingArray.fill(0);

for (; i < right; i++) {

val = (array[i] & mask) >>> 0;

maxVal = (maxVal | val) >>> 0;

if (maxVal > 0xFF) break;

countingArray[val]++;

sorted = sorted && prev <= (prev = val);

}

if (maxVal < 0x10000 && i < right) {

const countingArray = countingArrays8[2];

countingArray.fill(0);

countingArray[0] = i - left;

for (; i < right; i++) {

val = (array[i] & mask) >>> 0;

maxVal = (maxVal | (val & mask)) >>> 0;

if (maxVal > 0xFFFF) break;

countingArray[(val >>> 8) & 0xFF]++;

sorted = sorted && prev <= (prev = val);

}

if (maxVal < 0x1000000 && i < right) {

const countingArray = countingArrays8[1];

countingArray.fill(0);

countingArray[0] = i - left;

for (; i < right; i++) {

val = (array[i] & mask) >>> 0;

maxVal = (maxVal | (val & mask)) >>> 0;

if (maxVal > 0xFFFFFF) break;

countingArray[(val >>> 16) & 0xFF]++;

sorted = sorted && prev <= (prev = val);

}

if (i < right) {

const countingArray = countingArrays8[0];

countingArray.fill(0);

countingArray[0] = i - left;

while (i < right) {

val = array[i++];

countingArray[val >>> 24]++;

sorted = sorted && prev <= (prev = val);

}

if (sorted) return array;

if (maxVal < 0x100) {

const countingArray = countingArrays8[3];

let i = left,

j = left,

n = 0,

maskVal = array[i] - (array[i] & 0xFF);

while (n < 256 && i < right) {

j += countingArray[n++];

while (i < j) array[i++] = maskVal;

maskVal++;

}

return array;

} else if (maxVal < 0x10000) pass = 2;

else if (maxVal < 0x1000000) pass = 1;

const countingArray = countingArrays8[pass];

const shift = 24 - pass * 8;

const liftedValues = [];

const indexArray = countingArrays8[3];

const range = right - left;

if (range === countingArray[0]) {

return radixSort8(array, left, right, ++pass);

} else if (countingArray[0] > 0) {

liftedValues.push(array[left]);

}

indexArray[0] = left;

for (let i = 1, prevCount = (countingArray[0] += left); i < 256; i++) {

const count = countingArray[i];

if (range === count) return radixSort8(array, left, right, ++pass);

else if (count > 0) liftedValues.push(array[prevCount]);

indexArray[i] = prevCount;

prevCount += count;

countingArray[i] = prevCount;

}

for (let i = 0; i < liftedValues.length - 1; i++) {

let val = liftedValues[i];

let bucket = (val >>> shift) & 0xFF;

let idx = indexArray[bucket]++;

let end = countingArray[bucket];

array[idx++] = val;

while (idx < end) {

val = array[idx];

bucket = (val >>> shift) & 0xFF;

idx = indexArray[bucket]++;

end = countingArray[bucket];

array[idx++] = val;

}

val = liftedValues[liftedValues.length - 1];

array[indexArray[(val >>> shift) & 0xFF]] = val;

pass++;

for (let i = 0, sum = left; i < 256; i++) {

const nextSum = countingArray[i];

if (sum < nextSum) {

const range = nextSum - sum;

if (range <= 40) {

for (let i = sum + 1, j; i < nextSum; i++) {

const v = array[i];

for (j = i - 1; j >= sum && array[j] > v; j--) {

array[j + 1] = array[j];

}

if (++j < i) array[j] = v;

}

} else radixSort8(array, sum, nextSum, pass);

}

sum = nextSum;

}

return array;

}

return function radixsort(array) {

// // All benchmarks so far suggest that for arrays

// // smaller than this the native sorts definitely win

// if (array.length <= 200) {

// if (Array.isArray(array)) array.sort((a, b) => a - b);

// else array.sort(); // native TypedArray sort

// return array;

// }

const sort = array.length < 256 ? radixSort8 : array.length < 0xFFFF ? radixSort16 : radixSortL32;

return sort(array, 0, array.length, 0);

};

}

test_sort = {

if (true) {

const testInput = new Uint32Array(0x10000);

const original = testInput.slice();

const verification = testInput.slice();

let error;

const samples = 1000;

for (let i = 0; i < samples; i++) {

randomValues(testInput);

original.set(testInput);

original.sort();

radixsort(testInput);

verification.set(testInput);

verification.sort();

for (let j = 0; j < testInput.length; j++) {

if (testInput[j] !== original[j]) {

error = {

idx: j,

o: original[j],

t: testInput[j],

v: verification[j],

original,

testInput,

verification

};

break;

}

if (error) break;

yield `"${i + 1}/${samples} - no errors yet!`;

}

yield error || "no errors!";

} else {

yield "enable debugging to test";

}

radixsortU8 = {

const countArray = new Uint32Array(256);

return function radixsortU8(u8array, left, right) {

// count occurrences of each byte

countArray.fill(0);

let sorted = true;

for(let i = left, prev = u8array[0]; i < right; i++) {

const val = u8array[i];

countArray[val]++;

sorted = sorted && prev <= val;

prev = val;

}

if (sorted) return;

// Since we know we're only dealing with bytes, we can just overwrite the values:

for(let i = 0, n = left, m = left; i < 256 && n < right; i++) {

m += u8array[i];

while(n < m) {

u8array[n++] = i;

}

/*

"Michael Herf has also discovered a good way to generalize this to floating point numbers: Reinterpret cast the float to a uint32, then flip every bit if the float was negative, but flip only the sign bit if the float was positive. The same trick works for doubles and uint64s. Michael Herf explains why this works in the linked piece, but the short version of it is this: Positive floating point numbers already sort correctly if we just reinterpret cast them to a uint32. The exponent comes before the mantissa, so we would sort by the exponent first, then by the mantissa. Everything works out. Negative floating point numbers however would sort the wrong way. Flipping all the bits on them fixes that. The final remaining problem is that positive floating point numbers need to sort as bigger than negative numbers, and the easiest way to do that is to flip the sign bit since it’s the most significant bit."

https://probablydance.com/2016/12/27/i-wrote-a-faster-sorting-algorithm/

*/

function radixF32(F32) {

const U32 = new Uint32Array(F32.buffer);

for (let i = 0; i < U32.length; i++) {

const v = U32[i];

U32[i] = v & 0x80000000 ? v ^ 0xFFFFFFFF : v ^ 0x80000000;

}

radixsort(U32);

for (let i = 0; i < U32.length; i++) {

const v = U32[i];

U32[i] = v & 0x80000000 ? v ^ 0x80000000 : v ^ 0xFFFFFFFF;

}

return F32;

}

test_sortF32 = {

if (false) {

const testInput = new Float32Array(100000);

const original = testInput.slice();

const verification = testInput.slice();

let error;

const samples = 1000;

for (let i = 0; i < samples; i++) {

randomValuesF32(testInput);

original.set(testInput);

original.sort();

radixF32(testInput);

verification.set(testInput);

verification.sort();

for (let j = 0; j < testInput.length; j++) {

if (testInput[j] !== original[j]) {

error = {

idx: j,

o: original[j],

t: testInput[j],

v: verification[j],

original,

testInput,

verification

};

break;

}

if (error) break;

yield `"${i + 1}/${samples} - no errors yet!`;

}

yield error || "no errors!";

} else {

yield "enable debugging to test";

}

randomValues = {

// / * Observable seems to make crypto.getRandomValues() quite slow on Firefox */

// return function randomValues(typedArray) {

// const step = (1<<14)-1;

// if (typedArray.length > step) {

// for(let i = 0; i < typedArray.length; i += step) {

// crypto.getRandomValues(typedArray.subarray(i, Math.min(i + step, typedArray.length)))

// }

// } else {

// crypto.getRandomValues(typedArray);

// }

// return typedArray;

// }

// Use xorshift (good enough for this test)

let x = Math.random()*0x100000000 | 0;

return function randomValues(typedArray, shift = 0) {

let _x = x;

for(let i = 0; i < typedArray.length; i++){

_x ^= _x << 13;

_x ^= _x >> 17;

_x ^= _x << 5;

typedArray[i] = _x >>> shift;

}

x = _x;

return typedArray;

}

// Since we're probably want to have lots of random values in typed arrays,

// we might as well operate directly on said arrays

randomValuesF32 = {

let x = (Math.random() * 0x100000000) | 0;

return function xorShiftF32Array(f) {

const u = new Uint32Array(f.buffer);

let _x = x;

for (let i = 0; i < f.length; i++) {

_x ^= _x << 13;

_x ^= _x >> 17;

_x ^= _x << 5;

u[i] = (_x & 0x7fffff) | 0b111111100000000000000000000000;

f[i] = (f[i] - 1) * 255;

}

x = _x;

return f;

};

}

// time_select = {

// restart_time_select;

// const testInputs = Array.from({length: 10000}, _ => new Uint32Array(1000));

// for (const input of testInputs) randomValues(input);

// const timings = {radixselect: [], radixselectBranchless: [], radixsort: [], quickselect: []};

// let t0, t1;

// for (let i = 0; i < 50; i++) {

// const k = testInputs[0].lenght / i | 0;

// for (const input of testInputs) randomValues(input);

// t0 = performance.now();

// for (const input of testInputs) radixselect(input, k);

// t1 = performance.now();

// timings.radixselect.push(t1 - t0);

// for (const input of testInputs) randomValues(input);

// t0 = performance.now();

// for (const input of testInputs) radixselectBranchless(input, k);

// t1 = performance.now();

// timings.radixselectBranchless.push(t1 - t0);

// // for (const input of testInputs) randomValues(input);

// // t0 = performance.now();

// // for (const input of testInputs) radixsort(input);

// // t1 = performance.now();

// // timings.radixsort.push(t1 - t0);

// for (const input of testInputs) randomValues(input);

// t0 = performance.now();

// for (const input of testInputs) quickselect(input, k);

// t1 = performance.now();

// timings.quickselect.push(t1 - t0);

// yield timings;

// }

// yield timings;

// }

radixselect = {

const countArray = new Uint32Array(256);

// Assumes we pass a Uint32Array (or any other array that only has values

// that would fit in a Uint32Array)

function _radixselect(array, k, left, right, pass) {

// go from most significant byte to least significant byte

let shift = 24 - pass*8;

// Count occurrences of a particular masked/shifted byte value

let max = 0;

countArray.fill(0);

for(let i = left; i < right; i++) {

const val = array[i];

const bucket = (val >> shift) & 0xFF;

countArray[bucket]++;

if (val > max) max = val;

}

// If our values are small, the first few passes won't do anything

if (pass < 3 && max < 0x100) {

_radixselect(array, k, left, right, 3)

return;

} else if (pass < 2 && max < 0x10000) {

_radixselect(array, k, left, right, 2)

return;

} else if (pass < 1 && max < 0x1000000) {

_radixselect(array, k, left, right, 1)

return;

}

let pivotBucket = 0;

let sum = left;

// Note that this overshoots - will be corrected for below

do {

sum += countArray[pivotBucket++];

} while(sum <= k);

// We now know the "pivot" bucket in which our k value will

// be found. However, unlike a regular sort, in the case of

// n-selec we only care about whether all values on the left

// of the pivot bucket are smaller than the pivot bucket, and

// all values on the right are larger than the pivot bucket.

// In other words: instead of sorting 256 buckets, we

// can reduce that to a sort of Dutch Flag sort with the pivot

// bucket as our middle value.

// On top of that, because of our counting pass we also know

// how many values are smaller, equal or larger than our pivot

// bucket, so we can do a smarter-than average three-way sort

// with half-swaps compared to the regular Dutch Flag sort.

// Compared to a standard radix sort this three-way sort lets

// us get away with less work in two ways: it reduces the

// likelihood of having to swap values around from 255 time

// out of 256 buckets to 2 out of 3 buckets (and one of those

// is tiny, so it's closer to 1 out of 2). However, in the

// process we introduce a three-way branch with a worst-case

// branch prediction scenario, so it's not clear if this is

// a net benefit (but see the branchless version below).

// However, we also only have to recurse on the pivot bucket,

// which saves on sorting 255 out of 256 buckets on the next

// pass. When doing multiples passes this should lower total

// overhead significantly.

let pivotBucketStart = sum - countArray[--pivotBucket];

const pivotBucketEnd = sum;

// If our pivot bucket spans the entire array, we can skip

// the sorting step of this pass

if (pivotBucketStart === left && pivotBucketEnd === right) {

if (pass < 3) _radixselect(array, k, pivotBucketStart, pivotBucketEnd, pass+1);

return;

}

let lowBucketStart = left;

const lowBucketEnd = pivotBucketStart;

let highBucketStart = pivotBucketEnd;

const highBucketEnd = right;

// create gaps in buckets so we can do "half-swaps"

let holes = [];

if (lowBucketStart < lowBucketEnd) {

holes.push(array[lowBucketStart]);

}

if (pivotBucketStart < pivotBucketEnd) {

holes.push(array[pivotBucketStart]);

}

if (highBucketStart < highBucketEnd) {

holes.push(array[highBucketStart]);

}

// Here's a fun optimization: if two out of three

// buckets are "sorted", then by definition the

// third is as well. Hence the `> 1` comparison

while (holes.length > 1) {

// We put one of the values in the hole in the

// bucket it belongs, pick the next value from

// that bucket (leaving a hole), put that value

// into the hole in the bucket where it belongs,

// and repeat this process until we reach the end

// of a bucket. At that point we move onto the next

// value in the "hole" aray

let val = holes.pop();

for(;;) {

const bucket = (val >> shift) & 0xFF;

if (bucket < pivotBucket) {

array[lowBucketStart++] = val;

if (lowBucketStart < lowBucketEnd) { val = array[lowBucketStart]; }

else { break; }

} else if (bucket > pivotBucket) {

array[highBucketStart++] = val;

if (highBucketStart < highBucketEnd) { val = array[highBucketStart]; }

else { break; }

} else {

array[pivotBucketStart++] = val;

if (pivotBucketStart < pivotBucketEnd) { val = array[pivotBucketStart]; }

else { break; }

}

// clean-up: plug the last value back into the last hole

const val = holes[0];

const bucket = (val >> shift) & 0xFF;

if (bucket < pivotBucket) {

array[lowBucketStart] = val;

} else if (bucket > pivotBucket) {

array[highBucketStart] = val;

} else {

array[pivotBucketStart] = val;

}

if (pivotBucketEnd - lowBucketEnd > 1 && pass < 3) {

// insertion sort fallback for small ranges

if (pivotBucketEnd - lowBucketEnd <= 40) {

for(let i = lowBucketEnd + 1, j; i < pivotBucketEnd; i++) {

const v = array[i];

for (j = i - 1; j >= lowBucketEnd && array[j] > v; j--) {

array[j+1] = array[j]

}

if (++j < i) array[j] = v;

}

return;

} else {

_radixselect(array, k, lowBucketEnd, pivotBucketEnd, pass+1);

}

return function radixselect(array, k, left, right) {

const _left = left === undefined ? 0 : left;

const _right = right === undefined ? array.length : right;

if (_left >= _right) {

console.log(`\`left\` must be smaller than \`right\`! left: ${_left}, right: ${_right}`);

return array;

}

if (array.constructor === Uint8Array) {

return _radixselect(array, k, _left, _right, 3);

}

if (array.constructor === Uint16Array) {

return _radixselect(array, k, _left, _right, 2);

}

_radixselect(array, k, _left, _right, 0);

}

radixselectBranchless = {

const countArray = new Uint32Array(256);

// Assumes we pass a Uint32Array (or any other array that only has values

// that would fit in a Uint32Array)

function _radixselect(array, k, left, right, pass) {

// go from most significant byte to least significant byte

let shift = 24 - pass*8;

// Count occurrences of a particular masked/shifted byte value

let max = 0;

countArray.fill(0);

for(let i = left; i < right; i++) {

const val = array[i];

const bucket = (val >> shift) & 0xFF;

countArray[bucket]++;

if (val > max) max = val;

}

// If our values are small, the first few passes won't do anything

if (pass < 3 && max < 0x100) {

_radixselect(array, k, left, right, 3)

return;

} else if (pass < 2 && max < 0x10000) {

_radixselect(array, k, left, right, 2)

return;

} else if (pass < 1 && max < 0x1000000) {

_radixselect(array, k, left, right, 1)

return;

}

let pivotBucket = 0;

let sum = left;

// Note that this overshoots - will be corrected for below

do {

sum += countArray[pivotBucket++];

} while(sum <= k);

const pivotBucketStart = sum - countArray[--pivotBucket];

const pivotBucketEnd = sum;

// If our pivot bucket spans the entire array, we can skip

// the sorting step of this pass

if (pivotBucketStart === left && pivotBucketEnd === right) {

if (pass < 3) _radixselect(array, k, left, right, pass+1);

return;

}

// create gaps in buckets so we can do "half-swaps"

let holes = [];

if (left < pivotBucketStart) {

holes.push(array[left]);

}

if (pivotBucketStart < sum) {

holes.push(array[pivotBucketStart]);

}

if (sum < right) {

holes.push(array[sum]);

}

countArray[0] = left; // low bucket start

countArray[1] = pivotBucketStart; // pivot bucket start

countArray[2] = pivotBucketEnd // high bucket start

countArray[3] = pivotBucketStart; // low bucket end

countArray[4] = pivotBucketEnd // pivot bucket end

countArray[5] = right; // high bucket end

while (holes.length > 1) {

let val = holes.pop();

// Yep, this is how we can do a branchless integer comparison in JavaScript.

// Take a moment to marvel at this monstrosity. Came up with it myself.

// (pivotbucket - bucket) is either:

// a) negative if pivotbucket < bucket

// b) zero if pivotbucket === bucket

// c) positive if pivotbucket > bucket

// >> 31 converts any positive value to 0, and any negative value to -1

// Combining those two insights, the "equation" below produces:

// a) 0 if bucket < pivotbucket

// b) 1 if bucket === pivotbucket

// c) 2 if bucket > pivotbucket

// All of this involves bitwise operations, which combined with good old

// radix-sort based index arrays produces branchless swapping. It turns

// out that avoiding a three-way branch makes this code notably faster

// on Chrome, but not on Firefox (as of writing in Feb 2020).

let bucket = (val >> shift) & 0xFF;

bucket = -(((pivotBucket - bucket) >> 31) + ((pivotBucket - bucket - 1) >> 31));

let idx = countArray[bucket]++;

let end = countArray[bucket+3];

array[idx++] = val;

while(idx < end) {

val = array[idx];

bucket = (val >> shift) & 0xFF;

bucket = -(((pivotBucket - bucket) >> 31) + ((pivotBucket - bucket - 1) >> 31));

idx = countArray[bucket]++;

end = countArray[bucket+3];

array[idx++] = val;

}

// clean-up: plug the last value back into the last hole

const val = holes[0];

const bucket = (val >> shift) & 0xFF;

array[countArray[-(((pivotBucket - bucket) >> 31) + ((pivotBucket - bucket - 1) >> 31))]] = val;

if (pivotBucketEnd - pivotBucketStart > 1 && pass < 3) {

// insertion sort fallback for small ranges

if (pivotBucketEnd - pivotBucketStart <= 40) {

for(let i = pivotBucketStart + 1, j; i < pivotBucketEnd; i++) {

const v = array[i];

for (j = i - 1; j >= pivotBucketStart && array[j] > v; j--) {

array[j+1] = array[j]

}

if (++j < i) array[j] = v;

}

return;

} else {

_radixselect(array, k, pivotBucketStart, pivotBucketEnd, pass+1);

}

return function radixselectBranchless(array, k, left, right) {

const _left = left === undefined ? 0 : left;

const _right = right === undefined ? array.length : right;

if (_left >= _right) {

console.log(`\`left\` must be smaller than \`right\`! left: ${_left}, right: ${_right}`);

return array;

}

if (array.constructor === Uint8Array) {

return _radixselect(array, k, _left, _right, 3);

}

if (array.constructor === Uint16Array) {

return _radixselect(array, k, _left, _right, 2);

}

_radixselect(array, k, _left, _right, 0);

}

test_select = {

if (false) {

const testInput = new Uint32Array(100000);

const original = testInput.slice();

const verification = testInput.slice();

let error;

const samples = 1000;

for (let i = 0; i < samples; i++) {

const k = Math.random() * testInput.length | 0;

randomValues(testInput);

original.set(testInput);

original.sort();

radixselectBranchless(testInput, k);

verification.set(testInput);

verification.sort()

// make sure the values in the array are the same

for(let j = 0; j < testInput.length; j++) {

if (original[j] !== verification[j]) {

error = {idx: j, o: original[j], t: testInput[j], v: verification[j], original, testInput, verification};

break;

}

if (original[k] !== testInput[k]) {

error = {k: k, o: original[k], t: testInput[k], v: verification[k], original, testInput, verification};

break;

}

if (error) break;

yield `"${i+1}/${samples} - no errors yet!`

}

yield error || "no errors!"

} else { yield "enable debugging to test"; }

}

quickselect = require('quickselect@2.0.0/quickselect.js')

testArray = Uint32Array.from({length: 5000000}, _ => Math.random() * 0x100000000 | 0)

// walt = await require('walt-compiler')

// bucketIdx = {

// const source = `

// export function bucketIdx(val: i32, shift: i32, pivotBucket: i32) : i32 {

// const bucket : i32 = (val >> shift) & 0xFF;

// return -(((pivotBucket - bucket) >> 31) + ((pivotBucket - bucket - 1) >> 31));

// }

// `;

// const compiledBuffer = walt.compile(source).buffer();

// return WebAssembly.instantiate(compiledBuffer, {}).then(result => {

// return result.instance.exports.bucketIdx;

// });

// }

import { Toggle, Radio } from "@observablehq/inputs"

Purpose-built for displays of data