{
const { context, adapter, device } = await util.gpu(1, 1)
const module = device.createShaderModule({
label: 'compute module',
code: `
@group(0) @binding(0) var<storage, read_write> data: array<f32>;
@group(0) @binding(1) var<storage, read_write> bins: array<atomic<u32>>;
@compute @workgroup_size(1)
fn cs(
@builtin(global_invocation_id) id: vec3<u32>
) {
let i = id.x;
let v = data[i];
data[i] = data[i] * 2.0;
let numBins = f32(arrayLength(&bins));
let lastBinIndex = u32(numBins - 1);
let b = min(u32(v * numBins), lastBinIndex);
atomicAdd(&bins[b], 1u);
}
`,
})
const pipeline = device.createComputePipeline({
label: 'compute pipeline',
layout: 'auto',
compute: {
module,
entryPoint: 'cs',
},
})
const workBuffer = device.createBuffer({
label: 'work buffer',
size: input.byteLength,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST,
});
device.queue.writeBuffer(workBuffer, 0, input);
const binsBuffer = device.createBuffer({
size: numBins * 4,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
})
const resultBuffer = device.createBuffer({
label: 'result buffer',
size: input.byteLength,
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
})
const binsResultBuffer = device.createBuffer({
label: 'bins result buffer',
size: binsBuffer.size,
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
})
const bindGroup = device.createBindGroup({
label: 'bind group',
layout: pipeline.getBindGroupLayout(0),
entries: [
{ binding: 0, resource: { buffer: workBuffer } },
{ binding: 1, resource: { buffer: binsBuffer } },
],
})
const encoder = device.createCommandEncoder({ label: 'encoder' })
const pass = encoder.beginComputePass({ label: 'compute pass' })
pass.setPipeline(pipeline);
pass.setBindGroup(0, bindGroup);
pass.dispatchWorkgroups(input.length);
pass.end()
encoder.copyBufferToBuffer(workBuffer, 0, resultBuffer, 0, resultBuffer.size)
encoder.copyBufferToBuffer(binsBuffer, 0, binsResultBuffer, 0, binsResultBuffer.size);
device.queue.submit([encoder.finish()]);
await resultBuffer.mapAsync(GPUMapMode.READ)
const result = new Float32Array(resultBuffer.getMappedRange().slice())
resultBuffer.unmap()
await binsResultBuffer.mapAsync(GPUMapMode.READ)
const binsResult = new Uint32Array(binsResultBuffer.getMappedRange().slice())
binsResultBuffer.unmap()
return { inputs: input, doubled: result, binned: binsResult }
}