struct Params { filterDim : i32, blockDim : u32, } @group(0) @binding(0) var samp : sampler; @group(0) @binding(1) var params : Params; @group(1) @binding(1) var inputTex : texture_2d; @group(1) @binding(2) var outputTex : texture_storage_2d; struct Flip { value : u32, } @group(1) @binding(3) var flip : Flip; // This shader blurs the input texture in one direction, depending on whether // |flip.value| is 0 or 1. // It does so by running (128 / 4) threads per workgroup to load 128 // texels into 4 rows of shared memory. Each thread loads a // 4 x 4 block of texels to take advantage of the texture sampling // hardware. // Then, each thread computes the blur result by averaging the adjacent texel values // in shared memory. // Because we're operating on a subset of the texture, we cannot compute all of the // results since not all of the neighbors are available in shared memory. // Specifically, with 128 x 128 tiles, we can only compute and write out // square blocks of size 128 - (filterSize - 1). We compute the number of blocks // needed in Javascript and dispatch that amount. var tile : array, 4>; @compute @workgroup_size(32, 1, 1) fn main( @builtin(workgroup_id) WorkGroupID : vec3u, @builtin(local_invocation_id) LocalInvocationID : vec3u ) { let filterOffset = (params.filterDim - 1) / 2; let dims = vec2i(textureDimensions(inputTex, 0)); let baseIndex = vec2i(WorkGroupID.xy * vec2(params.blockDim, 4) + LocalInvocationID.xy * vec2(4, 1)) - vec2(filterOffset, 0); for (var r = 0; r < 4; r++) { for (var c = 0; c < 4; c++) { var loadIndex = baseIndex + vec2(c, r); if (flip.value != 0u) { loadIndex = loadIndex.yx; } tile[r][4 * LocalInvocationID.x + u32(c)] = textureSampleLevel( inputTex, samp, (vec2f(loadIndex) + vec2f(0.25, 0.25)) / vec2f(dims), 0.0 ).rgb; } } workgroupBarrier(); for (var r = 0; r < 4; r++) { for (var c = 0; c < 4; c++) { var writeIndex = baseIndex + vec2(c, r); if (flip.value != 0) { writeIndex = writeIndex.yx; } let center = i32(4 * LocalInvocationID.x) + c; if (center >= filterOffset && center < 128 - filterOffset && all(writeIndex < dims)) { var acc = vec3(0.0, 0.0, 0.0); for (var f = 0; f < params.filterDim; f++) { var i = center + f - filterOffset; acc = acc + (1.0 / f32(params.filterDim)) * tile[r][i]; } textureStore(outputTex, writeIndex, vec4(acc, 1.0)); } } } }