// Copyright 2022 the Vello Authors // SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense #import config #import drawtag @group(0) @binding(0) var config: Config; @group(0) @binding(1) var scene: array; @group(0) @binding(2) var reduced: array; const WG_SIZE = 256u; var sh_scratch: array; #import util @compute @workgroup_size(256) fn main( @builtin(local_invocation_id) local_id: vec3, @builtin(workgroup_id) wg_id: vec3, ) { let num_blocks_total = (config.n_drawobj + (WG_SIZE - 1u)) / WG_SIZE; // When the number of blocks exceeds the workgroup size, divide // the work evenly so each workgroup handles n_blocks / wg, with // the low workgroups doing one more each to handle the remainder. let n_blocks_base = num_blocks_total / WG_SIZE; let remainder = num_blocks_total % WG_SIZE; let first_block = n_blocks_base * wg_id.x + min(wg_id.x, remainder); let n_blocks = n_blocks_base + u32(wg_id.x < remainder); var block_index = first_block * WG_SIZE + local_id.x; var agg = draw_monoid_identity(); for (var i = 0u; i < n_blocks; i++) { let tag_word = read_draw_tag_from_scene(block_index); agg = combine_draw_monoid(agg, map_draw_tag(tag_word)); block_index += WG_SIZE; } sh_scratch[local_id.x] = agg; for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { workgroupBarrier(); if local_id.x + (1u << i) < WG_SIZE { let other = sh_scratch[local_id.x + (1u << i)]; agg = combine_draw_monoid(agg, other); } workgroupBarrier(); sh_scratch[local_id.x] = agg; } if local_id.x == 0u { reduced[wg_id.x] = agg; } }