// Copyright 2022 the Vello Authors // SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense #import bbox #import clip @group(0) @binding(0) var clip_inp: array; @group(0) @binding(1) var path_bboxes: array; @group(0) @binding(2) var reduced: array; @group(0) @binding(3) var clip_out: array; let WG_SIZE = 256u; var sh_bic: array; var sh_parent: array; var sh_path_ix: array; @compute @workgroup_size(256) fn main( @builtin(global_invocation_id) global_id: vec3, @builtin(local_invocation_id) local_id: vec3, @builtin(workgroup_id) wg_id: vec3, ) { let inp = clip_inp[global_id.x].path_ix; let is_push = inp >= 0; var bic = Bic(1u - u32(is_push), u32(is_push)); // reverse scan of bicyclic semigroup sh_bic[local_id.x] = bic; for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { workgroupBarrier(); if local_id.x + (1u << i) < WG_SIZE { let other = sh_bic[local_id.x + (1u << i)]; bic = bic_combine(bic, other); } workgroupBarrier(); sh_bic[local_id.x] = bic; } if local_id.x == 0u { reduced[wg_id.x] = bic; } workgroupBarrier(); let size = sh_bic[0].b; bic = Bic(); if local_id.x + 1u < WG_SIZE { bic = sh_bic[local_id.x + 1u]; } if is_push && bic.a == 0u { let local_ix = size - bic.b - 1u; sh_parent[local_ix] = local_id.x; sh_path_ix[local_ix] = u32(inp); } workgroupBarrier(); // TODO: possibly do forward scan here if depth can exceed wg size if local_id.x < size { let path_ix = sh_path_ix[local_id.x]; let path_bbox = path_bboxes[path_ix]; let parent_ix = sh_parent[local_id.x] + wg_id.x * WG_SIZE; let bbox = vec4(f32(path_bbox.x0), f32(path_bbox.y0), f32(path_bbox.x1), f32(path_bbox.y1)); clip_out[global_id.x] = ClipEl(parent_ix, bbox); } }