// Copyright 2022 the Vello Authors // SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense #import config #import bbox #import clip #import drawtag @group(0) @binding(0) var config: Config; @group(0) @binding(1) var clip_inp: array; @group(0) @binding(2) var path_bboxes: array; @group(0) @binding(3) var reduced: array; @group(0) @binding(4) var clip_els: array; @group(0) @binding(5) var draw_monoids: array; @group(0) @binding(6) var clip_bboxes: array>; let WG_SIZE = 256u; var sh_bic: array; var sh_stack: array; var sh_stack_bbox: array, WG_SIZE>; var sh_bbox: array, WG_SIZE>; var sh_link: array; fn search_link(bic: ptr, ix_in: u32) -> i32 { var ix = ix_in; var j = 0u; while j < firstTrailingBit(WG_SIZE) { let base = 2u * WG_SIZE - (2u << (firstTrailingBit(WG_SIZE) - j)); if ((ix >> j) & 1u) != 0u { let test = bic_combine(sh_bic[base + (ix >> j) - 1u], *bic); if test.b > 0u { break; } *bic = test; ix -= 1u << j; } j += 1u; } if ix > 0u { while j > 0u { j -= 1u; let base = 2u * WG_SIZE - (2u << (firstTrailingBit(WG_SIZE) - j)); let test = bic_combine(sh_bic[base + (ix >> j) - 1u], *bic); if test.b == 0u { *bic = test; ix -= 1u << j; } } } if ix > 0u { return i32(ix) - 1; } else { return i32(~0u - (*bic).a); } } fn load_clip_path(ix: u32) -> i32 { if ix < config.n_clip { return clip_inp[ix].path_ix; } else { return -2147483648; // literal too large? // return 0x80000000; } } @compute @workgroup_size(256) fn main( @builtin(global_invocation_id) global_id: vec3, @builtin(local_invocation_id) local_id: vec3, @builtin(workgroup_id) wg_id: vec3, ) { var bic: Bic; if local_id.x < wg_id.x { bic = reduced[local_id.x]; } sh_bic[local_id.x] = bic; for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { workgroupBarrier(); if local_id.x + (1u << i) < WG_SIZE { let other = sh_bic[local_id.x + (1u << i)]; bic = bic_combine(bic, other); } workgroupBarrier(); sh_bic[local_id.x] = bic; } workgroupBarrier(); let stack_size = sh_bic[0].b; // TODO: if stack depth > WG_SIZE desired, scan here // binary search in stack let sp = WG_SIZE - 1u - local_id.x; var ix = 0u; for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { let probe = ix + ((WG_SIZE / 2u) >> i); if sp < sh_bic[probe].b { ix = probe; } } let b = sh_bic[ix].b; var bbox = vec4(-1e9, -1e9, 1e9, 1e9); if sp < b { let el = clip_els[ix * WG_SIZE + b - sp - 1u]; sh_stack[local_id.x] = el.parent_ix; bbox = el.bbox; } // forward scan of bbox values of prefix stack for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { sh_stack_bbox[local_id.x] = bbox; workgroupBarrier(); if local_id.x >= (1u << i) { bbox = bbox_intersect(sh_stack_bbox[local_id.x - (1u << i)], bbox); } workgroupBarrier(); } sh_stack_bbox[local_id.x] = bbox; // Read input and compute Bic binary tree let inp = load_clip_path(global_id.x); let is_push = inp >= 0; bic = Bic(1u - u32(is_push), u32(is_push)); sh_bic[local_id.x] = bic; if is_push { let path_bbox = path_bboxes[inp]; bbox = vec4(f32(path_bbox.x0), f32(path_bbox.y0), f32(path_bbox.x1), f32(path_bbox.y1)); } else { bbox = vec4(-1e9, -1e9, 1e9, 1e9); } var inbase = 0u; for (var i = 0u; i < firstTrailingBit(WG_SIZE) - 1u; i += 1u) { let outbase = 2u * WG_SIZE - (1u << (firstTrailingBit(WG_SIZE) - i)); workgroupBarrier(); if local_id.x < 1u << (firstTrailingBit(WG_SIZE) - 1u - i) { let in_off = inbase + local_id.x * 2u; sh_bic[outbase + local_id.x] = bic_combine(sh_bic[in_off], sh_bic[in_off + 1u]); } inbase = outbase; } workgroupBarrier(); // search for predecessor node bic = Bic(); var link = search_link(&bic, local_id.x); sh_link[local_id.x] = link; workgroupBarrier(); let grandparent = select(link - 1, sh_link[link], link >= 0); var parent: i32; if link >= 0 { parent = i32(wg_id.x * WG_SIZE) + link; } else if link + i32(stack_size) >= 0 { parent = i32(sh_stack[i32(WG_SIZE) + link]); } else { parent = -1; } // bbox scan (intersect) across parent links for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { if i != 0u { sh_link[local_id.x] = link; } sh_bbox[local_id.x] = bbox; workgroupBarrier(); if link >= 0 { bbox = bbox_intersect(sh_bbox[link], bbox); link = sh_link[link]; } workgroupBarrier(); } if link + i32(stack_size) >= 0 { bbox = bbox_intersect(sh_stack_bbox[i32(WG_SIZE) + link], bbox); } // At this point, bbox is the intersection of bboxes on the path to the root sh_bbox[local_id.x] = bbox; workgroupBarrier(); if !is_push && global_id.x < config.n_clip { // Fix up drawmonoid so path_ix of EndClip matches BeginClip let parent_clip = clip_inp[parent]; let path_ix = parent_clip.path_ix; let parent_ix = parent_clip.ix; let ix = ~inp; draw_monoids[ix].path_ix = u32(path_ix); // Make EndClip point to the same draw data as BeginClip draw_monoids[ix].scene_offset = draw_monoids[parent_ix].scene_offset; if grandparent >= 0 { bbox = sh_bbox[grandparent]; } else if grandparent + i32(stack_size) >= 0 { bbox = sh_stack_bbox[i32(WG_SIZE) + grandparent]; } else { bbox = vec4(-1e9, -1e9, 1e9, 1e9); } } if global_id.x < config.n_clip { clip_bboxes[global_id.x] = bbox; } }