#include #include "lib.hpp" extern "C" void sta_levelize_calc_outs_cpu( usize num_pins, const usize *__restrict__ arcs_st, const STAGraphArc *__restrict__ arcs, usize *__restrict__ num_outs) { // parallel for(usize i = 0; i < num_pins; ++i) { usize l = arcs_st[i], r = arcs_st[i + 1]; for(usize j = l; j < r; ++j) { // atomic ++num_outs[arcs[j].prev]; } } } extern "C" void sta_levelize_get_first_layer_cpu( usize num_pins, const usize *__restrict__ arcs_st, const usize *__restrict__ num_outs, usize *__restrict__ levels_nd, usize *__restrict__ tmp) { // parallel for(usize i = 0; i < num_pins; ++i) { if(num_outs[i] != 0) continue; if(arcs_st[i] == arcs_st[i + 1]) { ++tmp[1]; // num_dead_pins } else { // atomic usize p = tmp[0]++; // next_level levels_nd[p] = i; } } } extern "C" void sta_levelize_one_level_cpu( const usize *__restrict__ arcs_st, const STAGraphArc *__restrict__ arcs, usize *__restrict__ num_outs, usize l0, usize l1, usize *__restrict__ levels_nd, usize *__restrict__ tmp) { // parallel for(usize p = l0; p < l1; ++p) { usize i = levels_nd[p]; usize l = arcs_st[i], r = arcs_st[i + 1]; for(usize j = l; j < r; ++j) { usize prev = arcs[j].prev; // atomic usize ro = --num_outs[prev]; if(ro == 0) { // atomic usize p = tmp[0]++; levels_nd[p] = prev; } } } }