// A MAC pipelined MAC that performs an add in the first cycle and multiply // in the next. It immediately (combinationally) forwards the left signal to // right and top to down. component mac_pe(top: 32, left: 32) -> (down: 32, right: 32) { cells { // Storage acc = prim std_reg(32); mul_reg = prim std_reg(32); // Computation add = prim std_add(32); mul = prim std_mult(32); } wires { group do_mul { mul.left = top; mul.right = left; mul_reg.in = mul.out; mul_reg.write_en = 1'd1; do_mul[done] = mul_reg.done; } group do_add { add.left = acc.out; add.right = mul_reg.out; acc.in = add.out; acc.write_en = 1'd1; do_add[done] = acc.done; } down = top; right = left; } control { seq { do_mul; do_add; } } } // Uses a ready/valid interface to read/write values from the other context. component main() -> () { cells { /// Data memories // Top memories t0 = prim std_mem_d1(32,2,3); t1 = prim std_mem_d1(32,2,3); // Left memories l0 = prim std_mem_d1(32,2,3); /// Transfer registers // Memory to PE r_00_top_read = prim std_reg(32); r_01_top_read = prim std_reg(32); r_00_left_read = prim std_reg(32); // Transfer registers r_00_right_write = prim std_reg(32); r_01_left_read = prim std_reg(32); // Processing elements pe00 = mac_pe; pe01 = mac_pe; // Current indices for memories t0_cur_idx = prim std_reg(3); t1_cur_idx = prim std_reg(3); l0_cur_idx = prim std_reg(3); // Adders for incrementing memory indices t0_add = prim std_add(3); t1_add = prim std_add(3); l0_add = prim std_add(3); } wires { // Initialize the indices. group init_indices { t0_cur_idx.in = 3'd0; t0_cur_idx.write_en = 1'd1; t1_cur_idx.in = 3'd0; t1_cur_idx.write_en = 1'd1; l0_cur_idx.in = 3'd0; l0_cur_idx.write_en = 1'd1; init_indices[done] = t0_cur_idx.done & t1_cur_idx.done & l0_cur_idx.done ? 1'd1; } // Move data from memory to PE transfer register. group t0_move { t0.addr0 = t0_cur_idx.out; r_00_top_read.in = t0.read_data; r_00_top_read.write_en = 1'd1; t0_move[done] = r_00_top_read.done; } // Increment the current index for T0; group t0_upd { t0_add.left = 3'd1; t0_add.right = t0_cur_idx.out; t0_cur_idx.in = t0_add.out; t0_cur_idx.write_en = 1'd1; t0_upd[done] = t0_cur_idx.done; } group l0_move { l0.addr0 = l0_cur_idx.out; r_00_left_read.in = l0.read_data; r_00_left_read.write_en = 1'd1; l0_move[done] = r_00_left_read.done; } group l0_upd { l0_add.left = 3'd1; l0_add.right = l0_cur_idx.out; l0_cur_idx.in = l0_add.out; l0_cur_idx.write_en = 1'd1; l0_upd[done] = l0_cur_idx.done; } group t1_move { t1.addr0 = t1_cur_idx.out; r_01_top_read.in = t1.read_data; r_01_top_read.write_en = 1'd1; t1_move[done] = r_01_top_read.done; } group t1_upd { t1_add.left = 3'd1; t1_add.right = t1_cur_idx.out; t1_cur_idx.in = t1_add.out; t1_cur_idx.write_en = 1'd1; t1_upd[done] = t1_cur_idx.done; } // Move data from the PE write registers to connected // PE's read register group pe00_move { r_01_left_read.in = r_00_right_write.out; r_01_left_read.write_en = 1'd1; pe00_move[done] = r_01_left_read.done; } // Perform computation with group pe00_compute { pe00.go = !pe00.done ? 1'd1; pe00.top = r_00_top_read.out; pe00.left = r_00_left_read.out; r_00_right_write.in = pe00.done ? pe00.right; r_00_right_write.write_en = pe00.done ? 1'd1; pe00_compute[done] = r_00_right_write.done; } group pe01_compute { pe01.go = !pe01.done ? 1'd1; pe01.top = r_01_top_read.out; pe01.left = r_01_left_read.out; pe01_compute[done] = pe01.done; } } control { seq { init_indices; par { t0_move; l0_move; } par { t0_upd; l0_upd; // Increment L1 memories pe00_compute; } par { t0_move; l0_move; t1_move; pe00_move; } par { t1_upd; // Increment L2 memories. pe00_compute; pe01_compute; } par { t1_move; pe00_move; } par { pe01_compute; } } } }