#include #define N 256 unsigned long long reg_val_double[N]; void init_reg_val_double() { unsigned long c = 19650218UL; int i; reg_val_double[0]= c & 0xffffffffUL; for (i = 1; i < N; i++) { reg_val_double[i] = (1812433253UL * (reg_val_double[i - 1] ^ (reg_val_double[i - 1] >> 30)) + i); } } /* Make a copy of original array to prevent the unexpected changes by Atomic Add Instructions */ unsigned long long reg_val_double_copy[N]; void copy_reg_val_double() { int i; for (i = 0; i < N; i++) { reg_val_double_copy[i] = reg_val_double[i]; } } /* TEST1_32/64 macro is used in load atomic increment/decrement/set/clear instructions. After executing each instruction we must check both memory location and register value. 1: Move arguments (offset and base address) to registers 2: Add offset and base address to make absolute address 3: Execute instruction 4: Move result from register ($t3) 5: Load memory data ('lw' for 32bit instruction and 'ld' for 64bit addresses) */ #define TEST1_32(instruction, offset,mem) \ { \ unsigned long out = 0; \ unsigned long res_mem = 0; \ __asm__ volatile( \ "move $t0, %2" "\n\t" \ "move $t1, %3" "\n\t" \ "daddu $t0, $t1, $t0" "\n\t" \ instruction " $t3, ($t0)" "\n\t" \ "move %0, $t3" "\n\t" \ "lw %1, 0($t0)" "\n\t" \ : "=&r" (out), "=&r"(res_mem) \ : "r" (mem) , "r" (offset) \ : "$12", "$13", "cc", "memory" \ ); \ printf("%s :: offset: 0x%x, out: 0x%lx, result:0x%lx\n", \ instruction, offset, out, res_mem); \ } #define TEST1_64(instruction, offset,mem) \ { \ unsigned long out = 0; \ unsigned long res_mem = 0; \ __asm__ volatile( \ "move $t0, %2" "\n\t" \ "move $t1, %3" "\n\t" \ "daddu $t0, $t1, $t0" "\n\t" \ instruction " $t3, ($t0)" "\n\t" \ "move %0, $t3" "\n\t" \ "ld %1, 0($t0)" "\n\t" \ : "=&r" (out), "=&r"(res_mem) \ : "r" (mem) , "r" (offset) \ : "$12", "$13", "cc", "memory" \ ); \ printf("%s :: offset: 0x%x, out: 0x%lx, result: 0x%lx\n", \ instruction, offset, out, res_mem); \ } /* Test 2 macro is used for pop/dpop/baddu instructions. After executing each instructions the macro performs following operations: 1: Move arguments to registers 2: Execute instruction 3: Move result to register ($t3) */ #define TEST2(instruction, RSVal, RTVal) \ { \ unsigned long out; \ __asm__ volatile( \ "move $t1, %1" "\n\t" \ "move $t2, %2" "\n\t" \ instruction "\n\t" \ "move %0, $t3" "\n\t" \ : "=&r" (out) \ : "r" (RSVal), "r" (RTVal) \ : "$12", "$13", "cc", "memory" \ ); \ printf("%s :: rd 0x%lx, rs 0x%llx, rt 0x%llx\n", \ instruction, out, (long long) RSVal, (long long) RTVal); \ } /* TEST3 macro is used for store atomic add and store atomic add doubleword instructions. Following operations are performed by the test macro: 1: Move arguments to the register 2: Add offset and base address to make absolute address 3: Execute instruction 4: Load memory data */ #define TEST3(instruction, offset, mem, value) \ { \ unsigned long out = 0; \ unsigned long outPre = 0; \ __asm__ volatile( \ "move $t0, %2" "\n\t" \ "move $t1, %3" "\n\t" \ "daddu $t0, $t1, $t0" "\n\t" \ "ld %1, 0($t0)" "\n\t" \ "move $t2, %4" "\n\t" \ instruction " $t2, ($t0)" "\n\t" \ "ld %0, 0($t0)" "\n\t" \ : "=&r" (out), "=&r" (outPre) \ : "r" (mem) , "r" (offset), "r" (value) \ : "$12", "$13", "$14", "cc", "memory" \ ); \ printf("%s :: value: 0x%llx, memPre: 0x%lx, mem: 0x%lx\n", \ instruction, value, outPre, out); \ } /* TEST4_32/64 is used for load atomic add/swap instructions. Following operations are performed by macro after execution of each instruction: 1: Move arguments to register. 2: Add offset and base address to make absolute address. 3: Execute instruction. 4: Move result to register. 5: Load memory data ('lw' for 32bit instruction and 'ld' for 64bit). */ #define TEST4_32(instruction, offset, mem) \ { \ unsigned long out = 0; \ unsigned long res_mem = 0; \ __asm__ volatile( \ "move $t0, %2" "\n\t" \ "move $t1, %3" "\n\t" \ "daddu $t0, $t0, $t1" "\n\t" \ instruction " $t3, ($t0), $t1" "\n\t" \ "move %0, $t3" "\n\t" \ "lw %1, 0($t0)" "\n\t" \ : "=&r" (out), "=&r"(res_mem) \ : "r" (mem) , "r" (offset) \ : "$12", "$13", "cc", "memory" \ ); \ printf("%s :: offset: 0x%x, out: 0x%lx, result:0x%lx\n", \ instruction, offset, out, res_mem); \ } #define TEST4_64(instruction, offset, mem) \ { \ unsigned long out = 0; \ unsigned long res_mem = 0; \ __asm__ volatile( \ "move $t0, %2" "\n\t" \ "move $t1, %3" "\n\t" \ "daddu $t0, $t0, $t1" "\n\t" \ instruction " $t3, ($t0), $t1" "\n\t" \ "move %0, $t3" "\n\t" \ "ld %1, 0($t0)" "\n\t" \ : "=&r" (out), "=&r"(res_mem) \ : "r" (mem) , "r" (offset) \ : "$12", "$13", "cc", "memory" \ ); \ printf("%s :: offset: 0x%x, out: 0x%lx, result: 0x%lx\n", \ instruction, offset, out, res_mem); \ } typedef enum { BADDU, POP, DPOP, SAA, SAAD, LAA, LAAD, LAW, LAWD, LAI, LAID, LAD, LADD, LAS, LASD, LAC, LACD } cvm_op; int main() { #if (_MIPS_ARCH_OCTEON2) init_reg_val_double(); int i,j; cvm_op op; for (op = BADDU; op <= LACD; op++) { switch(op){ /* Unsigned Byte Add - BADDU rd, rs, rt; Cavium OCTEON */ case BADDU: { for(i = 4; i < N; i += 4) for(j = 4; j < N; j += 4) TEST2("baddu $t3, $t1, $t2", reg_val_double[i], reg_val_double[j]); break; } case POP: { /* Count Ones in a Word - POP */ for(j = 4; j < N; j += 4) TEST2("pop $t3, $t1", reg_val_double[j], 0); break; } case DPOP: { /* Count Ones in a Doubleword - DPOP */ for(j = 8; j < N; j += 8) TEST2("dpop $t3, $t1", reg_val_double[j], 0); break; } case SAA: { /* Atomic Add Word - saa rt, (base). */ copy_reg_val_double(); for(j = 4; j < N; j += 4) TEST3("saa", j, reg_val_double_copy, reg_val_double[j]); break; } case SAAD: { /* Atomic Add Double - saad rt, (base). */ copy_reg_val_double(); for(j = 8; j < N; j += 8) TEST3("saad", j, reg_val_double_copy, reg_val_double[j]); break; } case LAA: { /* Load Atomic Add Word - laa rd, (base), rt. */ copy_reg_val_double(); for(j = 4; j < N; j += 4) TEST4_32("laa", j, reg_val_double_copy); break; } case LAAD: { /* Load Atomic Add Double - laad rd, (base), rt */ copy_reg_val_double(); for(j = 8; j < N; j += 8) TEST4_64("laad ", j, reg_val_double_copy); break; } case LAW: { /* Load Atomic Swap Word - law rd, (base), rt */ copy_reg_val_double(); for(j = 4; j < N; j += 4) TEST4_32("law", j, reg_val_double_copy); break; } case LAWD: { /* Load Atomic Swap Double - lawd rd, (base), rt */ copy_reg_val_double(); for(j = 8; j < N; j += 8) TEST4_64("lawd", j, reg_val_double_copy); break; } case LAI: { /* Load Atomic Increment Word - lai rd, (base) */ copy_reg_val_double(); for(i = 4; i < N; i += 4) TEST1_32("lai", i, reg_val_double_copy); break; } case LAID: { /* Load Atomic Increment Double - laid rd, (base) */ copy_reg_val_double(); for(i = 8; i < N; i += 8) TEST1_64("laid ", i, reg_val_double_copy); break; } case LAD: { /* Load Atomic Decrement Word - lad rd, (base) */ copy_reg_val_double(); for(i = 4; i < N; i += 4) TEST1_32("lad", i, reg_val_double_copy); break; } case LADD: { /* Load Atomic Decrement Double - ladd rd, (base) */ copy_reg_val_double(); for(i = 8; i < N; i += 8) TEST1_64("ladd",i, reg_val_double_copy); break; } case LAS:{ /* Load Atomic Set Word - las rd, (base) */ copy_reg_val_double(); for(i = 4; i < N; i += 4) TEST1_32("las",i, reg_val_double_copy); break; } case LASD:{ /* Load Atomic Set Word - lasd rd, (base) */ copy_reg_val_double(); for(i = 8; i < N; i += 8) TEST1_64("lasd",i, reg_val_double_copy); break; } case LAC: { /* Load Atomic Clear Word - lac rd, (base) */ copy_reg_val_double(); for(i = 4; i < N; i += 4) TEST1_32("lac",i, reg_val_double_copy); break; } case LACD: { /* Load Atomic Clear Double - lacd rd, (base) */ copy_reg_val_double(); for(i = 8; i < N; i += 8) TEST1_64("lacd",i, reg_val_double_copy); break; } default: printf("Nothing to be executed \n"); } } #endif return 0; }