// // Generated by NVIDIA NVVM Compiler // // Compiler Build ID: CL-33281558 // Cuda compilation tools, release 12.3, V12.3.52 // Based on NVVM 7.0.1 // .version 8.3 .target sm_89 .address_size 64 // .globl _Z10add_simplePfS_S_ .visible .entry _Z10add_simplePfS_S_( .param .u64 _Z10add_simplePfS_S__param_0, .param .u64 _Z10add_simplePfS_S__param_1, .param .u64 _Z10add_simplePfS_S__param_2 ) { .reg .f32 %f<4>; .reg .b32 %r<2>; .reg .b64 %rd<11>; ld.param.u64 %rd1, [_Z10add_simplePfS_S__param_0]; ld.param.u64 %rd2, [_Z10add_simplePfS_S__param_1]; ld.param.u64 %rd3, [_Z10add_simplePfS_S__param_2]; cvta.to.global.u64 %rd4, %rd3; cvta.to.global.u64 %rd5, %rd2; cvta.to.global.u64 %rd6, %rd1; mov.u32 %r1, %tid.x; mul.wide.u32 %rd7, %r1, 4; add.s64 %rd8, %rd6, %rd7; ld.global.f32 %f1, [%rd8]; add.s64 %rd9, %rd5, %rd7; ld.global.f32 %f2, [%rd9]; add.f32 %f3, %f1, %f2; add.s64 %rd10, %rd4, %rd7; st.global.f32 [%rd10], %f3; ret; }