// // Generated by NVIDIA NVVM Compiler // // Compiler Build ID: CL-29745058 // Cuda compilation tools, release 11.3, V11.3.58 // Based on NVVM 7.0.1 // .version 7.3 .target sm_52 .address_size 64 // .globl sin_kernel .global .align 4 .b8 __cudart_i2opi_f[24] = {65, 144, 67, 60, 153, 149, 98, 219, 192, 221, 52, 245, 209, 87, 39, 252, 41, 21, 68, 78, 110, 131, 249, 162}; .visible .entry sin_kernel( .param .u64 sin_kernel_param_0, .param .u64 sin_kernel_param_1, .param .u32 sin_kernel_param_2 ) { .local .align 4 .b8 __local_depot0[28]; .reg .b64 %SP; .reg .b64 %SPL; .reg .pred %p<12>; .reg .f32 %f<38>; .reg .b32 %r<53>; .reg .f64 %fd<3>; .reg .b64 %rd<33>; mov.u64 %SPL, __local_depot0; ld.param.u64 %rd10, [sin_kernel_param_0]; ld.param.u64 %rd11, [sin_kernel_param_1]; ld.param.u32 %r19, [sin_kernel_param_2]; add.u64 %rd1, %SPL, 0; mov.u32 %r20, %ntid.x; mov.u32 %r21, %ctaid.x; mov.u32 %r22, %tid.x; mad.lo.s32 %r1, %r21, %r20, %r22; setp.ge.s32 %p1, %r1, %r19; @%p1 bra $L__BB0_14; cvta.to.global.u64 %rd13, %rd11; cvt.s64.s32 %rd2, %r1; mul.wide.s32 %rd14, %r1, 4; add.s64 %rd15, %rd13, %rd14; ld.global.f32 %f1, [%rd15]; mul.f32 %f14, %f1, 0f3F22F983; cvt.rni.s32.f32 %r52, %f14; cvt.rn.f32.s32 %f15, %r52; mov.f32 %f16, 0fBFC90FDA; fma.rn.f32 %f17, %f15, %f16, %f1; mov.f32 %f18, 0fB3A22168; fma.rn.f32 %f19, %f15, %f18, %f17; mov.f32 %f20, 0fA7C234C5; fma.rn.f32 %f35, %f15, %f20, %f19; abs.f32 %f3, %f1; setp.leu.f32 %p2, %f3, 0f47CE4780; @%p2 bra $L__BB0_9; setp.eq.f32 %p3, %f3, 0f7F800000; @%p3 bra $L__BB0_8; bra.uni $L__BB0_3; $L__BB0_8: mov.f32 %f23, 0f00000000; mul.rn.f32 %f35, %f1, %f23; bra.uni $L__BB0_9; $L__BB0_3: mov.b32 %r3, %f1; bfe.u32 %r24, %r3, 23, 8; add.s32 %r4, %r24, -128; shl.b32 %r25, %r3, 8; or.b32 %r5, %r25, -2147483648; shr.u32 %r6, %r4, 5; mov.u64 %rd32, 0; mov.u32 %r49, 0; mov.u64 %rd30, __cudart_i2opi_f; mov.u64 %rd31, %rd1; $L__BB0_4: .pragma "nounroll"; ld.global.nc.u32 %r26, [%rd30]; mad.wide.u32 %rd18, %r26, %r5, %rd32; shr.u64 %rd32, %rd18, 32; st.local.u32 [%rd31], %rd18; add.s64 %rd31, %rd31, 4; add.s64 %rd30, %rd30, 4; add.s32 %r49, %r49, 1; setp.ne.s32 %p4, %r49, 6; @%p4 bra $L__BB0_4; st.local.u32 [%rd1+24], %rd32; cvt.u64.u32 %rd19, %r6; mov.u64 %rd20, 2; sub.s64 %rd21, %rd20, %rd19; shl.b64 %rd22, %rd21, 2; add.s64 %rd23, %rd1, %rd22; add.s64 %rd9, %rd23, 16; ld.local.u32 %r50, [%rd23+16]; ld.local.u32 %r51, [%rd23+12]; and.b32 %r11, %r4, 31; setp.eq.s32 %p5, %r11, 0; @%p5 bra $L__BB0_7; mov.u32 %r27, 32; sub.s32 %r28, %r27, %r11; shr.u32 %r29, %r51, %r28; shl.b32 %r30, %r50, %r11; add.s32 %r50, %r29, %r30; ld.local.u32 %r31, [%rd9+-8]; shr.u32 %r32, %r31, %r28; shl.b32 %r33, %r51, %r11; add.s32 %r51, %r32, %r33; $L__BB0_7: and.b32 %r34, %r3, -2147483648; shr.u32 %r35, %r51, 30; shl.b32 %r36, %r50, 2; or.b32 %r37, %r35, %r36; shr.u32 %r38, %r37, 31; shr.u32 %r39, %r50, 30; add.s32 %r40, %r38, %r39; neg.s32 %r41, %r40; setp.eq.s32 %p6, %r34, 0; selp.b32 %r52, %r40, %r41, %p6; setp.ne.s32 %p7, %r38, 0; xor.b32 %r42, %r34, -2147483648; selp.b32 %r43, %r42, %r34, %p7; selp.b32 %r44, -1, 0, %p7; xor.b32 %r45, %r37, %r44; shl.b32 %r46, %r51, 2; xor.b32 %r47, %r46, %r44; cvt.u64.u32 %rd24, %r45; cvt.u64.u32 %rd25, %r47; bfi.b64 %rd26, %rd24, %rd25, 32, 32; cvt.rn.f64.s64 %fd1, %rd26; mul.f64 %fd2, %fd1, 0d3BF921FB54442D19; cvt.rn.f32.f64 %f21, %fd2; setp.eq.s32 %p8, %r43, 0; neg.f32 %f22, %f21; selp.f32 %f35, %f21, %f22, %p8; $L__BB0_9: and.b32 %r18, %r52, 1; setp.eq.s32 %p9, %r18, 0; selp.f32 %f7, %f35, 0f3F800000, %p9; mul.rn.f32 %f8, %f35, %f35; mov.f32 %f36, 0fB94D4153; @%p9 bra $L__BB0_11; mov.f32 %f25, 0fBAB607ED; mov.f32 %f26, 0f37CBAC00; fma.rn.f32 %f36, %f26, %f8, %f25; $L__BB0_11: selp.f32 %f27, 0f3C0885E4, 0f3D2AAABB, %p9; fma.rn.f32 %f28, %f36, %f8, %f27; selp.f32 %f29, 0fBE2AAAA8, 0fBEFFFFFF, %p9; fma.rn.f32 %f30, %f28, %f8, %f29; mov.f32 %f31, 0f00000000; fma.rn.f32 %f32, %f8, %f7, %f31; fma.rn.f32 %f37, %f30, %f32, %f7; and.b32 %r48, %r52, 2; setp.eq.s32 %p11, %r48, 0; @%p11 bra $L__BB0_13; mov.f32 %f34, 0fBF800000; fma.rn.f32 %f37, %f37, %f34, %f31; $L__BB0_13: cvta.to.global.u64 %rd27, %rd10; shl.b64 %rd28, %rd2, 2; add.s64 %rd29, %rd27, %rd28; st.global.f32 [%rd29], %f37; $L__BB0_14: ret; }