V0401 08:54:21.881000 140424060892160 torch/_logging/structured.py:19] {"str": ["/data/users/jjwu/a/pytorch/test/inductor/test_torchinductor.py", 0]}
V0401 08:54:21.881000 140424060892160 torch/_logging/structured.py:19] {"str": ["/data/users/jjwu/a/pytorch/torch/_inductor/test_case.py", 1]}
V0401 08:54:21.881000 140424060892160 torch/_logging/structured.py:19] {"str": ["/data/users/jjwu/a/pytorch/torch/_dynamo/test_case.py", 2]}
V0401 08:54:21.881000 140424060892160 torch/_logging/structured.py:19] {"str": ["/data/users/jjwu/a/pytorch/torch/testing/_internal/common_utils.py", 3]}
V0401 08:54:21.881000 140424060892160 torch/_logging/structured.py:19] {"str": ["/data/users/jjwu/a/pytorch-env/lib/python3.10/unittest/main.py", 4]}
V0401 08:54:21.881000 140424060892160 torch/_logging/structured.py:19] {"str": ["/data/users/jjwu/a/pytorch-env/lib/python3.10/unittest/runner.py", 5]}
V0401 08:54:21.881000 140424060892160 torch/_logging/structured.py:19] {"str": ["/data/users/jjwu/a/pytorch-env/lib/python3.10/unittest/suite.py", 6]}
V0401 08:54:21.881000 140424060892160 torch/_logging/structured.py:19] {"str": ["/data/users/jjwu/a/pytorch-env/lib/python3.10/unittest/case.py", 7]}
V0401 08:54:21.881000 140424060892160 torch/_logging/structured.py:19] {"str": ["/data/users/jjwu/a/pytorch-env/lib/python3.10/contextlib.py", 8]}
V0401 08:54:21.881000 140424060892160 torch/_logging/structured.py:19] {"str": ["/data/users/jjwu/a/pytorch/torch/nn/modules/module.py", 9]}
V0401 08:54:21.881000 140424060892160 torch/_logging/structured.py:19] {"str": ["/data/users/jjwu/a/pytorch/torch/_dynamo/eval_frame.py", 10]}
V0401 08:54:21.882000 140424060892160 torch/_dynamo/convert_frame.py:672] {"dynamo_start": {"stack": [{"line": 10031, "name": "<module>", "filename": 0}, {"line": 14, "name": "run_tests", "filename": 1}, {"line": 41, "name": "run_tests", "filename": 2}, {"line": 1165, "name": "run_tests", "filename": 3}, {"line": 101, "name": "__init__", "filename": 4}, {"line": 271, "name": "runTests", "filename": 4}, {"line": 184, "name": "run", "filename": 5}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 650, "name": "__call__", "filename": 7}, {"line": 2866, "name": "run", "filename": 3}, {"line": 2838, "name": "_run_custom", "filename": 3}, {"line": 591, "name": "run", "filename": 7}, {"line": 549, "name": "_callTestMethod", "filename": 7}, {"line": 2739, "name": "wrapper", "filename": 3}, {"line": 9214, "name": "new_test", "filename": 0}, {"line": 79, "name": "inner", "filename": 8}, {"line": 8845, "name": "test_custom_op_fixed_layout_channels_last", "filename": 0}, {"line": 1527, "name": "_wrapped_call_impl", "filename": 9}, {"line": 1536, "name": "_call_impl", "filename": 9}, {"line": 450, "name": "_fn", "filename": 10}, {"line": 1527, "name": "_wrapped_call_impl", "filename": 9}, {"line": 1536, "name": "_call_impl", "filename": 9}]}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V0401 08:54:21.895000 140424060892160 torch/_dynamo/output_graph.py:1189] {"dynamo_output_graph": {"sizes": {"l_x_": [1, 320, 128, 128], "out": [1, 320, 128, 128], "out_2": [1, 320, 128, 128], "out_3": [1, 320, 128, 128]}}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "74cabbbada68afbad8a921c47aa2b317"}
	class GraphModule(torch.nn.Module):
	    def forward(self, L_x_ : torch.Tensor):
	        l_x_ = L_x_
	        
	        # File: /data/users/jjwu/a/pytorch/test/inductor/test_torchinductor.py:8809 in helper, code: out = F.gelu(x)
	        out = torch._C._nn.gelu(l_x_);  l_x_ = None
	        
	        # File: /data/users/jjwu/a/pytorch/test/inductor/test_torchinductor.py:8810 in helper, code: out = self.in_layers(out)
	        out_2 = self.L__self___in_layers_0(out);  out = None
	        
	        # File: /data/users/jjwu/a/pytorch/test/inductor/test_torchinductor.py:8815 in forward, code: out = torch.ops.test.baz(out)
	        out_3 = torch.ops.test.baz(out_2);  out_2 = None
	        return (out_3,)
	        
V0401 08:54:21.914000 140424060892160 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:112] {"aot_forward_graph": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "9d0885575d7f020cb3a1996185650901"}
	class <lambda>(torch.nn.Module):
	    def forward(self, arg0_1: "f32[1, 320, 128, 128]"):
	        # File: /data/users/jjwu/a/pytorch/test/inductor/test_torchinductor.py:8809 in helper, code: out = F.gelu(x)
	        mul: "f32[1, 320, 128, 128]" = torch.ops.aten.mul.Tensor(arg0_1, 0.5)
	        mul_1: "f32[1, 320, 128, 128]" = torch.ops.aten.mul.Tensor(arg0_1, 0.7071067811865476);  arg0_1 = None
	        erf: "f32[1, 320, 128, 128]" = torch.ops.aten.erf.default(mul_1);  mul_1 = None
	        add: "f32[1, 320, 128, 128]" = torch.ops.aten.add.Tensor(erf, 1);  erf = None
	        mul_2: "f32[1, 320, 128, 128]" = torch.ops.aten.mul.Tensor(mul, add);  mul = add = None
	        
	        # File: /data/users/jjwu/a/pytorch/test/inductor/test_torchinductor.py:8810 in helper, code: out = self.in_layers(out)
	        rand: "f32[1, 320, 128, 128]" = torch.ops.aten.rand.default([1, 320, 128, 128], dtype = torch.float32, device = device(type='cuda', index=0), pin_memory = False)
	        convert_element_type: "f32[1, 320, 128, 128]" = torch.ops.prims.convert_element_type.default(rand, torch.float32);  rand = None
	        clone: "f32[1, 320, 128, 128]" = torch.ops.aten.clone.default(convert_element_type, memory_format = torch.channels_last);  convert_element_type = None
	        gt: "b8[1, 320, 128, 128]" = torch.ops.aten.gt.Scalar(clone, 0.1);  clone = None
	        mul_3: "f32[1, 320, 128, 128]" = torch.ops.aten.mul.Tensor(gt, mul_2);  gt = mul_2 = None
	        mul_4: "f32[1, 320, 128, 128]" = torch.ops.aten.mul.Tensor(mul_3, 1.1111111111111112);  mul_3 = None
	        
	        # File: /data/users/jjwu/a/pytorch/test/inductor/test_torchinductor.py:8815 in forward, code: out = torch.ops.test.baz(out)
	        baz: "f32[1, 320, 128, 128]" = torch.ops.test.baz.default(mul_4);  mul_4 = None
	        return (baz,)
	        
V0401 08:54:22.062000 140424060892160 torch/_inductor/compile_fx.py:650] {"inductor_post_grad_graph": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "cf729e565df6880d92548bb46eff9ab5"}
	class <lambda>(torch.nn.Module):
	    def forward(self, arg0_1: "f32[1, 320, 128, 128]"):
	        # File: /data/users/jjwu/a/pytorch/test/inductor/test_torchinductor.py:8809 in helper, code: out = F.gelu(x)
	        mul: "f32[1, 320, 128, 128]" = torch.ops.aten.mul.Tensor(arg0_1, 0.5)
	        mul_1: "f32[1, 320, 128, 128]" = torch.ops.aten.mul.Tensor(arg0_1, 0.7071067811865476);  arg0_1 = None
	        erf: "f32[1, 320, 128, 128]" = torch.ops.aten.erf.default(mul_1);  mul_1 = None
	        add: "f32[1, 320, 128, 128]" = torch.ops.aten.add.Tensor(erf, 1);  erf = None
	        mul_2: "f32[1, 320, 128, 128]" = torch.ops.aten.mul.Tensor(mul, add);  mul = add = None
	        
	        # No stacktrace found for following nodes
	        inductor_seeds_default: "i64[1]" = torch.ops.prims.inductor_seeds.default(1, device(type='cuda', index=0))
	        inductor_lookup_seed_default: "i64[]" = torch.ops.prims.inductor_lookup_seed.default(inductor_seeds_default, 0);  inductor_seeds_default = None
	        inductor_random_default: "f32[1, 320, 128, 128]" = torch.ops.prims.inductor_random.default([1, 320, 128, 128], inductor_lookup_seed_default, 'rand');  inductor_lookup_seed_default = None
	        
	        # File: /data/users/jjwu/a/pytorch/test/inductor/test_torchinductor.py:8810 in helper, code: out = self.in_layers(out)
	        clone: "f32[1, 320, 128, 128]" = torch.ops.aten.clone.default(inductor_random_default, memory_format = torch.channels_last);  inductor_random_default = None
	        gt: "b8[1, 320, 128, 128]" = torch.ops.aten.gt.Scalar(clone, 0.1);  clone = None
	        mul_3: "f32[1, 320, 128, 128]" = torch.ops.aten.mul.Tensor(gt, mul_2);  gt = mul_2 = None
	        mul_4: "f32[1, 320, 128, 128]" = torch.ops.aten.mul.Tensor(mul_3, 1.1111111111111112);  mul_3 = None
	        
	        # File: /data/users/jjwu/a/pytorch/test/inductor/test_torchinductor.py:8815 in forward, code: out = torch.ops.test.baz(out)
	        baz: "f32[1, 320, 128, 128]" = torch.ops.test.baz.default(mul_4);  mul_4 = None
	        return (baz,)
	        
V0401 08:54:22.975000 140424060892160 torch/_inductor/graph.py:1268] {"inductor_output_code": {"filename": "/tmp/torchinductor_jjwu/pz/cpzf3cxhhnoarj4kjhg2wxhmski6yge4zox6h43vsrrdubhj7qnb.py"}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "5dd8b94f459b07b5314187f78575d118"}
	
	from ctypes import c_void_p, c_long
	import torch
	import math
	import random
	import os
	import tempfile
	from math import inf, nan
	from torch._inductor.hooks import run_intermediate_hooks
	from torch._inductor.utils import maybe_profile
	from torch._inductor.codegen.memory_planning import _align as align
	
	from torch import device, empty_strided
	from torch._inductor.codecache import AsyncCompile
	from torch._inductor.select_algorithm import extern_kernels
	from torch._inductor.codegen.multi_kernel import MultiKernelCall
	
	aten = torch.ops.aten
	inductor_ops = torch.ops.inductor
	assert_size_stride = torch._C._dynamo.guards.assert_size_stride
	empty_strided_cpu = torch._C._dynamo.guards._empty_strided_cpu
	empty_strided_cuda = torch._C._dynamo.guards._empty_strided_cuda
	alloc_from_pool = torch.ops.inductor._alloc_from_pool
	reinterpret_tensor = torch.ops.inductor._reinterpret_tensor
	async_compile = AsyncCompile()
	
	
	# kernel path: /tmp/torchinductor_jjwu/6p/c6pjjivx4yr7a6rhwljusernnkewuf53hvlnc2sy63rcnj764rhn.py
	# Source Nodes: [], Original ATen: []
	
	triton_poi_fused_0 = async_compile.triton('triton_', '''
	import triton
	import triton.language as tl
	from triton.compiler.compiler import AttrsDescriptor
	
	from torch._inductor import triton_helpers, triton_heuristics
	from torch._inductor.ir import ReductionHint, TileHint
	from torch._inductor.triton_helpers import libdevice, math as tl_math
	from torch._inductor.triton_heuristics import AutotuneHint
	from torch._inductor.utils import instance_descriptor
	
	@triton_heuristics.pointwise(
	    size_hints=[8388608], 
	    filename=__file__,
	    triton_meta={'signature': {0: '*i64', 1: '*fp32', 2: 'i32', 3: 'i32'}, 'device': 0, 'device_type': 'cuda', 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 3), equal_to_1=(), divisible_by_8=(3,))]},
	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_0', 'mutated_arg_names': [], 'no_x_dim': False, 'backend_hash': 'e24e28e8c74b85ff7b61b41fc9160c05d25c32556bda76a915743727cec50966'},
	    min_elem_per_thread=0
	)
	@triton.jit
	def triton_(in_ptr0, out_ptr0, load_seed_offset, xnumel, XBLOCK : tl.constexpr):
	    xnumel = 5242880
	    xoffset = tl.program_id(0) * XBLOCK
	    xindex = xoffset + tl.arange(0, XBLOCK)[:]
	    xmask = xindex < xnumel
	    x0 = xindex
	    tmp0 = tl.load(in_ptr0 + load_seed_offset)
	    tmp1 = x0
	    tmp2 = tl.rand(tmp0, (tmp1).to(tl.uint32))
	    tl.store(out_ptr0 + (x0), tmp2, None)
	''', device_str='cuda')
	
	import triton
	import triton.language as tl
	from torch._inductor.triton_heuristics import grid, split_scan_grid, start_graph, end_graph
	from torch._C import _cuda_getCurrentRawStream as get_raw_stream
	
	
	# kernel path: /tmp/torchinductor_jjwu/2g/c2gsdapii4jxiorppiwpmvslqswzpbmms26vhuqoh7nm27gxhdnf.py
	# Source Nodes: [out, out_2, out_3], Original ATen: [aten.gelu, aten.native_dropout, test.baz]
	# out => add, erf, mul, mul_1, mul_2
	# out_2 => clone, gt, mul_3, mul_4
	# out_3 => baz
	triton_poi_fused_baz_gelu_native_dropout_1 = async_compile.triton('triton_', '''
	import triton
	import triton.language as tl
	from triton.compiler.compiler import AttrsDescriptor
	
	from torch._inductor import triton_helpers, triton_heuristics
	from torch._inductor.ir import ReductionHint, TileHint
	from torch._inductor.triton_helpers import libdevice, math as tl_math
	from torch._inductor.triton_heuristics import AutotuneHint
	from torch._inductor.utils import instance_descriptor
	
	@triton_heuristics.pointwise(
	    size_hints=[16384, 512], tile_hint=TileHint.DEFAULT,
	    filename=__file__,
	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: 'i32', 4: 'i32'}, 'device': 0, 'device_type': 'cuda', 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4), equal_to_1=(), divisible_by_8=(3, 4))]},
	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_baz_gelu_native_dropout_1', 'mutated_arg_names': [], 'no_x_dim': False, 'backend_hash': 'e24e28e8c74b85ff7b61b41fc9160c05d25c32556bda76a915743727cec50966'},
	    min_elem_per_thread=0
	)
	@triton.jit
	def triton_(in_ptr0, in_ptr1, out_ptr0, ynumel, xnumel, YBLOCK : tl.constexpr, XBLOCK : tl.constexpr):
	    ynumel = 16384
	    xnumel = 320
	    yoffset = tl.program_id(1) * (tl.program_id(2) + 1) * YBLOCK
	    yindex = yoffset + tl.arange(0, YBLOCK)[None, :]
	    ymask = yindex < ynumel
	    xoffset = tl.program_id(0) * XBLOCK
	    xindex = xoffset + tl.arange(0, XBLOCK)[:, None]
	    xmask = xindex < xnumel
	    x1 = xindex
	    y0 = yindex
	    tmp0 = tl.load(in_ptr0 + (y0 + (16384*x1)), xmask, eviction_policy='evict_last')
	    tmp4 = tl.load(in_ptr1 + (x1 + (320*y0)), xmask, eviction_policy='evict_last')
	    tmp1 = 0.1
	    tmp2 = tmp0 > tmp1
	    tmp3 = tmp2.to(tl.float32)
	    tmp5 = 0.5
	    tmp6 = tmp4 * tmp5
	    tmp7 = 0.7071067811865476
	    tmp8 = tmp4 * tmp7
	    tmp9 = libdevice.erf(tmp8)
	    tmp10 = 1.0
	    tmp11 = tmp9 + tmp10
	    tmp12 = tmp6 * tmp11
	    tmp13 = tmp3 * tmp12
	    tmp14 = 1.1111111111111112
	    tmp15 = tmp13 * tmp14
	    tl.store(out_ptr0 + (x1 + (320*y0)), tmp15, xmask)
	''', device_str='cuda')
	
	
	async_compile.wait(globals())
	del async_compile
	
	def call(args):
	    arg0_1, = args
	    args.clear()
	    assert_size_stride(arg0_1, (1, 320, 128, 128), (5242880, 1, 40960, 320))
	    with torch.cuda._DeviceGuard(0):
	        torch.cuda.set_device(0)
	        buf0 = empty_strided_cuda((1, ), (1, ), torch.int64)
	        # Source Nodes: [], Original ATen: []
	        aten.randint.low_out(-9223372036854775808, 9223372036854775807, [1], out=buf0)
	        buf1 = empty_strided_cuda((1, 320, 128, 128), (5242880, 16384, 128, 1), torch.float32)
	        # Source Nodes: [], Original ATen: []
	        stream0 = get_raw_stream(0)
	        triton_poi_fused_0.run(buf0, buf1, 0, 5242880, grid=grid(5242880), stream=stream0)
	        run_intermediate_hooks('inductor_random_default', buf1)
	        del buf0
	        buf2 = empty_strided_cuda((1, 320, 128, 128), (5242880, 1, 40960, 320), torch.float32)
	        # Source Nodes: [out, out_2, out_3], Original ATen: [aten.gelu, aten.native_dropout, test.baz]
	        triton_poi_fused_baz_gelu_native_dropout_1.run(buf1, arg0_1, buf2, 16384, 320, grid=grid(16384, 320), stream=stream0)
	        run_intermediate_hooks('mul_4', buf2)
	        del arg0_1
	        del buf1
	        # Source Nodes: [out, out_2, out_3], Original ATen: [aten.gelu, aten.native_dropout, test.baz]
	        buf3 = torch.ops.test.baz.default(buf2)
	        run_intermediate_hooks('baz', buf3)
	        del buf2
	        buf4 = buf3
	        del buf3
	    return (buf4, )
	
	
	def benchmark_compiled_module(times=10, repeat=10):
	    from torch._dynamo.testing import rand_strided
	    from torch._inductor.utils import print_performance
	    arg0_1 = rand_strided((1, 320, 128, 128), (5242880, 1, 40960, 320), device='cuda:0', dtype=torch.float32)
	    fn = lambda: call([arg0_1])
	    return print_performance(fn, times=times, repeat=repeat)
	
	
	if __name__ == "__main__":
	    from torch._inductor.wrapper_benchmark import compiled_module_main
	    compiled_module_main('None', benchmark_compiled_module)
	
V0401 08:54:22.980000 140424060892160 torch/_logging/structured.py:19] {"str": ["/data/users/jjwu/a/pytorch/torch/_dynamo/convert_frame.py", 11]}
V0401 08:54:22.980000 140424060892160 torch/_logging/structured.py:19] {"str": ["/data/users/jjwu/a/pytorch/torch/_dynamo/utils.py", 12]}
V0401 08:54:22.980000 140424060892160 torch/_logging/structured.py:19] {"str": ["/data/users/jjwu/a/pytorch/torch/_dynamo/bytecode_transformation.py", 13]}
V0401 08:54:22.980000 140424060892160 torch/_logging/structured.py:19] {"str": ["/data/users/jjwu/a/pytorch/torch/_dynamo/symbolic_convert.py", 14]}
V0401 08:54:22.980000 140424060892160 torch/_logging/structured.py:19] {"str": ["/data/users/jjwu/a/pytorch/torch/_dynamo/output_graph.py", 15]}
V0401 08:54:22.980000 140424060892160 torch/_dynamo/guards.py:1194] {"dynamo_guards": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "2fd53e7b434e73e7bbdc496799a81b21"}
	[
	{"code": "hasattr(L['x'], '_dynamo_dynamic_indices') == False", "stack": null, "user_stack": null},
	{"code": "___check_obj_id(L['self'], 140423721840032)", "stack": null, "user_stack": null},
	{"code": "___check_obj_id(L['self'].training, 7665376)", "stack": null, "user_stack": null},
	{"code": "utils_device.CURRENT_DEVICE == None", "stack": [{"line": 10031, "name": "<module>", "filename": 0}, {"line": 14, "name": "run_tests", "filename": 1}, {"line": 41, "name": "run_tests", "filename": 2}, {"line": 1165, "name": "run_tests", "filename": 3}, {"line": 101, "name": "__init__", "filename": 4}, {"line": 271, "name": "runTests", "filename": 4}, {"line": 184, "name": "run", "filename": 5}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 650, "name": "__call__", "filename": 7}, {"line": 2866, "name": "run", "filename": 3}, {"line": 2838, "name": "_run_custom", "filename": 3}, {"line": 591, "name": "run", "filename": 7}, {"line": 549, "name": "_callTestMethod", "filename": 7}, {"line": 2739, "name": "wrapper", "filename": 3}, {"line": 9214, "name": "new_test", "filename": 0}, {"line": 79, "name": "inner", "filename": 8}, {"line": 8845, "name": "test_custom_op_fixed_layout_channels_last", "filename": 0}, {"line": 1527, "name": "_wrapped_call_impl", "filename": 9}, {"line": 1536, "name": "_call_impl", "filename": 9}, {"line": 450, "name": "_fn", "filename": 10}, {"line": 1527, "name": "_wrapped_call_impl", "filename": 9}, {"line": 1536, "name": "_call_impl", "filename": 9}, {"line": 939, "name": "catch_errors", "filename": 11}, {"line": 802, "name": "_convert_frame", "filename": 11}, {"line": 400, "name": "_convert_frame_assert", "filename": 11}, {"line": 79, "name": "inner", "filename": 8}, {"line": 686, "name": "_compile", "filename": 11}, {"line": 262, "name": "time_wrapper", "filename": 12}, {"line": 541, "name": "compile_inner", "filename": 11}, {"line": 1036, "name": "transform_code_object", "filename": 13}, {"line": 165, "name": "_fn", "filename": 11}, {"line": 485, "name": "transform", "filename": 11}, {"line": 2105, "name": "__init__", "filename": 14}, {"line": 344, "name": "__init__", "filename": 15}, {"line": 467, "name": "init_ambient_guards", "filename": 15}], "user_stack": null},
	{"code": "___check_current_backend(140423721840560)", "stack": [{"line": 10031, "name": "<module>", "filename": 0}, {"line": 14, "name": "run_tests", "filename": 1}, {"line": 41, "name": "run_tests", "filename": 2}, {"line": 1165, "name": "run_tests", "filename": 3}, {"line": 101, "name": "__init__", "filename": 4}, {"line": 271, "name": "runTests", "filename": 4}, {"line": 184, "name": "run", "filename": 5}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 650, "name": "__call__", "filename": 7}, {"line": 2866, "name": "run", "filename": 3}, {"line": 2838, "name": "_run_custom", "filename": 3}, {"line": 591, "name": "run", "filename": 7}, {"line": 549, "name": "_callTestMethod", "filename": 7}, {"line": 2739, "name": "wrapper", "filename": 3}, {"line": 9214, "name": "new_test", "filename": 0}, {"line": 79, "name": "inner", "filename": 8}, {"line": 8845, "name": "test_custom_op_fixed_layout_channels_last", "filename": 0}, {"line": 1527, "name": "_wrapped_call_impl", "filename": 9}, {"line": 1536, "name": "_call_impl", "filename": 9}, {"line": 450, "name": "_fn", "filename": 10}, {"line": 1527, "name": "_wrapped_call_impl", "filename": 9}, {"line": 1536, "name": "_call_impl", "filename": 9}, {"line": 939, "name": "catch_errors", "filename": 11}, {"line": 802, "name": "_convert_frame", "filename": 11}, {"line": 400, "name": "_convert_frame_assert", "filename": 11}, {"line": 79, "name": "inner", "filename": 8}, {"line": 686, "name": "_compile", "filename": 11}, {"line": 262, "name": "time_wrapper", "filename": 12}, {"line": 541, "name": "compile_inner", "filename": 11}, {"line": 1036, "name": "transform_code_object", "filename": 13}, {"line": 165, "name": "_fn", "filename": 11}, {"line": 485, "name": "transform", "filename": 11}, {"line": 2105, "name": "__init__", "filename": 14}, {"line": 344, "name": "__init__", "filename": 15}, {"line": 473, "name": "init_ambient_guards", "filename": 15}], "user_stack": null},
	{"code": "check_tensor(L['x'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 320, 128, 128], stride=[5242880, 1, 40960, 320])", "stack": null, "user_stack": null}
	]
V0401 08:54:21.882000 140424060892160 torch/_dynamo/convert_frame.py:672] {"dynamo_start": {"stack": [{"line": 10031, "name": "<module>", "filename": 0}, {"line": 14, "name": "run_tests", "filename": 1}, {"line": 41, "name": "run_tests", "filename": 2}, {"line": 1165, "name": "run_tests", "filename": 3}, {"line": 101, "name": "__init__", "filename": 4}, {"line": 271, "name": "runTests", "filename": 4}, {"line": 184, "name": "run", "filename": 5}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 650, "name": "__call__", "filename": 7}, {"line": 2866, "name": "run2", "filename": 3}, {"line": 2838, "name": "_run_custom", "filename": 3}, {"line": 591, "name": "run", "filename": 7}, {"line": 549, "name": "_callTestMethod", "filename": 7}, {"line": 2739, "name": "wrapper", "filename": 3}, {"line": 9214, "name": "new_test", "filename": 0}, {"line": 79, "name": "inner", "filename": 8}, {"line": 8845, "name": "test_custom_op_fixed_layout_channels_last", "filename": 0}, {"line": 1527, "name": "_wrapped_call_impl", "filename": 9}, {"line": 1536, "name": "_call_impl", "filename": 9}, {"line": 450, "name": "_fn", "filename": 10}, {"line": 1527, "name": "_wrapped_call_impl", "filename": 9}, {"line": 1536, "name": "_call_impl", "filename": 9}]}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V0401 08:54:21.882000 140424060892160 torch/_dynamo/convert_frame.py:672] {"dynamo_start": {"stack": [{"line": 10031, "name": "<module>", "filename": 0}, {"line": 14, "name": "run_tests", "filename": 1}, {"line": 41, "name": "run_tests", "filename": 2}, {"line": 1165, "name": "run_tests", "filename": 3}, {"line": 101, "name": "__init__", "filename": 4}, {"line": 271, "name": "runTests", "filename": 4}, {"line": 184, "name": "run", "filename": 5}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 650, "name": "__call__", "filename": 7}, {"line": 2866, "name": "run2", "filename": 3}, {"line": 2838, "name": "_run_custom", "filename": 3}, {"line": 591, "name": "run", "filename": 7}, {"line": 549, "name": "_callTestMethod", "filename": 7}, {"line": 2739, "name": "wrapper", "filename": 3}, {"line": 9214, "name": "new_test", "filename": 0}, {"line": 79, "name": "inner", "filename": 8}, {"line": 8845, "name": "test_custom_op_fixed_layout_channels_last", "filename": 0}, {"line": 1527, "name": "_wrapped_call_impl", "filename": 9}, {"line": 1536, "name": "_call_impl", "filename": 9}, {"line": 450, "name": "_fn", "filename": 10}, {"line": 1527, "name": "_wrapped_call_implaa", "filename": 9}, {"line": 1536, "name": "_call_impl", "filename": 9}]}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}