Operator: aten._log_softmax.default
cnt: 1, ((T([128, 1000], f16), 1, False), {})
Operator: aten._log_softmax_backward_data.default
cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {})
Operator: aten._softmax.default
cnt: 2, ((T([128, 8, 3137, 8], f16, stride=(602304, 8, 192, 1)), 2, False), {})
cnt: 2, ((T([128, 8, 785, 16], f16, stride=(301440, 16, 384, 1)), 2, False), {})
cnt: 2, ((T([128, 8, 197, 40], f16, stride=(189120, 40, 960, 1)), 2, False), {})
cnt: 2, ((T([128, 8, 50, 64], f16, stride=(76800, 64, 1536, 1)), 2, False), {})
Operator: aten._softmax_backward_data.default
cnt: 2, ((T([128, 8, 50, 64], f16, stride=(25600, 3200, 1, 50)), T([128, 8, 50, 64], f16), 2, f16), {})
cnt: 2, ((T([128, 8, 197, 40], f16, stride=(63040, 7880, 1, 197)), T([128, 8, 197, 40], f16), 2, f16), {})
cnt: 2, ((T([128, 8, 785, 16], f16, stride=(100480, 12560, 1, 785)), T([128, 8, 785, 16], f16), 2, f16), {})
cnt: 2, ((T([128, 8, 3137, 8], f16, stride=(200768, 25096, 1, 3137)), T([128, 8, 3137, 8], f16), 2, f16), {})
Operator: aten._unsafe_view.default
cnt: 6, ((T([128, 8, 3137, 8], f16), [1024, 3137, 8]), {})
cnt: 2, ((T([1024, 8, 8], f16), [128, 8, 8, 8]), {})
cnt: 2, ((T([1024, 3137, 8], f16), [128, 8, 3137, 8]), {})
cnt: 2, ((T([128, 3137, 8, 8], f16), [128, 3137, 64]), {})
cnt: 6, ((T([128, 8, 785, 16], f16), [1024, 785, 16]), {})
cnt: 2, ((T([1024, 16, 16], f16), [128, 8, 16, 16]), {})
cnt: 2, ((T([1024, 785, 16], f16), [128, 8, 785, 16]), {})
cnt: 2, ((T([128, 785, 8, 16], f16), [128, 785, 128]), {})
cnt: 6, ((T([128, 8, 197, 40], f16), [1024, 197, 40]), {})
cnt: 2, ((T([1024, 40, 40], f16), [128, 8, 40, 40]), {})
cnt: 2, ((T([1024, 197, 40], f16), [128, 8, 197, 40]), {})
cnt: 2, ((T([128, 197, 8, 40], f16), [128, 197, 320]), {})
cnt: 6, ((T([128, 8, 50, 64], f16), [1024, 50, 64]), {})
cnt: 2, ((T([1024, 64, 64], f16), [128, 8, 64, 64]), {})
cnt: 2, ((T([1024, 50, 64], f16), [128, 8, 50, 64]), {})
cnt: 2, ((T([128, 50, 8, 64], f16), [128, 50, 512]), {})
cnt: 2, ((T([128, 50, 3, 8, 64], f16), [128, 50, 1536]), {})
cnt: 2, ((T([128, 197, 3, 8, 40], f16), [128, 197, 960]), {})
cnt: 2, ((T([128, 785, 3, 8, 16], f16), [128, 785, 384]), {})
cnt: 2, ((T([128, 3137, 3, 8, 8], f16), [128, 3137, 192]), {})
Operator: aten.add.Tensor
cnt: 2, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16, stride=(200768, 1, 3584, 64))), {})
cnt: 6, ((T([128, 8, 3137, 8], f16), T([128, 8, 3137, 8], f16)), {})
cnt: 10, ((T([128, 3137, 64], f16), T([128, 3137, 64], f16)), {})
cnt: 2, ((T([128, 128, 28, 28], f16), T([128, 128, 28, 28], f16, stride=(100480, 1, 3584, 128))), {})
cnt: 6, ((T([128, 8, 785, 16], f16), T([128, 8, 785, 16], f16)), {})
cnt: 10, ((T([128, 785, 128], f16), T([128, 785, 128], f16)), {})
cnt: 2, ((T([128, 320, 14, 14], f16), T([128, 320, 14, 14], f16, stride=(63040, 1, 4480, 320))), {})
cnt: 6, ((T([128, 8, 197, 40], f16), T([128, 8, 197, 40], f16)), {})
cnt: 10, ((T([128, 197, 320], f16), T([128, 197, 320], f16)), {})
cnt: 2, ((T([128, 512, 7, 7], f16), T([128, 512, 7, 7], f16, stride=(25600, 1, 3584, 512))), {})
cnt: 6, ((T([128, 8, 50, 64], f16), T([128, 8, 50, 64], f16)), {})
cnt: 10, ((T([128, 50, 512], f16), T([128, 50, 512], f16)), {})
cnt: 4, ((T([3, 128, 8, 50, 64], f16), T([3, 128, 8, 50, 64], f16)), {})
cnt: 2, ((T([128, 512, 7, 7], f16, stride=(25600, 1, 3584, 512)), T([128, 512, 7, 7], f16, stride=(25088, 1, 3584, 512))), {})
cnt: 1, ((T([192, 1, 7, 7], f16), T([192, 1, 7, 7], f16)), {})
cnt: 2, ((T([192], f16), T([192], f16)), {})
cnt: 1, ((T([192, 1, 5, 5], f16), T([192, 1, 5, 5], f16)), {})
cnt: 2, ((T([128, 1, 3, 3], f16), T([128, 1, 3, 3], f16)), {})
cnt: 2, ((T([128], f16), T([128], f16)), {})
cnt: 1, ((T([512, 1, 3, 3], f16), T([512, 1, 3, 3], f16)), {})
cnt: 1, ((T([512], f16), T([512], f16)), {})
cnt: 4, ((T([3, 128, 8, 197, 40], f16), T([3, 128, 8, 197, 40], f16)), {})
cnt: 2, ((T([128, 320, 14, 14], f16, stride=(63040, 1, 4480, 320)), T([128, 320, 14, 14], f16, stride=(62720, 1, 4480, 320))), {})
cnt: 1, ((T([120, 1, 7, 7], f16), T([120, 1, 7, 7], f16)), {})
cnt: 2, ((T([120], f16), T([120], f16)), {})
cnt: 1, ((T([120, 1, 5, 5], f16), T([120, 1, 5, 5], f16)), {})
cnt: 1, ((T([80, 1, 3, 3], f16), T([80, 1, 3, 3], f16)), {})
cnt: 1, ((T([80], f16), T([80], f16)), {})
cnt: 1, ((T([320, 1, 3, 3], f16), T([320, 1, 3, 3], f16)), {})
cnt: 1, ((T([320], f16), T([320], f16)), {})
cnt: 4, ((T([3, 128, 8, 785, 16], f16), T([3, 128, 8, 785, 16], f16)), {})
cnt: 2, ((T([128, 128, 28, 28], f16, stride=(100480, 1, 3584, 128)), T([128, 128, 28, 28], f16, stride=(100352, 1, 3584, 128))), {})
cnt: 1, ((T([48, 1, 7, 7], f16), T([48, 1, 7, 7], f16)), {})
cnt: 2, ((T([48], f16), T([48], f16)), {})
cnt: 1, ((T([48, 1, 5, 5], f16), T([48, 1, 5, 5], f16)), {})
cnt: 1, ((T([32, 1, 3, 3], f16), T([32, 1, 3, 3], f16)), {})
cnt: 1, ((T([32], f16), T([32], f16)), {})
cnt: 4, ((T([3, 128, 8, 3137, 8], f16), T([3, 128, 8, 3137, 8], f16)), {})
cnt: 2, ((T([128, 64, 56, 56], f16, stride=(200768, 1, 3584, 64)), T([128, 64, 56, 56], f16, stride=(200704, 1, 3584, 64))), {})
cnt: 1, ((T([24, 1, 7, 7], f16), T([24, 1, 7, 7], f16)), {})
cnt: 2, ((T([24], f16), T([24], f16)), {})
cnt: 1, ((T([24, 1, 5, 5], f16), T([24, 1, 5, 5], f16)), {})
cnt: 1, ((T([16, 1, 3, 3], f16), T([16, 1, 3, 3], f16)), {})
cnt: 1, ((T([16], f16), T([16], f16)), {})
cnt: 1, ((T([64, 1, 3, 3], f16), T([64, 1, 3, 3], f16)), {})
cnt: 1, ((T([64], f16), T([64], f16)), {})
Operator: aten.addmm.default
cnt: 2, ((T([192], f16), T([401536, 64], f16), T([64, 192], f16, stride=(1, 64))), {})
cnt: 2, ((T([64], f16), T([401536, 64], f16), T([64, 64], f16, stride=(1, 64))), {})
cnt: 2, ((T([512], f16), T([401536, 64], f16), T([64, 512], f16, stride=(1, 64))), {})
cnt: 2, ((T([64], f16), T([401536, 512], f16), T([512, 64], f16, stride=(1, 512))), {})
cnt: 2, ((T([384], f16), T([100480, 128], f16), T([128, 384], f16, stride=(1, 128))), {})
cnt: 2, ((T([128], f16), T([100480, 128], f16), T([128, 128], f16, stride=(1, 128))), {})
cnt: 2, ((T([1024], f16), T([100480, 128], f16), T([128, 1024], f16, stride=(1, 128))), {})
cnt: 2, ((T([128], f16), T([100480, 1024], f16), T([1024, 128], f16, stride=(1, 1024))), {})
cnt: 2, ((T([960], f16), T([25216, 320], f16), T([320, 960], f16, stride=(1, 320))), {})
cnt: 2, ((T([320], f16), T([25216, 320], f16), T([320, 320], f16, stride=(1, 320))), {})
cnt: 2, ((T([1280], f16), T([25216, 320], f16), T([320, 1280], f16, stride=(1, 320))), {})
cnt: 2, ((T([320], f16), T([25216, 1280], f16), T([1280, 320], f16, stride=(1, 1280))), {})
cnt: 2, ((T([1536], f16), T([6400, 512], f16), T([512, 1536], f16, stride=(1, 512))), {})
cnt: 2, ((T([512], f16), T([6400, 512], f16), T([512, 512], f16, stride=(1, 512))), {})
cnt: 2, ((T([2048], f16), T([6400, 512], f16), T([512, 2048], f16, stride=(1, 512))), {})
cnt: 2, ((T([512], f16), T([6400, 2048], f16), T([2048, 512], f16, stride=(1, 2048))), {})
cnt: 1, ((T([1000], f16), T([128, 512], f16, stride=(25600, 1)), T([512, 1000], f16, stride=(1, 512))), {})
Operator: aten.bmm.default
cnt: 4, ((T([1024, 8, 3137], f16, stride=(25096, 1, 8)), T([1024, 3137, 8], f16)), {})
cnt: 4, ((T([1024, 3137, 8], f16), T([1024, 8, 8], f16)), {})
cnt: 4, ((T([1024, 16, 785], f16, stride=(12560, 1, 16)), T([1024, 785, 16], f16)), {})
cnt: 4, ((T([1024, 785, 16], f16), T([1024, 16, 16], f16)), {})
cnt: 4, ((T([1024, 40, 197], f16, stride=(7880, 1, 40)), T([1024, 197, 40], f16)), {})
cnt: 4, ((T([1024, 197, 40], f16), T([1024, 40, 40], f16)), {})
cnt: 4, ((T([1024, 64, 50], f16, stride=(3200, 1, 64)), T([1024, 50, 64], f16)), {})
cnt: 4, ((T([1024, 50, 64], f16), T([1024, 64, 64], f16)), {})
cnt: 2, ((T([1024, 50, 64], f16), T([1024, 64, 64], f16, stride=(4096, 1, 64))), {})
cnt: 2, ((T([1024, 64, 64], f16), T([1024, 64, 50], f16, stride=(3200, 1, 64))), {})
cnt: 2, ((T([1024, 197, 40], f16), T([1024, 40, 40], f16, stride=(1600, 1, 40))), {})
cnt: 2, ((T([1024, 40, 40], f16), T([1024, 40, 197], f16, stride=(7880, 1, 40))), {})
cnt: 2, ((T([1024, 785, 16], f16), T([1024, 16, 16], f16, stride=(256, 1, 16))), {})
cnt: 2, ((T([1024, 16, 16], f16), T([1024, 16, 785], f16, stride=(12560, 1, 16))), {})
cnt: 2, ((T([1024, 3137, 8], f16), T([1024, 8, 8], f16, stride=(64, 1, 8))), {})
cnt: 2, ((T([1024, 8, 8], f16), T([1024, 8, 3137], f16, stride=(25096, 1, 8))), {})
Operator: aten.cat.default
cnt: 1, (([T([128, 1, 64], f16, stride=(0, 64, 1)), T([128, 3136, 64], f16)], 1), {})
cnt: 2, (([T([128, 1, 64], f16, stride=(200768, 64, 1)), T([128, 3136, 64], f16, stride=(200704, 1, 3136))], 1), {})
cnt: 2, (([T([128, 16, 56, 56], f16), T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16)], 1), {})
cnt: 1, (([T([128, 1, 128], f16, stride=(0, 128, 1)), T([128, 784, 128], f16)], 1), {})
cnt: 2, (([T([128, 1, 128], f16, stride=(100480, 128, 1)), T([128, 784, 128], f16, stride=(100352, 1, 784))], 1), {})
cnt: 2, (([T([128, 32, 28, 28], f16), T([128, 48, 28, 28], f16), T([128, 48, 28, 28], f16)], 1), {})
cnt: 1, (([T([128, 1, 320], f16, stride=(0, 320, 1)), T([128, 196, 320], f16)], 1), {})
cnt: 2, (([T([128, 1, 320], f16, stride=(63040, 320, 1)), T([128, 196, 320], f16, stride=(62720, 1, 196))], 1), {})
cnt: 2, (([T([128, 80, 14, 14], f16), T([128, 120, 14, 14], f16), T([128, 120, 14, 14], f16)], 1), {})
cnt: 1, (([T([128, 1, 512], f16, stride=(0, 512, 1)), T([128, 49, 512], f16)], 1), {})
cnt: 2, (([T([128, 1, 512], f16, stride=(25600, 512, 1)), T([128, 49, 512], f16, stride=(25088, 1, 49))], 1), {})
cnt: 2, (([T([128, 128, 7, 7], f16), T([128, 192, 7, 7], f16), T([128, 192, 7, 7], f16)], 1), {})
cnt: 2, (([T([128, 128, 7, 7], f16, stride=(6272, 1, 896, 128)), T([128, 192, 7, 7], f16, stride=(9408, 1, 1344, 192)), T([128, 192, 7, 7], f16, stride=(9408, 1, 1344, 192))], 1), {})
cnt: 2, (([T([128, 80, 14, 14], f16, stride=(15680, 1, 1120, 80)), T([128, 120, 14, 14], f16, stride=(23520, 1, 1680, 120)), T([128, 120, 14, 14], f16, stride=(23520, 1, 1680, 120))], 1), {})
cnt: 2, (([T([128, 32, 28, 28], f16, stride=(25088, 1, 896, 32)), T([128, 48, 28, 28], f16, stride=(37632, 1, 1344, 48)), T([128, 48, 28, 28], f16, stride=(37632, 1, 1344, 48))], 1), {})
cnt: 2, (([T([128, 16, 56, 56], f16, stride=(50176, 1, 896, 16)), T([128, 24, 56, 56], f16, stride=(75264, 1, 1344, 24)), T([128, 24, 56, 56], f16, stride=(75264, 1, 1344, 24))], 1), {})
Operator: aten.clone.default
cnt: 1, ((T([128, 3, 224, 224], f16),), {})
Operator: aten.constant_pad_nd.default
cnt: 2, ((T([128, 8, 3136, 8], f16, stride=(200704, 8, 64, 1)), [0, 0, 1, 0, 0, 0], 0.0), {})
cnt: 2, ((T([128, 8, 784, 16], f16, stride=(100352, 16, 128, 1)), [0, 0, 1, 0, 0, 0], 0.0), {})
cnt: 2, ((T([128, 8, 196, 40], f16, stride=(62720, 40, 320, 1)), [0, 0, 1, 0, 0, 0], 0.0), {})
cnt: 2, ((T([128, 8, 49, 64], f16, stride=(25088, 64, 512, 1)), [0, 0, 1, 0, 0, 0], 0.0), {})
cnt: 2, ((T([128, 8, 50, 64], f16, stride=(25600, 64, 512, 1)), [0, 0, -1, 0, 0, 0]), {})
cnt: 2, ((T([128, 8, 197, 40], f16, stride=(63040, 40, 320, 1)), [0, 0, -1, 0, 0, 0]), {})
cnt: 2, ((T([128, 8, 785, 16], f16, stride=(100480, 16, 128, 1)), [0, 0, -1, 0, 0, 0]), {})
cnt: 2, ((T([128, 8, 3137, 8], f16, stride=(200768, 8, 64, 1)), [0, 0, -1, 0, 0, 0]), {})
Operator: aten.convolution.default
cnt: 1, ((T([128, 3, 224, 224], f16), T([64, 3, 4, 4], f16), T([64], f16), [4, 4], [0, 0], [1, 1], False, [0, 0], 1), {})
cnt: 2, ((T([128, 64, 56, 56], f16, stride=(200768, 1, 3584, 64)), T([64, 1, 3, 3], f16), T([64], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 64), {})
cnt: 2, ((T([128, 16, 56, 56], f16, stride=(602304, 1, 10752, 192)), T([16, 1, 3, 3], f16), T([16], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 16), {})
cnt: 2, ((T([128, 24, 56, 56], f16, stride=(602304, 1, 10752, 192)), T([24, 1, 5, 5], f16), T([24], f16), [1, 1], [2, 2], [1, 1], False, [0, 0], 24), {})
cnt: 2, ((T([128, 24, 56, 56], f16, stride=(602304, 1, 10752, 192)), T([24, 1, 7, 7], f16), T([24], f16), [1, 1], [3, 3], [1, 1], False, [0, 0], 24), {})
cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 2, 2], f16), T([128], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {})
cnt: 2, ((T([128, 128, 28, 28], f16, stride=(100480, 1, 3584, 128)), T([128, 1, 3, 3], f16), T([128], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 128), {})
cnt: 2, ((T([128, 32, 28, 28], f16, stride=(301440, 1, 10752, 384)), T([32, 1, 3, 3], f16), T([32], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {})
cnt: 2, ((T([128, 48, 28, 28], f16, stride=(301440, 1, 10752, 384)), T([48, 1, 5, 5], f16), T([48], f16), [1, 1], [2, 2], [1, 1], False, [0, 0], 48), {})
cnt: 2, ((T([128, 48, 28, 28], f16, stride=(301440, 1, 10752, 384)), T([48, 1, 7, 7], f16), T([48], f16), [1, 1], [3, 3], [1, 1], False, [0, 0], 48), {})
cnt: 1, ((T([128, 128, 28, 28], f16), T([320, 128, 2, 2], f16), T([320], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {})
cnt: 2, ((T([128, 320, 14, 14], f16, stride=(63040, 1, 4480, 320)), T([320, 1, 3, 3], f16), T([320], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 320), {})
cnt: 2, ((T([128, 80, 14, 14], f16, stride=(189120, 1, 13440, 960)), T([80, 1, 3, 3], f16), T([80], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 80), {})
cnt: 2, ((T([128, 120, 14, 14], f16, stride=(189120, 1, 13440, 960)), T([120, 1, 5, 5], f16), T([120], f16), [1, 1], [2, 2], [1, 1], False, [0, 0], 120), {})
cnt: 2, ((T([128, 120, 14, 14], f16, stride=(189120, 1, 13440, 960)), T([120, 1, 7, 7], f16), T([120], f16), [1, 1], [3, 3], [1, 1], False, [0, 0], 120), {})
cnt: 1, ((T([128, 320, 14, 14], f16), T([512, 320, 2, 2], f16), T([512], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {})
cnt: 2, ((T([128, 512, 7, 7], f16, stride=(25600, 1, 3584, 512)), T([512, 1, 3, 3], f16), T([512], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 512), {})
cnt: 2, ((T([128, 128, 7, 7], f16, stride=(76800, 1, 10752, 1536)), T([128, 1, 3, 3], f16), T([128], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 128), {})
cnt: 2, ((T([128, 192, 7, 7], f16, stride=(76800, 1, 10752, 1536)), T([192, 1, 5, 5], f16), T([192], f16), [1, 1], [2, 2], [1, 1], False, [0, 0], 192), {})
cnt: 2, ((T([128, 192, 7, 7], f16, stride=(76800, 1, 10752, 1536)), T([192, 1, 7, 7], f16), T([192], f16), [1, 1], [3, 3], [1, 1], False, [0, 0], 192), {})
Operator: aten.convolution_backward.default
cnt: 2, ((T([128, 192, 7, 7], f16, stride=(25088, 1, 3584, 512)), T([128, 192, 7, 7], f16, stride=(76800, 1, 10752, 1536)), T([192, 1, 7, 7], f16), [192], [1, 1], [3, 3], [1, 1], False, [0, 0], 192, [True, True, True]), {})
cnt: 2, ((T([128, 192, 7, 7], f16, stride=(25088, 1, 3584, 512)), T([128, 192, 7, 7], f16, stride=(76800, 1, 10752, 1536)), T([192, 1, 5, 5], f16), [192], [1, 1], [2, 2], [1, 1], False, [0, 0], 192, [True, True, True]), {})
cnt: 2, ((T([128, 128, 7, 7], f16, stride=(25088, 1, 3584, 512)), T([128, 128, 7, 7], f16, stride=(76800, 1, 10752, 1536)), T([128, 1, 3, 3], f16), [128], [1, 1], [1, 1], [1, 1], False, [0, 0], 128, [True, True, True]), {})
cnt: 2, ((T([128, 512, 7, 7], f16, stride=(25600, 1, 3584, 512)), T([128, 512, 7, 7], f16, stride=(25600, 1, 3584, 512)), T([512, 1, 3, 3], f16), [512], [1, 1], [1, 1], [1, 1], False, [0, 0], 512, [True, True, True]), {})
cnt: 1, ((T([128, 512, 7, 7], f16, stride=(25088, 1, 3584, 512)), T([128, 320, 14, 14], f16), T([512, 320, 2, 2], f16), [512], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {})
cnt: 2, ((T([128, 120, 14, 14], f16, stride=(62720, 1, 4480, 320)), T([128, 120, 14, 14], f16, stride=(189120, 1, 13440, 960)), T([120, 1, 7, 7], f16), [120], [1, 1], [3, 3], [1, 1], False, [0, 0], 120, [True, True, True]), {})
cnt: 2, ((T([128, 120, 14, 14], f16, stride=(62720, 1, 4480, 320)), T([128, 120, 14, 14], f16, stride=(189120, 1, 13440, 960)), T([120, 1, 5, 5], f16), [120], [1, 1], [2, 2], [1, 1], False, [0, 0], 120, [True, True, True]), {})
cnt: 2, ((T([128, 80, 14, 14], f16, stride=(62720, 1, 4480, 320)), T([128, 80, 14, 14], f16, stride=(189120, 1, 13440, 960)), T([80, 1, 3, 3], f16), [80], [1, 1], [1, 1], [1, 1], False, [0, 0], 80, [True, True, True]), {})
cnt: 2, ((T([128, 320, 14, 14], f16, stride=(63040, 1, 4480, 320)), T([128, 320, 14, 14], f16, stride=(63040, 1, 4480, 320)), T([320, 1, 3, 3], f16), [320], [1, 1], [1, 1], [1, 1], False, [0, 0], 320, [True, True, True]), {})
cnt: 1, ((T([128, 320, 14, 14], f16, stride=(62720, 1, 4480, 320)), T([128, 128, 28, 28], f16), T([320, 128, 2, 2], f16), [320], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {})
cnt: 2, ((T([128, 48, 28, 28], f16, stride=(100352, 1, 3584, 128)), T([128, 48, 28, 28], f16, stride=(301440, 1, 10752, 384)), T([48, 1, 7, 7], f16), [48], [1, 1], [3, 3], [1, 1], False, [0, 0], 48, [True, True, True]), {})
cnt: 2, ((T([128, 48, 28, 28], f16, stride=(100352, 1, 3584, 128)), T([128, 48, 28, 28], f16, stride=(301440, 1, 10752, 384)), T([48, 1, 5, 5], f16), [48], [1, 1], [2, 2], [1, 1], False, [0, 0], 48, [True, True, True]), {})
cnt: 2, ((T([128, 32, 28, 28], f16, stride=(100352, 1, 3584, 128)), T([128, 32, 28, 28], f16, stride=(301440, 1, 10752, 384)), T([32, 1, 3, 3], f16), [32], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, True]), {})
cnt: 2, ((T([128, 128, 28, 28], f16, stride=(100480, 1, 3584, 128)), T([128, 128, 28, 28], f16, stride=(100480, 1, 3584, 128)), T([128, 1, 3, 3], f16), [128], [1, 1], [1, 1], [1, 1], False, [0, 0], 128, [True, True, True]), {})
cnt: 1, ((T([128, 128, 28, 28], f16, stride=(100352, 1, 3584, 128)), T([128, 64, 56, 56], f16), T([128, 64, 2, 2], f16), [128], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {})
cnt: 2, ((T([128, 24, 56, 56], f16, stride=(200704, 1, 3584, 64)), T([128, 24, 56, 56], f16, stride=(602304, 1, 10752, 192)), T([24, 1, 7, 7], f16), [24], [1, 1], [3, 3], [1, 1], False, [0, 0], 24, [True, True, True]), {})
cnt: 2, ((T([128, 24, 56, 56], f16, stride=(200704, 1, 3584, 64)), T([128, 24, 56, 56], f16, stride=(602304, 1, 10752, 192)), T([24, 1, 5, 5], f16), [24], [1, 1], [2, 2], [1, 1], False, [0, 0], 24, [True, True, True]), {})
cnt: 2, ((T([128, 16, 56, 56], f16, stride=(200704, 1, 3584, 64)), T([128, 16, 56, 56], f16, stride=(602304, 1, 10752, 192)), T([16, 1, 3, 3], f16), [16], [1, 1], [1, 1], [1, 1], False, [0, 0], 16, [True, True, True]), {})
cnt: 2, ((T([128, 64, 56, 56], f16, stride=(200768, 1, 3584, 64)), T([128, 64, 56, 56], f16, stride=(200768, 1, 3584, 64)), T([64, 1, 3, 3], f16), [64], [1, 1], [1, 1], [1, 1], False, [0, 0], 64, [True, True, True]), {})
cnt: 1, ((T([128, 64, 56, 56], f16, stride=(200704, 1, 3584, 64)), T([128, 3, 224, 224], f16), T([64, 3, 4, 4], f16), [64], [4, 4], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {})
Operator: aten.copy_.default
cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {})
Operator: aten.gelu.default
cnt: 2, ((T([128, 3137, 512], f16),), {})
cnt: 2, ((T([128, 785, 1024], f16),), {})
cnt: 2, ((T([128, 197, 1280], f16),), {})
cnt: 2, ((T([128, 50, 2048], f16),), {})
Operator: aten.gelu_backward.default
cnt: 2, ((T([128, 50, 2048], f16), T([128, 50, 2048], f16)), {})
cnt: 2, ((T([128, 197, 1280], f16), T([128, 197, 1280], f16)), {})
cnt: 2, ((T([128, 785, 1024], f16), T([128, 785, 1024], f16)), {})
cnt: 2, ((T([128, 3137, 512], f16), T([128, 3137, 512], f16)), {})
Operator: aten.lift_fresh_copy.default
cnt: 1, ((T([128], i64),), {})
Operator: aten.mm.default
cnt: 1, ((T([128, 1000], f16), T([1000, 512], f16)), {})
cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 512], f16, stride=(25600, 1))), {})
cnt: 2, ((T([6400, 512], f16), T([512, 2048], f16)), {})
cnt: 2, ((T([512, 6400], f16, stride=(1, 512)), T([6400, 2048], f16)), {})
cnt: 2, ((T([6400, 2048], f16), T([2048, 512], f16)), {})
cnt: 2, ((T([2048, 6400], f16, stride=(1, 2048)), T([6400, 512], f16)), {})
cnt: 2, ((T([6400, 512], f16), T([512, 512], f16)), {})
cnt: 2, ((T([512, 6400], f16, stride=(1, 512)), T([6400, 512], f16)), {})
cnt: 2, ((T([6400, 1536], f16), T([1536, 512], f16)), {})
cnt: 2, ((T([1536, 6400], f16, stride=(1, 1536)), T([6400, 512], f16)), {})
cnt: 2, ((T([25216, 320], f16), T([320, 1280], f16)), {})
cnt: 2, ((T([320, 25216], f16, stride=(1, 320)), T([25216, 1280], f16)), {})
cnt: 2, ((T([25216, 1280], f16), T([1280, 320], f16)), {})
cnt: 2, ((T([1280, 25216], f16, stride=(1, 1280)), T([25216, 320], f16)), {})
cnt: 2, ((T([25216, 320], f16), T([320, 320], f16)), {})
cnt: 2, ((T([320, 25216], f16, stride=(1, 320)), T([25216, 320], f16)), {})
cnt: 2, ((T([25216, 960], f16), T([960, 320], f16)), {})
cnt: 2, ((T([960, 25216], f16, stride=(1, 960)), T([25216, 320], f16)), {})
cnt: 2, ((T([100480, 128], f16), T([128, 1024], f16)), {})
cnt: 2, ((T([128, 100480], f16, stride=(1, 128)), T([100480, 1024], f16)), {})
cnt: 2, ((T([100480, 1024], f16), T([1024, 128], f16)), {})
cnt: 2, ((T([1024, 100480], f16, stride=(1, 1024)), T([100480, 128], f16)), {})
cnt: 2, ((T([100480, 128], f16), T([128, 128], f16)), {})
cnt: 2, ((T([128, 100480], f16, stride=(1, 128)), T([100480, 128], f16)), {})
cnt: 2, ((T([100480, 384], f16), T([384, 128], f16)), {})
cnt: 2, ((T([384, 100480], f16, stride=(1, 384)), T([100480, 128], f16)), {})
cnt: 2, ((T([401536, 64], f16), T([64, 512], f16)), {})
cnt: 2, ((T([64, 401536], f16, stride=(1, 64)), T([401536, 512], f16)), {})
cnt: 2, ((T([401536, 512], f16), T([512, 64], f16)), {})
cnt: 2, ((T([512, 401536], f16, stride=(1, 512)), T([401536, 64], f16)), {})
cnt: 2, ((T([401536, 64], f16), T([64, 64], f16)), {})
cnt: 2, ((T([64, 401536], f16, stride=(1, 64)), T([401536, 64], f16)), {})
cnt: 2, ((T([401536, 192], f16), T([192, 64], f16)), {})
cnt: 2, ((T([192, 401536], f16, stride=(1, 192)), T([401536, 64], f16)), {})
Operator: aten.mul.Tensor
cnt: 2, ((T([128, 8, 3136, 8], f16, stride=(602304, 8, 192, 1)), T([128, 8, 3136, 8], f16, stride=(200704, 25088, 1, 3136))), {})
cnt: 2, ((T([128, 8, 3137, 8], f16), 0.3535533905932738), {})
cnt: 2, ((T([128, 8, 784, 16], f16, stride=(301440, 16, 384, 1)), T([128, 8, 784, 16], f16, stride=(100352, 12544, 1, 784))), {})
cnt: 2, ((T([128, 8, 785, 16], f16), 0.25), {})
cnt: 2, ((T([128, 8, 196, 40], f16, stride=(189120, 40, 960, 1)), T([128, 8, 196, 40], f16, stride=(62720, 7840, 1, 196))), {})
cnt: 2, ((T([128, 8, 197, 40], f16), 0.15811388300841897), {})
cnt: 2, ((T([128, 8, 49, 64], f16, stride=(76800, 64, 1536, 1)), T([128, 8, 49, 64], f16, stride=(25088, 3136, 1, 49))), {})
cnt: 2, ((T([128, 8, 50, 64], f16), 0.125), {})
cnt: 2, ((T([128, 8, 50, 64], f16, stride=(25600, 64, 512, 1)), 0.125), {})
cnt: 2, ((T([128, 8, 49, 64], f16, stride=(25088, 64, 512, 1)), T([128, 8, 49, 64], f16, stride=(76800, 64, 1536, 1))), {})
cnt: 2, ((T([128, 8, 49, 64], f16, stride=(25088, 64, 512, 1)), T([128, 8, 49, 64], f16, stride=(25088, 3136, 1, 49))), {})
cnt: 2, ((T([128, 8, 197, 40], f16, stride=(63040, 40, 320, 1)), 0.15811388300841897), {})
cnt: 2, ((T([128, 8, 196, 40], f16, stride=(62720, 40, 320, 1)), T([128, 8, 196, 40], f16, stride=(189120, 40, 960, 1))), {})
cnt: 2, ((T([128, 8, 196, 40], f16, stride=(62720, 40, 320, 1)), T([128, 8, 196, 40], f16, stride=(62720, 7840, 1, 196))), {})
cnt: 2, ((T([128, 8, 785, 16], f16, stride=(100480, 16, 128, 1)), 0.25), {})
cnt: 2, ((T([128, 8, 784, 16], f16, stride=(100352, 16, 128, 1)), T([128, 8, 784, 16], f16, stride=(301440, 16, 384, 1))), {})
cnt: 2, ((T([128, 8, 784, 16], f16, stride=(100352, 16, 128, 1)), T([128, 8, 784, 16], f16, stride=(100352, 12544, 1, 784))), {})
cnt: 2, ((T([128, 8, 3137, 8], f16, stride=(200768, 8, 64, 1)), 0.3535533905932738), {})
cnt: 2, ((T([128, 8, 3136, 8], f16, stride=(200704, 8, 64, 1)), T([128, 8, 3136, 8], f16, stride=(602304, 8, 192, 1))), {})
cnt: 2, ((T([128, 8, 3136, 8], f16, stride=(200704, 8, 64, 1)), T([128, 8, 3136, 8], f16, stride=(200704, 25088, 1, 3136))), {})
Operator: aten.native_layer_norm.default
cnt: 1, ((T([128, 3136, 64], f16, stride=(200704, 1, 3136)), [64], T([64], f16), T([64], f16), 1e-05), {})
cnt: 4, ((T([128, 3137, 64], f16), [64], T([64], f16), T([64], f16), 1e-06), {})
cnt: 1, ((T([128, 784, 128], f16, stride=(100352, 1, 784)), [128], T([128], f16), T([128], f16), 1e-05), {})
cnt: 4, ((T([128, 785, 128], f16), [128], T([128], f16), T([128], f16), 1e-06), {})
cnt: 1, ((T([128, 196, 320], f16, stride=(62720, 1, 196)), [320], T([320], f16), T([320], f16), 1e-05), {})
cnt: 4, ((T([128, 197, 320], f16), [320], T([320], f16), T([320], f16), 1e-06), {})
cnt: 1, ((T([128, 49, 512], f16, stride=(25088, 1, 49)), [512], T([512], f16), T([512], f16), 1e-05), {})
cnt: 5, ((T([128, 50, 512], f16), [512], T([512], f16), T([512], f16), 1e-06), {})
Operator: aten.native_layer_norm_backward.default
cnt: 5, ((T([128, 50, 512], f16), T([128, 50, 512], f16), [512], T([128, 50, 1], f32), T([128, 50, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {})
cnt: 1, ((T([128, 49, 512], f16, stride=(25600, 512, 1)), T([128, 49, 512], f16, stride=(25088, 1, 49)), [512], T([128, 49, 1], f32), T([128, 49, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {})
cnt: 4, ((T([128, 197, 320], f16), T([128, 197, 320], f16), [320], T([128, 197, 1], f32), T([128, 197, 1], f32), T([320], f16), T([320], f16), [True, True, True]), {})
cnt: 1, ((T([128, 196, 320], f16, stride=(63040, 320, 1)), T([128, 196, 320], f16, stride=(62720, 1, 196)), [320], T([128, 196, 1], f32), T([128, 196, 1], f32), T([320], f16), T([320], f16), [True, True, True]), {})
cnt: 4, ((T([128, 785, 128], f16), T([128, 785, 128], f16), [128], T([128, 785, 1], f32), T([128, 785, 1], f32), T([128], f16), T([128], f16), [True, True, True]), {})
cnt: 1, ((T([128, 784, 128], f16, stride=(100480, 128, 1)), T([128, 784, 128], f16, stride=(100352, 1, 784)), [128], T([128, 784, 1], f32), T([128, 784, 1], f32), T([128], f16), T([128], f16), [True, True, True]), {})
cnt: 4, ((T([128, 3137, 64], f16), T([128, 3137, 64], f16), [64], T([128, 3137, 1], f32), T([128, 3137, 1], f32), T([64], f16), T([64], f16), [True, True, True]), {})
cnt: 1, ((T([128, 3136, 64], f16, stride=(200768, 64, 1)), T([128, 3136, 64], f16, stride=(200704, 1, 3136)), [64], T([128, 3136, 1], f32), T([128, 3136, 1], f32), T([64], f16), T([64], f16), [True, True, True]), {})
Operator: aten.nll_loss_backward.default
cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {})
Operator: aten.nll_loss_forward.default
cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {})
Operator: aten.select_backward.default
cnt: 1, ((T([128, 512], f16), [128, 50, 512], 1, 0), {})
cnt: 2, ((T([128, 8, 50, 64], f16), [3, 128, 8, 50, 64], 0, 2), {})
cnt: 2, ((T([128, 8, 50, 64], f16), [3, 128, 8, 50, 64], 0, 1), {})
cnt: 2, ((T([128, 8, 50, 64], f16), [3, 128, 8, 50, 64], 0, 0), {})
cnt: 2, ((T([128, 8, 197, 40], f16), [3, 128, 8, 197, 40], 0, 2), {})
cnt: 2, ((T([128, 8, 197, 40], f16), [3, 128, 8, 197, 40], 0, 1), {})
cnt: 2, ((T([128, 8, 197, 40], f16), [3, 128, 8, 197, 40], 0, 0), {})
cnt: 2, ((T([128, 8, 785, 16], f16), [3, 128, 8, 785, 16], 0, 2), {})
cnt: 2, ((T([128, 8, 785, 16], f16), [3, 128, 8, 785, 16], 0, 1), {})
cnt: 2, ((T([128, 8, 785, 16], f16), [3, 128, 8, 785, 16], 0, 0), {})
cnt: 2, ((T([128, 8, 3137, 8], f16), [3, 128, 8, 3137, 8], 0, 2), {})
cnt: 2, ((T([128, 8, 3137, 8], f16), [3, 128, 8, 3137, 8], 0, 1), {})
cnt: 2, ((T([128, 8, 3137, 8], f16), [3, 128, 8, 3137, 8], 0, 0), {})
Operator: aten.slice_backward.default
cnt: 5, ((T([128, 50, 512], f16), [128, 50, 512], 0, 0, 9223372036854775807, 1), {})
cnt: 4, ((T([128, 8, 49, 64], f16, stride=(25088, 64, 512, 1)), [128, 8, 49, 64], 3, 0, 9223372036854775807, 1), {})
cnt: 4, ((T([128, 8, 49, 64], f16), [128, 8, 50, 64], 2, 1, 9223372036854775807, 1), {})
cnt: 4, ((T([128, 8, 50, 64], f16), [128, 8, 50, 64], 1, 0, 9223372036854775807, 1), {})
cnt: 4, ((T([128, 8, 50, 64], f16), [128, 8, 50, 64], 0, 0, 9223372036854775807, 1), {})
cnt: 2, ((T([128, 49, 512], f16), [128, 50, 512], 1, 1, 9223372036854775807, 1), {})
cnt: 2, ((T([128, 1, 512], f16, stride=(25600, 512, 1)), [128, 50, 512], 1, 0, 1, 1), {})
cnt: 1, ((T([128, 196, 320], f16, stride=(62720, 1, 196)), [128, 196, 320], 2, 0, 9223372036854775807, 1), {})
cnt: 3, ((T([128, 196, 320], f16), [128, 197, 320], 1, 1, 9223372036854775807, 1), {})
cnt: 5, ((T([128, 197, 320], f16), [128, 197, 320], 0, 0, 9223372036854775807, 1), {})
cnt: 4, ((T([128, 8, 196, 40], f16, stride=(62720, 40, 320, 1)), [128, 8, 196, 40], 3, 0, 9223372036854775807, 1), {})
cnt: 4, ((T([128, 8, 196, 40], f16), [128, 8, 197, 40], 2, 1, 9223372036854775807, 1), {})
cnt: 4, ((T([128, 8, 197, 40], f16), [128, 8, 197, 40], 1, 0, 9223372036854775807, 1), {})
cnt: 4, ((T([128, 8, 197, 40], f16), [128, 8, 197, 40], 0, 0, 9223372036854775807, 1), {})
cnt: 2, ((T([128, 1, 320], f16, stride=(63040, 320, 1)), [128, 197, 320], 1, 0, 1, 1), {})
cnt: 1, ((T([128, 784, 128], f16, stride=(100352, 1, 784)), [128, 784, 128], 2, 0, 9223372036854775807, 1), {})
cnt: 3, ((T([128, 784, 128], f16), [128, 785, 128], 1, 1, 9223372036854775807, 1), {})
cnt: 5, ((T([128, 785, 128], f16), [128, 785, 128], 0, 0, 9223372036854775807, 1), {})
cnt: 4, ((T([128, 8, 784, 16], f16, stride=(100352, 16, 128, 1)), [128, 8, 784, 16], 3, 0, 9223372036854775807, 1), {})
cnt: 4, ((T([128, 8, 784, 16], f16), [128, 8, 785, 16], 2, 1, 9223372036854775807, 1), {})
cnt: 4, ((T([128, 8, 785, 16], f16), [128, 8, 785, 16], 1, 0, 9223372036854775807, 1), {})
cnt: 4, ((T([128, 8, 785, 16], f16), [128, 8, 785, 16], 0, 0, 9223372036854775807, 1), {})
cnt: 2, ((T([128, 1, 128], f16, stride=(100480, 128, 1)), [128, 785, 128], 1, 0, 1, 1), {})
cnt: 1, ((T([128, 3136, 64], f16, stride=(200704, 1, 3136)), [128, 3136, 64], 2, 0, 9223372036854775807, 1), {})
cnt: 3, ((T([128, 3136, 64], f16), [128, 3137, 64], 1, 1, 9223372036854775807, 1), {})
cnt: 5, ((T([128, 3137, 64], f16), [128, 3137, 64], 0, 0, 9223372036854775807, 1), {})
cnt: 4, ((T([128, 8, 3136, 8], f16, stride=(200704, 8, 64, 1)), [128, 8, 3136, 8], 3, 0, 9223372036854775807, 1), {})
cnt: 4, ((T([128, 8, 3136, 8], f16), [128, 8, 3137, 8], 2, 1, 9223372036854775807, 1), {})
cnt: 4, ((T([128, 8, 3137, 8], f16), [128, 8, 3137, 8], 1, 0, 9223372036854775807, 1), {})
cnt: 4, ((T([128, 8, 3137, 8], f16), [128, 8, 3137, 8], 0, 0, 9223372036854775807, 1), {})
cnt: 2, ((T([128, 1, 64], f16, stride=(200768, 64, 1)), [128, 3137, 64], 1, 0, 1, 1), {})
Operator: aten.split_with_sizes.default
cnt: 2, ((T([128, 64, 56, 56], f16, stride=(602304, 1, 10752, 192)), [16, 24, 24], 1), {})
cnt: 2, ((T([128, 128, 28, 28], f16, stride=(301440, 1, 10752, 384)), [32, 48, 48], 1), {})
cnt: 2, ((T([128, 320, 14, 14], f16, stride=(189120, 1, 13440, 960)), [80, 120, 120], 1), {})
cnt: 2, ((T([128, 512, 7, 7], f16, stride=(76800, 1, 10752, 1536)), [128, 192, 192], 1), {})
Operator: aten.sum.SymInt
cnt: 1, ((T([128, 1000], f16), [0], True), {})
cnt: 4, ((T([6400, 512], f16), [0], True), {})
cnt: 2, ((T([6400, 2048], f16), [0], True), {})
cnt: 2, ((T([6400, 1536], f16), [0], True), {})
cnt: 1, ((T([128, 1, 512], f16, stride=(25600, 512, 1)), [0], True), {})
cnt: 4, ((T([25216, 320], f16), [0], True), {})
cnt: 2, ((T([25216, 1280], f16), [0], True), {})
cnt: 2, ((T([25216, 960], f16), [0], True), {})
cnt: 1, ((T([128, 1, 320], f16, stride=(63040, 320, 1)), [0], True), {})
cnt: 4, ((T([100480, 128], f16), [0], True), {})
cnt: 2, ((T([100480, 1024], f16), [0], True), {})
cnt: 2, ((T([100480, 384], f16), [0], True), {})
cnt: 1, ((T([128, 1, 128], f16, stride=(100480, 128, 1)), [0], True), {})
cnt: 4, ((T([401536, 64], f16), [0], True), {})
cnt: 2, ((T([401536, 512], f16), [0], True), {})
cnt: 2, ((T([401536, 192], f16), [0], True), {})
cnt: 1, ((T([128, 1, 64], f16, stride=(200768, 64, 1)), [0], True), {})
