1# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py 2# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=1500 -timeline -timeline-max-iterations=3 < %s | FileCheck %s 3 4# perf stat reports a throughput of 0.60 IPC for this code snippet. 5# Each lzcnt has a false dependency on %ecx; the first lzcnt has to wait on the 6# imul. However, the folded load can start immediately. 7# The last lzcnt has a false dependency on %cx. However, even in this case, the 8# folded load can start immediately. 9 10imul %edx, %ecx 11lzcnt (%rsp), %cx 12lzcnt 2(%rsp), %cx 13 14# CHECK: Iterations: 1500 15# CHECK-NEXT: Instructions: 4500 16# CHECK-NEXT: Total Cycles: 12003 17# CHECK-NEXT: Total uOps: 7500 18 19# CHECK: Dispatch Width: 4 20# CHECK-NEXT: uOps Per Cycle: 0.62 21# CHECK-NEXT: IPC: 0.37 22# CHECK-NEXT: Block RThroughput: 4.0 23 24# CHECK: Instruction Info: 25# CHECK-NEXT: [1]: #uOps 26# CHECK-NEXT: [2]: Latency 27# CHECK-NEXT: [3]: RThroughput 28# CHECK-NEXT: [4]: MayLoad 29# CHECK-NEXT: [5]: MayStore 30# CHECK-NEXT: [6]: HasSideEffects (U) 31 32# CHECK: [1] [2] [3] [4] [5] [6] Instructions: 33# CHECK-NEXT: 1 4 2.00 imull %edx, %ecx 34# CHECK-NEXT: 2 6 2.00 * lzcntw (%rsp), %cx 35# CHECK-NEXT: 2 6 2.00 * lzcntw 2(%rsp), %cx 36 37# CHECK: Resources: 38# CHECK-NEXT: [0.0] - PdAGLU01 39# CHECK-NEXT: [0.1] - PdAGLU01 40# CHECK-NEXT: [1] - PdBranch 41# CHECK-NEXT: [2] - PdCount 42# CHECK-NEXT: [3] - PdDiv 43# CHECK-NEXT: [4] - PdEX0 44# CHECK-NEXT: [5] - PdEX1 45# CHECK-NEXT: [6] - PdFPCVT 46# CHECK-NEXT: [7.0] - PdFPFMA 47# CHECK-NEXT: [7.1] - PdFPFMA 48# CHECK-NEXT: [8.0] - PdFPMAL 49# CHECK-NEXT: [8.1] - PdFPMAL 50# CHECK-NEXT: [9] - PdFPMMA 51# CHECK-NEXT: [10] - PdFPSTO 52# CHECK-NEXT: [11] - PdFPU0 53# CHECK-NEXT: [12] - PdFPU1 54# CHECK-NEXT: [13] - PdFPU2 55# CHECK-NEXT: [14] - PdFPU3 56# CHECK-NEXT: [15] - PdFPXBR 57# CHECK-NEXT: [16.0] - PdLoad 58# CHECK-NEXT: [16.1] - PdLoad 59# CHECK-NEXT: [17] - PdMul 60# CHECK-NEXT: [18] - PdStore 61 62# CHECK: Resource pressure per iteration: 63# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] 64# CHECK-NEXT: 3.00 3.00 - - - 4.00 1.00 - - - - - - - - - - - - 3.00 3.00 2.00 - 65 66# CHECK: Resource pressure by instruction: 67# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: 68# CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - - - - - 2.00 - imull %edx, %ecx 69# CHECK-NEXT: - 3.00 - - - 2.00 - - - - - - - - - - - - - - 3.00 - - lzcntw (%rsp), %cx 70# CHECK-NEXT: 3.00 - - - - 2.00 - - - - - - - - - - - - - 3.00 - - - lzcntw 2(%rsp), %cx 71 72# CHECK: Timeline view: 73# CHECK-NEXT: 0123456789 74# CHECK-NEXT: Index 0123456789 0123456 75 76# CHECK: [0,0] DeeeeER . . . .. imull %edx, %ecx 77# CHECK-NEXT: [0,1] DeeeeeeER . . . .. lzcntw (%rsp), %cx 78# CHECK-NEXT: [0,2] .D=eeeeeeER . . .. lzcntw 2(%rsp), %cx 79# CHECK-NEXT: [1,0] .D=======eeeeER. . .. imull %edx, %ecx 80# CHECK-NEXT: [1,1] . D======eeeeeeER . .. lzcntw (%rsp), %cx 81# CHECK-NEXT: [1,2] . D========eeeeeeER . .. lzcntw 2(%rsp), %cx 82# CHECK-NEXT: [2,0] . D=============eeeeER .. imull %edx, %ecx 83# CHECK-NEXT: [2,1] . D=============eeeeeeER.. lzcntw (%rsp), %cx 84# CHECK-NEXT: [2,2] . D==============eeeeeeER lzcntw 2(%rsp), %cx 85 86# CHECK: Average Wait times (based on the timeline view): 87# CHECK-NEXT: [0]: Executions 88# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue 89# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready 90# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage 91 92# CHECK: [0] [1] [2] [3] 93# CHECK-NEXT: 0. 3 7.7 0.3 0.0 imull %edx, %ecx 94# CHECK-NEXT: 1. 3 7.3 0.0 0.0 lzcntw (%rsp), %cx 95# CHECK-NEXT: 2. 3 8.7 1.0 0.0 lzcntw 2(%rsp), %cx 96# CHECK-NEXT: 3 7.9 0.4 0.0 <total> 97