Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

FMLSL (vector, 4S)

Test 1: uops

Code:

  fmlsl v0.4s, v1.4h, v2.4h
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03mmu table walk data (08)1e1f3f4e51schedule uop (52)schedule simd uop (54)dispatch simd uop (57)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map simd uop (7e)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd alu (9a)l1d cache writeback (a8)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)f5f6f7f8fd
1004403730000613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
1004403731000613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
1004403731000613407251000100010005319081401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
1004403730000613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
1004403730000613407251000100010005319081401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
1004403730000613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
1004403731000613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
1004403731090613407251000100010005319081401840374037326033895100010003000403740371110011000073116113473100040384038403840384038
1004403730000613407251000100010005319081401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
10044037300088613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038

Test 2: Latency 1->1

Code:

  fmlsl v0.4s, v1.4h, v2.4h
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)031e3a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
10204400372990061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000071003162239479100001004003840038400384003840038
10204400373000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000071002162239479100001004003840038400384003840038
102044003729900726394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000071012162239479100001004003840038400384003840038
10204400373000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000071012162239479100001004003840038400384003840038
10204400373000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000071012162239551100001004003840038400384003840038
10204400372999061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000071012162239479100001004003840038400384003840038
10204400373000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000071012162239479100001004003840038400384003840038
102044003729900726394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000071013162239479100001004003840038400384003840038
10204400373000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000071012162239479100001004003840038400384003840038
10204400372990066394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000071013162239553100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)031e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)acbranch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
100244003730006139407251001010100001010000505706908140018040037400373813033876710010201000020300004003740037111002110910101000010030640216223947310000104003840038400384003840038
100244003730006139407251001010100001010000505706908140018040037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
100244003729906139407251001010100001010000505706908140018040037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
100244003729906139407251001010100001010000505706908140065040037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
10024400372990131039407251001010100001010000505706908040018040037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
100244003730006139407251001010100001010000505706908140018040037400373813033876710010201000020300004003740037111002110910101000010000640216323947310000104003840038400384003840038
100244003730006139407251001010100001010000505706908140018040037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
100244003730006139407251001010100001010000505706908140018040037400373813033876710010201000020300004003740037111002110910101000010001640216223947310000104003840038400384003840038
1002440037299216139407251001010100001010000505706908140018040037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
100244003730006139407251001010100001010000505706908140018040037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038

Test 3: Latency 1->2

Code:

  fmlsl v0.4s, v0.4h, v1.4h
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)031e1f3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)acbranch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
10204400373000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
10204400373000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
10204400372990061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
10204400372990061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
10204400372990061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
10204400373000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
10204400373000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
102044003730000103394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
10204400373000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
10204400373000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)0309l2 tlb miss data (0b)1e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
10024400373000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
10024400373000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000101006402162239473010000104003840038400384003840038
10024400372990006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003730000010339407251001010100001010000505708304140053400374013238135338767101602010000203000040037401312110021109101010000100006402162239473010000104003840038400384003840038
10024400373000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
10024400373000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000101006402162239473010000104003840038400384003840038
100244003730010010339407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
1002440037299000726394072510010101000010100005057069081400184003740037381302638767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
10024400372990006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100006462162239473010000104003840038400384003840038
10024400372990006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038

Test 4: Latency 1->3

Code:

  fmlsl v0.4s, v1.4h, v0.4h
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)ld unit uop (a6)l1d cache writeback (a8)a9acc2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
10204400372990000061394072510100100100001001000050057069081400180400374003738108338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
1020440037300000041461394072510100120100001001000050057069081400180400374003738108338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840086
102044003730000001261394072510100100100001001000066957069080400180400374003738108338819101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384022840038
10204400373000000061394072510100100100001301000050057069080400180400374003738108338745101002001000020030000400374003711102011009910010010000100000002071011611394790100001004003840038400384003840038
10204400373000000061394072510100100100001001000050057069080400180400374003738122338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
10204400373000000061394072510100100100001001000050057069080400180400374003738108338745101002001000020030000400374003711102011009910010010000100000030071011611394790100001004003840038400384003840038
102044003729900100145394072510100100100001001000050057069080400180400374003738108338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
10204400373000001061394072510100100100001001000050057069081400180400374003738108338745101002081000020030000400374003711102011009910010010000100001030071011611394790100001004003840038400384003840038
10204400372990000061394072510100100100001001000050057069081400180400374003738108338745101002001000020030000400374003711102011009910010010000100000000071014811394790100001004003840038400384003840038
10204400373000000061393982510100100100001001041450057069080400180400374003738108338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss data (0b)1e1f3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1002440037299000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003730000000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010110634064021621039473010000104003840038400384003840038
1002440037300101006139407251001212100001210000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003730000047706139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
1002440037300000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006404162239473010000104003840038400384003840038
10024400373000000018939389251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
10024400372990001206139407251001010100001210000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402161239473010000104003840038400384003840038
1002440037300100006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
10024400372990006010339407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
1002440037300000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040078400371110021109101010000100006402162239473010000104003840038400384003840038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  fmlsl v0.4s, v8.4h, v9.4h
  movi v1.16b, 0
  fmlsl v1.4s, v8.4h, v9.4h
  movi v2.16b, 0
  fmlsl v2.4s, v8.4h, v9.4h
  movi v3.16b, 0
  fmlsl v3.4s, v8.4h, v9.4h
  movi v4.16b, 0
  fmlsl v4.4s, v8.4h, v9.4h
  movi v5.16b, 0
  fmlsl v5.4s, v8.4h, v9.4h
  movi v6.16b, 0
  fmlsl v6.4s, v8.4h, v9.4h
  movi v7.16b, 0
  fmlsl v7.4s, v8.4h, v9.4h
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)st unit uop (a7)l1d cache writeback (a8)a9accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1602042009115004025801001008000010080000500640000020046200652006532380100200800002002400002006520065111602011009910010016000010000020010113316332006201600001002006620066200662006620066
1602042006515154940258010010080000100800005006400000200462006520065323801002008000020024000020065200651116020110099100100160000100000001510113316232006201600001002006620066200662006620066
1602042006515004025801001008000010080000500640000020046200652006532380100200800002002400002006520065111602011009910010016000010000000010113316332006201600001002006620066200662006620066
160204200651500108625801001008000010080000500640000020046200652006532380100200800002002400002006520065111602011009910010016000010000000610114316342006201600001002006620066200662006620066
1602042006515004025801001008000010080000500640000020046200652006532380100200800002002400002006520065111602011009910010016000010000010010113316332006201600001002006620066200662006620066
1602042006515004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000010113316332006201600001002006620066200662006620066
1602042006515004025801001008000010080000500640000020046200652006532380100200800002002400002006520065111602011009910010016000010000010010113316332006201600001002006620066200662006620066
1602042006515004025801001008000010080000530640000020046200652006532380100200800002002400002006520065111602011009910010016000010000000010113316332006201600001002006620066200662006620066
160204200651500402580100100800001008000050064000002004620065200653238010020080000200240000200652006511160201100991001001600001000002015610113216442006201600001002006620066200662006620066
1602042006515104025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000910113316332006201600001002006620066200662006620066

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire uop (01)cycle (02)033a3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)5f60696b6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)ea? simd retires (ee)? int retires (ef)f5f6f7f8fd
1600242006715004625800121280000128000062640000112002802005120047323800122080000202400002004720047111600211091010160000100010032311202421181720044215160000102005220048202832005020048
1600242004715004625800121280000128000062640000112002802004720047323800122080000202400002004720047111600211091010160000101010033311142021114820044215160000102004820048202422005020048
160024200511500462580012128000012800006264000011200280200472004732380012208000020240000200472004711160021109101016000010101003131182021191520044215160000102004820048202472004820048
16002420047150188258001212800001280330626400001120028020047200473238001220800002024000020047200471116002110910101600001000100393111520211141420044215160000102004820048202602004820048
16002420047151046258001212800001280000626400001120028020047200473238001220800002024000020047200471116002110910101600001000100373111420211141420044215160000102004820048202442005220052
1600242004715004625800121280000128000062640000112002802004720047323800122080000202400002004720047111600211091010160000101310038311152021115920044215160000102004820048202392004820048
160024200471500462580012128000012800006264000011200280200472004732380012208000020240000200472004711160021109101016000010403100373111420211161420044215160000102004820048202362004820048
1600242004715004625800121280000128000062640000112002802004720047323800122080000202400002004720047111600211091010160000101010036311122021191520044215160000102004820048202382004820048
160024200471500462580012128000012800006264000011200280200472004732380012208000020240000200472004711160021109101016000010283100393111620211151420044215160000102004820048202452004820052
1600242004715005225800121280000128000062640000012002802004720051323800122080000202400002005120047111600211091010160000101010040312142041215920044230160000102005220048202422004820048

Test 6: throughput

Count: 12

Code:

  fmlsl v0.4s, v12.4h, v13.4h
  fmlsl v1.4s, v12.4h, v13.4h
  fmlsl v2.4s, v12.4h, v13.4h
  fmlsl v3.4s, v12.4h, v13.4h
  fmlsl v4.4s, v12.4h, v13.4h
  fmlsl v5.4s, v12.4h, v13.4h
  fmlsl v6.4s, v12.4h, v13.4h
  fmlsl v7.4s, v12.4h, v13.4h
  fmlsl v8.4s, v12.4h, v13.4h
  fmlsl v9.4s, v12.4h, v13.4h
  fmlsl v10.4s, v12.4h, v13.4h
  fmlsl v11.4s, v12.4h, v13.4h
  movi v12.16b, 13
  movi v13.16b, 14

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3337

retire uop (01)cycle (02)03mmu table walk data (08)1e373f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)acbranch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
120204408863100006137199251201001001200001001200005005630640040020040039400392493232664512010020012000020036000040039400391112020110099100100120000100100761041654400301200001004004040040416924004041689
120204400393000007269961251201011001200001001200005005630640040020040039400392493232499712010020012000020036000040039400391112020110099100100120000100100761041645400301200001004004040040400404004040040
12020440039300000619961251201001001200001001200005005630640040020040039400392493232499712010020012000020036000040039400391112020110099100100120000100100761041645400301200001004004040040416924170240040
12020440039300000619961251201001001200001001200005005630640040020040039400392493232499712010020012000020036000040039400391112020110099100100120000100200761041645416771200001004004040040400404004040040
12020440039312000619961251204261001200001001200005005630640040060040039400392493232499712010020012000020036000040039400391112020110099100100120000100160761051634400301200001004004040040416924004040040
120204400393000006135689251201001001200001001200005005630640040020040039400392493232499712010020012000020036000040039400391112020110099100100120000100200761041644416831200001004004040040416924168740040
120204400393130006136348251201001001200001001200005005630640040020341691400392493232499712010020012000020036000040039400391112020110099100100120000100200761041644400301200001004004040040400404004040040
120204400393000006135689251201001001200001001200005005630640040020040039400392493232499712010020012000020036000040039400391112020110099100100120000100500761041644400301200001004004040040416764004040040
120204400392990006137966251201001001200001001200005005630640040020040039400392493232499712010020012000020036000041691416911112020110099100100120000100200761051654400301200001004004040040416924004040040
12020440039300000619961471202031001200001001200005005630640040020040039400392493232499712010020012000020036000040039400391112020110099100100120000100500761041655400301200001004004040040416894004041688

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3337

retire uop (01)cycle (02)030918191e373f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)5f60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb miss (a1)l1d cache writeback (a8)a9accdcfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? simd retires (ee)? int retires (ef)f5f6f7f8fd
1200244003930000000673554446120010101200001012000050563064011400204003940039249553250191200102012000020360000400394003911120021109101012000010000007522311221621187400301594120000104169240040400404004040040
1200244003929900000673796625120010101200001012000050585199311400204003940039249553250191200102012000020360000400394003911120021109101012000010000027524622916422654003030177120000104004040040400404004040040
120024400393000001206799612512001010120000101200005056306401140020400394003924955325019120010201200002036000040039400391112002110910101200001000000752231161621198400301594120000104004040040400404004040040
12002440039299000006799612512001010120000101200005056306400140020424704003924955325019120010201200002036000040039400391112002110910101200001000000752231171621177400301594120000104004040040400404004040040
12002440039300000006799612512001010120000101200005056306401140020416864003924955325019120010201200002036000040039400391112002110910101200001000000752231171621177400301594120000104004040040400404004040040
1200244003930000000653299612512001010120000101200005056306401140020400394003924955325019120010201200002036000040039400391112002110910101200001000000752231171621196424601590120000104169240040400404004040040
12002440039300000006799612512001010120000101200005056306401140020400394003924955325019120010201200002036000040039400391112002110910101200001000000752231171621185400301594120000104004040040400404004040040
12002440039300000603299961251200101012000010120000505630640014002040039400392495532501912001020120000203600004003940039111200211091010120000100000075246228164221164003630177120000104004040040400404004040040
1200244003930000000734027425120010101200001012000050563064001400204003940039256333250191200102012000020360000400394003911120021109101012000010000007524622716422984003030177120000104168740040400404168740040
1200244003930000000739961251200101012000010120000505630640014002040039400392495532501912001020120000203600004003940039111200211091010120000100000075246226164226144003030177120000104169240040400404004040040