Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

FMLSL (vector, 2S)

Test 1: uops

Code:

  fmlsl v0.2s, v1.2h, v2.2h
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03l2 tlb miss data (0b)191e3f4e51schedule uop (52)schedule simd uop (54)dispatch simd uop (57)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map simd uop (7e)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd alu (9a)l1d tlb miss (a1)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)f5f6f7f8fd
100440373000982340725100010001000531908140184037403732583389510001000300040374073111001100000073216223473100040384038403840384038
1004403730000613407251000100010005319081401840374037325833895100010003000403740371110011000003373216223473100040384038403840384038
10044037300006134072510001000100053190814018403740373258338951000100030004037403711100110000016273216223473100040384038403840384038
100440373000361340725100010001000531908040184037403732583389510001000300040374037111001100000073216223473100040384038403840384038
100440373000061340725100010001000531908040184037403732583389510001000300040374037111001100000373216223473100040384038403840384038
1004403730000613407251000100010005319080401840374037325833895100010003000403740371110011000024073216223473100040384038403840384038
1004403730000128340725100010001000531908140184037403732583389510001000300040374037111001100000373216223473100040384038403840384038
1004403731001861340725100010001000531908040184037403732583389510001000300040374037111001100000073216223473100040384038403840384038
1004403730005461340725100010001000531908140184037403732583389510001000300040374037111001100000073216223473100040384038403840384038
100440373000061340725100010001000531908040184037403732583389510001000300040374037111001100000373216223473100040384038403840384038

Test 2: Latency 1->1

Code:

  fmlsl v0.2s, v1.2h, v2.2h
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)030918191e1f3a3f4e5051schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache writeback (a8)branch cond mispred nonspec (c5)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
10204400373000001200613940702510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710061622394790100001004003840038400854003840038
1020440037300000000613940702510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710121622394790100001004003840038400384003840038
1020440037300000600613940702510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710121622394790100001004003840038400384003840038
102044003730000427009503940702510100100100001001000050057069080400534003740037381083387451010020010000200300004003740037111020110099100100100001000000710131622394790100001004003840038400384003840038
1020440037300000001613940702510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710121622394790100001004003840038400384003840038
1020440037300000000613940702510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710121623394790100001004003840038400384003840038
1020440037300000000613940702510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710121622394790100001004003840038400384003840038
10204400372990000006139407400212510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710121622394790100001004003840038400384003840038
1020440037300040000613940702510100100100001001000050057069081400184003740037381083387451010020010000200300004008440037111020110099100100100001000000710121622394790100001004003840038400384003840038
10204400783000005400823939802510100104100001001000050057083040400184003740037381083387451010021410000200300004003740037111020110099100100100001000000710121622394790100001004003840038400384003840086

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)0318191e1f3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d cache writeback (a8)accdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
1002440037300000061394072510010101000010100005057069081400184003740037381300338767100102010000203000040037400371110021109101010000100000640316233947310000104003840038400384003840038
1002440037299000061394072510010101000010100005057069081400184003740037381300338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400373000000156394072510010101000010100005057069081400184003740037381300338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300000061394072510010101000010100005057069081400184003740037381300338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300000061394072510010101000010100005057069081400184003740037381300338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300000061394072510010101000010100005057069081400184003740037381300338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300000061394072510010101000010100005057069081400184003740037381300338767100102010000203000040037400371110021109101010000100100640216223947310000104003840038400384003840038
1002440037300000061394072510010101000010100005057069081400184003740037381300338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300000061394072510010101000010100005057069081400184003740037381300338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300000061394072510010101000010100005057069081400184003740037381300338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038

Test 3: Latency 1->2

Code:

  fmlsl v0.2s, v0.2h, v1.2h
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)03l2 tlb miss data (0b)3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
102044003730006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100007101161139479100001004003840038400384003840038
102044003730006139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100007101161139479100001004003840038400384003840038
102044003730006139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100007101161139479100001004003840038400384003840038
102044003730006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100007101161139479100001004003840038400384003840038
1020440037300061394072510100100100001001000050057074390400184003740037381083387451010020010000200300004003740037111020110099100100100001004907101161139479100001004003840038400384003840038
102044003730006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100007101161139479100001004003840038400384003840038
102044003729906139407251010010010000100100005005708268040018400374003738108338745101002001000020030000400374003711102011009910010010000100007101161139479100001004003840038400384003840038
1020440037300072639407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100007101161139479100001004003840038400384003840038
102054003730006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100007101161139479100001004003840038400384003840038
102044003729906139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100037101161139479100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)03091e1f3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)acbranch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
100244003730000061394072510010101000010100005057069080400184003740037381300338767100102010000203000040037400371110021109101010000100016402162239473010000104003840038400384003840038
100244003730000061394072510010101000010100005057069081400184003740037381300338767100102010000203000040037400371110021109101010000100006402162239473010000104022640038400384003840038
100244003730000061394072510010101000010100005057069081400184003740037381300338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003730000061394072510010101000010100005057069080400184003740037381300338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003730000061394072510010101000010100005057069080400184003740037381300338767100102010000223000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003729900061394072510010101000010100005057069080400184003740037381307338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003730000061394072510010101000010100005057069080400184003740037381300338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003730000061394072510010101000010100005057069081400184003740037381300338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003730000061394072510010101000010100005057069080400184003740037381300338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003729900061394072510010101000010100005057069080400184003740037381300338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038

Test 4: Latency 1->3

Code:

  fmlsl v0.2s, v1.2h, v0.2h
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)1e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
102044003730000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000071011611394790100001004003840038400384003840038
1020440037300000251393782510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000001071011611394790100001004003840038400384003840038
102044003730000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000071011611394790100001004003840038400384022840038
102044003730000061394072510100100100001001000062157069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000071011611395830100001004003840038400384003840038
10204400373000003300394072510100100100001001000050057069081400184018040037381083387451010020010000200300004003740037111020110099100100100001000020071011611394790100001004003840038400384003840038
102044003729900061394072510100100100001001000050057069080400184003740037381083387451010020810000200300004003740037111020110099100100100001000000371011611394790100001004003840038400384003840038
102044003730000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000071034611394790100001004003840038400384003840038
1020440037300000613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000001271011611394790100001004003840038400384022840038
102044003730000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000000071011611394790100001004003840038400384003840038
1020440037299000613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000000710116113947915100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)0318191e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fld unit uop (a6)acbranch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
10024400373000007263940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384008540038
10024400373000006139407251001010100001010000505706908140018400374003738130213876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
1002440037300000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
1002440037300000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
10024400372990009213940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
1002440037300000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
1002440037299000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
1002440037300000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
1002440037300000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
1002440037300000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  fmlsl v0.2s, v8.2h, v9.2h
  movi v1.16b, 0
  fmlsl v1.2s, v8.2h, v9.2h
  movi v2.16b, 0
  fmlsl v2.2s, v8.2h, v9.2h
  movi v3.16b, 0
  fmlsl v3.2s, v8.2h, v9.2h
  movi v4.16b, 0
  fmlsl v4.2s, v8.2h, v9.2h
  movi v5.16b, 0
  fmlsl v5.2s, v8.2h, v9.2h
  movi v6.16b, 0
  fmlsl v6.2s, v8.2h, v9.2h
  movi v7.16b, 0
  fmlsl v7.2s, v8.2h, v9.2h
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
16020420091150086258010010080000100800005006400001200462006520065323801002008000020024000020065200651116020110099100100160000100001011331653200621600001002006620066200662006620066
160204200651500402580100100800001008000050064000012004620065200651223801002008000020024000020065200651116020110099100100160000100001011331633200621600001002006620066200662006620066
16020420065150040258010010080000100800005006400001200462006520065323801002008000020024000020065200651116020110099100100160000100001011331622200621600001002006620066200662006620066
16020420065150040258010010080000100800005006400001200462006520065323801002008000020024000020065200651116020110099100100160000100001011331653200621600001002006620066200662006620066
16020420065150040258010010080000100800005006400001200462006520065323801002008000020024000020065200651116020110099100100160000100001011331653200621600001002006620066200662006620066
160204200651500325258010010080000100800005006400001200462006520065323801002008000020024000020065200651116020110099100100160000100001011331653200621600001002006620150200662006620066
16020420065150040258010010080000100800005006400001200462006520065323801002008000020024000020065200651116020110099100100160000100001011321653200621600001002006620066200662006620066
16020420065150040258010010080000100800005006400001200462006520065323801002008000020024000020065200651116020110099100100160000100001011331653200621600001002006620066200662006620066
16020420065150063258010010080000100800005006400001200462006520065323801002008000020024000020065200651116020110099100100160000100001011331633200621600001002006620066200662006620066
16020420065150061258010010080000100800005006400001200462006520065323801002008000020024000020065200651116020110099100100160000100001011431623200621600001002006620066200662006620066

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)3a3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)5f60696b6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaec? simd retires (ee)? int retires (ef)f5f6f7f8fd
160024200621502101152780012128000012800006264000011200330200542005232380012208000020240000200542005411160021109101016000010001004131122251112118200512201160000102005320064200552005520053
16002420054150010522780012128000012800006264000011200350200522005232380012208000020240000200542005411160021109101016000010001003931119272112220200512201160000102005520053200532027820053
16002420052151010582780012128000012800006264000011200330200542005232380012208000020240336200542005421160021109101016000010001004031116252112116200492201160000102005320053200532005320053
160024200521512205227800121280000128000062640000112003302005220052323800122080000202400002005220052111600211091010160000100571003831118251112117200492201160000102005320055200552005520053
16002420054150110732780012128000012800006264000011200330200522005232380012208000020240000200522005211160021109101016000010001004031117252111418200492201160000102005320053200552005520053
16002420054150110582780012128000012800006264000011200330200522005232380012208000020240000200542005411160021109101016000010001004131118271111614200512201160000102005320053200532005520053
16002420054150120462780012128000012800006264000011200330200542005432380012208000020240000200522005411160021109101016000010001003931118272111417200512201160000102005320055200532005520053
16002420054150220732780012128000012800006264000011200330200542005432380012208000020240000200542005411160021109101016000010001006831113343221915200492401160000102006420062200532006220064
160024200631502101212780012128000012800006264000001200330200542006332380012208000020240000200522005411160021109101016000010001004031117272111117200582402160000102005320053200532005320062
16002420052150110522780012128000012800006264000001200420200542005232380012208000020240000200632006111160021109101016000010001004462211343221915200582401160000102006420055200552005320062

Test 6: throughput

Count: 12

Code:

  fmlsl v0.2s, v12.2h, v13.2h
  fmlsl v1.2s, v12.2h, v13.2h
  fmlsl v2.2s, v12.2h, v13.2h
  fmlsl v3.2s, v12.2h, v13.2h
  fmlsl v4.2s, v12.2h, v13.2h
  fmlsl v5.2s, v12.2h, v13.2h
  fmlsl v6.2s, v12.2h, v13.2h
  fmlsl v7.2s, v12.2h, v13.2h
  fmlsl v8.2s, v12.2h, v13.2h
  fmlsl v9.2s, v12.2h, v13.2h
  fmlsl v10.2s, v12.2h, v13.2h
  fmlsl v11.2s, v12.2h, v13.2h
  movi v12.16b, 13
  movi v13.16b, 14

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3337

retire uop (01)cycle (02)03l2 tlb miss data (0b)191e373f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
1202044003929900006135689251201011001200011001200005005630640400204003940039249323249971201002001200002003600004169140039111202011009910010012000010000761021611400301200001004004040040400404004040040
1202044003931300036199392512010010012000110012000050056306404002040039416872657727249971203102001200002003600004003940039111202011009910010012000010000761011611400301200001004169240040400404004041692
1202044169130000006137966251201011001200001001200005005630640400204003941691265773266491201002001200002003600004168640039111202011009910010012000010000761011611400301200001004004041687400404168740040
120204400392990002619961251201001001200001001200005005851993416674169140039249323249971201002001200002003600004003941686111202011009910010012000010000761011611400301200001004169240040416924004041687
1202044169129900006135689251201031001200031001200005005630640400204003940039249323249971201002001200002003600004003940039111202011009910010012000010000761011611416831200001004004041692400404168740040
120204400393130000619961251201001001200001001200005005851869416724169140039249323249971201002001200002003600004003941691111202011009910010012000010000761011611416831200001004004041692400404168740040
1202044003931200006135689251201031001200011001200005005630640400204003941691265823266491201002001200002003600004169140039111202011009910010012000010000761011611416771200001004004041687400404169240040
1202044003931200006137966251201011001200011001200005005630640400204003941691265823266441201002001200002003600004003941691111202011009910010012000010000761011611408391200001004004041687400404169240040
120204400393120001619961251201001001200001001200005005851869416724169140039249323249971201002001200002003600004003941686111202011009910010012000010000761011611400301200001004004040040400404004040040
1202044003930000005189961251201001001200001001200005005851869416724169140039249323249971201002001200002003600004003941691111202011009910010012000010000761011611400301200001004004041692400404169240040

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3337

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)181e373f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)5f60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? simd retires (ee)? int retires (ef)f5f6f7f8fd
1200244003929900000679961251200101012000010120000505851993114167204169140039249553250191200102012000020360000416884003911120021109101012000010007523311130162111824416781695120000104168741687416874168740040
12002440039312100036737966251200111012000110120000505630640104166704003941691266003250191200102012000020360000416914003911120021109101012000010007523322022162112325400301690120000104004040040416874168741687
12002441686300100006737966251200111012000110120000505851869104167204003940039266003266661200102012000020360000416914003911120021109101012000010007523311124162112227400301690120000104169240040416924004041692
12002441687300100006735689251200131012000010120000505851869104002004003940039249553250191200102012000020360000400394167511120021109101012000010007523311125162112826400301695120000104168740040400404004041687
1200244168630010000679961251200101012000010120000505630640114002004169140039249553250191200102012000020360000400394086811120021109101012000010007523311127162112522416781690120000104004041692400404169240040
1200244168631210001679961251200101012000110120000505851993104166704003941686266003266661200102012000020360000416914003911120021109101012000010007523311128162112828400301690120000104168741687416924004040040
12002440039300100003529961251200101012000310120000505851869104166704168641686249553250191200102012000020360000400394169111120021109101012000010007523311118162412827400301695120000104168741687416874168740040
1200244003929910003679961251200101012000310120000505851993104166704168641686266003266711200102012000020360000400394170111120021109101012000010007523311118162111726400301695120000104004040040400404004040040
1200244003930010001679961251200101012000010120000505630640104002004003940039249553250191200102012000020360000416864167511120021109101012000010907523311125162112716400301690120000104169240040416924004041692
1200244003930010003637379662512001010120000101200005058518691141672041691400392495527266711200102012000020360000400394009011120021109101012000010007523311127162112617416781690120000104004040040416874168741687