Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

FMLA (vector, 2S)

Test 1: uops

Code:

  fmla v0.2s, v1.2s, v2.2s
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)031e3f4e51schedule uop (52)schedule simd uop (54)dispatch simd uop (57)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map simd uop (7e)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd alu (9a)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)f5f6f7f8fd
1004403730061340725100010001000531908140184037403732583389510001000300040374037111001100073116113473100040384038403840384038
1004403730061340725100010001000531908140184037403732583389510001000300040374037111001100073116113473100040384038408541114038
1004403730061340725100010001000531908140184037403732583389510001000300040374037111001100073116113473100040384038403840384038
1004403730061340725100010001000531908140184037403732583389510001000300040374037111001100073116113473100040384038403840384038
1004403730061340725100010001000531908140184037403732583389510001000300040374037111001100073116113473100040384038403840384038
1004403730961340725100010001000531908140184037403732583389510001000300040374037111001100073116113473100040384038403840384038
1004403731061340725100010001000531908140184037403732583389510001000300040374037111001100073116113473100040384038403840384038
1004403730061340725100010001000531908140184037403732583389510001000300040374037111001100073116113473100040384038403840384038
1004403730061340725100410001000531908140184037403732583389510001000300040374037111001100073116113473100040384038403840384038
1004403730361340725100010001000531908140184037403732583389510001000300040374037111001100073116113473100040384038403840384038

Test 2: Latency 1->1

Code:

  fmla v0.2s, v1.2s, v2.2s
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)03mmu table walk data (08)1e3a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)acc2branch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
10204400373000009203940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010002700710121622394790100001004003840038400384003840038
1020440037300000147394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710121622394790100001004003840038400384003840038
1020440037300001304394072510100100100001001000050057069080400184003740037381083387451025120010000200300004003740037111020110099100100100001000000710121622394790100001004003840038400384003840038
1020440037300000214394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710121622394790100001004003840038400384003840038
1020440037300000170394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710121622394790100001004003840038400384003840038
1020440037300000804394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710121622394790100001004003840038400384003840038
1020440037300000317394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710121622394790100001004003840038400384003840038
10204400373000120208394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000320712121622394790100001004003840038400384003840133
1020440037300000145394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001001000710121622394790100001004003840038400384003840038
10204400373000121726394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710121622400340100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)031e3a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d cache writeback (a8)accdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
1002440037299006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100030640216223947310000104003840038400384003840038
1002440037300006139407251001010100001010000505706908040018400844003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300006139407251001010100001010000505708307040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000640216323947310000104003840038400384003840038
1002440037300006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400372110021109101010000100000640216223947310000104003840038400384003840079
1002440037299006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000640216323947310000104003840038400384003840038
10024400373000083239407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100001640216233947310000104003840038400384003840038
1002440037299006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400373000025139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000640216233947310000104003840038400384003840038
1002440037300006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000640216323947310000104003840038400384003840038

Test 3: Latency 1->2

Code:

  fmla v0.2s, v0.2s, v1.2s
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)031e1f3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9faccfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
10204400373000061394072510100100100001001000050057069081400184003740037381080338745101002001000020030000400374003711102011009910010010000100071011611394790100001004003840038400384003840038
10204400373000061394072510100100100001001000050057069081400184003740037381080338745101002001000020030000400374003711102011009910010010000100071021611394790100001004003840038400384003840083
102044003730087061394072510100100100001001000050057069081400184003740037381080338745101002001000020030000400374003711102011009910010010000100071021611394790100001004003840038400384003840038
10204400373000061394072510100100100001251000050057069081400184003740037381080338745101002001000020030000400374003711102011009910010010000100371021611394790100001004003840038400384003840038
10204400373000061394072510100125100001001000050057069081400184003740037381080338745101002001000020030000400374003711102011009910010010000100071021611394790100001004003840038400384003840038
10204400372990061394072510100100100001001000050057069081400184003740037381080338745102512001000020030000400374003711102011009910010010000100071021611394790100001004003840038400384003840038
10204400373000061394072510100100100001001000062657069081400184003740037381080338745101002001000020030000400374003711102011009910010010000100071021611394790100001004003840038400384003840038
102044003729900103394072510100100100001001000050057069081400184003740037381080338745101002001000020030000400374003711102011009910010010000100071011611394790100001004003840038400384003840038
10204400372990061394072510100100100001001000050057069081400184003740037381080338745101002001000020030000400374003711102011009910010010000100071021611394790100001004003840038400384003840038
102044003729900613940725101001001000010010000500570690814001840037400373810803387451010020010000200300004003740037111020110099100100100001000710216113947925100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)031e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
10024400373000613940725100101010000101000050570690814001804003740037381303387671001020100002030000400374003711100211091010100001000640316333947310000104003840038400384003840038
10024400843000613940725100101010000101000050570690814001804003740037381303387671001020100002030000400374003711100211091010100001000640316333947310000104003840038400384003840038
10024400372990613940725100101010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001000640316333947310000104003840038400384003840038
10024400373000613940725100101010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001000640316333947310000104003840038400384003840038
10024400373000613940725100101010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001000640316333947310000104003840038400384003840038
10024400373000823940725100101010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001000640316333947310000104003840038400384003840038
10024400373000613940725100101010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001000640316333947310000104003840038400384003840038
1002440037299157263940725100101010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001000640316333947310000104003840038400384003840038
10024400372990613940725100101210000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001000640316333947310000104003840038400384003840038
10024400373000613940725100101010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001000640349333947310000104003840038400384003840038

Test 4: Latency 1->3

Code:

  fmla v0.2s, v1.2s, v0.2s
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)031e3a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb miss (a1)l1d cache writeback (a8)acbranch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
102044003730000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038
102044003730000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840083
102044003730000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038
102044003730000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038
102044003729900613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038
102044003730000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038
102044003729900613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040084400371110201100991001001000010000007101161139479100001004003840038400384003840038
102044003730000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038
102044003730001613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038
1020440037299003463940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)031e1f3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
10024400373000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640216223947310000104003840038400384003840038
10024400373000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640216223947310000104003840038400384003840038
10024400373000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640216223947310000104003840038400384003840038
100244003730000103394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640216223947310000104003840038400384003840038
10024400373000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640216223947310000104003840038400384003840038
10024400373000082394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640216223947310000104003840038400384003840038
10024400373000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000661216223947310000104003840038400384003840038
10024400373000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100221091010100001000640216223947310000104003840038400384003840038
10024400372990061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100221091010100001000640216223947310000104003840038400384003840038
10024400373000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640216223947310000104003840038400384003840038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  fmla v0.2s, v8.2s, v9.2s
  movi v1.16b, 0
  fmla v1.2s, v8.2s, v9.2s
  movi v2.16b, 0
  fmla v2.2s, v8.2s, v9.2s
  movi v3.16b, 0
  fmla v3.2s, v8.2s, v9.2s
  movi v4.16b, 0
  fmla v4.2s, v8.2s, v9.2s
  movi v5.16b, 0
  fmla v5.2s, v8.2s, v9.2s
  movi v6.16b, 0
  fmla v6.2s, v8.2s, v9.2s
  movi v7.16b, 0
  fmla v7.2s, v8.2s, v9.2s
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696b6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9faccfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
160204200651500402580100100800001008000050064000012004602006520065323801002008000020024000020065200651116020110099100100160000100010111116112006201600001002006620066200662006620066
16020420065150348402580100100800001008000050064000012004602006520065323801002008000020024000020065200651116020110099100100160000100010111116112006201600001002006620066200662006620066
16020420065150336402580100100800001008000050064000012004602006520065323801002008000020024000020065200651116020110099100100160000100010111116112006201600001002006620066200662006620066
160204200651510402580100100800001008000050064000012004602006520065323801002008000020024000020065200651116020110099100100160000100010111116113307201600001002006620066200662006620066
160204200651510402580100100800001008000050064000012004602006520065323801002008010920024000020065200651116020110099100100160000100010111116112006201600001002006620066200662006620066
160204200651506402580100100800001008000050064000012004602006520065323801002008000020024000020065200651116020110099100100160000100010111116112006201600001002006620066200662006620066
16020420065150222612580100100800001008000050064000002004602006520065323801342008000020024000020065200651116020110099100100160000100010111116112006201600001002006620066200662006620066
160204200651500402580100100800001008000050064000012004602006520065323801002008000020024000020065200651116020110099100100160000100010111116112006201600001002006620066200662006620066
16020420065150381402580100100800001008000050064000002004632006520065323801002008000020024000020065200651116020110099100100160000100910111116112006291600001002006620066200662006620066
160204200651500402580100100800001008000050064000012004602006520065323801002008000020024000020065200651116020110099100100160000100010111116112006201600001002006620066200662006620066

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)1e3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaec? simd retires (ee)? int retires (ef)f5f6f7f8fd
1600242008615000023627800121280000128000062640000015200332005220052323800122080000202400002006120061111600211091010160000100100318222525211149200492201160000102005320053200532005320053
1600242005215000372582780012128000012800006264000001520033200522005232380012208000020240000200522005211160021109101016000010010029841825211149200492201160000102005320053200532005320053
160024200521500004627800121280000128000062640000115200332005220052323800122080000202400002005220052111600211091010160000100100288411234211714200582201160000102005320053200532006220062
16002420052150000462780012128000012800006264000011520042200522005232380012208000020240000200612005211160021109101016000010010028841142521176200492201160000102005320053200532005320053
16002420052150003514627800121280000128000062640000115200332005220052323800122080000202400002005220052111600211091010160000100101141142625221126200492202160000102005320053200532005320053
1600242005215000046278001212800001280000686400001152003320052200523238001220800002024000020061200521116002110910101600001001003584162521156200492201160000102005320053200532005320053
1600242005215000046278001212800001280000626400001152003320052200523238001220800002024000020052200521116002110910101600001001003384172541176200492202160000102005320053200622005320053
16002420052150003724627800121280000128000062640000115200332005220052323800122080000202400002005220052111600211091010160000100100298411325211713200492201160000102005320053200532005320053
16002420052150001546278001212800001280000626400001152004220061200523238001220800002024000020052200521116002110910101600001001002984142521166200492201160000102005320053200532005320053
1600242005215000046278001212800001280000626400001152003320052200523518001220800002024000020052200521116002110910101600001001003284262521166200492402160000102005320053200532005320053

Test 6: throughput

Count: 16

Code:

  fmla v0.2s, v16.2s, v17.2s
  fmla v1.2s, v16.2s, v17.2s
  fmla v2.2s, v16.2s, v17.2s
  fmla v3.2s, v16.2s, v17.2s
  fmla v4.2s, v16.2s, v17.2s
  fmla v5.2s, v16.2s, v17.2s
  fmla v6.2s, v16.2s, v17.2s
  fmla v7.2s, v16.2s, v17.2s
  fmla v8.2s, v16.2s, v17.2s
  fmla v9.2s, v16.2s, v17.2s
  fmla v10.2s, v16.2s, v17.2s
  fmla v11.2s, v16.2s, v17.2s
  fmla v12.2s, v16.2s, v17.2s
  fmla v13.2s, v16.2s, v17.2s
  fmla v14.2s, v16.2s, v17.2s
  fmla v15.2s, v16.2s, v17.2s
  movi v16.16b, 17
  movi v17.16b, 18

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2502

retire uop (01)cycle (02)0318191e1f373f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fld unit uop (a6)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
1602044013030000000420251601171001600011001600005001280000140021411904120519973321148160100200160000200480000400404004011160201100991001001600001000001011011611411871600001004119141206400414004140041
16020440040300000022450251601001001600001001600005001280000140021400404004019973319998160100200160000200480000400404004011160201100991001001600001000001011011602412021600001004004140041400414004140041
1602044119030800000420251601001001600001001600005001280000040021400414004019973319998160100200160000200480000400404004011160201100991001001600001000001011011611400371600001004119140044400414004140041
1602044004030000000424485251601001001600001001600005001280000140021422804004019973321148160100200160000200480000400404004011160201100991001001600001000001011011611411871600001004004141191422864004142286
1602044004030000000420251601001001600001001600005001280000140021400404004019973319998160100200160000200480000400404004011160201100991001001600001000001011011611400371600001004004140041400414004140041
1602044004030000000610251601171001600001001600005001280000040021400404004019973321148160100200160000200480000400404004011160201100991001001600001000001011011611400371600001004119141206400414004140041
1602044004030000000420251601001001600001001600005001280000040021400404119019973321148160100200160000200480000412914120511160201100991001001600001000001011011611400371600001004004140041400414004140041
1602044004029900002242448525160122100160022100160000500128000004226640040411902109531999816010020016000020048000041190412051116020110099100100160000100001651011011611411871600001004004140041400414004140041
160204400402990000227070251601001001600001001600005001280000141171400404004019973319998160100200160000200480000400404004311160201100991001001600001000001011011611412021600001004004140041400414004141191
16020440040300000043614485251601221001600001001600005001280000140021400404004021095319998160100200160000200480000400434004011160201100991001001600001000001011011611400371600001004004441191412064004140041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2502

retire uop (01)cycle (02)03mmu table walk data (08)1e373f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)5f6061696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)ld unit uop (a6)l1d cache writeback (a8)a9acc2cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaeb? simd retires (ee)? int retires (ef)f5f6f7f8fd
160024400493060005302516001010160000101600005012800001104002140040400402117303200201600102016000020480000400404121911160021109101016000010001000100223113162114440037156160000104004140041401404004142319
1600244004030000047025160011101600001016000050132000011040021412454004019996032002116001020160000204800004232740040111600211091010160000100030780100228213162113240037156160000104099142377417434004140042
16002440040300000478863251600101016000010160000501280000015400214004040040211730320020160010201600002048000042285400401116002110910101600001000000010024113231642234400373010160000104004140041400624004140041
160024400403000004888632516004510160000101600005012800001154002140040400401999603200201600102016000020480000423184004011160021109101016000010000000100228215162114340037156160000104004140041400664004140041
1600244004031700023702516001010160053101600005058683330154002140040400401999603200201600102016000020480000400404004011160021109101016000010000000100248223174222440037156160000104004140041401344229740041
1600244122930000354789132516001010160000101600005057210731154226640040400401999603222741600102016000020480000400404004011160021109101016000010004000100228213164113240037155160000104231940041401804004240041
160024400403000004702516001010160000101600005012800001154002140040400411999603200201600102016000020480000400404124511160021109101016000010030060100228313162114540037155160000104010640041421114228840041
160024400403000004702516001010160000101600005012800000154002140040400401999603200201600102016000020480000400404004011160021109101016000010002000100228322162123340037155160000104004142286400664004140041
160024400403000004702516001010160000101600005012800001154002140040400401999603200201600102016000020480000400404004011160021109101016000010001000100248214162113340037155160000104229642286401484004140042
160024400403000006789702516001010160000101600005012800001154002140040400401999603200201600102016000020480000400404004011160021109101016000010006060100248213162114540038155160000104004140041402404004140041