Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FCMLA (vector, by element, 4S)

Test 1: uops

Code:

  fcmla v0.4s, v1.4s, v2.s[1], #90
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)03181e3a3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a0a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
100440373000061340725100010001000531908040184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
100440373100061340725100010001000531908040184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
100440373000061340725100010001000531908040184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
100440373000061340725100010001000531908140184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
100440373010061340725100010001148531908040184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
1004403731000149340725100010001000531908040184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
100440373000061340725100010001000531908140184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
1004403731018061340725100010001000531908140184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
100440373000061340725100010001000531908040184037403732583389510001000300040374037111001100002673116113473100040384038403840384038
100440373000061340725100010001000531908040184037403732583389510001000300040374037111001100020073116113473100040384038403840384038

Test 2: Latency 1->1

Code:

  fcmla v0.4s, v1.4s, v2.s[1], #90
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)0308181e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc5branch mispredict (cb)cfd0d5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102044003730000006139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000071012162239479100001004003840038400384003840038
102044003730000006139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000071012162239479100001004003840038400384003840038
1020440037300000010339407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000071012162239479100001004003840038400384003840038
102044003729900006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000071012162239479100001004003840038400384003840038
1020440037299000032139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000071012162239479100001004003840038400384003840038
102044003730000006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000071012162239479100001004003840038400384003840038
102044003730000006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100001071012162239479100001004003840038400384003840038
102044003729900006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000071012162239479100001004003840038400384003840038
102044003729900006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000071012162239479100001004003840038400384003840038
102044003730000006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000071012162239479100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024400373000000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640416223947310000104003840038400384003840038
10024400373000000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640216223947310000104003840038400384003840038
10024400373000000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640216223947310000104003840038400384003840038
1002440037300000180726394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640216223947310000104003840038400384003840038
100244003729900084061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640216223947310000104003840038400384003840038
10024400373000000061394072510010101000610100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640216223947310000104003840038400384003840038
10024400373000000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640316223947310000104003840038400384003840038
10024400373000000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640216223947310000104003840038400384003840038
10024400372990000061394072510010101000010100005057069081400184003740037381303387671001022103262030000400374003711100211091010100001000640216223947310000104003840038400384003840038
10024400372990009061394072510010101000010100005057083051400184003740037381303387671001020100002030000400374003711100211091010100001000640316433947310000104003840038400384003840038

Test 3: Latency 1->2

Code:

  fcmla v0.4s, v0.4s, v1.s[1], #90
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03070b1e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8acc5cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400372990006139407251010010010000100100005005706908400180400374003738108338745101002001000020030000400374003711102011009910010010000100000007101161139479100001004003840038400384003840038
10204400373000006139407251010010010000100100005005706908400180400374003738108338745101002001000020030000400374003711102011009910010010000100000007101161139479100001004003840038400384003840038
10204400373000006139407251010010010000100100005005706908400180400374003738108338745101002001000020030000400374003711102011009910010010000100000007101161139479100001004003840038400384003840038
10204400373000006139407251010010010000100100005005706908400180400374003738108338745101002001000020030000400374003711102011009910010010000100000007101161139479100001004003840038400384003840038
10204400373000006139407251010010010000100100005005706908400183400374003738108338745101002001000020030000400374003711102011009910010010000100000007101161139479100001004003840038400384003840038
10204400373000006139407251010010010000104100005005706908400180400374003738108338745101002001000020030000400374003711102011009910010010000100000007101161139479100001004003840038400384003840038
10204400373000006139407251010010010000100100005005706908400180400374003738108338745101002001000020030000400374003711102011009910010010000100000007101161139479100001004003840038400384003840038
10204400373000006139407251010010010000100100005005706908400180400374003738108338745101002001000020030000400374003711102011009910010010000100000007101161139479100001004003840038400384003840038
10204400373000006139407251010010010000100100005005706908400180400374003738108338745101002001000020030000400374003711102011009910010010000100000007421161139479100001004003840038400384003840038
10204400373000045102739407251010010010000100100005005706908400180400374003738108338745101002001000020030000400374003711102011009910010010000100000007101161139479100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9acbranch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100244003730000000613940725100101010000101000050570690804001804003740037381303387671001020100002030516400374003711100211091010100001000000640216223947310000104003840038400384003840038
100244003730000000613940725100101010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001000000640223223947310000104003840038400384003840038
1002440037300000002513940725100101010000101000050570690804001804003740037381303387671060520100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
100244003729900000613940725100101010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
100244003729900000613940725100101010000101000050570690804001804008540084381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
100244003730000000613940725100101010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
10024400372990001201033940725100101010000101000050570690804005304003740037381303387671001020100002030000400374003711100211091010100001012000660224323947310000104003840038400384003840038
1002440037300000120613940725100101010000101014850570830404001804003740037381383387871016020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
100244003730000000613940725100101010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
1002440037300000120613940725100101010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001010300640216223947310000104003840038400384003840038

Test 4: Latency 1->3

Code:

  fcmla v0.4s, v1.4s, v0.s[1], #90
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)0318191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9acbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400373000006139407251010010010000100100005005706908140018400374003738108338745101002001000020430000400374003711102011009910010010000100106071011611394790100001004003840038400384003840038
102044003730000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001004400073911611394790100001004003840038400384003840038
10204400373000006139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100806071011611394790100001004003840038400384003840038
102044003729900061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001001400071011611394790100001004003840038400384003840038
1020440037299000613940710710119100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001001103071011611394790100001004003840038400384003840038
1020440037299000613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010015018071011611394790100001004003840088400384003840038
102044003729900061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001001309071011611394790100001004003840038400384003840038
102044003730000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001001006071011611394790100001004003840038400384003840038
10204400373000006139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100809071011611394790100001004003840038400384003840038
10204400373000006139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100403071011611394790100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03080b191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100244003730000006139407251001010100001010000505706908400184003740037381303387671001020100002030000400374003711100211091010100001032000640216223947310000104003840038400384003840038
100244003729900006139407251001010100001010000505706908400184003740037381303387671001020100002030000400374003711100211091010100001058000640216223947310000104003840038400384003840038
10024400373000000613940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100600640216223947310000104003840038400384003840038
10024400373000000613940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100600640216223947310000104003840038400384003840038
100244003730000001893940725100101010000101000055570690840018400374003738130338767100102010000203000040037400371110021109101010000100300640216223947310000104003840038400384003840038
1002440037300100126139407251001010100241010148505706908400184003740037381303387671001020100002030000400374003711100211091010100001001500640216223947310000104003840038400384003840038
10024400373000000613940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000101300640216223947310000104003840038400384003840076
10024400373000000613940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100600640216223947310000104003840038400384003840038
100244003730000007263940725100101010000101000050570690840018400374003738135338767100102010000203000040037400371110021109101010000104300640216223947310000104003840038400384003840038
100244003730000006139407251001010100001010000505706908400184003740037381303387671001020100002030000400374003711100211091010100001011500640216223947310000104003840038400384003840038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  fcmla v0.4s, v8.4s, v9.s[1], #90
  movi v1.16b, 0
  fcmla v1.4s, v8.4s, v9.s[1], #90
  movi v2.16b, 0
  fcmla v2.4s, v8.4s, v9.s[1], #90
  movi v3.16b, 0
  fcmla v3.4s, v8.4s, v9.s[1], #90
  movi v4.16b, 0
  fcmla v4.4s, v8.4s, v9.s[1], #90
  movi v5.16b, 0
  fcmla v5.4s, v8.4s, v9.s[1], #90
  movi v6.16b, 0
  fcmla v6.4s, v8.4s, v9.s[1], #90
  movi v7.16b, 0
  fcmla v7.4s, v8.4s, v9.s[1], #90
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)03090b1e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8a9acc2branch mispredict (cb)cfd0d2d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204200881510004025801001008000010080000500640000002004620065200653238010020280000200240000200652006511160201100991001001600001000000001011100216112006201600001002006620066200712006620066
1602042006515200051525801001008000010080000500640000002004620065200653238010020080000200240000200652006511160201100991001001600001000006001011150116112006201600001002006620066200662006620066
160204200651500004025801001008000010080000500640000002004620065200653238010020080000200240000200652006511160201100991001001600001000000001011100116112006201600001002006620066200662006620066
160204200651501004025801001008000010080000500640000002004620065200653238021020080000200240000200652006511160201100991001001600001000003001011100116112006201600001002006620066200662006620066
1602042006515100040258010010080000100800005006400000020046200652006532380100200800002002400002006520065111602011009910010016000010000015001011100116112006201600001002006620066200662006620066
160204200651500004025801001008000010080000500640000002004620065200653238010020080000200240000200652006511160201100991001001600001000003001011151116112006201600001002006620066200662006620066
160204200651500004025801001008000010080000500640000052011420065200653238010020080000200240000200652006511160201100991001001600001000003001011150116112006201600001002006620066200662006620066
160204200651500004025801001008000010080000500640000102004620065200653238010020080000200240000200652006511160201100991001001600001000000001013500116112006201600001002006620066200662006620066
1602042006515000040258010010080000100800005006400000520046200652006532380100200800002002400002006520065111602011009910010016000010001012001011101116112006201600001002006620066200662006620066
160204200651510004025801001008000010080000500640000052004620065200653238010020080000200240000200652006511160201100991001001600001000000001011101116112006201600001002006620148200662006620066

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)030718191e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accdcfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024200871501113109278001212800001280000626400001152009920052200521223800122080109202400002005220052111600211091010160000102060010042132143342111919200492401160000102005320053202472006620053
160024200611500000882780012128000012800006264000011102003320052200523238001220800002024000020052200521116002110910101600001003010042137125254112020200492201160000102005320053202482007520053
1600242005215000012882780012128000012800006264000011102003320052200523238001220800002024000020052200521116002110910101600001019010044137124252111725200492201160000102005320053202302006620053
160024200521500000882780012128000012800006264000001102003320052200523238001220800002024000020052200521116002110910101600001003010041137115252121919200492201160000102005320053202582006620053
16002420061150000046278001212800001280000626400001110200422005220052323800122080000202400002005220052111600211091010160000101111010042138219254111725200492201160000102005320053202282007520053
160024200521500000462780012128000012800006264000011102003320052200523238001220800002024000020061200521116002110910101600001010010046138120252111520200492201160000102005320053202432006620053
160024200521500000462780012128000012800006264000011102004220052200523238001220800002024000020052200521116002110910101600001000010042138119252112115200492201160000102005320053202202006620384
1600242005215000004627800121280000128000062640000111020033200522005232380012208000020240000200522005211160021109101016000010443010038138115252111915200492201160000102005320053202292006620053
160024200521500000462780012128000012800006264000011102003320052200523238001220800002024000020052200521116002110910101600001016010047138119252112625200492201160000102005320053202322006620053
1600242005215000004627800121280000128000062640000111020033200522005232380012208000020240000200522005211160021109101016000010373010038138115252111915200492201160000102005320053202282006620053

Test 6: throughput

Count: 16

Code:

  fcmla v0.4s, v16.4s, v17.s[1], #90
  fcmla v1.4s, v16.4s, v17.s[1], #90
  fcmla v2.4s, v16.4s, v17.s[1], #90
  fcmla v3.4s, v16.4s, v17.s[1], #90
  fcmla v4.4s, v16.4s, v17.s[1], #90
  fcmla v5.4s, v16.4s, v17.s[1], #90
  fcmla v6.4s, v16.4s, v17.s[1], #90
  fcmla v7.4s, v16.4s, v17.s[1], #90
  fcmla v8.4s, v16.4s, v17.s[1], #90
  fcmla v9.4s, v16.4s, v17.s[1], #90
  fcmla v10.4s, v16.4s, v17.s[1], #90
  fcmla v11.4s, v16.4s, v17.s[1], #90
  fcmla v12.4s, v16.4s, v17.s[1], #90
  fcmla v13.4s, v16.4s, v17.s[1], #90
  fcmla v14.4s, v16.4s, v17.s[1], #90
  fcmla v15.4s, v16.4s, v17.s[1], #90
  movi v16.16b, 17
  movi v17.16b, 18

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2502

retire (01)cycle (02)0318191e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a8acbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020440060317000017420251601001001600351001600005001280000040021400404004019973032223916010020016000020048000041245400401116020110099100100160000100004001011011611412421600001004004140041400414004140041
1602044124530900000420251601001001600351001600005005721073040021412454004019973031999816010020016000020048000042294412451116020110099100100160000100001001011011611400371600001004004142295400414004141246
16020441245317000054890251601351001600001001600005001280000140021412454124519973032225216010020016000020048000042294412451116020110099100100160000100000301011011611400371600001004232842282400434004140041
16020441245300000002320581601001001600001001600005001280000140021400404004019973032225216010020016000020048000040040400401116020110099100100160000100001001011011611400371600001004004142295400414004140041
1602044004029900000110389372516010010016003510016000050012800001400214124541245221983232120316010020016000020048000040040422941116020110099100100160000100001001011011611400371600001004004140041423104004140041
1602044004030000000420251601001001600001001600005001280000040021400404004019973032120316010020016000020048000041245400401116020110099100100160000100000001011011611400371600001004004140041400414121542286
16020440040300000007070251601531001600361001600005001280000140021400404004019973031999816010020016000020048000041190412191116020110099100100160000100004001011011611422911600001004004140041400414004140041
16020440040309000035424575251601001001600351001600005005721073141226412454004021151032120316010020016000020048000040040400401116020110099100100160000100004001011011611400371600001004004140041400414004140041
1602044229430000000420251601001001600001001600005001280000142262422814004019973032225216010020016000020048000040040400401116020110099100100160000100002001011011611400371600001004124641246400444232840041
16020440040317000017070251601001001600351001600005001280000141226400404004021151031999816010020016000020048000040040400401116020110099100100160000100004001011011611422821600001004228642286400434004140041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2502

retire (01)cycle (02)03181e373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9acbranch mispredict (cb)cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0eaeb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002442281299005347457625160045101600001016000050586833311104002140040400401999603200201600102016000020480000400404004011160021109101016000010000010022133141621176400372011160000104004140043400414228240041
1600244004030000073245762516007010160000101600005058683331110400224004040040213860320021160010201600002048000040040400421116002110910101600001000001002213417162114342292208160000104231940041400434004142319
160024400402990004988632516001110160000101600005012800001110400214231840040199960321206160010201600002048000042302412261116002110910101600001000001002213513162114740038208160000104004140041400414004140041
1600244122630000152345762516001010160060101600005013200001110400214122641226211460321199160010201600002048000041226412261116002110910101600001000001002213614162114740037209160000104004240041400414004142028
1600244004030000536706316020410160001101600005012800001110400214004042281222120321206160010201600002048000041226400401116002110910101600001000001002213514162114341223408160000104231940041423194004140041
160024400403000004702516004510160053101600005012800001110400214121940040199960320021160010201600002048000040041412191116002110910101600001000001002213614162114440039408160000104122741227400414004140041
160024412193160016745762516001010160060101600005012800001110400214004042318199960320020160010201600002048000040040423181116002110910101600001000001002213614162114340037208160000104122740041400414122742282
1600244122629900147025160010101600001016000050131999911104229940040412261999603200201600102016000020480000400414231811160021109101016000010000010022138241621134412232016160000104004141220400414231940041
16002440042300006047025160070101600001016000050572804711104120740040400401999635320021160010201600002048000042318400401116002110910101600001000011002213716162114440037208160000104004142319408444004141230
160024412263000007345762516007010160000101600005012800001110422994004042318222440320020160010201600002048000040040400431116002110910101600001000001002216714162116640037208160000104004140041423194004140041