Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FCMLA (vector, by element, 8H)

Test 1: uops

Code:

  fcmla v0.8h, v1.8h, v2.h[1], #90
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)03181e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
1004403730006134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
100440373001289734072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
10044037300186134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730106134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730006134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730008234072510001000100053190804018403740373258338951000100030004037403711100110001073116113473100040384038403840384038
1004403730008234072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730006134072510001000100053190804018403740373258338951000100030004037403711100110000079116113473100040384038403840384038
1004403730006134072510001000100053190804018403740373258338951000100030004037403711100110001373116113473100040384038403840384038
1004403730006134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038

Test 2: Latency 1->1

Code:

  fcmla v0.8h, v1.8h, v2.h[1], #90
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102044003729900613940725101001001000010010000500570690814001840037400373811506387411010020010008200300244003740037111020110099100100100001000011171711601394900100001004003840038400384003840038
102044003730001843940725101001001000010010000500570690814001840037400373811506387411010020010008200300244003740037111020110099100100100001000011171801600394890100001004003840038400384003840038
102044003730001613940725101001001000010010000500570690804001840037400373811506387411010020010008200300244003740037111020110099100100100001000011171801600394890100001004003840038400384003840038
102044003730000823940725101001001000010010000500570690804001840037400373811507387401010020010008200300244003740037111020110099100100100001000011171701600394890100001004003840038400384003840038
102044003729900613940725101001001000010010000500570690804001840037400373811506387411010020010008200300244003740037111020110099100100100001000011171801600394900100001004003840038400384003840038
102044003729900613940725101001001000010010000500570690814001840037400373811506387411010020010008200300244003740037111020110099100100100001000011171801600394900100001004003840038400384003840038
102044003730000613940725101001001000010010000500570690804001840037400373811507387401010020010008200300244003740037111020110099100100100001000011171701600394900100001004003840038400384003840038
102044003730000613940725101001001000010010000500570690814001840037400373811506387411010020010008200300244003740037111020110099100100100001000311171801600394890100001004003840038400384003840038
1020440037300151823940725101001001000010010000500570690804001840037400373811506387411010020010008200300244003740037111020110099100100100001000011171701600394890100001004003840038400384003840038
102044003730000613940725101001001000010010000500570690804001840037400373811507387401010020010008200300244003740037111020110099100100100001000011171701600394890100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8acbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002440037300006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000640316223947310000104003840038400384003840038
1002440037300006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000640216233947310000104003840038400384003840038
1002440037300006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100100640216223947310000104003840038400384003840038
1002440037300006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000640216233947310000104003840038400384003840038
1002440037300006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000640216233947310000104003840038400384003840038
1002440037300006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037299006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400373000061393892510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001001000640216223947310000104003840038400384003840038
10024400373000072639407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038

Test 3: Latency 1->2

Code:

  fcmla v0.8h, v0.8h, v1.h[1], #90
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102044003730036139407251010010010000100100005005706908400184003740037381083387451010020010000200300004003740037111020110099100100100001000000071021623394790100001004003840038400384003840038
102044003730006139407251010010010000100100005005706908400184003740037381083387451010020010000200300004003740037111020110099100100100001000000071021622394790100001004003840038400384003840038
1020440037300426139407251010010010000100100005005706908400184003740037381083387451010020010000200300004003740037111020110099100100100001000000071021622394790100001004003840038400384003840038
102044003730026161394072510100100100001001000050057069084001840037400373810820387451010020010000200300004003740037111020110099100100100001000000071021622394790100001004003840038400384003840038
1020440037300276139407251010010010000100100005005706908400184003740037381083387451010020010000200300004003740037111020110099100100100001000203071021622394790100001004003840038400384003840038
10204400373003944139407251010010010000100100005005706908400184003740037381083387451010020010000200300004003740037111020110099100100100001000000071021622394790100001004003840038400384003840038
10204400373003516139407251010010010000100100005005706908400184003740037381083387451010020010000200300004003740037111020110099100100100001000000071021622394790100001004003840038400384003840038
1020440037300336139407251010010010000100100005005706908400184003740037381083387451010020010000200300004003740037111020110099100100100001000003071021622394790100001004003840038400384003840038
10204400373004206139407251010010010000100100005005706908400184003740037381083387451010020010000200300004003740037111020110099100100100001000000071021622394790100001004003840038400384003840038
102044003730066139407251010010010000100100005005706908400184003740037381083387451010020010000200300004003740037111020110099100100100001000000071021622394790100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)6061696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9acc2cdcfd0d2d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024400373000001509433940725100101010000101000050570690810400184003740037381303387671001020100002030000400374003711100211091010100001000000640002162239473010000104003840038400384003840038
1002440037300000225067783940725100101010000101000050570690810400184003740037381303387671001020100002030000400374003711100211091010100001000000640002162239473010000104003840038400384003840038
1002440037300000300613940725100101010000101000050570690810400184003740037381303387671001020100002030000400374003711100211091010100001000000640003162239473010000104003840038400384003840038
1002440037299000300613940725100101010000101000050570690810400184003740037381303387671001020100002030000400374003711100211091010100001000000640002162239473010000104003840038400384003840038
100244003730000020106139407251001010100001010000505706908104001840037400373813017387671001020100002030000400374003711100211091010100001000000640002162239473010000104003840038400384003840038
10024400373000001140613940725100101010000101000050570690810400184003740037381303387671001020100002030000400374003711100211091010100001000000640002162239473010000104003840038400384003840038
10024400373000004170613940725100101010000101000050570690810400184003740037381303387671001020100002030000400374003711100211091010100001000000640002162239473010000104003840038400384003840038
10024400373000003690613940725100101010000101000050570690810400184003740037381303387671001020100002030000400374003711100211091010100001000000640002162239473010000104003840038400384003840038
10024400373000002400613940725100101010000101000050570690810400184003740037381303387671001020100002030000400374003711100211091010100001000000640002162239473010000104003840038400384003840038
10024400372990004950613940725100101010000101000050570690810400184003740037381303387671001020100002030000400374003711100211091010100001000000640002162239473010000104003840038400384003840038

Test 4: Latency 1->3

Code:

  fcmla v0.8h, v1.8h, v0.h[1], #90
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102044003730039613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000071021622394790100001004003840038400384003840038
10204400373000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000071021622394790100001004003840038400384003840038
102044003730007473940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000071021622394790100001004003840038400384003840038
102044003730002053940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000071021622394790100001004003840038400384003840038
10204400373000613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000071021622394790100001004003840038400384003840038
10204400373000613940725101001001000010010000500570690804001840037400373812233874510100200100002003000040037400371110201100991001001000010000071021622394790100001004003840038400384003840038
10204400373000613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000071021622394790100001004003840038400384003840038
10204400373000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000071021622394790100001004003840038400384003840038
1020440037300411613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000071021622394790100001004003840038400384003840038
10204400373000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000071021622394790100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc5cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002440037299006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006405162239473010000104003840038400384003840038
1002440037300006139407251001010100001010000505706908040158400854003738130338767100102010000203000040037400371110021109101010000100016402162239473010000104003840038400384003840038
1002440037300006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
1002440037300006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
1002440037300006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
1002440037300006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
1002440037300006139407251001010100001010000505706908040022400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
1002440037300006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
1002440037300006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
1002440037299006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  fcmla v0.8h, v8.8h, v9.h[1], #90
  movi v1.16b, 0
  fcmla v1.8h, v8.8h, v9.h[1], #90
  movi v2.16b, 0
  fcmla v2.8h, v8.8h, v9.h[1], #90
  movi v3.16b, 0
  fcmla v3.8h, v8.8h, v9.h[1], #90
  movi v4.16b, 0
  fcmla v4.8h, v8.8h, v9.h[1], #90
  movi v5.16b, 0
  fcmla v5.8h, v8.8h, v9.h[1], #90
  movi v6.16b, 0
  fcmla v6.8h, v8.8h, v9.h[1], #90
  movi v7.16b, 0
  fcmla v7.8h, v8.8h, v9.h[1], #90
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)031e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602042006515104025801001008000010080000500640000120046020065200653238010020080000200240000200652006511160201100991001001600001000000001011111611200621600001002006620066200662006620066
1602042006515004025801001008000010080000500640000120046020065200653238010020080000200240000200652006511160201100991001001600001000000001011111611200621600001002006620066200662006620066
1602042006515004025801001008000010080000500640000020046020065200653238010020080000200240000200652006511160201100991001001600001000000001011111611200621600001002006620066200662006620066
1602042006515004025801001008000010080000500640000120046020065200653238010020080000200240000200652006511160201100991001001600001000000001011111611200621600001002006620066200662006620066
1602042006515004025801001008000010080000500640000020046020065200653238010020080000200240000200652006511160201100991001001600001000000001011111611200621600001002006620066200662006620066
1602042006515104025801001008000010080000500640000020046020065200653238010020080000200240000200652006511160201100991001001600001000000031011111611200621600001002006620066200662006620066
1602042006515004025801001008000010080000500640000020046020065200653238010020080000200240000200652006511160201100991001001600001000000001011111611200621600001002006620066200662006620066
16020420065150070525801001008000010080000500640000020046020065200653238010020080000200240000200652006511160201100991001001600001000000001011111611200621600001002006620066200662006620066
1602042006515104025801001008000010080000500640000020046020065200653238010020080000200240000200652006511160201100991001001600001000000001011111612200621600001002006620066200662006620066
1602042006515104025801001008000010080000500640000120046020065200653238010020080000200240000200652006511160201100991001001600001000001001011111611200621600001002006620066200662006620066

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)030b1e1f3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9acc5cdcfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002420075151090052278001212800001280000626400001120033200522005232380012208010820240000200522005211160021109101016000010000001002531152541133200492202160000102005320053200532005320053
160024200521500480046278001212800001280000626400001120033200522005232380012208000020240000200522005211160021109101016000010000001002731133421133200492202160000102006220053200532005320053
16002420052150000046278001212800001280000626400001120033200612005232380012208000020240000200522006111160021109101016000010000001002631133421232200492201160000102005320053200532005320053
16002420052150000052278001212800001280000626400001120033200522005232380012208000020240000200522005211160021109101016000010009001002631132521142200492201160000102005320053200532005320053
160024200521500510046278001212800001280000626400001120033200522005232380012208000020240000200522005211160021109101016000010000001002631132521133200492201160000102005320137201362005320136
160024200521500150046278001212800001280000626400001120033200522005232380012208000020240000200522005211160021109101016000010000001002731132521123200492201160000102005320053200532005320153
16002420052150000046278001212800001280218626400001120033200522005232380012208000020240000200522005211160021109101016000010000001002731142521132200492201160000102005320053200532005320053
16002420052150000073278001212800001280000626400001120033200522005232380012208000020240000200522005211160021109101016000010000001002531122521124200492201160000102005320053200532005320053
160024200521500330046298001212800001280000626400001120033200522005232380012208000020240000201342014411160021109101016000010000001002531232522133200492202160000102005320053200532005320053
1600242005215004320046278001212800001280000626400001120033200522006132380012208000020240000200612005211160021109101016000010900001002531132522134200492201160000102005320053200532005320053

Test 6: throughput

Count: 16

Code:

  fcmla v0.8h, v16.8h, v17.h[1], #90
  fcmla v1.8h, v16.8h, v17.h[1], #90
  fcmla v2.8h, v16.8h, v17.h[1], #90
  fcmla v3.8h, v16.8h, v17.h[1], #90
  fcmla v4.8h, v16.8h, v17.h[1], #90
  fcmla v5.8h, v16.8h, v17.h[1], #90
  fcmla v6.8h, v16.8h, v17.h[1], #90
  fcmla v7.8h, v16.8h, v17.h[1], #90
  fcmla v8.8h, v16.8h, v17.h[1], #90
  fcmla v9.8h, v16.8h, v17.h[1], #90
  fcmla v10.8h, v16.8h, v17.h[1], #90
  fcmla v11.8h, v16.8h, v17.h[1], #90
  fcmla v12.8h, v16.8h, v17.h[1], #90
  fcmla v13.8h, v16.8h, v17.h[1], #90
  fcmla v14.8h, v16.8h, v17.h[1], #90
  fcmla v15.8h, v16.8h, v17.h[1], #90
  movi v16.16b, 17
  movi v17.16b, 18

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)03191e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602044042730003900420251601171001600221001600005005715261411864119042285199730319998160100200160000200480000411904004311160201100991001001600001000151011031644400371600001004119142286400414004140041
160204412093000210082454125160100100160001100160000500128000040021422974122719973032114816010020016000020048000040040400401116020110099100100160000100001011041643400371600001004122041205400414004140041
160204400403000150061448525160100100160000100160000500128000040021411904120522222031999816010020016000020048000040040412041116020110099100100160000100001011041644400371600001004004140044407264004142286
160204400403090001271448525160100100160000100160000500128000040021423184004021095032114816010020016000020048000041205411901116020110099100100160000100001011031644412021600001004004140041400414004141191
160204400403000002242448525160117100160000100160000500128000040021411904120519973031999816010020016000020048000041190400401116020110099100100160000100001011041644400401600001004004140041400414119140041
1602044119030004200518375343160279100160102114160107500128000040021400404119019973032114816010020016000020048000040043400401116020110099100100160000100001011041634400371600001004119141206411914120641191
160204411903000902261025160100100160022100160000500571526140021400404004021095031999816010020016000020048000040040411901116020110099100100160000100001011031644422821600001004004440041400414119140041
1602044004030007502261025160100100160000100160000500131999940021411904004019973031999816010020016000020048000041190412091116020110099100100160000100001011041634400371600001004004141206411914004240041
1602044120430003300726025160100100160000100160000500131999940021400404004019973032114816010020016000020048000040040400401116020110099100100160000100001011041644412021600001004004141191412204120540041
160204411903170120042448525160100100160000100160000500128000041171400404004021095031999816010020016000020048000040040412611116020110099100100160000100001011041644400371600001004004241191422864004141191

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)03070b181e1f373a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a7a8acc5cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaeb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024400973061000035047025160045101600531016000050128000011541200041226400401999603200231601192016021020480630400404004211160021109101016000010000141010022811916211117412260208160000104122740042400414004140041
1600244005130800000530670251600101016003510160000501280000015400210400404122619996032229816001020160000204800004004042318111600211091010160000100000010022841716211715400370209160000104004142319400414231940041
1600244004229900000350470251600101016005310160000501280000115400210400404231822244032002016001020160000204800004004040043111600211091010160000100001620100228411516211157423150208160000104228240041400424231940041
16002440055317000000047029316050112160172111600006114869341154120704004040814211410102089116001020160169204806244062240149211600211091010160000104000010022841716211117422821209160000104080741094400414122741313
1600244004030301110415981047460725160010101600001016000050572804711540021042281412191999603200201600102016000020480000400404009521160021109101016000010011001002284111162111111412230208160000104122040041400434230342282
1600244009230000000530490251600451016000110160000501280000115400210423184004019996032229816001020160000204800004122640040111600211091010160000100002370100228411116411715400370208160000104004140043412274004140041
160024422852990000000890251600101016003510160000501280000115400210400404122921146253212061600102016000020480000400404231811160021109101016000010000001002484111162111111423150208160000104004140041412274004140041
16002442281299000006005302516001010160000101600005012800001154120004121940040199960320020160010201600002048000040040400401116002110910101600001000000100228411116411117400390409160000104004142282412204122740041
16002440101300000000053025160010101600351016000050586833311540021041226412261999625320020160010201600002048000040041412191116002110910101600001000000100228411116211715400370208160000104004142319400414228641227
1600244010230900000104702516004510160001101600005057280471154002104004042281199960320020160010201600002048000041226400401116002110910101600001000033010022841716211711400370208160000104004140041400414004142319