Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FMLSL2 (by element, 2S)

Test 1: uops

Code:

  fmlsl2 v0.2s, v1.2h, v2.h[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)0318191e1f3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
10044037310000613407251000100010005319081401840374037325833895100010003000403740371110011000073216113811100040384038403840384038
10044037301000613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
10044037300000613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
10044037300000613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
10044037300000613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
10044037300000613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
100440373001120613407251000100010005319080401840374037325833895100011633480403740371110011000073116113473100040384038403840384038
10044037300000613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
100440373000210613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
10044037300090613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038

Test 2: Latency 1->1

Code:

  fmlsl2 v0.2s, v1.2h, v2.h[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020440037299007263940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000710031622394790100001004003840038400384003840038
102044003730000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010003710121622394790100001004003840038400384003840038
102044003730000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000710121622394790100001004003840038400384003840038
102044003730000613940725101001001000010010000500570690804006140037400373810833874510100200100002003000040037400371110201100991001001000010000710121622394790100001004003840038400384003840038
102044003730000763940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000710121622394790100001004003840038400384003840038
10204400373000011313940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000710121622394790100001004003840038400384003840038
102044003730000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000710121622394790100001004003840038400384003840038
102044003730000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000710121632394790100001004003840038400384003840038
102044003729900613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000710121622394790100001004003840038400384003840038
102044003730000613940725101001001000012710000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000710121623394790100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030818191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a7a8a9accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100244003729910027011139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400375110021109101010000100000006402164339473010000104003840038400384003840038
1002440037300000002582394072510010101000010105925057069081400184003740037381301838767100102010000203000040037400371110021109101010000100000036402162239473010000104003840038400384018140038
1002440037300000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000006402162239473010000104003840038400384003840038
1002440037300000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000006402163239473010000104003840038400384003840038
1002440037300000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000036402162239473010000104003840038400384003840038
10024400373020000012439407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100200006402162239473010000104003840038400384003840038
10024400373000003906139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000006402162239473010000104003840038400384003840038
1002440037300000006139407251001010100061010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000006402162239473010000104003840038400384003840038
1002440037300000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000006402162239473010000104003840038400384003840038
1002440037299000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000007252163239473010000104003840038400384003840038

Test 3: Latency 1->2

Code:

  fmlsl2 v0.2s, v0.2h, v1.h[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8accdcfd0d5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102044003730006139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000071001161139479100001004003840038400384003840038
102044003730006139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000071001161139479100001004003840038400384003840038
102044003729906139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000071001161139479100001004003840038400384003840038
1020440037300096639407251010010010000100100005005706908140158400374003738108338745101002001000020030000400374003711102011009910010010000100000071001161139479100001004003840038400384003840038
102044003730006139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000071001161139479100001004003840038400384003840038
1020440037300025139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000071001161139479100001004003840085400384003840038
102044003729906139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000073901161139479100001004003840038400384003840038
102044003730006139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000071001161139479100001004003840038400384003840038
102044003729906139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000071001161139479100001004003840038400384003840038
102044003729906139407251010010010000100100005005706908140018400374003738146338745101002001000020030000400374003711102011009910010010000100000071001171139479100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100244003729906139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003729906139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003730066139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003730006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
1002440037300072639407251001010100001010000505706908140018400374003738130338767100102010000203048640085400371110021109101010000100006402162239473010000104003840038400384003840038
100244003730006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003730006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003729906139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000104006402162239473010000104003840038400384003840038
1002440037299013939407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
1002440037299053639407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038

Test 4: Latency 1->3

Code:

  fmlsl2 v0.2s, v1.2h, v0.h[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)6061696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd0d5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400373000993940725101001001000010010000500570690810400184003740037381083387451010020010000200300004003740037111020110099100100100001000071003162239479100001004003840038400384003840038
10204400373000613940725101001001000010010000500570690810400184003740037381083387451010020010000200300004003740037111020110099100100100001000071002162239479100001004003840038400384003840038
10204400373000613940725101001001000010010000500570690810400184003740037381083387451010020010000200300004003740037111020110099100100100001000071002162239479100001004003840038400384003840038
10204400373000613940725101001001000010010000500570690810400184003740037381083387451010020010000200300004003740037111020110099100100100001000071002162239479100001004003840038400384003840038
10204400372990613940725101001001000010010000500570690810400184003740037381083387451010020010000200300004003740037111020110099100100100001000071002162239479100001004003840038400384003840038
10204400373000613940725101001001000010010000500570690810400184003740037381083387451010020010000200300004003740037111020110099100100100001000071002162239479100001004003840038400384003840038
10204400373009613940725101001001000010010000500570690810400184003740037381083387451010020010000200300004003740037111020110099100100100001000071002162239479100001004003840038400384003840038
10204400373000613940725101001001000010010000500570690810400184003740037381083387451010020010000200300004003740037111020110099100100100001000071002162239479100001004003840038400384003840038
102044003730007263940725101001001000010010000500570690810400184003740037381083387451010020010000200300004003740037111020110099100100100001000071002162239479100001004003840038400384003840038
1020440037299540613940725101001001000010010000500570690810400184003740037381083387451010020010000200300004003740037111020110099100100100001000071002162239479100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03073f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024400373000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640416443947310000104003840038400384003840038
10024400373001613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640316343947310000104003840038400384003840038
10024400373001613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010010640416343947310000104003840038400384003840038
10024400372990613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640416443947310000104003840038400384003840038
10024400373000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640416443947310000104003840038400384003840038
10024400373001613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640416443947310000104003840038400384003840038
10024400373000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640316343947310000104003840038400384003840038
10024400373001613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010003640316443947310000104003840038400384003840038
10024400372990613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640416433947310000104003840038400384003840038
100244003729903693940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640416343947310000104003840038400384003840038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  fmlsl2 v0.2s, v8.2h, v9.h[1]
  movi v1.16b, 0
  fmlsl2 v1.2s, v8.2h, v9.h[1]
  movi v2.16b, 0
  fmlsl2 v2.2s, v8.2h, v9.h[1]
  movi v3.16b, 0
  fmlsl2 v3.2s, v8.2h, v9.h[1]
  movi v4.16b, 0
  fmlsl2 v4.2s, v8.2h, v9.h[1]
  movi v5.16b, 0
  fmlsl2 v5.2s, v8.2h, v9.h[1]
  movi v6.16b, 0
  fmlsl2 v6.2s, v8.2h, v9.h[1]
  movi v7.16b, 0
  fmlsl2 v7.2s, v8.2h, v9.h[1]
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)03080a0b18191e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602042006915000000004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000012010111116112006201600001002006620066200662006620066
160204200651500000000402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000000000010111116112006201600001002006620066200662006620066
16020420065150000000040258010010080000100800005006400001200462006520065323801002008000020024000020065200651116020110099100100160000100000000120010111116112006201600001002006620066200662006620066
160204200651500010000402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000000000010111116112006201600001002006620066200662006620066
160204200651500000000402580100100800001008000050064000002004620065200653238010020080000200240000200652006511160201100991001001600001000000000010111116112006201600001002006620066200662006620066
16020420065150000000040258010010080000100800005006400000200462006520065323801002008000020024000020065200651116020110099100100160000100000000114010111116112006201600001002006620066200662006620066
160204200651510000000402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000000103010111116112006201600001002006620066200662006620066
16020420065150000000040258010010180000123800005006400001200462006520065323801002008000020024000020065200651116020110099100100160000100000000147010111116112006201600001002006620066200662006620066
16020420065150000000061025801001008000010080000500640000020046200652006532380100200800002002400002006520065111602011009910010016000010000000099010111116112006201600001002006620066200662006620066
160204200651500000000402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000000103010111116112006201600001002006620066200662006620066

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)03181e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8accfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024200801500004629800121280000128000062640000115200332005220052323800122080000202400002005220052111600211091010160000100131002782142521144200492201160000102005320053200532005320053
1600242005215000046278001212800001280000626400001152003320052200523238001220800002024000020052200521116002110910101600001001310029113252521154200492201160000102005320053200532005320053
160024200521510006729800121280000128000062640000115200332005220052323800122080000202400002005220052111600211091010160000100001002782142521144200492201160000102005320053200532005320053
16002420052150000462780012128000012800006264000011520033200522747632380012208000020240000200522005211160021109101016000010033691002782142541243200492201160000102005320053200532005320062
160024200521500004627800121280000128000062640000115200332006120052323800122080000202400002006120052111600211091010160000100701003082142521144200492201160000102005320053200622005320062
1600242005215000071127800121280000128000062640000115200332006120052323800122080000202400002005220052111600211091010160000100801002782242521142200492201160000102005320053200532005320053
160024200611500004627800121280000128000062640000115200332006120052323800122080000202400002005220052111600211091010160000100001003082132522144200492202160000102006220062200622005320053
160024200521500004627800121280000128000062640000015200332005220061323800122080000202400002005220052111600211091010160000100101002883232541134200582402160000102005320053200532005320062
160024200521500210462780012128000012800006264000011520033200522005232380012208000020240000200522006111160021109101016000010001201002783132521144200492201160000102006220053200532005320053
160024200521500004629800121280000128000062640000115200422005220052323800122080000202400002005220052111600211091010160000100101002783142521134200492201160000102005320053200532005320062

Test 6: throughput

Count: 12

Code:

  fmlsl2 v0.2s, v12.2h, v13.h[1]
  fmlsl2 v1.2s, v12.2h, v13.h[1]
  fmlsl2 v2.2s, v12.2h, v13.h[1]
  fmlsl2 v3.2s, v12.2h, v13.h[1]
  fmlsl2 v4.2s, v12.2h, v13.h[1]
  fmlsl2 v5.2s, v12.2h, v13.h[1]
  fmlsl2 v6.2s, v12.2h, v13.h[1]
  fmlsl2 v7.2s, v12.2h, v13.h[1]
  fmlsl2 v8.2s, v12.2h, v13.h[1]
  fmlsl2 v9.2s, v12.2h, v13.h[1]
  fmlsl2 v10.2s, v12.2h, v13.h[1]
  fmlsl2 v11.2s, v12.2h, v13.h[1]
  movi v12.16b, 13
  movi v13.16b, 14

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3337

retire (01)cycle (02)03071e373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8acc5cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1202044003930000061356892512012810012000110012000050058519930416674169140039249323249971201002001200002003600004003941686111202011009910010012000010000007610116114003001200001004004040040416924169240040
1202044003930000361996125120100100120003100120000500563064004002040039416862657732664412010020012000020036000041691400391112020110099100100120000100000076101161141682251200001004004040040400404169240040
1202044003931200361356892512010312512000110012000050056306401400204003941686249323249971201002001200002003600004003941691111202011009910010012000010002307610116114167701200001004004042471400404004040040
120204400393120006199612512010010012000010012000050058519930400204168640039249323249961201002001200002003600004168640039111202011009910010012000010000007610116114168301200001004004040040400404169240040
120204400393000016199612512012512512000110012000050058519930416674168640039249323249971201002001200002003600004003941691111202011009910010012000010060007610116424167701200001004004040040416924168740040
120204400393120036199612512010010012000010012000050058569420416674169140039249323249971201002001200002003600004003941691111202011009910010012000010000007610116114167701200001004004040040424714004040040
120204400393120006199612512010010012000010012000050058519930416674169140039249323266491201002001200002003600004168640039111202011009910010012000010000007610116114003001200001004168941687416924004040040
120204400393280036199612512010010012000010012000050058519930416674003941691265773266441201002001200002003600004169140039111202011009910010012000010000007610116114003001200001004247140040400404004040040
120204400393120006199612512010010012000010012000050058518690416674168640039249323249971201002001200002003600004168640039111202011009910010012000010000007610116114167701200001004169241692416924004040040
120204400393120006199612512010310012000110012000050058518690416674168640039249323249971201002001200002003600004003941691111202011009910010012000010000007610116114168301200001004168940040400404004040040

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3337

retire (01)cycle (02)0307080b18191e1f373a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8a9acc2c5cdcfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
12002440039300000000000679961251202511012000110120000505630640110400204179641719249550325019120010201200002036000040039400391112002110910101200001000000007524611616211464003001594120000104168840040400404004040040
12002440039300000000000679961251200101012000010120000505630640110400204003940039249550325019120010201200002036000040039400391112002110910101200001000000007522311616211644003001590120000104004040040400404004040040
120024400393000000000006379961251200101012000010120000505851869110400204003940039249550325019120010201200002036000040039400391112002110910101200001000000007522311916211984003001590120000104170240040400404004040040
12002440039300000000000679961251200101012000010120000505630640100400204003940039249550325019120010201200002036000040039400391112002110910101200001000000007522311916211894003001594120000104170240040400404004040040
120024400393000000000006799612512001010120000101200005056306400104002040039400392495503250191200102012000020360000400394003911120021109101012000010000000075243126162119840030015177120000104004040040400404004040040
12002440039300000000000679961671200101012000010120000505630640100400204003940039249550325019120010201200002036000040039400391112002110910101200001000000007522811616211564003001597120000104004040040400404004040040
120024400393000000000006799612512001010120003101200005058518691054002041686400392495503250191200102012000020360000416914003911120021109101012000010000001075223114162114104003001594120000104004040040400404004040040
12002440039300000000000679961251200101012000010120000505630640100400204003941691249550326666120010201200002036000040039416861112002110910101200001001000017522311716211644003001594120000104170240040400404004040040
12002440039300000000000679961251200101012000010120000505630640105400204168740039249550325019120010201200002036000040039400391112002110910101200001000000007522321616211694003001594120000104170241692400404004040040
12002440039300000000000679961251200101012000010120000505630640110400204003940039249550325019120010201200002036000040039400391112002110910101200001000000007522311616211464003001594120000104168740040400404004040040