Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FMLA (by element, 4S)

Test 1: uops

Code:

  fmla v0.4s, v1.4s, v2.s[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)03181e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
1004403730006134072510001000100053190814018403740373258338951000100030004037403711100110002073116113473100040384038403840384038
1004403730006134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730036134072510001000100053190814018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730106134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730008434072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730006134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730006134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730006134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730009034072510001000100053190814018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730006134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038

Test 2: Latency 1->1

Code:

  fmla v0.4s, v1.4s, v2.s[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03080b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2c5c9cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10205400373000000000613940725101001001000010410444500570690840123040037400373810803387451010020010000200300004003740037411020110099100100100001000000030000710024123394790100001004003840084401464003840038
102044003730901000006139407251010010010000110100005165708304400180400374003738108025387451010020010164212300004003740084111020110099100100100001000000000000741021622394790100001004003840038400384003840038
1020440037312010000010373940725101001001000010010000500570690840088340037400373811503387451010020010000200304984018340084111020110099100100100001000000000000734021622394797100001004003840038400384003840075
10204401803120100000613940725101001031001810010000500570690840018040037400373810803387451010020210174200300004003740037111020110099100100100001004000000000740021622394790100001004003840038400864003840038
10204400373000001000613940725101001041001810010000546570690840018040037400373810807388001010020010000204300004003740037211020110099100100100001000000000000710121622394790100001004003840038400384003840038
1020440037300010000010653940786101001001000010410296500570690840018040037400373810803387631010020010000200300004003740037211020110099100100100001000021000001710124022394790100001004003840038400384003840038
1020440084304010400029163940783101001001000010910000500571109640018040037400373810803387451010020010000200300004003740037111020110099100100100001000000060000740131722394790100001004003840038400384003840038
10204402283110000000613940725101001001000010010000522570690840018040178400843810803387451010020010160200300004003740037111020110099100100100001000400000000710121622396210100001004003840038400384008640038
10204400373001000156008143940725101001131000010010000500570830440018040084400373810803387451025020010000200300004003740037111020210099100100100001000007004000710121623394790100001004003840038400384003840086
10204400373000000396016139407251010011010000100100005005711096400180400374003738118016387451010020010499200300004008440179111020110099100100100001000022032000710131623394793100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)0307080a0b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002440084300000113007263940725100101010000101000050570690814001804003740037381300838767100102010000203000040037400371110021109101010000100000000006403163239473010000104003840038400384003840038
10024401773170000005370613938925100101010000101000050570690814001834003740037381300738767100102210000203000040037400371110021109101010000100001507203006402163339473010000104003840085400384003840229
1002440037300100000210613940725100101010018101014861570690814001834003740037381300338767100102010000223000040037400371110021109101010000100000000006403163639473010000104003840038400384003840038
100244018031300003040502963940725100101010018101000050570690814001804003740037381300338767104542010000203000040037400854110021109101010000100000000006645163439473010000104003840038400384003840180
10024400373120000005790613940725100101010000121000050570690804001804003740037381300338767100102010000203000040037400374110021109101010000100000200006404163339473010000104003840038400384003840038
10024400373141000000014053940769100101210000101000050570830404001804003740037381300338767100102010000203000040037400854110021109101010000100000007468006403163439473010000104008540181400854003840038
10024400372991000005400613940725100101010018101000050570690814001804018040037381300338841100102010000203000040037400371110021109101010000100000103006633165239473010000104008640133400854003840038
100244003730010000090613940725100161110000101000050570830404001804003740037381300338767100102010000203145240084400371110022109101010000100000000006403163439473010000104003840038400384003840038
100254003730000000000613940725100101010000101000050570830404001804003740037381300338767100102010000203000040037401321110021109101010000100000103540006692163339473010000104003840038400384003840038
10024401803171001010094394072510010101000010101475057069080400180400374003738130033876710010201000020300004003740179111002110910101000010000000126006403163239473010000104003840038400384003840083

Test 3: Latency 1->2

Code:

  fmla v0.4s, v0.4s, v1.s[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03090b18191e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a7a8acc2cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102044003729900000072639407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000007101161139479100001004003840038400384003840038
10204400373000000006139407451010010010000100100005615706908040018400374003738108338745101002061000020030498400374003711102011009910010010000100200047101161139479100001004003840038400384013340038
1020440037315100027606139396251010011410000100102965005706908040053400374003738108338745101002001000020030984400374003711102011009910010010000100004363537101162139479100001004003840133400384003840038
1020440132311000021906139407251010010010000100100005005708304040018400374003738108338745101002001000020030000400374003711102011009910010010000100000007101161139479100001004003840038400384003840038
102044003731200000020139407251010010010000100100005005706908040018400374003738108338745120162041000020030999400374003711102011009910010010000100001007101161139479100001004003840038400384003840038
1020440037310003000201039407251010010010000121101485005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100001307101161139479100001004013340134400384003840038
1020440037311000013206139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100001007101161139479100001004003840038400384003840038
1020440037305000212017639407251010010510000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000007101161139479100001004003840038400384003840038
10204400373000000016139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400844003711102011009910010010000100000027101161139479100001004003840038400384003840134
1020440037300000000103394072510100100100001001000050057069080400184003740037381083387451010020510000200300004003740037111020110099100100100001002101082007101161139479100001004003840038400384003840085

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100244003730000000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000640416433947310000104003840038400384003840038
100244003730000000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640316343947310000104003840038400384003840038
100244003730000000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000640316343947310000104003840038400384003840038
100244003729900000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000640316343947310000104003840038400384003840038
100244003730000000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640416433947310000104003840038400384003840038
100244003729900000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000640416443947310000104003840038400384003840038
100244003730000000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000640416343947510000104003840038400384003840038
100244003730000000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000640316343947310000104003840038400384003840038
1002440037300000009823940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000640416343947310000104003840038400384003840038
100244003730000000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000640416343947310000104003840038400384003840038

Test 4: Latency 1->3

Code:

  fmla v0.4s, v1.4s, v0.s[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03080b1e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8acbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400373000006139389251010010010000100101475005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000071011611394790100001004003840038400384003840038
1020440037300000132539407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374008411102011009910010010000100000071011611394790100001004003840038400384003840038
10204400373000066139407251010010010000100100005005706908140018400374003738108338745101002001017920030000400374003711102011009910010010000100000071011611394790100001004003840038400384003840038
102044003729900061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710116113947911100001004003840038400384003840038
102044003730000074739407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000071011611394790100001004003840038400384003840038
102044003729900017239407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000071011611394790100001004003840038400384003840038
10204400373000006139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000071011611394790100001004003840038400384003840038
10204400372990006139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000071011611394799100001004003840038400384003840038
10204400373000006139407251010010410000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000071011611394790100001004003840038400384003840038
102044003729900126139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000071011611394790100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8cdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100244003729906139407251001010100001010000505706908140018400374003738130033876710010201000020300004003740037111002110910101000010000640316343947310000104003840038400384003840038
100244003730006139407251001010100001010000505706908140018400374003738130033876710010201000020300004003740037111002110910101000010000668316343947310000104003840038400384003840038
100244003730006139407251001010100001010000505706908040018400374003738130033876710010201000020300004003740037111002110910101000010000640316343947310000104003840038400384003840038
1002440037300072639407251001010100001010000505706908040018400374003738130033876710010201000020300004003740037111002110910101000010000640316333947310000104003840038400384003840038
100244003730006139407251001010100001010000505706908040018400374003738130033876710010201000020300004003740037111002110910101000010000640316343951110000104003840038400384003840038
100244003730006139407251001010100001010000505706908040018400374003738130033876710010201017220300004003740037111002110910101000010000640316343947310000104003840038400384003840038
100244003730006139407251001010100001010000505706908040018400374003738130033876710010201000020300004003740037111002110910101000010000640316333947310000104003840038400384003840038
100244003730008239407251001010100001010000505706908140018400374003738130033876710010201000020300004003740037111002110910101000010000640316333947310000104003840038400384003840038
100244003730006139407251001010100001010000505706908040018400374003738130033876710010201000020300004003740037111002110910101000010000640316333947310000104003840038400384003840038
1002440037300072639407251001010100001010000505706908040018400374003738130033876710010201000020300004003740037111002110910101000010000640316333947310000104003840038400384003840038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  fmla v0.4s, v8.4s, v9.s[1]
  movi v1.16b, 0
  fmla v1.4s, v8.4s, v9.s[1]
  movi v2.16b, 0
  fmla v2.4s, v8.4s, v9.s[1]
  movi v3.16b, 0
  fmla v3.4s, v8.4s, v9.s[1]
  movi v4.16b, 0
  fmla v4.4s, v8.4s, v9.s[1]
  movi v5.16b, 0
  fmla v5.4s, v8.4s, v9.s[1]
  movi v6.16b, 0
  fmla v6.4s, v8.4s, v9.s[1]
  movi v7.16b, 0
  fmla v7.4s, v8.4s, v9.s[1]
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)0304080b18191e1f3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2c5cfd2d5d6daddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020420091150000000005152580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000000000001019641614905620062211600001002006620405204322056620160
160204204841510116089752804025801001008000010080000500640000120046200652006534680100200800002002400002006520065111602011009910010016000010000202434900010115010160562006201600001002006620066200662006620066
160204200651500000000040258010012180000118800005006408960200462006520065204580100200800002002400002006520065111602011009910010016000010000000000010120091601052006201600001002006620066200662006620066
16020420065150000000004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000070300101150101601042006201600001002006620066200662006620066
160204200651500000000040258010010080000100800005006400001200462006520065323801002008000020024000020065200651116020110099100100160000100000000000101200101601042006201600001002006620066200662006620066
16020420065150000000006325801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000000010120010160942006201600001002006620066200662006620066
16020420065150000000008425801001008000010080000500640000120046200652006532380100200800002002400002006520065311602011009910010016000010000000000010120051601142006201600001002006620066200662006620066
1602042006515000000000822580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000000000001011901016010102006201600001002006620066200662006620066
1602042006515000000000444258010010080000100800005006400001200462006520065323801002008000020024000020065200651116020110099100100160000100000000000101190101601042006201600001002006620066200662006620066
16020420065151000000004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000000010120051601042006201600001002006620066200662006620066

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)0304081e3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)dfe0? int output thing (e9)ea? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600242007515020012202580012128000012800006264000011520030200492004932380012208000020240000200492004911160021109101016000010010068851241221113737420046216160000102005020050200502005020050
1600242004915000017325800121280000128000062640000115200302004920049323800122080000202400002004920049111600211091010160000100100581451243221113735420046216160000102005020050200502005020050
160024200491501001462580012128000012800006264000011520030200492004932380012208000020240000200492004911160021109101016000010010070851243221113935520046216160000102005020050200502005020050
1600242004915010017772580012128000012800006264000011520030200492004932380012208000020240000200492004911160021109101016000010010070862242221213735520050216160000102005020054200502005020050
1600242004915010004802580012128000012800006264000001520030200492004932380012208000020240000200492004911160021109101016000010010073851237221114041420046216160000102005020050200502005020050
1600242004915020018192580012128000012800006264000001520034200532004932380012208000020240000200492004911160021109101016000010010074851251223224024420050216160000102005020050200502005020050
1600242004915010018152580012128000012800006264000011520030200492004932380012208000020240000200492004911160021109101016000010010070851240221112939420046216160000102005020050200502005020050
1600242004915010014342580012128000012800006264000011520034200492004932380012208000020240000200492004911160021109101016000010010072861253221214225420050231160000102005020050200502005020050
160024200491501013508052580012128000012800006264000011520030200492004932380012208000020240000200492004911160021109101016000010010072861236231112738420046216160000102005020050200502005020050
160024200491501001522580012128000012800006264000011520030200492004932380012208000020240000200492004911160021109101016000010010074851223221113943420046216160000102005020050200502005020050

Test 6: throughput

Count: 12

Code:

  fmla v0.4s, v12.4s, v13.s[1]
  fmla v1.4s, v12.4s, v13.s[1]
  fmla v2.4s, v12.4s, v13.s[1]
  fmla v3.4s, v12.4s, v13.s[1]
  fmla v4.4s, v12.4s, v13.s[1]
  fmla v5.4s, v12.4s, v13.s[1]
  fmla v6.4s, v12.4s, v13.s[1]
  fmla v7.4s, v12.4s, v13.s[1]
  fmla v8.4s, v12.4s, v13.s[1]
  fmla v9.4s, v12.4s, v13.s[1]
  fmla v10.4s, v12.4s, v13.s[1]
  fmla v11.4s, v12.4s, v13.s[1]
  movi v12.16b, 13
  movi v13.16b, 14

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3337

retire (01)cycle (02)03191e373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc2cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
12020440039300000619961251201001001200001001200005005630640040020400394003924932032499712010020012000020036000041687400391112020110099100100120000100000761051611400301200001004004040040400404004040040
12020440039300000619961251201001001200001001200005005966386040020400394003924932032499712010020012000020036000041688400391112020110099100100120000100000761011611400301200001004004040040400404004040040
120205416913000120619961251201001001200001001200005005630640040020400394003924932032499712010020012020220036000040039400391112020110099100100120000100000761011611400301200001004159840040400404004040040
12020440039299000619961251201001001200001001200005005630640040020400394003924932032499712010020012000020036000040039400391112020110099100100120000100000761011611400301200001004004040040400404004040040
12020440039300000619961251201001001200001001200005005630640040020400394003924932032499712010020012000020036000040039400391112020110099100100120000100000761011611400301200001004004040040400404004040040
12020440039300000619961251201011001200001001200005005630640040020400394003924932032499712010020012000020036000040039400391112020110099100100120000100000761011611400301200001004004040040400404004040040
12020440039300000619961251201001001200001001200005005630640040020400394169124932032664512010020012000020036000040039400391112020110099100100120000100000761011611400301200001004004040040400404004040040
12020440039300000619961251201001001200001001200005005630640040020400394003924932032499712010020012000020036000040039400391112020110099100100120000100000761011611400301200001004004040040400404004040040
12020440039299000619961251201001001200001001200005005630640040020416884003924932032499712010020012000020036000040039400391112020110099100100120000100000761011611400301200001004004040040400404004040040
12020440039299000619961251201001001200001001200005005630640040020400394003924932032499712010020012000020036000040039400391112020110099100100120000100000761011611400301200001004004040040400404004040040

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3337

retire (01)cycle (02)030b18191e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9acc2cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
12002440039300011000619961251200101012000010120000505630640000400714003940039249550325019120010201200002036000041835416861112002110910101200001000607520500141600061040030000120000104004040040400404004040040
1200244003930000000061996125120010101200001012000050563064000040020400394003924955032501912001020120000203600004003940039111200211091010120000100000752053110160009540030005120000104004040040400404004040040
12002440039300000000536402744612001010120000101200005056306400004002041686400392495503250191200102012000020360000400394003911120021109101012000010000075200006160008940030005120000104004040040400404004040040
1200244003929900000061996125120010101200001012000050563064000040020400394003924955032502712020620120000203600004003940039111200211091010120000100000752002071600061040030000120000104004040040400404004040040
120024400393000000006199612512001010120000101200005056306400004002040682400392495503250191200102012000020360000400394003911120021109101012000010000075200006160009540030000120000104007541692416874004040040
1200244170130000000082996125120010101200001012000050563064001540881400394003924955032501912001020120000203600004003940039111200211091010120000100000752000061600091040030000120000104004040040400404168740040
12002442470300000000619961251200101012000010120000505630640000400204003940039249550325019120208201200002036000040039400391112002110910101200001000007520000916000106400300200120000104004040040400404004040040
120024400393000000006199612512001010120000101200005056306400154002040039400392495503250191200102012000020360000400394003911120021109101012000010000075200006160005940030000120000104004040040400404004040040
120024400393180000006199615112001010120000101200005056306400054002040039400392495503250191200102012000020360000400394003911120021109101012000010000075205008160009640030000120000104004040040400404004040040
120024400393000000006199612512001010120000101200005056306400154002040039400392495503250191200102012000020360000400394003911120021109101012000010000075200007160009940030000120000104004040040400404004040040