Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FMLS (by element, 2D)

Test 1: uops

Code:

  fmls v0.2d, v1.2d, v2.d[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)03080b18191e1f3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a0a7a8a9acc2cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
100440373000000061340725100010001000531908140184037403732583389510001000300040374037111001100000000073216113473100040384038403840384038
100440373000000061340725100010001000531908140184037403732583389510001000300040374037111001100000000073116113473100040384038403840384038
1004403730000000613407251000100010005319081401840374037325833895100010003000403740371110011000000069073116113473100040384038403840384038
1004403730000000613407251000100010005319081401840374037325833895100010003000403740371110011000000012073116113473100040384038403840384038
100440373000000061340725100010001000531908140184037403732583389510001000300040374037111001100000000073116113473100040384038403840384038
100440373000000061340725100010001000531908140184037403732583389510001000300040374037111001100000000073116113473100040384038403840384038
100440373000000061340725100010001000531908140184037403732583389510001000300040374037111001100000000073116113473100040384038403840384038
100440373000000061340725100010001000531908140184037403732583389510001000300040374037111001100000000073116113473100040384038403840384038
100440373100000061340725100010001000531908140184037403732583389510001000300040374037111001100000000073116113473100040384038403840384038
100440373000000061340725100010001000531908140184037403732583389510001000300040374037111001100000000073116113473100040384038403840384038

Test 2: Latency 1->1

Code:

  fmls v0.2d, v1.2d, v2.d[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03181e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8accfd0d5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102044003730000025139407251010010010000100100005005706908140018040037400373810833874510100200100002003000040037400371110201100991001001000010001071003163339479100001004003840038400384003840038
10204400373000006139407251010010010000100100005005706908140018040037400373810833874510100200100002003000040037400371110201100991001001000010001071013163439479100001004003840038400384003840038
10204400372990006139407251010010010000100100005005706908040018040037400373810833874510100200100002003000040037400371110201100991001001000010003071013163339479100001004003840038400384003840038
10204400373000008939407251010010010000100100005005706908140018040037400373810833874510100200100002003000040037400371110201100991001001000010001071013164339479100001004003840038400384003840038
10204400372990006139407251013610010000100100005005706908140018040037400373810833874510100200100002003000040037400371110201100991001001000010000071013163339479100001004003840038400384003840038
10204400372990006139407251010010010000100100005005706908140018040037400373810833874510100200100002003000040037400371110201100991001001000010000071213163439479100001004003840038400384003840038
102044003729900010339407251010010010000100100005005706908140018040037400373810833874510100200100002003000040037400371110201100991001001000010000071013163339479100001004003840038400384003840038
10204400373000916139407251010010010000100100005005706908040018340037400373810833874510100200100002003000040037400371110201100991001001000010000071013163339479100001004003840038400384003840038
10204400372990016139407251010010010000100100005005706908140018040037400373810833874510100200100002003000040037400371110201100991001001000010009071013163339479100001004003840038400384003840038
102044003730000072639407871010010010000100100005005706908140018040037400373810833874510100200100002003000040037400371110201100991001001000010020071014163339479100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)033f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8a9acc5cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002440037300613940725100101110000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010022200640216333947310000104003840038400384003840038
10024400373007263940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000000640316333947310000104003840038400384003840038
1002440037300613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000000640316333947310000104003840038400384003840038
1002440037300613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000000640316333954510000104003840038400384003840038
1002440037300893940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000000640316433947310000104003840038400384003840038
1002440037300613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000000640416333947310000104003840038400384003840038
1002440037300613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000030640316333947310000104003840038400384003840038
1002440037300613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000000640316333947310000104003840038400384003840038
1002440037299613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000000640316333947310000104003840038400384003840038
1002440037300613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000000640316333947310000104003840038400384003840038

Test 3: Latency 1->2

Code:

  fmls v0.2d, v0.2d, v1.d[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)0308090b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a7a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400373000100000061394072510100100100001001000050057069080400184003740037381080338745101002001000020030000400374003711102011009910010010000100000100071011611394790100001004003840038400384003840038
10204400373000000000061394072510100100100001001000050057069080400184003740037381080338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
10204400373000000000061394072510100100100001001000050057069080400184003740037381080338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
102044003729900000000162394072510100100100001001000050057069081400184003740037381080338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
10204400373000000000061394072510100100100001001000050057069080400184003740037381080338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
10204400373000000000061394072510100100100001001000050057069080400184003740037381080338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
10204400373000000060061394072510100100100001001000050057069080400184003740037381080338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
10204400373000000000061394072510100100100001001000050057069080400184003740037381080338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
10204400372990000000061394072510100100100001001000050057069080400184003740037381080338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
102044003729900000000943394072510100100100001001000050057069080400184003740037381080338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)0318191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a8a9acc2cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002440037300000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000000640216223947310000104003840038400384003840038
1002440037300000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000000640216223947310000104003840038400384003840038
1002440037300009613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010020000640216223947310000104003840038400384003840038
1002440037300000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000000640216223947310000104003840038400384003840038
10024400373000005363940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000000640216223947310000104003840038400384003840038
1002440037300000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000000640216223947310000104003840038400384003840038
1002440037299000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000000640216223947310000104003840038400384003840038
1002440037299000613940725100101010000101000050570690814001840037400373813063878610308201016320304864003740037111002110910101000010000000640216223947310000104003840038400384003840038
1002440037300000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000000640216223947310000104003840038400384003840038
1002440037300000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000000640216223947310000104003840038400384003840038

Test 4: Latency 1->3

Code:

  fmls v0.2d, v1.2d, v0.d[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102044003730006139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100007101161139479100001004003840038400384003840038
102044003730008239407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100007101161139479100001004003840038400384003840038
102044003730006139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100037101161139479100001004003840038400384003840038
102044003730006139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100007101161139479100001004003840038400384003840038
102044003730096139407441010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100007101161139479100001004003840038400384003840038
102044003730008239389251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100007101161139479100001004003840038400384003840038
102044003730006139407251010010010000100100005005706908140018400374003738108338764101002001000020030000400374003711102011009910010010000100007101161139479100001004003840038400384003840038
102044003730006139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100007101161139479100001004003840038400384003840038
1020440037300072639407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100007101161139479100001004003840038400384003840038
102044003730006139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100007101161139479100001004003840038400384003840084

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)0318191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100244003731100150014443940725100101010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001000000640416333947310000104003840085400384022440038
100244003732101162132115639389251001010100001010000505706908040018040037400373813011388051045620100002030000400374003711100211091010100001042010640316333947310000104003840038400384003840038
10024400373110000011513940725100101010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001000000640316333947310000104003840038400384003840038
1002440037306000009083940725100181010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001000000640316333947310000104003840038400384003840038
100244003730600000613940725100101010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001000000640316333947310000104003840038400384003840038
1002440037305000001033940725100101010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001000000640316333947310000104003840038400384003840038
100244003730000000613940725100101010000101000050570690814001804003740037381303387671001020100002030000400374003711100211091010100001000000640316353947310000104003840038400384003840038
10024400373002000016033940725100101010006101000050570690814001804003740037381303387671001020100002030000400374003711100211091010100001000003640316333947310000104003840038400384003840038
100244003730000000613940725100101010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001000000640316333947310000104003840038400384003840038
100244003730000000823940725100101010000101000050570690804001804003740037381303387671001020104862030000400854008511100211091010100001000239640316333947310000104003840038400384003840038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  fmls v0.2d, v8.2d, v9.d[1]
  movi v1.16b, 0
  fmls v1.2d, v8.2d, v9.d[1]
  movi v2.16b, 0
  fmls v2.2d, v8.2d, v9.d[1]
  movi v3.16b, 0
  fmls v3.2d, v8.2d, v9.d[1]
  movi v4.16b, 0
  fmls v4.2d, v8.2d, v9.d[1]
  movi v5.16b, 0
  fmls v5.2d, v8.2d, v9.d[1]
  movi v6.16b, 0
  fmls v6.2d, v8.2d, v9.d[1]
  movi v7.16b, 0
  fmls v7.2d, v8.2d, v9.d[1]
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)031e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc5cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204200771500127425801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010020301011111621200621600001002006620066200662006620066
1602042006515007052580100100800001178000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011221621200621600001002006620066200662006620066
160204200651500402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011121622200621600001002006620066200662006620066
160204200651500103425801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010020001011221622200621600001002006620066200662006620066
160204200651500402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011111612200621600001002006620066200662006620066
160204200651500402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011211622200621600001002006620066200662006620066
1602042006515001032580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011221621200621600001002006620066200662006620066
1602042006515104732580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066
160204200651500402580100100800001008000050064000012004620135200653238010020080000200240000200652006511160201100991001001600001000001011211621200621600001002006620066200662006620066
160204200651510402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011221621200621600001002006620066200662006620066

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)03070a3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002420097150002882780012128000012800006264000011020033200522005232380012208000020240000200522005211160021109101016000010231004281120252111918200492402160000102006220062200622006220062
160024200611502118529800121280000128000062640000015200422006120061323800122080000202400002006120061111600211091010160000100010038115219344222020200582402160000102006220062200622006220062
160024200611504216429800121280109128000062640000015200422006120061323800122080000202400002006120061111600211091010160000100010045115219344221918200582402160000102006220062200622006220062
1600242006115111211729800121280000128000062640000015200422006120061323800122080000202400002006120130111600211091010160000100010043115213344221917200582201160000102005320053200532005320053
16002420052150102582780012128000012800006264000011520033200522005232380012208000020240000200522005211160021109101016000010001004184119252111918200492201160000102005320053200532005320053
160024200521501111232780012128000012800006264000011520033200522005232380012208000020240000200522005211160021109101016000010001004384114252112020200492201160000102005320053200532005320053
160024200521501111822780012128000012800006264000011520033200522005232380012208000020240000200522005211160021109101016000010031004285117252111819200492201160000102005320053200532005320053
160024200521501118862780012128000012800006264000011520033200522005232380012208000020240000200522005211160021109101016000010001004285118252112020200492201160000102005320053200532005320053
160024200521500029532780012128000012800006264000011520033200522005232380012208000020240000200522005211160021109101016000010001004085117252111917200492201160000102005320053200532005320053
160024200521502121382780012128000012800006264000011520033200522005232380012208000020240000200522005211160021109101016000010001004385120252112021200492201160000102005320053200532005320053

Test 6: throughput

Count: 12

Code:

  fmls v0.2d, v12.2d, v13.d[1]
  fmls v1.2d, v12.2d, v13.d[1]
  fmls v2.2d, v12.2d, v13.d[1]
  fmls v3.2d, v12.2d, v13.d[1]
  fmls v4.2d, v12.2d, v13.d[1]
  fmls v5.2d, v12.2d, v13.d[1]
  fmls v6.2d, v12.2d, v13.d[1]
  fmls v7.2d, v12.2d, v13.d[1]
  fmls v8.2d, v12.2d, v13.d[1]
  fmls v9.2d, v12.2d, v13.d[1]
  fmls v10.2d, v12.2d, v13.d[1]
  fmls v11.2d, v12.2d, v13.d[1]
  movi v12.16b, 13
  movi v13.16b, 14

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3337

retire (01)cycle (02)030b1e373a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8a9accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
120204400393000000105379662512010110012000110012000050056306400400200400394169126577324997120100200120000200360000400394168611120201100991001001200001000000761031611400301200001004004041692400404004040040
12020440039312000033999612512010010012000310012000050056306400416670416864003924932326649120100200120000200360000416864003911120201100991001001200001000000761011611400301200001004168740040416924004041692
12020441688312000089899612512010010012000010012000050058519930416720416864003924932324997120100200120000200360000416914003911120201100991001001200001000000761011611416771200001004168740040416874004041687
12020441686299000014599612512010010012000010012000050056306400400200416864003924932324997120100200120000200360000416864003911120201100991001001200001000000761011611416771200001004004041687400404168740040
1202044003931200306199612512010010012000010012000050058518690416670416914003924932324997120100200120000200360000400394168811120201100991001001200001000003761011611400301200001004168740040416874004041692
1202044169130000006199612512010010012000310012000050056306401400200400394169124932324997120100200120000200360000400394168611120201100991001001200001000000761011611400301200001004168740040416924004041687
1202044169129900006199612512010310012000010012000050056306400400200400394169126577326644120100200120000200360000416864003911120201100991001001200001000000761011611416771200001004004041687400404169240040
12020440039312003035699612512010010012000010012000050058519930416670416864003924932326644120100200120000200360000400394003911120201100991001001200001000000761011611400301200001004004041687400404168740040
120204400393120000489356892512010310012000110012000050056306400400200400394168626582326644120100200120000200360000416864003911120201100991001001200001000000761011611400301200001004168740040416874004041687
12020441686299003012499612512010010012000010012000050058519931416720416864003924932324997120100200120000200360000416914003911120201100991001001200001000000761011611400301200001004004041692400404168940040

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3337

retire (01)cycle (02)030809181e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9accdcfd0d5d6dbddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
12002440039300000000619961251200101012000010120000505630640040020400394003924955027250191200102012000020360000400394003911120021109101012000010000075206925160101340030120000104004040040400404004040040
12002440039300000000943996125120010101200001012000050563064004002040039400392495503250191200102012000020360000400394003911120021109101012000010000175206911160131140030120000104004040040400404004040040
1200244003930000000061996125120010101200001012000050563064004002040039400392495573250191200102012000020360000400394003911120021109101012000010000075206611160121240030120000104004040040400404004040040
1200244003930000000061996125120010101200001012000050563064004002040039400392495503250191200102012000020360000400394003911120021109101012000010000075206312160131040030120000104004040040400404004040040
1200244003929900000061996125120010101200001012000050563064004002040039400392495503250191200102012000020360000400394003911120021109101012000010000075206012160122441679120000104004040040400404169240040
1200244003930000000061996125120010101200001012000050563064004002040039416912495503250191200102012000020360000400394003911120021109101012000010000075205710160131340030120000104004040040400404004040040
1200244003930000000061996125120010101200001012000050563064004002040039400392495503250191200102012000020360000400394003911120021109101012000010000075204813160141040030120000104004040040400404004040040
12002441691300000000726996125120010101200001012000050563064004002040039400392495503250191200102012000020360000400394003911120021109101012000010000075205412160151140030120000104004040040400404004040040
1200244003929900000061996125120010101200001012000050563064004002040039400392495503266711200102012000020360000400394003911120021109101012000010003075204810160101240030120000104004040040400404004040040
1200244003930000000061996125120010101200001012000050563064004002040039400392495503250191200102012000020360000400394003911120021109101012000010100075205114160121240030120000104004040040416924004040040