Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FMLSL (vector, 4S)

Test 1: uops

Code:

  fmlsl v0.4s, v1.4h, v2.4h
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)03081e1f3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
1004403730000613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
1004403731000613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
1004403731000613407251000100010005319081401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
1004403730000613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
1004403730000613407251000100010005319081401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
1004403730000613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
1004403731000613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
1004403731090613407251000100010005319081401840374037326033895100010003000403740371110011000073116113473100040384038403840384038
1004403730000613407251000100010005319081401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
10044037300088613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038

Test 2: Latency 1->1

Code:

  fmlsl v0.4s, v1.4h, v2.4h
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd0d5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400372990061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000071003162239479100001004003840038400384003840038
10204400373000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000071002162239479100001004003840038400384003840038
102044003729900726394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000071012162239479100001004003840038400384003840038
10204400373000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000071012162239479100001004003840038400384003840038
10204400373000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000071012162239551100001004003840038400384003840038
10204400372999061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000071012162239479100001004003840038400384003840038
10204400373000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000071012162239479100001004003840038400384003840038
102044003729900726394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000071013162239479100001004003840038400384003840038
10204400373000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000071012162239479100001004003840038400384003840038
10204400372990066394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000071013162239553100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100244003730006139407251001010100001010000505706908140018040037400373813033876710010201000020300004003740037111002110910101000010030640216223947310000104003840038400384003840038
100244003730006139407251001010100001010000505706908140018040037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
100244003729906139407251001010100001010000505706908140018040037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
100244003729906139407251001010100001010000505706908140065040037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
10024400372990131039407251001010100001010000505706908040018040037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
100244003730006139407251001010100001010000505706908140018040037400373813033876710010201000020300004003740037111002110910101000010000640216323947310000104003840038400384003840038
100244003730006139407251001010100001010000505706908140018040037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
100244003730006139407251001010100001010000505706908140018040037400373813033876710010201000020300004003740037111002110910101000010001640216223947310000104003840038400384003840038
1002440037299216139407251001010100001010000505706908140018040037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
100244003730006139407251001010100001010000505706908140018040037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038

Test 3: Latency 1->2

Code:

  fmlsl v0.4s, v0.4h, v1.4h
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc5cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400373000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
10204400373000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
10204400372990061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
10204400372990061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
10204400372990061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
10204400373000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
10204400373000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
102044003730000103394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
10204400373000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
10204400373000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03090b1e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024400373000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
10024400373000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000101006402162239473010000104003840038400384003840038
10024400372990006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003730000010339407251001010100001010000505708304140053400374013238135338767101602010000203000040037401312110021109101010000100006402162239473010000104003840038400384003840038
10024400373000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
10024400373000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000101006402162239473010000104003840038400384003840038
100244003730010010339407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
1002440037299000726394072510010101000010100005057069081400184003740037381302638767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
10024400372990006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100006462162239473010000104003840038400384003840038
10024400372990006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038

Test 4: Latency 1->3

Code:

  fmlsl v0.4s, v1.4h, v0.4h
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03080b18191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8a9acc2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400372990000061394072510100100100001001000050057069081400180400374003738108338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
1020440037300000041461394072510100120100001001000050057069081400180400374003738108338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840086
102044003730000001261394072510100100100001001000066957069080400180400374003738108338819101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384022840038
10204400373000000061394072510100100100001301000050057069080400180400374003738108338745101002001000020030000400374003711102011009910010010000100000002071011611394790100001004003840038400384003840038
10204400373000000061394072510100100100001001000050057069080400180400374003738122338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
10204400373000000061394072510100100100001001000050057069080400180400374003738108338745101002001000020030000400374003711102011009910010010000100000030071011611394790100001004003840038400384003840038
102044003729900100145394072510100100100001001000050057069080400180400374003738108338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
10204400373000001061394072510100100100001001000050057069081400180400374003738108338745101002081000020030000400374003711102011009910010010000100001030071011611394790100001004003840038400384003840038
10204400372990000061394072510100100100001001000050057069081400180400374003738108338745101002001000020030000400374003711102011009910010010000100000000071014811394790100001004003840038400384003840038
10204400373000000061393982510100100100001001041450057069080400180400374003738108338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)0307080b1e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002440037299000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003730000000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010110634064021621039473010000104003840038400384003840038
1002440037300101006139407251001212100001210000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003730000047706139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
1002440037300000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006404162239473010000104003840038400384003840038
10024400373000000018939389251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
10024400372990001206139407251001010100001210000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402161239473010000104003840038400384003840038
1002440037300100006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
10024400372990006010339407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
1002440037300000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040078400371110021109101010000100006402162239473010000104003840038400384003840038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  fmlsl v0.4s, v8.4h, v9.4h
  movi v1.16b, 0
  fmlsl v1.4s, v8.4h, v9.4h
  movi v2.16b, 0
  fmlsl v2.4s, v8.4h, v9.4h
  movi v3.16b, 0
  fmlsl v3.4s, v8.4h, v9.4h
  movi v4.16b, 0
  fmlsl v4.4s, v8.4h, v9.4h
  movi v5.16b, 0
  fmlsl v5.4s, v8.4h, v9.4h
  movi v6.16b, 0
  fmlsl v6.4s, v8.4h, v9.4h
  movi v7.16b, 0
  fmlsl v7.4s, v8.4h, v9.4h
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)031e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a7a8a9accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602042009115004025801001008000010080000500640000020046200652006532380100200800002002400002006520065111602011009910010016000010000020010113316332006201600001002006620066200662006620066
1602042006515154940258010010080000100800005006400000200462006520065323801002008000020024000020065200651116020110099100100160000100000001510113316232006201600001002006620066200662006620066
1602042006515004025801001008000010080000500640000020046200652006532380100200800002002400002006520065111602011009910010016000010000000010113316332006201600001002006620066200662006620066
160204200651500108625801001008000010080000500640000020046200652006532380100200800002002400002006520065111602011009910010016000010000000610114316342006201600001002006620066200662006620066
1602042006515004025801001008000010080000500640000020046200652006532380100200800002002400002006520065111602011009910010016000010000010010113316332006201600001002006620066200662006620066
1602042006515004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000010113316332006201600001002006620066200662006620066
1602042006515004025801001008000010080000500640000020046200652006532380100200800002002400002006520065111602011009910010016000010000010010113316332006201600001002006620066200662006620066
1602042006515004025801001008000010080000530640000020046200652006532380100200800002002400002006520065111602011009910010016000010000000010113316332006201600001002006620066200662006620066
160204200651500402580100100800001008000050064000002004620065200653238010020080000200240000200652006511160201100991001001600001000002015610113216442006201600001002006620066200662006620066
1602042006515104025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000910113316332006201600001002006620066200662006620066

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)033a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696b6d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)ea? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600242006715004625800121280000128000062640000112002802005120047323800122080000202400002004720047111600211091010160000100010032311202421181720044215160000102005220048202832005020048
1600242004715004625800121280000128000062640000112002802004720047323800122080000202400002004720047111600211091010160000101010033311142021114820044215160000102004820048202422005020048
160024200511500462580012128000012800006264000011200280200472004732380012208000020240000200472004711160021109101016000010101003131182021191520044215160000102004820048202472004820048
16002420047150188258001212800001280330626400001120028020047200473238001220800002024000020047200471116002110910101600001000100393111520211141420044215160000102004820048202602004820048
16002420047151046258001212800001280000626400001120028020047200473238001220800002024000020047200471116002110910101600001000100373111420211141420044215160000102004820048202442005220052
1600242004715004625800121280000128000062640000112002802004720047323800122080000202400002004720047111600211091010160000101310038311152021115920044215160000102004820048202392004820048
160024200471500462580012128000012800006264000011200280200472004732380012208000020240000200472004711160021109101016000010403100373111420211161420044215160000102004820048202362004820048
1600242004715004625800121280000128000062640000112002802004720047323800122080000202400002004720047111600211091010160000101010036311122021191520044215160000102004820048202382004820048
160024200471500462580012128000012800006264000011200280200472004732380012208000020240000200472004711160021109101016000010283100393111620211151420044215160000102004820048202452004820052
1600242004715005225800121280000128000062640000012002802004720051323800122080000202400002005120047111600211091010160000101010040312142041215920044230160000102005220048202422004820048

Test 6: throughput

Count: 12

Code:

  fmlsl v0.4s, v12.4h, v13.4h
  fmlsl v1.4s, v12.4h, v13.4h
  fmlsl v2.4s, v12.4h, v13.4h
  fmlsl v3.4s, v12.4h, v13.4h
  fmlsl v4.4s, v12.4h, v13.4h
  fmlsl v5.4s, v12.4h, v13.4h
  fmlsl v6.4s, v12.4h, v13.4h
  fmlsl v7.4s, v12.4h, v13.4h
  fmlsl v8.4s, v12.4h, v13.4h
  fmlsl v9.4s, v12.4h, v13.4h
  fmlsl v10.4s, v12.4h, v13.4h
  fmlsl v11.4s, v12.4h, v13.4h
  movi v12.16b, 13
  movi v13.16b, 14

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3337

retire (01)cycle (02)03081e373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
120204408863100006137199251201001001200001001200005005630640040020040039400392493232664512010020012000020036000040039400391112020110099100100120000100100761041654400301200001004004040040416924004041689
120204400393000007269961251201011001200001001200005005630640040020040039400392493232499712010020012000020036000040039400391112020110099100100120000100100761041645400301200001004004040040400404004040040
12020440039300000619961251201001001200001001200005005630640040020040039400392493232499712010020012000020036000040039400391112020110099100100120000100100761041645400301200001004004040040416924170240040
12020440039300000619961251201001001200001001200005005630640040020040039400392493232499712010020012000020036000040039400391112020110099100100120000100200761041645416771200001004004040040400404004040040
12020440039312000619961251204261001200001001200005005630640040060040039400392493232499712010020012000020036000040039400391112020110099100100120000100160761051634400301200001004004040040416924004040040
120204400393000006135689251201001001200001001200005005630640040020040039400392493232499712010020012000020036000040039400391112020110099100100120000100200761041644416831200001004004040040416924168740040
120204400393130006136348251201001001200001001200005005630640040020341691400392493232499712010020012000020036000040039400391112020110099100100120000100200761041644400301200001004004040040400404004040040
120204400393000006135689251201001001200001001200005005630640040020040039400392493232499712010020012000020036000040039400391112020110099100100120000100500761041644400301200001004004040040416764004040040
120204400392990006137966251201001001200001001200005005630640040020040039400392493232499712010020012000020036000041691416911112020110099100100120000100200761051654400301200001004004040040416924004040040
12020440039300000619961471202031001200001001200005005630640040020040039400392493232499712010020012000020036000040039400391112020110099100100120000100500761041655400301200001004004040040416894004041688

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3337

retire (01)cycle (02)030918191e373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8a9accdcfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0eaebec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1200244003930000000673554446120010101200001012000050563064011400204003940039249553250191200102012000020360000400394003911120021109101012000010000007522311221621187400301594120000104169240040400404004040040
1200244003929900000673796625120010101200001012000050585199311400204003940039249553250191200102012000020360000400394003911120021109101012000010000027524622916422654003030177120000104004040040400404004040040
120024400393000001206799612512001010120000101200005056306401140020400394003924955325019120010201200002036000040039400391112002110910101200001000000752231161621198400301594120000104004040040400404004040040
12002440039299000006799612512001010120000101200005056306400140020424704003924955325019120010201200002036000040039400391112002110910101200001000000752231171621177400301594120000104004040040400404004040040
12002440039300000006799612512001010120000101200005056306401140020416864003924955325019120010201200002036000040039400391112002110910101200001000000752231171621177400301594120000104004040040400404004040040
1200244003930000000653299612512001010120000101200005056306401140020400394003924955325019120010201200002036000040039400391112002110910101200001000000752231171621196424601590120000104169240040400404004040040
12002440039300000006799612512001010120000101200005056306401140020400394003924955325019120010201200002036000040039400391112002110910101200001000000752231171621185400301594120000104004040040400404004040040
12002440039300000603299961251200101012000010120000505630640014002040039400392495532501912001020120000203600004003940039111200211091010120000100000075246228164221164003630177120000104004040040400404004040040
1200244003930000000734027425120010101200001012000050563064001400204003940039256333250191200102012000020360000400394003911120021109101012000010000007524622716422984003030177120000104168740040400404168740040
1200244003930000000739961251200101012000010120000505630640014002040039400392495532501912001020120000203600004003940039111200211091010120000100000075246226164226144003030177120000104169240040400404004040040