Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FMLAL2 (by element, 4S)

Test 1: uops

Code:

  fmlal2 v0.4s, v1.4h, v2.h[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8acc2cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
1004403731016134072510001000100053190840184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
100440373006134072510001000100053190840184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
100440373006134072510001000100053190840184037403732583389510001000300040374037111001100006073116113473100040384038403840384038
1004403730025534072510001000100053190840184037403732583389510001000300040374037111001100010073116113473100040384038403840384038
1004403730043134072510001000100053190840184037403732583389510001000300040374037111001100000073116113547100040384038403840384038
1004403730019834074410001000100053190840184037408432583389510001000300040374037111001100000073116113473100040384038403840384085
1004403731027134072510001000100053190840184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
100440373106134072510001000100053190840184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
100440373006134072510001000100053190840184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
1004403730054634072510001000100053190840184037403732583389510001000300040374037111001100000073116113473100040384038403840384038

Test 2: Latency 1->1

Code:

  fmlal2 v0.4s, v1.4h, v2.h[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2branch mispredict (cb)cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400373000000006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000000000710021722394790100001004003840038400384003840038
10204400373000000006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000000000712161642394790100001004003840038400384003840038
10204400373000000006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000000000710121622394790100001004003840038400384003840038
10204400373000000006139407251010010010000100100005005706908040065400374003738108338745101002001000020030000400374003711102011009910010010000100000000000710131622394790100001004003840038400384003840038
10204400372990000006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000000000712121622394790100001004003840038400384003840038
10204400373000000006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003721102011009910010010000100000000000710121622394790100001004003840038400384003840038
10204400372990000006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000000000710131622394790100001004003840038400384003840038
10204400372990000006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000000000712121622394790100001004003840038400384003840038
10204400373000000006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000000000712121622394790100001004003840038400384003840038
10204400372990000006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000020000712121622394790100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03091e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024400373000006139407251001010100001010000505706908040018040037400373813033876710010201000020300004003740037111002110910101000010000640316343947310000104003840038400384003840038
10024400373000006139407251001010100001010000505706908040018040037400373813033876710010201000020300004003740037111002110910101000010000640316333947310000104003840038400384003840038
10024400372990006139407251001010100001010000505706908040018040037400373813033876710010201000020300004003740037111002110910101000010000640316333947310000104003840038400384003840038
10024400373000006139407251001010100001010000505706908040018040037400373813033876710010201000020300004003740037111002110910101000010000640316333947310000104003840038400384003840038
10024400373000006139398251001010100001010000505706908040018040037400373813033876710010201000020300004003740037111002110910101000010000640316333947310000104003840038400384003840038
10024400373000006139407251001010100001010000505706908040018040037400373813033876710010201000020300004003740037111002110910101000010000640416433947310000104003840038400384003840038
10024400373000006139407251001010100001010000505706908040018040037400373813033876710010201000020300004003740037111002110910101000010000640316333947310000104003840038400384003840038
10024400373000006139407251001010100001010000505706908040018040037400373813033876710010201000020300004003740037111002110910101000010000640316433947310000104003840038400384003840038
10024400373000006139407251001010100001010000505706908040018040037400373813033876710010201000020300004003740037111002110910101000010000640416333947310000104003840038400384003840038
10024400373000006139407251001010100001010000505706908040018040037400373813033876710010201000020300004003740037111002110910101000010000640416433947310000104003840038400384003840038

Test 3: Latency 1->2

Code:

  fmlal2 v0.4s, v0.4h, v1.h[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03090b1e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a7a8acc2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020440037300000613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000000071011611394790100001004003840038400384003840038
1020440037300000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400811110201100991001001000010000000071011611394790100001004003840038400384003840038
1020440037300000613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000000071011611394790100001004003840038400384003840038
1020440037299000613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000000071011611394790100001004003840038400384003840038
10204400373000004413940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000030071011611394790100001004003840038400384003840038
1020440037300000613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000000071011611394790100001004003840038400384003840038
1020440037300000613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000000071011611394790100001004003840038400384003840038
1020440037300000613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000000071011611394790100001004003840038400384003840038
1020440037300000613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000000071011611394790100001004003840038400384003840038
10204400373000007263940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000000071011611394790100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030b1e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a9accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002440037300006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300066139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000661216223947310000104003840038400384003840038
100244003730004026139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300006139407251001010100001010000505706908040018400374003738130338767101572010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840059
10024400372990053639407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300006139407461001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038

Test 4: Latency 1->3

Code:

  fmlal2 v0.4s, v1.4h, v0.h[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030b1e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8accfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020440037300019200103394072510100100100001001000050057069081400184003740037381083387451010021610000200300004003740037111020110099100100100001000006710011613394790100001004003840038400384003840080
1020440037299000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710011611394790100001004003840038400384003840038
10204400372990000103394072510100100100001001000050057069081400184003740227381083387451010020010000200300004003740037111020110099100100100001000000710011611394790100001004003840038400384003840038
102044003730000352061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000003710011611395550100001004003840038400384003840085
1020440037300000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001002000710011611394790100001004003840038400384003840038
10204400372990120061394072510100100100001001000066557069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710011611394790100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000230710011611394790100001004003840038400384003840229
1020440037300000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710011611394790100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037411020110099100100100001000000710011611394790100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710111611394797100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)0307081e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002440037300000613940725100101210000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001000006403162239473010000104003840038400384003840038
1002440037300000613940725100101010000101000050570690814001804003740037381303387671001020100002030000400374003711100211091010100001000006402162239473010000104003840038400384003840038
1002440037300000613940725100101010000111000060570690814001804003740037381303387671001220100002030000400374003711100211091010100001000006402162239473010000104008540038400384003840038
10024400373000007263940725100101010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001003006402165239473010000104003840038400384003840038
100244003730000153613940745100101010000101000055570690804001804003740037381303387671001020100002030000400374003711100211091010100001000007072162239512210000104003840038400384003840038
1002440037300000613940725100101010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001003006402162239473010000104003840038400384003840038
1002440037300000613940725100101010000101000050570690814001804003740037381303387671001020100002030000400374003711100211091010100001000006402162239473010000104003840038400384003840038
10024400373000012613940725100101010000101000050570690814001804003740037381303387671001020101602030000401314013021100211091010100001007278206402242239473010000104003840038400384003840038
10024400373000001033940725100101010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001003006402162239473010000104003840038400384003840038
1002440037299000613940725100101010000101000050570690804001804003740037381303387671001020100002030000400374003711100211091010100001000006402162239473010000104003840038400384003840038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  fmlal2 v0.4s, v8.4h, v9.h[1]
  movi v1.16b, 0
  fmlal2 v1.4s, v8.4h, v9.h[1]
  movi v2.16b, 0
  fmlal2 v2.4s, v8.4h, v9.h[1]
  movi v3.16b, 0
  fmlal2 v3.4s, v8.4h, v9.h[1]
  movi v4.16b, 0
  fmlal2 v4.4s, v8.4h, v9.h[1]
  movi v5.16b, 0
  fmlal2 v5.4s, v8.4h, v9.h[1]
  movi v6.16b, 0
  fmlal2 v6.4s, v8.4h, v9.h[1]
  movi v7.16b, 0
  fmlal2 v7.4s, v8.4h, v9.h[1]
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)031e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc5cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204200911500402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066
160204200651510402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066
160204200651500402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066
160204200651510402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662015120066
160204200651500402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066
160204200651500402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066
160204200651510402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066
160204200651510402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066
160204200651500402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066
160204200651500402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)03071e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fcfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)ea? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024200691510046258001212800001280000626400001120028200472004732380012208000020240000200472004711160021109101016000010100293117202116820044215160000102004820048200482004820048
160024200471521046258001212800001280000626400001120028200472004732380012208000020240000200472004711160021109101016000010100313116202116520044215160000102004820048200482004820048
160024200471510046258001212800001280000626400001120028200472005132380012208000020240000200472004711160021109101016000010100323125202125820050215160000102004820048200482005220052
160024200471510052258001212800001280000626400001120028200472004732380012208000020240000200472004711160021109101016000010100283114202114620044215160000102004820048200482004820048
160024200471510046258001212800001280000626400001120028200472004732380012208000020240000200472004711160021109101016000010100303116202117720044215160000102004820048200482004820048
160024200471510046258001212800001280000626400001120034200472004732380012208000020240000200472004711160021109101016000010100283115202115620044215160000102004820048200482004820048
160024200511520046258001212800001280000626400001120028200472004732380012208000020240000200472004711160021109101016000010100293115202117720044215160000102004820048200482004820048
160024200471510046258001212800001280000626400000120028200472004732380012208000020240000200472004711160021109101016000010100293117204117520044215160000102004820048200482004820048
160024200511510046258001212800001280000626400001120028200472004732380012208000020240000200512004711160021109101016000010100313117202115620044215160000102004820048200482004820048
16002420047152121346258001212800001280000626400001120028200512004732380012208000020240000200472004711160021109101016000010100303117202116520044215160000102004820048200482004820048

Test 6: throughput

Count: 12

Code:

  fmlal2 v0.4s, v12.4h, v13.h[1]
  fmlal2 v1.4s, v12.4h, v13.h[1]
  fmlal2 v2.4s, v12.4h, v13.h[1]
  fmlal2 v3.4s, v12.4h, v13.h[1]
  fmlal2 v4.4s, v12.4h, v13.h[1]
  fmlal2 v5.4s, v12.4h, v13.h[1]
  fmlal2 v6.4s, v12.4h, v13.h[1]
  fmlal2 v7.4s, v12.4h, v13.h[1]
  fmlal2 v8.4s, v12.4h, v13.h[1]
  fmlal2 v9.4s, v12.4h, v13.h[1]
  fmlal2 v10.4s, v12.4h, v13.h[1]
  fmlal2 v11.4s, v12.4h, v13.h[1]
  movi v12.16b, 13
  movi v13.16b, 14

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3337

retire (01)cycle (02)030b1e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1202044088630000006135689251201031001200001001200005005851993416670416914003924932324997120100200120000200360000416914003911120201100991001001200001000976101161141677191200001004004040040416894004040040
120204416863000000619961251201001001200011001200005005630640400200400394169126582326644120100200120000200360000416864003911120201100991001001200001000997610116114003001200001004168740040416874004041687
120204400393120000619961251201031001200001001200005005630640400200416864003924932324997120100200120000200360000400394168611120201100991001001200001000877610116114168301200001004168740040416874004040040
1202044168630002403105356892512010310012000010012000050058569424167204168640039249323266491201002001200002003600004003941691111202011009910010012000010001267610116114003001200001004004041687400404168740040
12020441686299000061356892512010310012000010012000050056306404002004169140039249323249971201002001200002003600004003941686111202011009910010012000010001057610116114003001200001004168740040416874004041687
12020441691300000361996125120100100120000100120000500563064041672041686400392493232499712010020012000020036000040039416861112020110099100100120000100097610116114168301200001004169240040416924004041687
1202044169129900006199612512010010012000010012000050056306404166704168640039249323249971201002001200002003600004168640039111202011009910010012000010001717610116114003001200001004168740040416924004041687
12020441686300000061996125120103100120001100120000500585199341667041686400392493232499712010020012000020036000040039416861112020110099100100120000100007610116114167701200001004004041687400404169240040
1202044168630000006199612512010010012000110012000050058519934167204168640039249323249971201002001200002003600004003941686111202011009910010012000010001687610116114003001200001004004041687400404168740040
1202044003931200006135689251201031001200001001200005005851993400200416864003924932326644120100200120000200360000416864003911120201100991001001200001000997610116114168301200001004004041687400404169241687

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3337

retire (01)cycle (02)03090b181e373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8accdcfd5d6ddinst fetch restart (de)e0ea? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1200244110630000000619961251200101012000310120000505630640040020040039400392495532501912001020120000203600004003940039111200211091010120000100060752031633400300120000104004040040400404004040040
120024400393000000061996125120010101200031012000050563064014002004003940039249553250191200102012000020360000400394003911120021109101012000010059150752021634400300120000104004040040400404004040040
12002440039300000006199612512001010120003101200005056306401416720416864003924955325019120010201200002036000040039400391112002110910101200001000630752031632400300120000104004040040400404004040040
120024400392990000161996125120010101200011012000050563064004002004003940039249553250191200102012000020360000400394003911120021109101012000010003076163162340030111120000104004040040400404004040040
1200244003930000000346996125120010101200521012000050563064004002034003940039249553250191200102012000020360000400394003911120021109101012000010001380754432433400300120000104165040040408874151640040
1200244003930001000619961251200101012000010120000505630640140020040039400392495532501912001020120000203600004003940039111200211091010120000100090752031633400300120000104004040040400404004040040
12002440039300000006199612512001010120003101200005056306401400200400394003924955325019120010201200002036000040039400391112002110910101200001000720752031633400300120000104004040040400404004040040
120024400393000000161996125120010101200001012000050563064004002004003940039249553250191200102012000020360000400394003911120021109101012000010001890752031633400300120000104009140040400404004040040
120024400393000000061996125120010101200011012000050563064004002004003940039249553250191200102012000020360000400394003911120021109101012000010001141752031634400300120000104004040040400404004040040
12002440039300000006199612512001010120002101200005056306401400200400394003924955326671120010201200002036000040039400391112002110910101200001000270752041743400300120000104004040040400404004040040