Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FMLA (vector, 4H)

Test 1: uops

Code:

  fmla v0.4h, v1.4h, v2.4h
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)03191e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a1a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
10044037310061340725100010001000531908040184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
10044037300061340725100010001000531908040184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
10044037310061340725100010001000531908040184037403732583389510001000300040374037111001100000073224113473100040384038403840384038
10044037300061340725100010001000531908040184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
10044037300082340725100010001000531908040184037403732583389510001000300040374037111001100000073116113533100040384038403840384038
10044037310061340725100010001000531908040184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
10044037300061340725100010001000531908040184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
10044037300061340725100010001000531908040184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
10044037310061340725100010001000531908040184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
1004403730118661340725100010001000531908040184037403732583389510001000300040374037111001100040073116113473100040384038403840384038

Test 2: Latency 1->1

Code:

  fmla v0.4h, v1.4h, v2.4h
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03080b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9accfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020440037300000000061394072510100100100001001000050057083040400184003740037381083387451010020010000200300004008340037111020110099100100100001000000003710021632394790100001004003840038400384003840038
1020440037299000000061394074510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000000000710021623394790100001004003840086400384003840038
10204400373000100000441394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000003710021622394790100001004003840038400384003840038
1020440037300000000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000000710021622394790100001004003840038400384003840038
102044003730000001410061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037211020110099100100100001000000000710021622394790100001004003840038400384003840038
1020440037300000000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037211020110099100100100001000000000712021623394790100001004003840038400384003840038
1020440037300000000061394072510100100100001001000050057069080400184003740037381083387571010020010000200300004003740037111020110099100100100001000000300710021622394790100001004008740038400864003840038
1020440037300010000061394072510100100100001001000050057069081400184003740037381083387451010020010000204300004003740037111020110099100100100001000000120710021622394790100001004003840086400384003840038
102044003730000000001061394072510100100100001001000052257069080400184003740037381083387451025220010000200300004003740037111020110099100100100001000000000710021622394790100001004003840038400384003840038
10204400373000000000557394072510100100100001001000050057069080400184003740037381123387451010020010000200300004003740037111020110099100100100001000000003710022622394790100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)0318191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8acc3cdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024400373000010261394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000000640216233947310000104003840038400384003840038
1002440037299004261394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
1002440037300006613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010011432900640216323947310000104003840038400384003840038
1002440037300113961394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
10024400373000015103394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
100244003730000271023394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000640316323947310000104003840038400384003840038
100244003730000294613940725100101010000101000050570830404001840037400373813012387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
1002440037299002761394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000300640224223947310000104003840038400864003840038
1002440037299101861394072510010101000010100005057083040400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
10024400373000023161394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038

Test 3: Latency 1->2

Code:

  fmla v0.4h, v0.4h, v1.4h
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)0307080b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8acc2cfd5d6d8ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400372990000013206139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000000071021702239479100001004003840038400384003840038
10204400372990000011406139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000000071021602239479100001004003840038400384003840038
10204400373000000010508239398251010010010000100100005005706908040018400844003738108338745101002001000020030000400374008511102011009910010010000100020050071011602239479100001004003840038400384003840038
1020440037299000002706139407251010010010000100100005005706908040018400374003738108338745101002001000020430000400374003711102011009910010010000100000010071021602239479100001004003840038400384003840038
102044003730000011210122739389251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000000071021602239479100001004003840038400384003840038
1020440037300000002790385139398251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000013071021602239479100001004003840038400384003840038
10204400372990000018308239407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000000071021602239479100001004003840038400384003840038
10204400373000000029106139398251010010010000100101485005706908040018400864003738108338745101002001000020030000400374003711102011009910010010000100002013368471021602239479100001004008640038400384003840038
10204400373000000025806139407441010011410000100101485005706908040053400854003738108338764101002001000020030000400374003711102011009910010010000100400100273221602239479100001004003840038400854003840038
1020440084300001102706139407451010011910000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000010071021602239479100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)0308090b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2c5cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024400373000000030012639407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000000006403163339473010000104003840038400384003840038
1002440037300000000006139407251001010100001010000505709700140053400374003738130338767100102010000203000040037400371110021109101010000100000000006403163339473010000104003840038400384003840038
1002440037300000000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000000006403163339473010000104003840038400384003840038
1002440037299000000006139407251001010100001010000555706908140018400374003738130338767100102010000203000040037400371110021109101010000100000300006403163339473010000104003840038400384003840038
1002440037300000000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000000006403163339473010000104003840038400384003840038
1002440037300000000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000000006403163339473010000104003840038400384003840038
1002440037299000000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000000006403163339473010000104003840038400384003840038
10024400373000000000072639407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000000006403163339473010000104003840038400384003840038
1002440037300000000006139407251001010100001010000505706908140018400374003738135338767100102010000203000040037400371110021109101010000100000003006403163339473010000104003840038400384003840038
1002440037300000000006139407251001010100001010000505706908140018400374017738130338767100102010000203000040037400761110021109101010000100000000006403163339473010000104003840038400384003840038

Test 4: Latency 1->3

Code:

  fmla v0.4h, v1.4h, v0.4h
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)0309191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)6061696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc5cfd0icache miss (d3)d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020440037299200061394072510100100100001001000050057069081040018400374003738108033874510100200100002003000040037400371110201100991001001000010000000007100011611394790100001004003840038400384003840038
10204400373000000916394072510100100100001001000050057069081040018400374003738108033874510100200100002003000040037400371110201100991001001000010000000007104011611394790100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069081440018400374003738108033874510100200100002003000040037400371110201100991001001000010000000007100011611394790100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069081040018400844003738108033874510100200100002003000040037400371110201100991001001000010000000007100011611394790100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069080040018400374003738108033874510100200100002003000040037400371110201100991001001000010000000007100011611394790100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069080040018400374003738108033874510100200100002003000040037400371110201100991001001000010000000007100011611394790100001004003840038400384003840038
1020440037299000061394072510100100100001001000050057069081040018400374003738108033874510100200100002003000040037400371110201100991001001000010000000007100011611394790100001004003840038400384003840038
1020540037300000061394072510100100100001001000050057069080040018400374003738108033874510100200100002003000040037400371110201100991001001000010000000007100011611394790100001004003840038400384003840038
10204400372990000127394072510100100100001001000050057069080440018400374003738108033874510100200100002003000040037400371110201100991001001000010000000007100011611394790100001004003840038400384003840038
1020440037299000061394072510100100100001001000050057069081040018400374003738108033874510100200100002003000040037400371110201100991001001000010000000007100011611394790100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)0308090b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024400373000000000061394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000606402162239473010000104003840038400384003840038
10024400373000000000061394072510010101000010100005057069080401584008540037381303387671001020100002030000400374003711100211091010100001000000006402162239473010000104003840038400384003840038
100244003730000000000251394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000006402162239473010000104003840038400384003840038
10024400373000000000061394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000006402162239473010000104003840038400384003840038
10024400372990000000061394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000006402162239473010000104003840038400384003840038
10024400373000000000061394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000006402162239473310000104032140417404654041740420
1002440467303011981200792073883933518010067201004812113325557189180402984050040416381684238915111992411451243372340462403687110021109101010000102202428758080931044339795510000104022940416404634046440462
10024404633031011081068792141573932610110062161005416111847057194720403334046740461381662338938113452210808263437140453404151111002110910101000010202142078806402162239473010000104003840038400384003840038
10024400373000000000061394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000006402162239473010000104003840038400384003840038
1002440037300000004110061394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000006402162239473010000104003840038400384003840038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  fmla v0.4h, v8.4h, v9.4h
  movi v1.16b, 0
  fmla v1.4h, v8.4h, v9.4h
  movi v2.16b, 0
  fmla v2.4h, v8.4h, v9.4h
  movi v3.16b, 0
  fmla v3.4h, v8.4h, v9.4h
  movi v4.16b, 0
  fmla v4.4h, v8.4h, v9.4h
  movi v5.16b, 0
  fmla v5.4h, v8.4h, v9.4h
  movi v6.16b, 0
  fmla v6.4h, v8.4h, v9.4h
  movi v7.16b, 0
  fmla v7.4h, v8.4h, v9.4h
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)031e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6e74scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204200901510402580100100800001008000050064000020046200652006503238010020080000200240000200652006511160201100991001001600001000031011111612200621600001002006620066200662006620066
160204200651506402580100100800001008000050064000020046200652006503238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066
160204200651500402580100100800001008000050064000020046200652006503238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066
1602042006515002302580100100800001008000050064000020046200652006503238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066
160204200651500402580100100800001008000050064000020046200652006503238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066
1602042006515024402580100100800001008000050064000020046200652006503238010020080000200240000200652006511160201100991001001600001000001011111631200621600001002006620066200662006620066
160204200651500402580100100800001008000050064000020046200652006503238010020080000200240000200652006511160201100991001001600001000031011111611200621600001002006620066200662006620066
160204200651500402580100100800001008000050064000020046200652006503238010020080000200240000200652006511160202100991001001600001000001011111611200621600001002006620066200662006620066
160204200651500402580100100800001008000050064000020046200652006503238010020080000200240000200652006511160201100991001001600001000001011111613200621600001002006620066200662006620066
160204200651510402580100100800001008000050064000020046200652006503238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2526

retire (01)cycle (02)03071e3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a7a8a9acc5cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600242008515100046278001212800001280000626400001102003320052200523238001220800002024000020052200521116002110910101600001000000001003381125252111615200492201160000102006220053200622005320053
160024200611500303046278001212800001280000626400002152003320052200523238001220800002024000020052200611116002110910101600001000000001003185120252121911200492201160000102005320053200532005320053
160024200521500396046278001212800001280000626400002152003320061200613238001220800002024000020052200521116002110910101600001000000001003485112252111711200492401160000102005320053200532005320053
1600242006115006046278001212800001280000626400002152003320052200523238001220800002024000020052200521116002110910101600001000000001003885120252111015200492201160000102005320053200532005320053
1600242005215000046278001212800001280000626400001152004220052200523238001220800002024000020052200521116002110910101600001000000001003885122252112019200492201160000102005320053200532005320053
160024200611500004627800121280000128000062640000215200332005220052323800122080000202400002005220052111600211091010160000100000000100388519252111510200492201160000102005320053200532005320053
160024200521500294046278001212800001280000626400002152003320052200523238001220800002024000020052200521116002110910101600001000000001003885115252111515200492201160000102005320053200532005320053
1600242005215000046278001212800001280000626400002152004220061200523238001220800002024000020052200521116002110910101600001000000001003385110252111411200492201160000102005320053200532005320053
1600242005215000052298001212800001280000626400002152003320052200523238001220800002024000020052200521116002110910101600001000000001003385120252111315200492201160000102005320053200532005320053
1600242005215000046278001212800001280000626400001152003320052200523908001220800002024000020052200521116002110910101600001000000001003885114252111515200492201160000102005320053200532014620053

Test 6: throughput

Count: 16

Code:

  fmla v0.4h, v16.4h, v17.4h
  fmla v1.4h, v16.4h, v17.4h
  fmla v2.4h, v16.4h, v17.4h
  fmla v3.4h, v16.4h, v17.4h
  fmla v4.4h, v16.4h, v17.4h
  fmla v5.4h, v16.4h, v17.4h
  fmla v6.4h, v16.4h, v17.4h
  fmla v7.4h, v16.4h, v17.4h
  fmla v8.4h, v16.4h, v17.4h
  fmla v9.4h, v16.4h, v17.4h
  fmla v10.4h, v16.4h, v17.4h
  fmla v11.4h, v16.4h, v17.4h
  fmla v12.4h, v16.4h, v17.4h
  fmla v13.4h, v16.4h, v17.4h
  fmla v14.4h, v16.4h, v17.4h
  fmla v15.4h, v16.4h, v17.4h
  movi v16.16b, 17
  movi v17.16b, 18

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2502

retire (01)cycle (02)0318191e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020440145307000006102516012210016000010016000050012800004002140040400431997303211851601002001600002004800004004040040111602011009910010016000010000000010110316114003701600001004004140041400414004140041
160204412053080084006102516010010016002210016000050012800004231640040400402109503199981601002001600002004800004004040040111602011009910010016000010000000010110116114003701600001004004141191412064004140041
160204400403000000224245412516010010016000010016000050012800004002140040400401997303199991601002001600002004800004004040040111602011009910010016000010000000010110116114003701600001004119140044400414004140041
16020440040300000004245412516012210016000010016000050012800004117140040400402111103211481601002001600002004800004004040040111602011009910010016000010000000010110116114003701600001004119140044400414004140041
160204400402990000224244852516010010016000110016000050057152614002140040411902109503199981601002001600002004800004119041205111602011009910010016000010000000010110116114003701600001004119140044400414004140041
16020440040300000004202516010010016000010016000050012800004002140040400401997303211631601002001600002004800004004040040111602011009910010016000010000000010110116114003701600001004004141191412064004141206
16020440040299000004202516010010016000010016000050012800004002140040411901997303199981601002001600002004800004004040040111602011009910010016000010000000010110116114120201600001004004140041411914120640041
16020440040300000004244852516010010016002210016000050012800004002140043411902109503211481601002001600002004800004004040040111602011009910010016000010000000010110116114003701600001004004141191400444004140041
160204400402990000070702516012210016000010016000050057057744002140040411901997303199981601002001600002004800004004040040111602011009910010016000010000100010110116114120201600001004004140041400414004140041
16020440040300000004202516010010016002210016000050013199994002140040400402109503199981601002001600002004800004120540040111602011009910010016000010000000010110116114003701600001004004140041411914228640041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2547

retire (01)cycle (02)030b181e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8a9acc2c5branch mispredict (cb)cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0eaeb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002440758300000080478955251600101016000010160000505868895114230842327400401999603223071600102016000020480000400404232711160021109101016000010000000010022311816211102040037157160000104231940041423194232842319
16002440040317000007302516009010160000101600005012800000142308400404232722237032002016001020160000204800004232740040111600211091010160000100000000100246221916222202042324307160000104004142328400414232840041
16002440040317000080678955251600901016008010160000505868895114230842296423272223703223071600102016000020480000423274004011160021109101016000010000020010022311191621191942324157160000104004142286400414232842328
16002442327300000080110895525160010101600001016000050586889511400214232742327199963332002016001020160000204800004004042327111600211091010160000100000000100223111916211202040037157160000104004142328400414232842328
1600244232729900000712025160090101600801016000050128000011423084004042327222373332230716001020160000204800004232740040111600221091010160000100000000100223121916211202042324155160000104232840041423284004140041
16002440040317000006789552516009010160043101600005058688951140021423274232722237333200201600102016000020480000400404232711160021109101016000010000000010022611191622171940037307160000104004142328423284232842328
16002440040317000006789552516001010160080101600005012800001140021400404232719996032230716001020160000204800004232742327111600211091010160000100000000100243111916211161940037155160000104004140041422864004142319
160024400403170000806789552516009010160080101600005012800000140021423274232719996032230716001020160000204800004232742327111600211091010160000100000000100226121916211202042324157160000104123042328423284232842315
160024423273170000807302516001010160000101600005058688951140021423274232722237333223071600102016000020480000400404232711160021109101016000010000000010024611191621119740037155160000104004142328400414232840041
16002440040317000004702516009010160080101600005012800000142308423274004022237032002016001020160000204800004232740040111600211091010160000100000000100223111916219120740037307160000104004142328400414232842328