Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FMLS (by element, D)

Test 1: uops

Code:

  fmls d0, d1, v2.d[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)03191e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
1004403730006134072510001000100053190814018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730006134072510001000100053190814018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730006134072510001000100053190814018403740373258338951000100030004037403711100110000373116113473100040384038403840384038
1004403730008434072510001000100053190814018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730006134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730006134072510001000100053190814018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730006134072510001000100053190814018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403731006134072510001000100053190814018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730006134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730006134072510001000100053190814018403740373258338951000100030004037403711100110000073116113473100040384038403840384038

Test 2: Latency 1->1

Code:

  fmls d0, d1, v2.d[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8accfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020440037300000726394072510100100100001001000050057069081400180400374003738108338745101002001000020030000400374003711102011009910010010000100000710121622394790100001004003840038400384003840038
102044003730000061394072510100100100001001000050057069081400180400374003738108338745101002001000020030000400374003711102011009910010010000100010710121632394790100001004003840038400384003840038
102044003730000061394072510100100100001001000050057069081400180400374003738108338745101002001000020030000400374003711102011009910010010000100000710121622394790100001004003840038400384003840038
102044003730000061394072510100100100001001000050057069081400180400374003738108338745101002001000020030000400374003711102011009910010010000100000712121623394790100001004003840038400384003840038
102044003730000061394072510100100100001001000050057069081400180400374003738108938800101002001000020030000400374003711102011009910010010000100000710121622394790100001004003840038400384003840038
102044003730000061394072510100100100001001000050057069081400180400374003738108338745101002001000020030000400374003711102011009910010010000100000710121622394790100001004003840038400384003840038
102044003729990061394072510100100100001001000050057069081400180400374003738108338745101002001000020030000400374003711102011009910010010000100000712121622394790100001004003840038400384003840038
102044003730000061394072510100100100001001000050057069081400180400374003738108338745101002001000020030000400374003711102011009910010010000100000710121622394790100001004003840038400384003840038
102044003730000061394072510100100100001001000050057069081400180400374003738108338745101002001000020030000400374003711102011009910010010000100000710121622394790100001004003840038400384003840038
102044003730000061394072510100100100001001000050057069081400180400374008338108338745101002001000020030000400374003711102011009910010010000100000710121622394790100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)0308090a18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a8a9acc2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002440037299000000092939407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000000640316323947310000104003840038400384003840038
100244003730000000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000000640216223947310000104003840038400384003840038
100244003730000000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000000640216223947310000104003840038400384003840038
100244003730000000906139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000000640216223947310000104003840038400384003840038
100244003729900000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000000640216223947310000104003840038400384003840038
1002440037300000000012639407251001010100001010000505706908140018400374003738158338767100102010000203000040084400371110021109101010000100000000640316223947310000104003840038400384003840038
100244003730000000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000000640216223947310000104003840038400384003840038
100244003730000000606139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000000640216223947310000104003840038400384003840038
100244003730000000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000000640216223947310000104003840038400384003840038
100244003730000000906139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100202361520640216213954510000104013240133401314008540038

Test 3: Latency 1->2

Code:

  fmls d0, d0, v1.d[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03070a1e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400373000024061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000007102162239479100001004003840038400384003840038
1020440037300003061394072510100100100001001014750057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000007102162239479100001004003840038400384003840038
1020440037300002426461394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000007102162239479100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000007102162239479100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000007102162239479100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007102162239479100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000007102162239479100001004003840038400384003840038
102044003730000231061394072510100100100001001000050057069081400184003740037381083387451010020010000200300004003740037111020110099100100100001000007102162239479100001004003840038400384003840038
10204400373000145082394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000007102162239479100001004003840038400384003840038
1020440037299000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000007102162239479100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03181e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002440037299069061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640216223947310000104003840038400384003840038
1002440037300027061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640216223947310000104003840038400384003840038
100244003730009061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640216223947310000104003840038400384003840038
100244003729900061394072510010101000010100005057069081400184008540085381303387671001020100002030000400374003711100211091010100001000640216223947310000104003840038400384003840038
1002440037300015061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100221091010100001000640216223947310000104003840038400384003840038
100244003729900061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640216223947310000104003840038400384003840038
100244003730000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640216223947310000104003840038400384003840038
100244003729900061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640216223947310000104003840038400384003840038
100244003730000061394072510010101000010100005057069081400534003740037381303387671001020100002030000400374003711100211091010100001030640216223954710000104003840038400384003840038
100244003730000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000640216223947310000104003840038400384003840038

Test 4: Latency 1->3

Code:

  fmls d0, d1, v0.d[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030b18191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400373000000726394072510100100100001001000050057069080400184003740037381156387401010020010008200300244003740037111020110099100100100001000011171701600394890100001004003840038400384003840038
1020440037299000061394072510100100100001001000050057069081400184003740037381156387411010020010008200300244003740037111020110099100100100001000011171801600394890100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069081400184003740037381156387411010020010008200300244003740037111020110099100100100001000011171701600394890100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069081400184003740037381156387411010020010008200300244003740037111020110099100100100001000011171701600394890100001004003840038400384003840038
1020440037299000061394072510100100100001001000050057069080400184003740037381156387401010020010008200300004003740037111020110099100100100001000000071011611394790100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000071011611394790100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000071011611394790100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000071011611394790100001004003840038400384003840038
1020440037299000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000071011611394790100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000071011611394790100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03080b1e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc2c5branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002440653311000274394072510010101000010100005057069081400184003740037381303387891001020100002030000400374003711100211091010100001000000640116223947310000104003840038400384003840038
100244003730001061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
100244003730000661394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
100244003729900061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
100244003730000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
1002440037300000631394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
100244003730000061394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
100244003730000061394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000640216333947310000104003840038400384003840038
100244003730000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
100244003730000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  fmls d0, d8, v9.d[1]
  movi v1.16b, 0
  fmls d1, d8, v9.d[1]
  movi v2.16b, 0
  fmls d2, d8, v9.d[1]
  movi v3.16b, 0
  fmls d3, d8, v9.d[1]
  movi v4.16b, 0
  fmls d4, d8, v9.d[1]
  movi v5.16b, 0
  fmls d5, d8, v9.d[1]
  movi v6.16b, 0
  fmls d6, d8, v9.d[1]
  movi v7.16b, 0
  fmls d7, d8, v9.d[1]
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)030b181e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020420089151000402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011121611200621600001002006620066200662006620066
16020420065150000402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066
16020420065150000402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066
16020420065150000402580100100800001008000050064000012004620065200653238010020080000200240000200652013511160201100991001001600001000001011111601200621600001002006620066200662006620066
16020420065150000402580100100800001008000050064000002004620065200653238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066
16020420065150000402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066
160204200651500024402580100100800001008000050064000002004620065200653238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066
16020420065151100402580100100800001008000050064000012012220065200653238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066
16020420065150009402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066
16020420065150000402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001011111611200621600001002006620066200662006620066

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)03071e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)ea? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024200641510046258001212800001280000626400001152002820047200473238001220800002024000020047200471116002110910101600001000100308314202116620044215160000102004820048200482004820048
160024200471500046258001212800001280000626400001152002820047200473238001220800002024000020047200471116002110910101600001000100278314202114620044215160000102004820048200482004820048
160024200471500946258001212800001280000626400001152002820047200473238001220800002024000020047200471116002110910101600001000100298416202114420044215160000102004820048200482004820048
160024200471511046258001212800001280000626400001152002820047200513238001220800002024000020047200471116002110910101600001000100268316202114320044215160000102004820048200482004820048
160024200471500046258001212800001280000626400001152002820047200473238001220800002024000020047200471116002110910101600001000100278316202113420044215160000102004820048200482004820048
16002420047150094625800121280000128000062640000111020028200512004732380012208000020240000200472004711160021109101016000010001003016526244226620048230160000102005220052200522005220052
16002420051150004625800121280000128000062640000111020028200472004732380012208000020240000200472004711160021109101016000010001002913414202114320044215160000102004820048200482004820048
16002420047151104625800121280000128000062640000111020028200472005132380012208000020240000200472004711160021109101016000010001002913414204117620044215160000102004820048200482004820048
16002420047150004625800121280000128000062640000111020028200472004732380012208000020240000200472004711160021109101016000010001002713424204113420044215160000102004820048200482004820048
16002420047150004625800121280000128000062640000111020028200472004732380012208000020240000200472004711160021109101016000010001003013419202127720044215160000102004820048200482004820048

Test 6: throughput

Count: 12

Code:

  fmls d0, d12, v13.d[1]
  fmls d1, d12, v13.d[1]
  fmls d2, d12, v13.d[1]
  fmls d3, d12, v13.d[1]
  fmls d4, d12, v13.d[1]
  fmls d5, d12, v13.d[1]
  fmls d6, d12, v13.d[1]
  fmls d7, d12, v13.d[1]
  fmls d8, d12, v13.d[1]
  fmls d9, d12, v13.d[1]
  fmls d10, d12, v13.d[1]
  fmls d11, d12, v13.d[1]
  movi v12.16b, 13
  movi v13.16b, 14

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3337

retire (01)cycle (02)030b1e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1202044003929900001224996125120100100120000100120000500563064014002041691400392493232499712010020012000020036000040039416871112020110099100100120000100010076103253240030151200001004074540040400404004040040
12020440902300014488028999612512010010012000010012000050058604880416674169140039249323249971201002001200002003600004168640039111202011009910010012000010000007610216324003001200001004004041692400404169240040
12020440039312012056199612512010010012000010012000050058463791400204003941691249323249971201002001200002003600004003941686111202011009910010012000010000307610316334003001200001004169240040416884004041692
1202044003931200006199612512010010012000010012000050058518691416724169140039249323266491201002021200002003600004169140039111202011009910010012000010000007610216234003001200001004025240040416924004040040
12020440039313000061379662512010110012000110012000050056306400400204003940039249323249971201002001200002003600004003941686111202011009910010012000010000007610316334003001200001004004040040416874004041688
12020441691300000161356892512010110012000310012000050056306400400204003941686249323266491201002001200002003600004003940039111202011009910010012000010000007610316334003001200001004004041687400404169240040
12020440039313021016199612512010010012000010012000050058519930416724003940039249323249971201002001200002003600004003941686111202011009910010012000010000007610216234003001200001004168740040400404004040040
12020440039312014488261356892512010110012015710012000050056306400416724003942470265823266491201002001200002003600004169140248211202011009910010012000010001279107610316424003001200001004004040040400404004040040
120204400393000000726379662512010110012000210012000050056306400400204003941691265823266491209032001200002003600004003941691111202011009910010012000010000007610316234003001200001004169240040407914004041692
12020440039312039006199612512010110012000110012000050056306400400204003941691265823266491201002001203962003600004003941691111202011009910010012000010000007610316334003001200001004184440040400404004040040

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3337

retire (01)cycle (02)0318191e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9acc5cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0eaebec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
12002440039300000036799612512001010120000101200005056306400154002004169140039249550325019120010201200002036000041686400391112002110910101200001000007522831716422754167830167120000104168741687400404168740040
12002440039299000006799612512001010120000101200005056306400154002004003940039249550325019120010201200002036000040039400391112002110910101200001000007545831516212644003030177120000104004040040400404004040040
1200244003930000000673568925120010101200001012000050563064011540020041691400392660103250191200102012000020360000416914003911120021109101012000010000075241142316422354003030177120000104004040040400404004040040
1200244003930000000739961251200101012000010120000505630640115400200400394003924955032501912001020120000203600004003940039111200211091010120000100000752283131621135400301597120000104004040040400404004040040
12002440039300000036799612512001010120000101200005056306400154002004003940039249550325019120010201200002036000040039400391112002110910101200001000007522831516422454003030174120000104004041689400404004040040
120024400393000000073996125120010101200001012000050563064011540020040039400392495503250191200102012000020360000400394003911120021109101012000010000075221142516422534003015177120000104004040040400404004040040
12002440039300000007399612512001310120000101200005056306401154002004003940039249550325019120010201200002036000041686416861112002110910101200001000007524842516412534003030174120000104004040040400404004040040
1200244003929900000679961251200101012000010120000505630640015400200400394003924955032501912001020120000203600004003940039111200211091010120000100000752284131641135400303097120000104004040040400404004040040
120024400393000000073996125120010101200001012000050563064011540020040039400392660003250191200102012000020360000400394003911120021109101012000010000075241132516422534003030174120000104004040040400404004040040
1200244003929900000739961251200101012000010120000505630640015400200400394003924955032501912001020120000203600004003940039111200211091010120000100000752284251642245400301594120000104004040040400404168741687