Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FMLA (vector, 4S)

Test 1: uops

Code:

  fmla v0.4s, v1.4s, v2.4s
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
100440373006134072510001000100053190814018403740373258338951000100030004037403711100110000073216223473100040384038403840384038
100440373006134072510001000100053190814018403740373258338951000100030004037403711100110000073216223473100040384038403840384038
100440373006134072510001000100053190814018403740373258338951000100030004037403711100110000073216223473100040384038403840384038
100440373006134072510001000100053190814018403740373258338951000100030004037403711100110000073216223473100040384038403840384038
1004403730020934072510001000100053190814018403740833258338951000100030004037403711100110000073216223473100040384038403840384038
100440373006134072510001000100053190814018403740373258338951000100030004037403711100110000073216223473100040384038403840384038
100440373006134072510001000100053190814018403740373258338951000100030004037403711100110000073216223473100040384038403840384038
1004403730010834072510001000100053190814018403740373258338951000100030004037403711100110000073216223473100040384038403840384038
100440373106134072510001000100053190814018403740373258338951000100030004037403711100110000073216223473100040384038403840384038
100440373006134072510001000100053190814018403740373258338951000100030004037403711100110000073216223473100040384038403840384038

Test 2: Latency 1->1

Code:

  fmla v0.4s, v1.4s, v2.4s
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030818191e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8a9accfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400373000000061394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000000710031622394790100001004003840038400384003840038
102044003729900000613940725101001001000010010000500570690840018400374003738108338745101002001000020030000400374003711102011009910010010000100000007101216223947919100001004003840038400384003840038
10204401813000000061394074410100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000003710121622394790100001004003840038400384003840038
102044003730000000103394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000000710121632394790100001004003840038400384003840038
102044003730000018161394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110202100991001001000010000000710121622394790100001004003840038400384003840038
10204400373000000061394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000000710121622394790100001004003840038400384003840038
10204400373000000161394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000103710121622394790100001004003840038400384003840038
10204400372990000061394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000000710121622394790100001004003840038400384003840038
10204400373000000061394072510100100100001001014750057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000000710131622394800100001004003840038400384003840038
10204400373000000061394072510100100100001001000053657073154001840037400373810833874510100200100002003000040037400371110201100991001001000010000100739121622394790100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024400372990613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640416223947310000104003840038400384003840038
100244003730001243940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010030640216223947310000104003840038400384003840038
10024400373000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
10024400373000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
10024400373000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000640216233947310000104003840038400384003840038
10024400373000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
1002440037299013983940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
100244003730007263940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
10024400372990613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
10024400373000613940725100101010000101000050570690804001840037400373813033876710157201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038

Test 3: Latency 1->2

Code:

  fmla v0.4s, v0.4s, v1.4s
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9accdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020440037300000002603940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038
102044003730000000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038
102044003729900000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139513100001004003840038400384003840038
102044003729900000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840086400854003840038
1020440037299011002083940725101001001000010010000500570868904001840084401323810833874510100200103272063048340134401323110201100991001001000010000007101161139479100001004003840038400384003840038
102044003730000000613940725101001001000010010000522570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038
102044003730000000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038
102044003730000000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038
102044003730000000613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038
102044003729900000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000017101161139479100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030818191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a8acc2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100244003729900028517612439407251001010100001010000505706908040053400374003738130338767100102010000203000040037400371110021109101010000100000006402162239473010000104003840038400384003840038
1002440037300000006139407251001010100001010000505706908040018402264003738130338767100102010000203000040037400371110021109101010000100000006402162239473010000104003840038400384003840038
10024400373001002706139407251001010100001010000505712492040018400374003738130338767100102010000203000040037400371110021109101010000100016006402162239473010000104003840038400384003840038
1002440037300000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100003006402162239473010000104003840038400384013340038
1002440037300000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100013006402163339473010000104003840038400384003840038
1002440037299000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100020206402162239473010000104003840038400384003840038
1002440037299000006139407251001010100001010000505708304040018400374003738130338767100102010000203000040037400371110021109101010000100000006402242239473010000104003840038400384003840038
10024400373000000012439407251001012100001010148505706908040018400844003738135338786100102010000203048340037400371110021109101010000100000006402162239473010000104003840038400384003840038
1002440037299000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000006402162239473010000104003840038400384003840038
1002440037299000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000006402162239473010000104003840038400384003840227

Test 4: Latency 1->3

Code:

  fmla v0.4s, v1.4s, v0.4s
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd0d5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400373000613940725101001001000010010000500570690804001804003740037381157387411010020410008200300244003740037111020110099100100100001000071001161139479100001004003840038400384003840038
102044003730007103940725101001001000010010000500570690804001804003740037381083387451010020010000200300004003740037111020110099100100100001000071001161139479100001004003840081400384003840038
102044003730006139407251010010010000100100005775706908040018040037400373810833874510100200100002003000040037400371110201100991001001000010018371001161139479100001004003840038400384003840038
10204400373000613940725101001001000010010000500570690804001804003740037381083387451010020010000200300004003740037111020110099100100100001000071001161139479100001004003840038400384003840038
102044003729901053940725101001001000010010000500570690804001804003740037381083387451010020010000200300004003740037111020110099100100100001000071001161139479100001004003840038400384003840038
10204400373000613940725101001001000010010000500570690804001804003740037381083387451010020010000200300004003740037111020110099100100100001000071001161139479100001004003840038400384003840038
102044003730007263940725101001001000010010000500570690804001804003740037381083387451010020010000200300004003740037111020110099100100100001000071001161139479100001004003840038400384003840038
1020440037300028663940725101001001000010010000500570690804001804003740037381083387451010020010000200300004003740037111020110099100100100001000071001161139479100001004003840038400384003840038
10204400373000613940725101001001000010010000500570690804001804003740037381083387451010020010000200300004003740037111020110099100100100001000071001161139479100001004003840038400384003840038
10204400373000613940725101001001000010010000500570690804001804003740037381083387451010020010000200300004003740037111020110099100100100001000071001161139479100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002440037300061394072510010101000010100005057069084001840037400373813033878910010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
10024400372992761394072510010101000010100005057069084001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
1002440113300061394072510010101000010100005057069084001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
1002440037300061394072510010101000010100005057069084001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
10024400373000145394072510010101000010100005057069084001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
1002440037300061394072510010101000010100005057069084001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
1002440037300061394072510010101000010100005057069084001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
1002440037300061394072510010101000010100005057069084001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
10024400373000631394074410010101000010100005057069084001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
1002440037300061394072510010101000010100005057069084001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  fmla v0.4s, v8.4s, v9.4s
  movi v1.16b, 0
  fmla v1.4s, v8.4s, v9.4s
  movi v2.16b, 0
  fmla v2.4s, v8.4s, v9.4s
  movi v3.16b, 0
  fmla v3.4s, v8.4s, v9.4s
  movi v4.16b, 0
  fmla v4.4s, v8.4s, v9.4s
  movi v5.16b, 0
  fmla v5.4s, v8.4s, v9.4s
  movi v6.16b, 0
  fmla v6.4s, v8.4s, v9.4s
  movi v7.16b, 0
  fmla v7.4s, v8.4s, v9.4s
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)031e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8acbranch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020420091150040258010010080000100800005006400000200462006520065323801002008000020024000020065200651116020110099100100160000100000001011641643200621600001002006620066200662006620066
16020420065151040258010010080000100800005006400000200462006520065323801002008000020024000020065200651116020110099100100160000100000001011531633200621600001002006620066200662006620066
16020420065150040258010010080000100800005006400000200462006520065323801002008000020024000020065200651116020110099100100160000100000001011631643200621600001002006620066200662006620066
16020420065150040258010010080000100800005006400000200462006520065323801002008000020024000020065200651116020110099100100160000100000001011531633200621600001002006620066200662006620066
16020420065150040258010010080000100800005006400000200462006520065323801002008000020024000020065200651116020110099100100160000100000001011531623200621600001002006620066200662006620066
16020420065151340258010010080000100800005006400000200462006520065323801002008000020024000020065200651116020110099100100160000100000021011631633200621600001002006620066200662006620066
16020420065150040258010010080000100800005006400000200462006520065323801002008000020024000020065200651116020110099100100160000100000001011631633200621600001002006620066200662006620066
16020420065150061258010010080000100800005006400000200462006520065323801002008000020024000020065200651116020110099100100160000100000001011231633200621600001002006620066200662006620066
16020420065150040258010010080000100800005006400000200462006520065323801002008000020024000020065200651116020110099100100160000100000001011531633200621600001002006620066200662006620066
16020420065150063258010010080000100800005006400000200462006520065323801002008000020024000020065200651116020110099100100160000100000201011631633200621600001002006620066200662006620066

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)0307080918191e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600242007615000000240462780012128000012800006264000011020033200522005232380012208000020240000200522005211160021109101016000010001003632232344221220200582402160000102006220062200622006220062
160024200611510000000522980012128000012800006264000001520033200612005232380012208000020240000200612006111160021109101016000010001004384113252111719200492201160000102005320053200532005320053
160024200521500000000522980012128000012800006264000001520033201352005232380012208000020240000200522005211160021109101016000010001003484111252111311201162201160000102005320053200532005320053
160024200521500100000522980012128000012800006264000011520033200522006132380012208000020240000200612006111160021109101016000010131004085211344221016201372402160000102006220062200622006220062
16002420086151011113688522980012128000012800006264000001520042200612006132380012208000020240000200612006111160021109101016000010001004185111344221114200492401160000102006220062200532005320062
1600242006115000000004627800121280000128000062640000115200332006120052323800122080000202400002006120061111600211091010160000100010039115213344221219200582402160000102006220062200622006220062
1600242006115000000006162780012128000012800006264000001520033200612006132380012208000020240000200612006111160021109101016000010001003685213344221313200492401160000102006220053200532005320053
1600242005215000000180462780012128000012800006264000011520033200612005232380012208000020240000200612006111160021109101016000010001004086217344211311200582402160000102006220062200622006220062
160024200611500000000462980012128000012800006264000011520033200522005232380012208000020240000200522005211160021109101016000010001003685114252111411200492201160000102005320053200532005320053
16002420052150000002704627800121280000128000062640000115200332005220052323800122080000202400002005220061111600211091010160000100010039116214254221411200582402160000102006220062200622006220062

Test 6: throughput

Count: 16

Code:

  fmla v0.4s, v16.4s, v17.4s
  fmla v1.4s, v16.4s, v17.4s
  fmla v2.4s, v16.4s, v17.4s
  fmla v3.4s, v16.4s, v17.4s
  fmla v4.4s, v16.4s, v17.4s
  fmla v5.4s, v16.4s, v17.4s
  fmla v6.4s, v16.4s, v17.4s
  fmla v7.4s, v16.4s, v17.4s
  fmla v8.4s, v16.4s, v17.4s
  fmla v9.4s, v16.4s, v17.4s
  fmla v10.4s, v16.4s, v17.4s
  fmla v11.4s, v16.4s, v17.4s
  fmla v12.4s, v16.4s, v17.4s
  fmla v13.4s, v16.4s, v17.4s
  fmla v14.4s, v16.4s, v17.4s
  fmla v15.4s, v16.4s, v17.4s
  movi v16.16b, 17
  movi v17.16b, 18

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2502

retire (01)cycle (02)03091e373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204410763002017310251601431001600641001600195005705933040021411904228521169619991160119200160024200480072422854004011160201100991001001600001000001111011811600400371600001004228640041411914120641191
160204400403090043314486251601081001600801001600195005715386040021400404004021115619991160123200160024200480072412054004011160201100991001001600001006001111011801600412061600001004120640041411914120642286
1602044004030802422614541251601001001600011001600005001280000040021400404004021111321163160100200160000200480000400434004011160201100991001001600001000000001011011611411871600001004004141191412064119141206
16020441205300000614485251601221001600221001600005001319999042266400404004019973321163160100200160000200480000400404004011160201100991001001600001000000001011011611411871600001004004140041400414004141206
160204400403000043420251601001001600001001600005005715261041185411904120519973321148160100200160000200480000400404004011160201100991001001600001000000001011011611400371600001004004140041400414119141206
160204412053090043614485251601221001600221001600005005705774040021400404004021095319998160100200160000200480000411904120511160201100991001001600001000000001011011611412021600001004004140041411914228640041
160204411903170017610251601431001600001001600005001280000041171400404004019973321148160100200160000200480000400404004011160201100991001001600001000300001011011621400371600001004228640041400414004140041
16020440040300000420251601001001600171001600005005705774041171400404004019973319998160100200160000200480000400404004011160201100991001001600001000000001011011611400371600001004004140041411914120640041
16020440040300000420521601001001600171001600005005715261040021400434004019973321148160100200160000200480000411904120911160201100991001001600001000000001011011611400371600001004120641191400424004140041
160204400403000270610251601001001600001001600005001280000041186412094004021111319998160100200160000200480000400404004011160201100991001001600001000000001011011611400371600001004004140041411914120640041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)030708090a0b18191e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8accfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)dfe0? int output thing (e9)eaeb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024400623000000000000674589251600101016005110160000505728047110400214004040040199960321206160010201600002048000042318400401116002110910101600001000008110026112107164113434003704027160000104231940041400414004140041
16002441226308000000000175402516001110160000101600005057280471154120741226412262114103212061600102016000020480000423184004011160021109101016000010000001002511310416211333423150208160000104004142319400414004340041
1600244004030000000000016702516007010160002101600005012800001154226240040423182224403200201600102016000020480000400404122611160021109101016000010000031002611410316211433412020207160000104122741227412274122741227
1600244228530000000000060470251600701016000110160000505868333115412004231840040199960322261160010201600002048000040041412191116002110910101600001000001021002611410316211683400400207160000104004140041400434004140041
1600244004031700000000060680251600101016000110160000501280000115412074004041226199960321199160010201600002048000041205412191116002110910101600001000001201002611510416211443412020208160000104004142319400414004441220
160024400403160100000000732457644160070101600171016000050572804711541200400404122619996032002016001020160000204800004004240040111600211091010160000100000181002611510316211443423150207160000104122741227412274122741206
160024400403170000000000670251600281016000010160000505868333115422994004042318222440320020160010201600002048000041226400401116002110910101600001000001261002511510316211443400370208160000104004142319400414231940041
16002440040300000000000048025160028101600001016000050128000011542299400404004019996032002016001020160000204800004004042318111600211091010160000100001871002511510516241553412160208160000104004140043423024004140041
160024400402990001000000498970251600631016000110160000505868333115400234004042318199960320020160010201600002048000042318400401116002110910101600001000001831002611510316211423423150208160000104004142319400414004340041
16002440040317000000000067884425160028101600001016000050571065411540021400404122619996032120616001020160000204800004230240043111600211091010160000100000871002611510516211553423150208160000104004340042400414004140041