Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FMLA (by element, 2D)

Test 1: uops

Code:

  fmla v0.2d, v1.2d, v2.d[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)03091e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a9accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
100440373000823407251000100010005319084018403740373258338951000100030004037403711100110000073216123473100040384038403840384038
100440373000613407251000100010005319084018403740373258338951000100030004037403711100110000073116223473100040384038403840384038
1004403730006134072510001000100053190840184037403732583389510001000300040374037111001100001873216213473100040384038403840384038
1004403730003093407251000100010005319084018403740373258338951000100030004037403711100110000073116223473100040384038403840384038
100440373000843407251000100010005319084018403740373258338951000100030004037403711100110000073116213473100040384038403840384038
100440373000613407251000100010005319084018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730006134072510001000100053190840184037403732583389510001000300040374037111001100001573216113473100040384038403840384038
100440373100823407251000100010005319084018403740373258338951000100030004037403711100110000073216113473100040384038403840384038
100440373000823407251000100010005319084018403740373258338951000100030004037403711100110000073216213473100040384038403840384038
1004403730001523407251000100010005319084018403740373258338951000100030004037403711100110000073216113473100040384038403840384038

Test 2: Latency 1->1

Code:

  fmla v0.2d, v1.2d, v2.d[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030818191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8accfd0d5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020440037300000061394072510100100100001001000050057069080400530400374003738108033874510100200100002003000040037400371110201100991001001000010000071004163339479100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069081400180400374003738108033874510100200100002003000040037400371110201100991001001000010000071003163339479100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069081400180400374003738108033874510100200100002003000040037400371110201100991001001000010000071013163339479100001004003840038400384003840038
1020440037299000061394072510100100100001001000050057069081400180400374003738108033874510100200100002003000040037400371110201100991001001000010000071014164339549100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069081400180400374003738108033874510100200100002003000040037400851110201100991001001000010000071013163339479100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069080400180400374003738108033874510100200100002003000040037400371110201100991001001000010000071013163339479100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069080400180400374003738108033874510100200100002003000040037400371110201100991001001000010000071013163339479100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069080400180400374003738108033874510100200100002003000040037400372110201100991001001000010000071014163339479100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069081400180400374003738108033874510100200100002003000040037400371110201100991001001000010000071013163339479100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069080400180400374003738108033874510100200100002003000040037400371110201100991001001000010000071013163339479100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03181e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002440037300000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000000640216223947310000104003840038400384003840038
1002440037300000613940743100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000000640216223947310000104003840038400384003840038
1002440037300000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000000640216223947310000104003840038400384003840038
1002440037300000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010001000640216223947310000104003840038400384003840038
1002440037300000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000000640216223947310000104003840038400384003840038
1002440037300000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000000640216223947310000104003840038400384003840038
1002440037299000613940725100101010000101000050570830414001840037400373813033876710010201000020300004003740037111002110910101000010000000640216233947310000104003840038400384003840038
1002440037299000613940725100101010000101000050570690814001840037400373813033876710157201000020300004003740037111002110910101000010000000640216223947310000104003840038400384003840038
1002440037300000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000000640216223947310000104003840038400384003840038
1002440037300000613940725100101010000101000050570690814001840037400373813033876710010201017220300004003740037111002110910101000010000000640216223947310000104003840038400384003840038

Test 3: Latency 1->2

Code:

  fmla v0.2d, v0.2d, v1.d[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030809181e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc5cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400373000000823940725101001001000010010000500570690814001840037400373810803387451010020010000200300004003740037111020110099100100100001000007101161140034100001004003840038400384003840038
1020440037300000029973940725101001001000010010000500570690814001840037400373810803387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
10204400372990000613940725101001001000010010000500570690814001840037400373810803387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
10204400373000000613940725101001001000010010000500570690814001840037400373810803387451010020010000200300004003740037111020110099100100100001000017101161139479100001004003840038400384003840038
10204400373000000613940725101001001000010010000500570690814001840037400373810803387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
10204400372990000613940725101001001000010010000500570690814001840037400373810803387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
102044003730000007263940725101001001000010010000500570690814001840037400373810803387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
10204400373000000613940744101001001000010010000500570690814001840037400373810803387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038
10204400373000000613940725101001001000010010000500570690814001840037400373810803387451010020010000200300004003740037111020110099100100100001000007101171139479100001004003840038400384003840038
10204400373000000613940725101001001000010010000500570690814001840037400373810803387451010020010000200300004003740037111020110099100100100001000007101161139479100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a7a8a9accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002440037299061394072510010101000010100005057069084001840037400373813003387671001020100002030000400374003711100211091010100001000000726402162239473010000104003840038400384003840038
100244003729906139407251001010100001010000505706908400184003740037381300338767100102010000203000040037400371110021109101010000100000006402162239473010000104003840038400384003840038
10024400373000613940725100181010000101000050570690840018400374003738130033876710010201000020300004003740037111002110910101000010000001416402162239473010000104003840038400384003840038
10024400373000613940725100101010006111000050570690840018400374003738130033876710010201000020300004003740037111002110910101000010000001416402162239473010000104003840038400384003840038
10024400372990613940725100101010000101000050570690840018400374003738130033876710010201000020300004003740037111002110910101000010000001356402162239473010000104003840038400384003840038
100244003730006139407251001010100001010000505706908400184003740037381300338767100102010000203000040037400371110021109101010000100000006402162239473010000104003840038400384003840038
1002440037300061394072510010101000010100005057069084001840037400373813003387671001020100002030000400374003711100211091010100001000079036402162239473010000104003840038400384003840038
1002440037300061394072510010101000010100005057069084001840037400373813003387671001020100002030000400374003711100211091010100001000018096402162239473010000104003840038400384003840038
100244008530006139398251001010100001010000505706908400184003740037381300338767100102010000203000040037400371110021109101010000100000036402163339473010000104003840038400384003840038
100244003729906139407251001010100001010000505706908400184003740037381300338767100102010000203000040037400371110021109101010000100000006402162239473010000104003840038400384003840038

Test 4: Latency 1->3

Code:

  fmla v0.2d, v1.2d, v0.d[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03181e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102044003730000061394072510100100100001001000050057069081400180400374003738108338745101002001000020030000400374003711102011009910010010000100937101161139479100001004003840038400384003840038
1020440037299000613940725101001001000010010000500570690804001804003740037381083387451010020010000200300004003740037111020110099100100100001004737101160139479100001004003840038400384003840038
1020440037300000613940725101001001000010010000500570690804001804003740037381083387451010020010000200300004003740037111020110099100100100001005007101161139479100001004003840038400384003840038
1020440037299000613940725101001001000010010000500570690804001804003740037381083387451010020010000200300004003740037111020110099100100100001005067101161139479100001004003840038400384003840038
1020440037300000613940725101001001000010010000500570690804001804003740037381083387451010020010000200300004003740037111020110099100100100001004597101161139479100001004003840038400384003840038
102044022730000061394072510100100100061001000050057069081400180400374003738108338745101002001000020030000400374003711102011009910010010000100837101161139479100001004003840038400384003840038
1020440037300000613940725101001001000010010000500570690814001804003740037381083387451010020010000200300004003740037111020110099100100100001000667101161139479100001004003840038400384003840038
102044003729900061394072510100100100001001000050057069080400180400374003738108338745101002001000020030000400374003711102011009910010010000100007101161139479100001004003840038400384003840038
10204400373000006139407251010010010000100100005005706908140018040037400373810826387451010020210000200300004008440037111020110099100100100001003937101161139479100001004008540038400384003840038
102044003730000061394072510100100100001001000050057069080400183400374003738108338745101002001000020030000400374003711102011009910010010000100107101161139479100001004003840038400384003840185

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002440037299010339407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003730006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100026402162239473010000104003840038400384003840038
100244003730006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003730006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003729906139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003730006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003730006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003729906139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100006402162239473010000104003840038400384003840038
100244003730006139407251001010100001010000505706908040018400374003738130338767100102010000203000040083400371110021109101010000100006402162239473010000104003840038400384003840038
1002440037300061394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001062306402162239473010000104003840038400384003840038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  fmla v0.2d, v8.2d, v9.d[1]
  movi v1.16b, 0
  fmla v1.2d, v8.2d, v9.d[1]
  movi v2.16b, 0
  fmla v2.2d, v8.2d, v9.d[1]
  movi v3.16b, 0
  fmla v3.2d, v8.2d, v9.d[1]
  movi v4.16b, 0
  fmla v4.2d, v8.2d, v9.d[1]
  movi v5.16b, 0
  fmla v5.2d, v8.2d, v9.d[1]
  movi v6.16b, 0
  fmla v6.2d, v8.2d, v9.d[1]
  movi v7.16b, 0
  fmla v7.2d, v8.2d, v9.d[1]
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)030b18191e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9accdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602042008815000004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000001011111611200621600001002006620066200662006620066
1602042006515000004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000001011111611200621600001002006620171201712015920066
1602042006515000004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000011011111611200621600001002006620066200662006620066
16020420065151000040258010010080000100800005006400000200462006520065402380100200800002002400002006520065111602011009910010016000010000020001011111611200621600001002006620066200662006620066
1602042006515000004025801001008000010080000500640000020046200652006532380100200800002002400002006520065111602011009910010016000010000000001011111611200621600001002006620066200662006620066
1602042006515000004025801001008000010080000500640000020046200652006532380100200800002002400002006520065111602011009910010016000010000000001011111611200621600001002006620066200662006620066
1602042006515100004025801001008000010080000500640000020046200652006532380100200800002002400002006520065111602011009910010016000010000000001011111611200621600001002006620066200662006620066
1602042006515000004025801001008000011480000500640000020046200652006532380100200800002002400002006520065111602011009910010016000010000000001011111611200621600001002006620066200662006620066
1602042006515000004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000001011111611200621600001002006620066200662006620066
1602042006515000004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000001011111611200621600001002006620066200662006620066

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)031e3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6e74scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8acc2branch mispredict (cb)cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024200861510029227800121280000128000062640000100200342005220052032380012208000020240000200522005211160021109101016000010000000100348111425211118200492601160000102005320053200532005320053
16002420052151002112780012128000012800006264000010520033200522005203238001220800002024000020052200521116002110910101600001000202010034331725211711200492401160000102005320053200532005320053
16002420053150120123727800121280000128000062640000115201102005220052032380012208000020240000200522005211160021109101016000010021000100348311125211813200832201160000102005320053200532005420054
160024200521500027227800121280000128000062640000100200332005220052032380012208000020240000200522005211160021109101016000010000600100348111125211117200492201160000102005320053200542005320053
160024200521500027427800121280000128000062640000115200332005220052032380012208000020240000200522005211160021109101016000010000000100308311125211117200492201160000102005320053200532005320053
16002420052150002352780012128000012800006264000011520033200522005203238001220800002024000020052200521116002110910101600001000000010030831725211711200492401160000102005320053200532005320053
1600242005215000263278001212800001280000626400001152003320052200525323800122080000202400002005220052111600211091010160000100000001003483113252111111200492201160000102005320053200532005320053
16002420052150004627800121280000128000062640000115200332005220052032380012208000020240000200522005211160021109101016000010000000100308311125211117200492201160000102005320053200532005320053
160024200521500021627800121280000128000062640000115200332005220052032380012208000020240000200522005211160021109101016000010000300100308311125211119200492201160000102005320053200532005320053
1600242005215000130278001212800001280000626400001152003320052200610323800122080000202400002005220052111600211091010160000100000001003083111252111111200492201160000102005320053200532005320053

Test 6: throughput

Count: 12

Code:

  fmla v0.2d, v12.2d, v13.d[1]
  fmla v1.2d, v12.2d, v13.d[1]
  fmla v2.2d, v12.2d, v13.d[1]
  fmla v3.2d, v12.2d, v13.d[1]
  fmla v4.2d, v12.2d, v13.d[1]
  fmla v5.2d, v12.2d, v13.d[1]
  fmla v6.2d, v12.2d, v13.d[1]
  fmla v7.2d, v12.2d, v13.d[1]
  fmla v8.2d, v12.2d, v13.d[1]
  fmla v9.2d, v12.2d, v13.d[1]
  fmla v10.2d, v12.2d, v13.d[1]
  fmla v11.2d, v12.2d, v13.d[1]
  movi v12.16b, 13
  movi v13.16b, 14

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3337

retire (01)cycle (02)031e373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc2cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
12020440039308002339961251201011001200011001200005005630640424514003940039249323249971201002001200002003600004169140039111202011009910010012000010000007610116114003001200001004004040040400404004040040
1202044003930000619961251201001001200001001200005005630640400204247040039273593274281201002001200002003600004169140039111202011009910010012000010000007610116114003001200001004004042471400404169240040
12020440039300001249961251201001001200001001200005005630640400204003940039249323249971201002001200002003600004003940090111202011009910010012000010000007610116114003001200001004004040040424714004040040
1202044168630001849961251201001001200001001200005005630640400204003940039249323249971202962001200002003600004247040039111202011009910010012000010000007610116114003001200001004004040040400404004040040
1202044003931200619961251201001001200001001200005005630640400204003942470273593274281201002001200002003600004003942470111202011009910010012000010000007610116114003001200001004004040040400404004040040
12020440039318002089961251201001001200001001200005005966386424514247040039249323249971201002001200002003600004003940039111202011009910010012000010000007610116114246101200001004004040040400404004040040
12020440039300002379961251201011001200001001200005005630640400204003941675249323266491201002001200002003600004247040039111202011009910010012000010000007610116114003001200001004247140040424714004040040
1202044003930000619961251201001001200011001200005005630640400204247040039249323274281201002001200002003600004247040039111202011009910010012000010000007610116114003001200001004004040040400404004042471
120204424703000010337966251201021001200001001200005005630640400204003940039249323249971201002001200002003600004003940039111202011009910010012000010000007610116114003001200001004004040040400404004040040
12020440039300007069961251201001001200011001200005005630640400204247040039249323249971201002001200002003600004003940039111202011009910010012000010000007610116114003001200001004247141702424714004040040

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3337

retire (01)cycle (02)03070b181e1f373a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1200244110430000000006199612512001010120000101200005056306400140020400394003924955032501912001020120000203600004003941691111200211091010120000100000000761800181600024400300000120000104004040040400404004041692
1200244168629900000006199612512001010120000121200006056306400141672400394003924955032501912001020120000203600004003940039111200211091010120000100000000752000051600042400300000120000104004040040400404004040040
1200244003930000000006199612512001010120000101200005056306400140020400394003924955032501912001020120000203600004003941701111200211091010120000100000000752000031600034400300000120000104004040040400404004040040
12002440039300000000083799612512001010120000101200005056306400140020400394003924955032501912001020120000203600004003940039111200211091010120000100001020840752000041600034400300000120000104169240040400404004040040
12002440039300100120006199612512001010120000101200005056306400140020400394003924955032501912001020120000203600004003940039111200211091010120000100000000752000021600026400300000120000104004040040400404004040040
1200244003930000000006199612512001312120000121200006056306400140020400394003924955032501912001020120000203600004003940039111200211091010120000100002000752000151620062400300000120000104004040040400404004040040
120024400393130001201061356892512001210120000101200005056306400140020400394003924955032666812001020120000203600004003940039111200211091010120000100000030752000041600042400300000120000104004040040400404004040040
1200244003930000000006199612512001010120000101200005056306400142451400394003924955032501912001020120000203600004003940039111200211091010120000100000000752000021600057400300000120000104004040040400404004040040
1200244003930000000006199612512001010120000101200005056306400140020400394003924955032667112001020120000203600004003940039111200211091010120000100000000752000041600024400300000120000104004040040401094177240040
1200244003930000000006199612512001010120000101200005056306400140020400394003924955032666812001020120000203600004003940039111200211091010120000100000000752000021600042403280000120000104004040040400404004040040