Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FCMLA (vector, 8H)

Test 1: uops

Code:

  fcmla v0.8h, v1.8h, v2.8h, #90
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
100440373006634072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
100440373006134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
100440373006134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
100440373107934072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
100440373006134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730037334072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
100440373006134072510001000100053190804018403740373258338951000100030004037403711100110001073116113473100040384038403840384038
100440373006134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
100440373006134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
100440373006134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038

Test 2: Latency 1->1

Code:

  fcmla v0.8h, v1.8h, v2.8h, #90
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03070a1e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8c5branch mispredict (cb)cdcfd0d2d5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020440037300110061394072510100100100001001000050057069080400184003740037381156387411010020010008200300244003740037111020110099100100100001000111718011161139494100001004003840038400384003840038
1020440037300110061394072510100100100001001000050057069080400184003740037381157387401010020010008200300244003740037211020110099100100100001000111718001161239493100001004003840038400384003840038
10204400373001100726394072510100100100001001000053357083040400184018140084381083387451010020010000200300004003740037111020110099100100100001001100710103163439479100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710103163339479100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710103163339479100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710103163539479100001004003840038400384003840038
1020440037299000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710104163339479100001004003840038400384003840038
10204400373000000726394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710103163339479100001004003840038400384003840038
1020440037300000161394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710103163339479100001004003840038400384003840038
1020440037300000061394072510100100100001001000050057069080400184003740037381083387451010020010000200300004003740037111020110099100100100001000000710103163339479100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030b1e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8acc2cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100244003730000613940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100000640316233947310000104003840038400384003840038
100244003729900613940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100000640216233947310000104003840038400384003840038
100244003730000613940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100000640216233947310000104003840038400384003840038
1002440037299001563940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100000640216323947310000104003840038400384003840038
100244003730000613940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
100244003730000613940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
100244003730000613940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
100244003730000613940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100000640216323947310000104003840038400384003840038
100244003730000613940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
100244003729900613940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038

Test 3: Latency 1->2

Code:

  fcmla v0.8h, v0.8h, v1.8h, #90
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030818191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6acc5cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102044003730000000061394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
102044003729900000061394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
102044003730000000061394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
102044003730000000061394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
102044003730000000061394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
102044003730000000061394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
102044003730000000061394072510100100100001001000050057069084001840037400373810833874510100200106632003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
102044003730000000061394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
102044003730000000061394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
102044003730000000061394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)0318191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8acc2cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100244003730042924613937114310039151003613101485057069081400534008540037381402538767103072010161203048040085400371110021109101010000100000640316223947310000104003840038400384003840038
10024400373000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400372990006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400373000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400373000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040085400371110021109101010000100000640216223947310000104003840038400384003840038
10024400373000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300008436139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400373000006139407251001010100001010000505706908040018400374003738130338767103092010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400373000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400372990006639407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038

Test 4: Latency 1->3

Code:

  fcmla v0.8h, v1.8h, v0.8h, #90
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030818191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400373000000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
1020440037300000585613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
1020440037300000738823940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
1020440037300000675613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
1020440037299000501613940725101001001000010410000500570690804001840037400373810833882110100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
1020440037300000471613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
1020440037300000498613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400854003840038
1020440037300000612613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
1020440037299000534613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
102044003729900078613940725101001001000010010000500570690804001840037400373810833878110100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03181e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa7a8a9accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002440037300006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640316223947310000104003840038400384003840038
10024400373000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000033640216223947310000104003840038400384003840038
10024400373000061394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000027640216223947310000104003840038400384003840038
10024400373000061394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000027640216223947310000104003840038400384003840038
10024400372990061394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000045640216223947310000104003840038400384003840038
10024400373000061394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001010024640216223947310000104003840038400384003840038
10024400372990061394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000027640216223947310000104003840038400384003840038
100244003729900346394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000024640216223947310000104003840038400384003840038
10024400373000061394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000021640216223947310000104003840038400384003840038
10024400373000061394072510010101000610100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000027640216223947310000104003840038400384003840038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  fcmla v0.8h, v8.8h, v9.8h, #90
  movi v1.16b, 0
  fcmla v1.8h, v8.8h, v9.8h, #90
  movi v2.16b, 0
  fcmla v2.8h, v8.8h, v9.8h, #90
  movi v3.16b, 0
  fcmla v3.8h, v8.8h, v9.8h, #90
  movi v4.16b, 0
  fcmla v4.8h, v8.8h, v9.8h, #90
  movi v5.16b, 0
  fcmla v5.8h, v8.8h, v9.8h, #90
  movi v6.16b, 0
  fcmla v6.8h, v8.8h, v9.8h, #90
  movi v7.16b, 0
  fcmla v7.8h, v8.8h, v9.8h, #90
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)03041e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a7a8a9accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020420078150104025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000010111116112006201600001002006620066200662006620066
16020420065150004025801001008000010080000500640000020046200652006532380100200800002002400002006520065111602011009910010016000010000100540101111161120137231600001002006620066200662006620066
16020420172152004065802211068010911380219578640864020046200652006532380100200800002002400002006520065111602011009910010016000010024000010111116112006201600001002006620066200662006620151
16020420065150004010780229100800001008000050064000002011320065200653238010020080000200240000200652006511160201100991001001600001000000254010136116112006201600001002006620066200662006620066
16020420065151009125801001008000010080000500640000020046200652006532380100200800002002400002006520065111602011009910010016000010000000010111116112006201600001002006620066200662006620066
16020420065150004025801001068011210080000500644384020446202342006532380100200800002002400002006520065111602011009910010016000010000000010111116112006201600001002006620066200662006620066
16020420065150004025801001008000010080000500643480020046200652006532380100200800002002400002006520065111602011009910010016000010000000010111116112006201600001002006620066200662006620066
160204200651500334025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000010111116112006201600001002006620066200662006620066
16020420065150004025801001008000010080000500640000020046200652006532380100200800002002400002006520065111602011009910010016000010000000010111116112006201600001002006620066200662006620066
16020420065150004025801001008000010080000500640000020046200652006532380100200800002002400002006520065111602011009910010016000010000000610111116112006201600001002006620066200662006620066

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)03081e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9accfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002420087150006931800121280000128000062640000015200422006120052323800122080000202400002006120061111600211091010160000100031004416621534422159200582402160000102006220062200622006220062
160024200521500033731800121280000128000062640000011020033200612005232380012208000020240000200612006111160021109101016000010490111100321351625212715200492201160000102005320053200532005320053
160024200521500046298001212800001280000626400001110200332005220052323800122080000202400002005220052111600211091010160000104001711003813511025212105200492201160000102005320053200532005320053
16002420052150004629800121280000128000062640000111020033200522005212238001220800002024000020052200521116002110910101600001000310030135162521159200492201160000102005320053200532005320053
1600242005215100462980012128000012800006264000011102003320052200523238001220800002024000020052200521116002110910101600001020310031135182521195200492201160000102005320053200532005320053
1600242005215000462980012128000012800006264000011102003320052200523238001220800002024000020052200521116002110910101600001010010029135152521158200492201160000102005320053200532005320053
16002420052150004629800121280000128000062640000111020033200522005232380012208000020240000200522005211160021109101016000010103100371351142521148200492201160000102005320053200532005320053
1600242005215000463180012128000012800006264000001102004220061200613238001220800002024000020061200611116002110910101600001070010037136283422198200492201160000102006220053200532005320053
16002420052150004629800121280000128000062640000111020042200522005232380012208000020240000200522005211160021109101016000010000100291351825211842004924035160000102006220062200622006220053
1600242005215000272129800121280000128000062640000111020042200522006132380012208000020240000200522005211160021109101016000010000100321351534421146200492201160000102005320053200532005320053

Test 6: throughput

Count: 16

Code:

  fcmla v0.8h, v16.8h, v17.8h, #90
  fcmla v1.8h, v16.8h, v17.8h, #90
  fcmla v2.8h, v16.8h, v17.8h, #90
  fcmla v3.8h, v16.8h, v17.8h, #90
  fcmla v4.8h, v16.8h, v17.8h, #90
  fcmla v5.8h, v16.8h, v17.8h, #90
  fcmla v6.8h, v16.8h, v17.8h, #90
  fcmla v7.8h, v16.8h, v17.8h, #90
  fcmla v8.8h, v16.8h, v17.8h, #90
  fcmla v9.8h, v16.8h, v17.8h, #90
  fcmla v10.8h, v16.8h, v17.8h, #90
  fcmla v11.8h, v16.8h, v17.8h, #90
  fcmla v12.8h, v16.8h, v17.8h, #90
  fcmla v13.8h, v16.8h, v17.8h, #90
  fcmla v14.8h, v16.8h, v17.8h, #90
  fcmla v15.8h, v16.8h, v17.8h, #90
  movi v16.16b, 17
  movi v17.16b, 18

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)0318191e373a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8acc2c5cdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204403393160001706190212516010010016000010016000050012800001400214004040040199730319998160100200160000200480000400404123511160201100991001001600001000000001011011611400371600001004004142319400414004140041
160204400403000000014702516010110016000010016000050058659721400214121941245199730319998160100200160000200480000400404231811160201100991001001600001000000011011011611400391600001004228240041400414231940041
1602044004030000210023302516010010016000010016000050058717751400214004040040199730319998160100200160000200480000400404004211160201100991001001600001000000001011011611400371600001004004140043423154231940041
160204400403090000017302516018710016000010016000050012800001400214004042318199730319998160100200160000200480000400404121911160201100991001001600001000000001011011613400381600001004097541236423424004140042
16020440357317001236010902516031910016005311916000059012800001400244004042318199730322300160100200160000200480000423184004011160201100991001001600001000000001011011611400371600001004228240041400414231941236
16020442281300000104202516010010016000010016000050058659721400214004042318199730322276160100200160000200480000400404004111160201100991001001600001000000001011011611400391600001004231040041412204004140041
160204400403000012360350025160100100160000100160000500128000014002140041423421997317321177160100200160000200482172412354231411160201100991001001600001000000001011011611400391600001004004142319400414231942286
160204400403170001804402516010110016000010016000050058659721400214004042318222220322276160100200160000200480000400404228111160201100991001001600001000000001011011611400371600001004004142282400414231940041
160204400403170001061894525160145100160053100160000500586833314232340040423421997317321177160100200160000200480000400404004011160201100991001001600001000000001011011611400371600001004004142319400414004140042
16020440041308000004402516010010016000010016000050058683331400214004042318222220319998160100200160000200480000400404234211160201100991001001600001000000001011011601400371600001004004142343400414004142343

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2502

retire (01)cycle (02)030407080a18191e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8a9acc2cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaeb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024400403000000000001770251600101016000010160000501280000110400214004040040199960320022160010201600002048000040040400401116002110910101600001000060100228111316211108400370208160000104232840041423284004142328
160024400403000000000011770251600101016008010160000501280000115400214231840040199960322307160010201600002048000042327400401116002110910101600001000000100228211116211119400650208160000104004140041400414004140041
160024400403170000002400890251600101016008010160000501280000115400214004042327199960320020160010201600002048000042327400401116002110910101600001000000100248211116211119400370208160000104004140041400414004140041
160024400402990000000004702516009010160000101600005012800001154230840040423272113041322307160010201600002048000040040400401116002110910101600001000000100228211116211911400370208160000104004140041400414004140041
16002440040300000000000478955251600101016008010160000501280000115400214004040040199960322261160010201600002048000042327400401116002110910101600001000000100228211216211117423240208160000104004142328423284004140041
16002440040300000000003547895525160010101600001016000050128000011540021400404232719996333223071600102016000020480000400404232711160021109101016000010000001002282113162111313400370209160000104004140041400414004140041
1600244004030000000000071202516001010160000101600006058688951154002140040423271999633322307160010201600002048000040040400401116002110910101600001000000102328217116211118410194208160000104122241203411364142740468
160024410163091211710119788001198381316316127612161005131613688243133931110408174107540775206470492149416150820161159204838314088641332811600211091010160000100125063010022131113162111312412260208160000104004140041423284004140041
160024400403170000000060363502516001010160000101600005012800001110400214004040040199960320020160010201600002048000040040423271116002110910101600001000000100221331916211129400370208160000104004140041400414004140041
1600244004029900000000047025160010101600801016000050128000011104002140040400401999603223071600102016000020480000400404004011160021109101016000010000001004013311149211139406730408160000104135840450407824044842328