Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FCMLA (vector, 4S)

Test 1: uops

Code:

  fcmla v0.4s, v1.4s, v2.4s, #90
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)033f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
10044037306134072510001000100053190814018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
10044037306134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
10044037306134072510001000100053190814018403740373258338951000100030004037403711100110000973116113473100040384038403840384038
10044037306134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
10044037306134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
10044037306134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
1004403730189340725100010001000531908140184037403732583389510001000300040374037111001100003673116113473100040384038403840384038
10044037306134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
10044037306134072510001000100053190804018403740373258338951000100030004037403711100110000073116113473100040384038403840384038
100440373061340725100010001000531908140184037403732583389510001000300040374037111001100002773116113473100040384038403840384038

Test 2: Latency 1->1

Code:

  fcmla v0.4s, v1.4s, v2.4s, #90
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03080918191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a7a8a9acc2cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400373000000000613940725101001001000010010000500570690804001840037400373810803387451010020010000200300004003740037111020110099100100100001000000000710021622394790100001004003840038400384003840038
10204400373000000000613940725101001001000010010000500570690804001840037400373810803387451010020010000200300004003740037111020110099100100100001000001030710021622394790100001004003840038400864003840038
102044003730000000006139407251010010010000100100005005706908040018400374003738108026387451010020010000200300004003740037111020110099100100100001000000000710121622394790100001004003840038400384003840038
10204400372990000000613940725101001001000010010000500570690804001840037400373810803387451010020010000200300004003740037111020110099100100100001000000000740121622394790100001004003840038400384003840038
10204400373000000000613940725101001001000010010000500570690804001840037400373810803387451010020010000200300004003740037111020110099100100100001000000000710121622394790100001004003840038400384003840038
10204400373000000000613940725101001001000010010000500570690804001840037400373810803387451010020010000200300004003740037111020110099100100100001000000000710121623394790100001004003840038400384003840038
10204400373000000000613940725101001001000010010000500570690804001840037400373810873387451010020010000200300004003740037111020110099100100100001000000000710121622394790100001004003840038400384003840038
10204400373000000000613940725101001001000010010000500570690804001840037400373810803387451010020010000200300004003740037111020110099100100100001000000000710121622394790100001004003840038400384003840038
10204400372990000000823940725101001001000010010000500570690804001840037400373810803387451010020010000200300004003740037111020110099100100100001000000000710121622394790100001004003840038400384003840038
10204400373000000000613940725101001001000010010000500570690804001840037400373810803387451010020010000200300004003740037111020110099100100100001000000000710121622394790100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030a0b191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc5cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002440037300000061394072510010101000010100005057069084001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
1002440037300000061394072510010101000010100005057069084001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
1002440037300100061394072510010101000010100005057069084001840037400373813033876710010201000020300004003740037111002110910101000010000640216233947310000104003840038400384003840038
1002440037300000061394072510010101000010100005057069084001840037400373813033876710010201000020300004003740037111002110910101000010000640316223947310000104003840038400384003840038
1002440037299000061394072510010101000010100005057069084001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
10024400373000000251394072510010101000010100005057069084001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
1002440037299000061394072510010101000010100005057069084001840037400373813033876710010201000020300004003740037111002110910101000010100640216223947310000104003840038400384003840038
1002440037299000061394072510010101000010100005057069084001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
1002440037299000061394072510010101000010100005057069084001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038
1002440037300000061394072510010101000010100005057069084001840037400373813033876710010201000020300004003740037111002110910101000010000640216223947310000104003840038400384003840038

Test 3: Latency 1->2

Code:

  fcmla v0.4s, v0.4s, v1.4s, #90
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030b181e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc2cfd0d5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102044003730000061394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000071001161139479100001004003840038400384003840038
102044003730000061394072510100100100001001000050057069084001840037400373810833874510100200100002023000040037400371110201100991001001000010000071001161139479100001004003840038400384003840038
102044003730000061394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000071001161139479100001004003840038400384003840038
102044003730000061394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000071001161139479100001004003840038400384003840038
102044003730000061394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000071001161139479100001004003840038400384003840038
102044003730000061394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000071001161139479100001004003840038400384003840038
102044003730000061394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000071001161139479100001004003840038400384003840038
102044003730000061394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000071001161139479100001004003840038400384003840038
102044003729900061394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000071001161139479100001004003840038400384003840038
102044003730000061394072510100100100001001000050057069084001840037400373810833874510100200100002003000040037400371110201100991001001000010000071001161139479100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03080918191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc2c5cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100244003730000220702613940725100101010000101000050570690840088400374003738130338767103072010322223000040037400371110021109101010000100373020640216223947310000104003840038400384003840179
10024400373000000008843940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300000060613940725100101010006101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100600640216223947310000104003840038400384003840038
1002440037300000000613940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300000000613940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100000640225223947310000104003840038400384003840038
1002440037300000000613940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300000000613940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300000000613940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400373000000005363940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037299000000613940725100101010000101000050570690840018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038

Test 4: Latency 1->3

Code:

  fcmla v0.4s, v1.4s, v0.4s, #90
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9acc5cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020440037300048613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038
1020440037300075613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038
1020440037300057613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038
10204400372990150613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038
1020440037300039613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038
10204400373000285613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038
102044003730003483503940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038
10204400373000120613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038
10204400372990465613940725101001001000010010000500570690814001840037400373810833874510100200100002003052240037400371110201100991001001000010000007101161139479100001004003840038400384003840038
10204400373000357613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010000007101161139479100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)0308181e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8acbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024400372990018061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000006402162239473010000104003840038400384003840038
100244003730000660103394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001042006402162239473010000104003840038400384003840038
1002440037300000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000006402162239473010000104003840038400384003840038
10024400373000000726394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000006402162239473010000104003840038400384003840038
1002440037300000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000016402162239473010000104003840038400384003840038
10024400372990012061394072510010101000010100005057069081400184003740037381303387671001020100002030000400844003711100211091010100001000006402162239473010000104003840038400384003840038
1002440037300000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000006402162239473010000104003840038400384003840038
10024400373000060390394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001001006402162239473010000104003840038400384003840038
1002440037300000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000006402162239473010000104003840038400384003840038
1002440037318000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374008411100211091010100001000606402162239473010000104003840038400384003840038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  fcmla v0.4s, v8.4s, v9.4s, #90
  movi v1.16b, 0
  fcmla v1.4s, v8.4s, v9.4s, #90
  movi v2.16b, 0
  fcmla v2.4s, v8.4s, v9.4s, #90
  movi v3.16b, 0
  fcmla v3.4s, v8.4s, v9.4s, #90
  movi v4.16b, 0
  fcmla v4.4s, v8.4s, v9.4s, #90
  movi v5.16b, 0
  fcmla v5.4s, v8.4s, v9.4s, #90
  movi v6.16b, 0
  fcmla v6.4s, v8.4s, v9.4s, #90
  movi v7.16b, 0
  fcmla v7.4s, v8.4s, v9.4s, #90
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)031e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204200891500029258011610080016100800285006401961200462006520065612801282008002820024008420065200651116020110099100100160000100090011110119116002006201600001002006620066200662006620066
16020420065156002925801161008001610080028500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000010111116112006201600001002006620066200662006620066
16020420065150004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000010111116112006201600001002006620066200662006620066
16020420065150004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000010211116112006201600001002006620149200662006620066
16020420065151004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000010111116112006201600001002006620066200662006620066
16020420065150004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000010111117112006201600001002006620066200662006620066
16020420065150004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000010111116112006201600001002006620066200662006620066
16020420065151004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000010111116112006201600001002006620066200662006620066
16020420065150004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000010142116112006201600001002006620066200662006620066
16020420065151004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000000010111116112006201600001002006620066200662006620066

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)03071e3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc2cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaeb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024200651504005225800121280000128000062640000110200282005120047323800122080000202400002005120047111600211091010160000100001003083162042274200442150160000102005220048200522005220052
1600242005115050071725800121280000128000062640000110200322005120047323800122080000202400002005120047111600211091010160000100001003183142421147200482300160000102004820048200482004820052
160024200471501005225800121280000128000062640000115200282005120047323800122080000202400002005120047111600211091010160000100001003383172022274200482300160000102005220052200482004820052
160024200471503004625800121280000128000062640000005200322005120051323800122080000202400002005120047111600211091010160000100001003062272042174200482300160000102004820052200522004820052
1600242005115052104625800121280000128000062640000010200322004720051323800122080000202400002004720051111600211091010160000100041003383172022274200482300160000102005220052200482005220052
160024200471515005225800121280000128000062640000100200322004720047323800122080000202400002004720047111600211091010160000100301002782142021147200442150160000102004820048200482004820048
1600252004715050012325800121280000128000062640000105200282004720047323800122080000202400002004720047111600211091010160000100001003031142021147200442150160000102004820048200482004820048
160024200471502004625800121280000128000062640000105200282004720047323800122080000202400002004720047111600211091010160000100001003082162023674200442150160000102004820048200482004820048
1600242004715050046258001212800001280000626400001052002820047200474123800122080000202400002004720047111600211091010160000100001003083152041177200442150160000102004820048200482004820048
160024200471502004625800121280000128000062640000100200322004720047323800122080000202400002004720047111600211091010160000100001003081152021177200442150160000102004820048200482004820048

Test 6: throughput

Count: 16

Code:

  fcmla v0.4s, v16.4s, v17.4s, #90
  fcmla v1.4s, v16.4s, v17.4s, #90
  fcmla v2.4s, v16.4s, v17.4s, #90
  fcmla v3.4s, v16.4s, v17.4s, #90
  fcmla v4.4s, v16.4s, v17.4s, #90
  fcmla v5.4s, v16.4s, v17.4s, #90
  fcmla v6.4s, v16.4s, v17.4s, #90
  fcmla v7.4s, v16.4s, v17.4s, #90
  fcmla v8.4s, v16.4s, v17.4s, #90
  fcmla v9.4s, v16.4s, v17.4s, #90
  fcmla v10.4s, v16.4s, v17.4s, #90
  fcmla v11.4s, v16.4s, v17.4s, #90
  fcmla v12.4s, v16.4s, v17.4s, #90
  fcmla v13.4s, v16.4s, v17.4s, #90
  fcmla v14.4s, v16.4s, v17.4s, #90
  fcmla v15.4s, v16.4s, v17.4s, #90
  movi v16.16b, 17
  movi v17.16b, 18

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)03081e373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020440061307005342025160160100160000100160000500128000004002140042400401997331999816010020016000020048000040040400401116020110099100100160000100001011081699411871600001004228240041400414231040041
1602044228130000142897025160100100160000100160000500131999904002142318400401997331999816010020016000020048000040040423181116020110099100100160000100401011091699400371600001004004240041400414004140042
16020440040317005161025160100100160000100160000500128000004002140040400402109531999816010020016000020048000040040400411116020110099100100160000100031011081698400371600001004004140041400414231940041
1602044231430000061025160100100160000100160000500586833314002142318400401997331999916010020016000020048000040040423181116020110099100100160000100001011091694400371600001004004140041400414004240042
1602044004030000061897025160153100160000100160000500128000004002140040400421997332000016010020016000020048000040040422811116020110099100100160000100001011091698400371600001004230640041400414004140041
1602044231829900061897025160100100160000100160000500586820604228640040423051997331999816010020016000020048000042281400401116020110099100100160000100001011041648400371600001004231940041400414004140041
16020440040302005361897025160100100160000100160000500586833304229940040400401997331999816010020016000020048000040040422811116020110099100100160000100101011091699400371600001004004340042400414004140043
16020440042317000105025160100100160000100160000500586833304059840041400402027332000016010020016000020048000042318423051116020110099100100160000100001011091699400371600001004004140042400414004140041
16020440041300005342025160100100160000100160000500132000004229041190400401997332227616010020016000020048000040040400401116020110099100100160000100001011091699400371600001004004142282400414231940041
1602044004030000061025160100100160000100160000500128000004002140040423181997331999816010020016000020048000042318400401116020110099100100160000100201011091699400371600001004151440041400424004142310

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2502

retire (01)cycle (02)03071e373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acbranch mispredict (cb)cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0eaeb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600244229430015105167025160010101600001016000050128000011540021400404004019996032227616001020160000204807324004140040111600211091010160000100150010022831151622110740037157160000104004140041400414231940041
1600244231830000067469525160010101600001016000050128000011540021400404124519996032002016001020160000204800004004040040111600211091010160000101601002281191721181040037155160000104004140041400414124640041
160024423183000004746952516001010160001101600005012800001104002140040400402117303200201600102016000020480000400404004011160021109101016000010018601002281181621111740037155160000104123042328412464228640041
1600244231830000068025160010101600001016000050128000011540021400404004019996032002016001020160000204800004004041245111600211091010160000100301002281191621111842282157160000104004142286422864004142286
160024400403170004702516001010160000101600005012800001154002140040423181999673200201600102016000020480000423184004111160021109101016000010184010022331101621191240037155160000104004140041400414004140041
1600244231831600047025160010101600011016000050128000011540021400404004019996032002016001020160000204800004004040040111600211091010160000100108010022811101621181140037155160000104004140041400414124640041
16002440040317000470251600101016000010160000501320000115422664004040040199960320020160010201600002048000040040400401116002110910101600001003010022831101621112840037155160000104004140041400414004142297
1600244004030900047025160010101600001016000050128000010040021400404004019996032122516001020160000204800004231840040111600211091010160000100901002283181621191240037155160000104004140041400414004140041
160024422963000035670251600101016000010160000501319999110412004004041219222440320020160010201600002048000041219400401116002110910101600001003010022331916211111040037156160000104004140041400414004240041
16002442318300000522025160010101600001016000050128000010542266400404228522212025222651600102016000020480000400414004111160021109101016000010011401002233171621110740037155160000104042540041423284004140041