Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FMLAL (by element, 4S)

Test 1: uops

Code:

  fmlal v0.4s, v1.4h, v2.h[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
1004403730010334072510001000100053190840184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
1004403730061340725100010001000531908401840374037325833895100010003000403740371110011000005773116113473100040384038403840384038
100440373006134072510001000100053190840184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
100440373006134072510001000100053190840184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
1004403730126134072510001000100053190840184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
1004403730061340725100010001000531908401840374037325833895100010003000403740371110011000048073116113473100040384038403840384038
100440373106134072510001000100053190840184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
100440373006134072510001000100053190840184037403732583389510001000300040374037111001100001073116113473100040384038403840384038
100440373006134072510001000100053190840184037403732583389510001000300040374037111001100000073116113473100040384038403840384038
100440373006134072510001000100053190840184037403732583389510001000300040374037111001100002673116113473100040384038403840384038

Test 2: Latency 1->1

Code:

  fmlal v0.4s, v1.4h, v2.h[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8c5cdcfd0d5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400372990099439407251010010010000100100005005706908040018400374003738108033874510100200100002003000040037400371110201100991001001000010000071006162239479100001004003840038400384003840038
102044003730000130939407251010010010000100100005005706908040018400374003738108033874510100200100002003000040037400371110201100991001001000010000071012162239479100001004003840038400384003840038
102044003730000129439407251010010010000100100005005706908140018400374003738108033874510100200100002003000040037400371110201100991001001000010000071012162239479100001004003840038400384003840038
102044003730000137439407251010010010000100100005005706908040018400374003738108033874510100200100002003000040037400371110201100991001001000010000071012162239479100001004003840038400384003840038
102044003729900128839407251010010010000100100005005706908140018400374003738108033874510100200101802003000040037400371110201100991001001000010000071012162239479100001004003840038400384003840038
102044003730000127239407251010010010000100100005005706908040018400374003738108033874510100200100002003000040037400371110201100991001001000010000071012162239479100001004003840038400384003840038
102044003730000135039407251010010010000100100005005706908040018400374003738108033874510100200100002003000040037400371110201100991001001000010001071012162239479100001004003840038400384003840038
102044003729900115539407251010010010000100100005005706908040018400374003738108033874510100200100002003000040037400371110201100991001001000010000071012162239479100001004003840038400384003840038
102044003729900127239407251010010010000100100005005706908040018400374003738108033874510100200100002003000040037400371110201100991001001000010000071012162239479100001004003840038400384003840038
102044003730000127139407251010010010000100100005005706908140018400374003738108033874510100200100002003000040037400371110201100991001001000010000071012162239479100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024400373000061394072510010101000010100005057069081400180400374003738130338767100102010000203000040037400371110021109101010000100000640316323947310000104003840038400384003840038
100244003730024061394072510010101000010100005057069081400180400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400372996061394072510010101000010100005057069081400180400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400373001770126394072510010101000010100005057069081400180400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400373006061394072510010101000010100005057069081400180400374003738130338767100102010000203000040037400371110021109101010000100000640216223953610000104003840038400384003840038
1002440037299537061394072510010101000010100005057069081400180400374008438130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400373006061394072510010101000010100005057069081400180400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
1002440037300132061394072510010101000010100005057069081400180400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
100244003730021061394072510010101000010100005057069081400180400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
100244003730027061394072510010101000010100005057069081400180400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038

Test 3: Latency 1->2

Code:

  fmlal v0.4s, v0.4h, v1.h[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020440037300036613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
1020440037300039613940725101001001000010010000500570690804001840085400843810833874510100200101612103000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
102044003730009613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
102044003730000613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
102044003730000613940725101001001000010010000500570690814001840037400373810833874510271200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
102044003730000613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
10204400372990291613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
102044003730000613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
1020440037300007263940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038
102044003729900613940725101001021000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010000071011611394790100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a8accdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100244003729981061394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
10024400373000061394072510010101000010100005057069081400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
10024400372990352726394072510010101000010100005057069080400184003740037381303387671001020101602030000400374003711100211091010100001000001640216223947310000104003840038400384003840038
10024400372990061394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
10024400373000061394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
10024400373000061394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
10024400373000061394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
1002440037300210251394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
10024400373000061394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038
100244003730000912394072510010101000010100005057069080400184003740037381303387671001020100002030000400374003711100211091010100001000000640216223947310000104003840038400384003840038

Test 4: Latency 1->3

Code:

  fmlal v0.4s, v1.4h, v0.h[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03080b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a7a8a9acc2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400373000000006139407251010010010000100100005005706908040018400374003738115638741101002001000820030024400374003711102011009910010010000100000100011171801600394890100001004003840038400384003840083
10204400372990000006139407251010010010000100100005005706908040018400374003738115738740101002001000820030024400374003711102011009910010010000100000000011171801600394900100001004003840038400384003840038
10204400373000000006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000000000071011611394790100001004003840038400384003840038
10204400373000000006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000000000071011611394790100001004003840038400384003840038
1020440037300000000665439407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000000000071011611394790100001004003840038400384003840038
10204400372990000006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000000000071011611394790100001004003840038400384003840038
10204400372990000006139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000000000071011611394790100001004003840038400384003840038
10204400373000000006139407251010010010000100100005005706908040018400374003738108338745101002001000020030000400374003711102011009910010010000100000000000071011611394790100001004003840038400384003840038
10204400373000000006139407251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000000000071011611394790100001004003840038400384003840038
10204400373000000006139389251010010010000100100005005706908140018400374003738108338745101002001000020030000400374003711102011009910010010000100000000000074011611394790100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03181e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc5cdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024400373000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400373000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400373000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400373000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
100244003730042706139407251001010100001010000505706908140018400374003738130338767100102010000203048040073400371110021109101010000100010640216223947310000104003840038400384003840038
100244003729900015639407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400373000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400373000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400373000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400372990006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  fmlal v0.4s, v8.4h, v9.h[1]
  movi v1.16b, 0
  fmlal v1.4s, v8.4h, v9.h[1]
  movi v2.16b, 0
  fmlal v2.4s, v8.4h, v9.h[1]
  movi v3.16b, 0
  fmlal v3.4s, v8.4h, v9.h[1]
  movi v4.16b, 0
  fmlal v4.4s, v8.4h, v9.h[1]
  movi v5.16b, 0
  fmlal v5.4s, v8.4h, v9.h[1]
  movi v6.16b, 0
  fmlal v6.4s, v8.4h, v9.h[1]
  movi v7.16b, 0
  fmlal v7.4s, v8.4h, v9.h[1]
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)0318191e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602042009115000454025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000001011121611200621600001002006620066200662006620066
160204200651510094025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000001011111611200621600001002006620066200662006620066
160204200651500004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000001011111611200621600001002006620066200662006620066
160204200651500004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000001011111611200621600001002006620066200662006620066
160204200651500004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000001011111611200621600001002006620066200662006620066
16020420065150000402580100100800001008000050064000012004620065200653238010020080000200240000200652006511160201100991001001600001000001771011111611200621600001002006620066200662006620066
1602042006515000070525801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000001011111611200621600001002006620066200662006620066
160204200651500004025801001008000010080000500640980120046200652006532380100200800002002400002006520065111602011009910010016000010000001011111611200621600001002006620066200662006620066
160204200651500004025801001008000010080000500640000120046200652006532380100200800002002400002006520065111602011009910010016000010000001011111611200621600001002006620066200662006620066
160204200651500004025801001008000010080000500640000020046200652006532380100200800002002400002006520065111602011009910010016000010000001011111611200621600001002006620066200662006620066

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)0318191e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9facc5cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)ea? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024200721500004625800121280000128000062640000112002820047200473238001220800002024000020047200471116002110910101600001000100333112520211141420044215160000102004820048200482004820048
16002420047150000462580012128000012800006264000011200282004720047323800122080000202400002004720047111600211091010160000100010032311122021191520044215160000102004820048200482004820048
1600242004715000046258001212800001280000626400001120028200472004732380012208000020240000200472004711160021109101016000010001003131182021181420044215160000102004820048200482004820048
160024200471500004625800121280000128000062640000112002820047200473238001220800002024000020047200471116002110910101600001000100333111420211101520044215160000102004820048200482004820048
16002420047151000462580012128000012800006264000011200282004720047323800122080000202400002004720047111600211091010160000100010041311112021181320044215160000102004820048200482004820048
1600242004715000214625800121280000128000062640000112002820047200473238001220800002024000020047200471116002110910101600001000100423111920211201520044215160000102004820048200482004820048
160024200471500004625800121280000128000062640000112002820047200473238001220800002024000020047200471116002110910101600001000100383111020211151020044215160000102004820048200482004820048
160024200471500234625800121280000128000062640000112002820047200473238001220800002024000020047200471116002110910101600001000100333111020211151020044215160000102004820048200482004820048
160024200471500094625800121280000128000062640000112002820047200473238001220800002024000020047200471116002110910101600001000100323111020211112020044215160000102004820048200482004820048
160024200471500004625800121280000128000062640000112002820047200473238001220800002024000020047200471116002110910101600001000100373111520211151520044215160000102004820048200482004820048

Test 6: throughput

Count: 12

Code:

  fmlal v0.4s, v12.4h, v13.h[1]
  fmlal v1.4s, v12.4h, v13.h[1]
  fmlal v2.4s, v12.4h, v13.h[1]
  fmlal v3.4s, v12.4h, v13.h[1]
  fmlal v4.4s, v12.4h, v13.h[1]
  fmlal v5.4s, v12.4h, v13.h[1]
  fmlal v6.4s, v12.4h, v13.h[1]
  fmlal v7.4s, v12.4h, v13.h[1]
  fmlal v8.4s, v12.4h, v13.h[1]
  fmlal v9.4s, v12.4h, v13.h[1]
  fmlal v10.4s, v12.4h, v13.h[1]
  fmlal v11.4s, v12.4h, v13.h[1]
  movi v12.16b, 13
  movi v13.16b, 14

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3474

retire (01)cycle (02)030b1e373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1202044003931200082996125120100100120000100120000500563064014002040039400392493203249971201002001200002003600004003940039111202011009910010012000010000761021622424611200001004247140040400404247140040
1202044247030000061996125120100100120000100120000500563064014002040039400392493203249971201002001200002003600004003940039111202011009910010012000010000761021622424611200001004004040040400404004042471
1202044247030000361996125120100100120000100120000500585186914167241686400392493203249971201002001200002003600004003941686111202011009910010012000010000761021622416771200001004004041692400404168740040
1202044003931200361996125120100100120000100120000500585199314167241686416912657703266441201002001200002003600004168640039111202011009910010012000010000761021622416771200001004004041692400404168740040
12020440039312000613796625120101100120003100120000500563064014166741686400392493203249971201002001200002003600004003941691111202011009910010012000010000761021622400301200001004168740040416874004041687
120204416862990477361996125120100100120000100120000500585199314002040039416862657703266441201002001200002003600004168640039111202011009910010012000010000761021622400301200001004004041687400404168740040
1202044003931300361996125120100100120000100120000500585199314166741691400392493203249971201002001200002003600004003941686111202011009910010012000010000761021622400301200001004004041687400404168741692
12020441688300001613568925120103100120001100120000500563064014166741686400392493203249971201002001200002003600004003941691111202011009910010012000010000761021622416831200001004004041692400404169240040
12020440039312000613568925120103100120003100120000500563064014002040039416862657703266441201002001200002003600004169140039111202011009910010012000010000761021622400301200001004004041687400404168740040
1202044003931300061996125120100100120000100120000500585199314167240039416862493203266441201002001200002003600004003941686111202011009910010012000010000761021622400301200001004168740040400404169241687

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3337

retire (01)cycle (02)0307080a0b181e373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc5cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0eaebec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
12002441920300110013006799612512001010120000101200005056306400140020400394003924955325019120010201200002036000040039400391112002110910101200001000000007524622416222334003030177120000104004040040400404004040040
120024400393000000000679961251200101012000010120000505630640014002040039400392495532501912001020120000203600004003940039111200211091010120000100000000752462241642234400301594120000104004040040400404004040040
120024400393120000000739961251200101012000010120000505630640014002040039400392495532501912001020120000203600004003940039111200211091010120000101000000752261141621225400301594120000104004040040400404004040040
1200244003929900000006799632512009011120085101200005056277901140060416274003925006325038120010201200002036000040039408271112002110910101200001020002907522311416212344003030177120000104004040040400404004040040
120024400393001000000679961251200101012000010120000505630640014002040039400392495532501912001020120000203600004003940039111200211091010120000100001000752431141641133400301594120000104004040040400404004040040
120024400393000000000679961251200101012000010120000505630640114002040039400392495532501912001020120000203600004003940039111200211091010120000100000000752232141641144400301594120000104004040040400404004040040
120024400393000000000679961251200101012000010120000505630640114002040039400392495532501912001020120000203600004003940039111200211091010120000100000000752231141621144400301594120000104004040040400404004040040
1200244003930030000006799612512001010120000101200005056306401140071400394003924955325019120010201200002036000040039400391112002110910101200001000100007522322416221444003030177120000104004040040400404004040040
120024400393000000000739961251200101012000010120000505630640114002040039400392495532501912001020120000203600004003940039111200211091010120000100000000752231151642234400301594120000104004040040400404004040040
1200244003930020010007399612512001010120000101200005056306401140020400394003924955325019120010201200002036000040039400391112002110910101200001000000007524311416422554003030177120000104004040040400404004040040