Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FCMLA (vector, by element, 4H)

Test 1: uops

Code:

  fcmla v0.4h, v1.4h, v2.h[1], #90
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
10044037300613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
10044037300613407251000100010005319081401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
10044037303613407251000100010005319081401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
10044037300613407251000100010005319081401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
10044037300613407251000100010005319081401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
10044037300613407251000100010005319081401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
10044037300613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
100440373091033407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
10044037300613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038
10044037300613407251000100010005319080401840374037325833895100010003000403740371110011000073116113473100040384038403840384038

Test 2: Latency 1->1

Code:

  fcmla v0.4h, v1.4h, v2.h[1], #90
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03070a1e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc5branch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102044003730011000613940725101001001000010010000500570690814001840037400373811506387411010020010008200300244003740037111020110099100100100001000011172001161139493100001004003840038400384003840038
102044003730011000613940725101001001000010010000500570690814001840037400373811506387411010020010008200300244003740037111020110099100100100001000011171801161139493100001004003840038400384003840038
102044003730011000613940725101001001000010010000500570690814001840037400373810803387451010020010000200300004003740037111020110099100100100001000000071013164339479100001004003840038400384003840038
102044003730000000613940725101001001000010010000500570690814001840037400373810803387451010020010000200300004003740037111020110099100100100001000000071013163339479100001004003840038400384003840038
1020440037300009006139407104101361021000010010000500570690814001840037400373810803387451010020010000200300004003740037111020110099100100100001000000071013163339479100001004003840038400384003840038
102044003730000000613940725101001001000010010000500570690814001840037400373810803387451010020010000200300004003740084111020110099100100100001000000071013163339479100001004003840038400384003840038
102044003730000000613940725101001001000010010000500570690814001840037400373810803387451010020010000200300004003740037111020110099100100100001006000071013163339479100001004003840038400384003840038
102044003729900600613940725101001001000010010000500570690814001840037400373810803387451010020010000200300004003740037111020110099100100100001000000071013163339479100001004003840038400384003840038
102044003730000000613940725101001001000010010000500570690814001840037400373810803387451010020010000200300004003740037111020110099100100100001000000071013163339479100001004003840038400384003840038
102044003730000000613940725101001001000010010148500570690814001840037400373810803387451010020010000200300004003740037111020110099100100100001000000071013163339479100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030b191e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8acbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002440037300001206639407251001010100001010148505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216233947310000104003840038400384003840038
100244003730000006139407251001010100001010000505706908140018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
100244003729900306139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
10024400372990000258239407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
100244003730000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216323947310000104003840038400384003840038
1002440037299002106139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
100244003729900006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
100244003729900006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216323947310000104003840038400384003840038
100244003730000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216223947310000104003840038400384003840038
100244003730000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000640216233947310000104003840038400384003840038

Test 3: Latency 1->2

Code:

  fcmla v0.4h, v0.4h, v1.h[1], #90
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03181e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400372990001033940725101001001000010010000500570690804001840037400373810833874510100200101742003000040037400371110201100991001001000010007101161139479100001004003840038400384003840038
10204400373003573264613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010007101161139479100001004003840038400384003840038
1020440037300000613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010007101161139479100001004003840038400384003840038
1020440037300000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010007101161139479100001004003840038400384003840038
1020440037300000613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010007101161139479100001004003840038400384003840038
10204400373000007263940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010017101161139479100001004003840038400384003840038
1020440037300000613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010007101161139479100001004003840038400384003840038
1020440037299060613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010007101161139479100001004003840038400384003840038
1020440037300000613940725101001001000010010000500570690814001840037400373810833874510100200100002003000040037400371110201100991001001000010007101161139479100001004003840038400384003840038
1020440037300000613940725101001001000010010000500570690804001840037400373810833874510100200100002003000040037400371110201100991001001000010007101161139479100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03080b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100244003730000000001613940725100161010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000000006403164339473010000104003840038400384003840038
10024400373000000000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000000006403164439473010000104003840038400384003840038
10024400373000000000613940725100101010000101000050570690804001840037400373813033876710010201000020304864003740037111002110910101000010000000006404164339473010000104003840038400384003840038
10024400373000000000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000000006404163439473010000104003840038400384003840038
10024400373000000000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000000006403163439473010000104003840038400384003840038
10024400373000000000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000000006404164439473010000104003840038400384003840038
100244003730000000007263940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000000006403164339473010000104003840038400384003840038
10024400373000000000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000000006404163439473010000104003840038400384003840038
10024400373000000000613940725100101010000101000050570690814001840037400373813033876710010201000020300004003740037111002110910101000010000000006404164439473010000104003840038400854003840038
10024400373000000000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000001006404164439473010000104003840038400384003840038

Test 4: Latency 1->3

Code:

  fcmla v0.4h, v1.4h, v0.h[1], #90
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)0307080b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400372990000000061394072510100100100001001000050057069080400184003740037381080338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
10204400373000000000061394072510100100100001001000050057069080400184003740037381080338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
10204400373100000000061394072510100100100061001000050057069080400184003740037381080338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
10204400373000000000061394072510100100100001001000050057069080400184003740037381080338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
10204400373000100000061394072510100100100001001000050057069080400184003740037381080338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
10204400373000000000061394072510100100100001001000050057069080400184003740037381080338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
10204400373000000000061394072510100100100001001000050057069080400184003740037381080338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
10204400373000000000061394072510116100100001001000050057069080400184003740037381080338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038
10204400373000000000061394072510100100100001001000050057069080400184003740037381080338745101002001000020030000400374003711102011009910010010000100000000071012511394790100001004003840038400384003840038
10204400373000000000061394072510100100100001001000050057069080400184003740037381080338745101002001000020030000400374003711102011009910010010000100000000071011611394790100001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03080b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024400373710000000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000000006402162239473010000104003840038400384003840038
10024400373710000000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000020006402162239473010000104003840038400384003840038
10024400373470000000613940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000010006402162239473010000104003840038400384003840038
100244003734700000007263940725100101010000141000050570690804001840037400373813033876710159201000020300004003740037111002110910101000010000010006402162239547010000104003840038400384003840038
100244003732200000007263940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000040306632162239473010000104003840038400384003840038
100244003732100000006139407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000590006402162239473010000104008540038400384003840038
1002440037322000000010339407251001010100001010000505706908040018400374003738130338767100102010000203000040037400371110021109101010000100000220306407734239473210000104088240895409784098940937
1002440037321100000025263940725100101010000101000050570690804001840037400373813033876710010201000020300004003740037111002110910101000010000070606403802239509010000104008640180401814018140178
1002440133311000000089639407251001010100001010000505706908040018401314045238192703904812534281259324377824078240833171100211091010100001022303004676007882323239581610000104088940885408784083540889
10024408403121017162127158401375039236375101251110108201266492573064004064840887409323821571391601283128130732838940409284088519110021109101010000100000626502712100331452239744210000104041740563404624050940794

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  fcmla v0.4h, v8.4h, v9.h[1], #90
  movi v1.16b, 0
  fcmla v1.4h, v8.4h, v9.h[1], #90
  movi v2.16b, 0
  fcmla v2.4h, v8.4h, v9.h[1], #90
  movi v3.16b, 0
  fcmla v3.4h, v8.4h, v9.h[1], #90
  movi v4.16b, 0
  fcmla v4.4h, v8.4h, v9.h[1], #90
  movi v5.16b, 0
  fcmla v5.4h, v8.4h, v9.h[1], #90
  movi v6.16b, 0
  fcmla v6.4h, v8.4h, v9.h[1], #90
  movi v7.16b, 0
  fcmla v7.4h, v8.4h, v9.h[1], #90
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)031e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd0d2d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204200911500402580100100800001008000050064000000200462006520065323801002008000020024000020065200651116020110099100100160000100001011100116112006201600001002006620066200662006620066
160204200651500402580100100800001008000050064000010200462006520065323801002008000020024000020065200651116020110099100100160000100001011150116112006201600001002006620066200662006620066
160204200651500402580100100801041008000050064000000200462006520065323801002008000020024000020065200651116020110099100100160000100001011100116112006201600001002006620066200662006620066
1602042006515004025801001008000010080000500640000002004620065200653238010020080000200240000200652006511160201100991001001600001004151011100116112006201600001002006620066200662006620066
160204200651500402580100100800001008000050064000005200462006520065323801002008000020024000020065200651116020110099100100160000100001011100116112006201600001002006620066200662006620066
160204200651500402580100100800001008000050064000005200462006520065323801002008000020024000020065200651116020110099100100160000100101011100116112006201600001002006620066200662006620066
16020420065151186402580100100800001008000050064000005200462006520065323801002008000020024000020065200651116020110099100100160000100001011100116112006201600001002006620066200662006620066
1602042006515007052580100100800001008000050064000010200462006520065323801002008000020024032720065200912116020110099100100160000100101011100116112006201600001002015020148200662006620066
160204200651500402580100100800001008000050064000010200462006520065323801002008000020024000020065200651116020110099100100160000100001011100116112006201600001002006620066200662006620066
160204200651500402580100100800001008000050064000000200462006520065323801002008000020024000020065200651116020110099100100160000100001011100116112006201600001002006620066200662006620066

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)030b18191e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a7a8a9acc2cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024200711500000462780012128000012800006264000001102004220061200613238001220800002024000020061200611116002110910101600001000300010027165262721142200492201160000102006220062200622006220062
16002420061150000052298001212800001280000626400000102003320061200613238001220800002024000020061200611116002110910101600001000043010030167143442124200492201160000102017520071200622015420062
16002420061150000052298001212800001280000626400000110200422006120061323800122080000202400002006120061111600211091010160000100020506010028167243422224200582402160000102006220062200622006220062
160024200611510000522980012128000012800006264000000102004220061200613238001220800002024000020061200611116002110910101600001000100010031167243422242200582402160000102006220062200622005320062
1600242006115300005227800121280000128000062640000011020033200612006138238001220800002024000020061200611116002110910101600001000100010030167253442277200582401160000102005320053200532005320053
160024200521500000462780012128000012800006264000011102003320052200523238001220800002024000020052200521116002110910101600001000000010027136142521124200492201160000102005320053200532005320053
160024200521500000462780012128000012800006264000011102003320052200523238001220800002024000020052200521116002110910101600001000300010027136122521142200492201160000102005320053200532005320053
160024200521500000462780012128000012800006264000011102003320052200523238001220800002024000020052200521116002110910101600001000000010027136142521124200492201160000102005320053200532005320053
1600242005215000007112780012128000012800006264000011102003320052200523238001220800002024000020052200521116002110910101600001000000010027136142521142200492201160000102005320053200532005320053
160024200521500000462780012128000012800006264000011102003320052200523238001220800002024000020052200521116002110910101600001000000010027136122522257200492201160000102005320053200532005320053

Test 6: throughput

Count: 16

Code:

  fcmla v0.4h, v16.4h, v17.h[1], #90
  fcmla v1.4h, v16.4h, v17.h[1], #90
  fcmla v2.4h, v16.4h, v17.h[1], #90
  fcmla v3.4h, v16.4h, v17.h[1], #90
  fcmla v4.4h, v16.4h, v17.h[1], #90
  fcmla v5.4h, v16.4h, v17.h[1], #90
  fcmla v6.4h, v16.4h, v17.h[1], #90
  fcmla v7.4h, v16.4h, v17.h[1], #90
  fcmla v8.4h, v16.4h, v17.h[1], #90
  fcmla v9.4h, v16.4h, v17.h[1], #90
  fcmla v10.4h, v16.4h, v17.h[1], #90
  fcmla v11.4h, v16.4h, v17.h[1], #90
  fcmla v12.4h, v16.4h, v17.h[1], #90
  fcmla v13.4h, v16.4h, v17.h[1], #90
  fcmla v14.4h, v16.4h, v17.h[1], #90
  fcmla v15.4h, v16.4h, v17.h[1], #90
  movi v16.16b, 17
  movi v17.16b, 18

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2502

retire (01)cycle (02)030818191e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204400633000000080620251601001001600801001600006261280000140021400404004019973031999816010020016000020048000040040400401116020110099100100160000100002000101101161142311251600001004004140041400414004142328
1602044004030000000042025160100125160000100160000500128000004002140040400402221403222851601002001600002004800004004040040111602011009910010016000010000000010110116114003701600001004232840041400414004140041
160204400402990000004202516010010016000010016000050012800001423084004040040199730319998160100200160000200480000400404004011160201100991001001600001000016012010112116114003701600001004019241118400414232842297
1602044004029900000042025160100100160000100160000500128000014002140040400401997303199981601002001600002004800004004042327111602011009910010016000010000200010110116114003701600001004231540041400414004140041
1602044004031700000042025160100100160000100160000500128000014002142327423271997303199981601002001600002004800004004040040111602011009910010016000010000000010110116114003701600001004004140041423284004140041
16020440040300000000420251601001001600001001600005005868895142308400404004019973031999816010020016000020048000040040400401116020110099100100160000100000000101101172340037251600001004232840041400414004140041
1602044004030000000042025160100100160000100160000500586889504002140040423271997303199981601002001600002004800004004040040111602011009910010016000010000000010110116114232401600001004004140041400414232840041
160204400403000000080610251601801001600801001600005005868895042308400404004019973032228516010020016000020048000040040400401116020110099100100160000100004300010110116114232401600001004004140041400414004142328
160204400402990000007288955251601801001600801001600005001280000140021423274004019973032228516010020016000020048000040040400401116020110099100100160000100003000101101161140037251600001004004140041400414004140041
1602044004030000000041025160168125160000125160000626128000004002140040400402221403222851601002001600002004800004004042327111602011009910010016000010000000010110116114232401600001004004142295400414004140041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)030408091e373a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a8a9accfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0eaeb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600244022630000005306745762516006310160000101600005058683331104229940040423182224403200201600102016000020480000400404121911160021109101016000010000001002583112816111373842315218160000104004142319400414004140043
1600244004030010000153457625160010101600001016000050128000011541207400404004019996253200201600102016000020480000400404004011160021109101016000010000001002586113616111353540037218160000104122740041400414122740041
160024400403001000117345892516001010160053101600005058683331154229942318400401999603200201600102016000020480000400404004011160021109101016000010000001002286103616111243542324218160000104004142319400414231940041
160024400763001000015302516001010160000101600005058683331154002140040400401999603200201600102016000020480000400404231811160021109101016000010060001002386113516111253540037218160000104004141227412204231940041
1600244004031710005314702516001010160000101600005012800001154002140040423182224403222981600102016000020480000423184004011160021109101016000010000001002586113616111363640037218160000104122741227412274122740041
160024400403001000014789702516001010160000101600005012800001154229940040423181999603222981600102016000020480000400404004011160021109101016000010000001002486102516111352540037218160000104004140041400414231942319
16002442318300100053159025160063101600531016000050128000011540021423184004022208253200201600102016000020480000423184004011160021109101016000010000001002586113616111373742315218160000104004140041423194004140041
160024412293091000351738970251600631016005310160000505868333015400214004042318199962532229816001020160000204800004231840040111600211091010160000100000010025116113716111383740037218160000104004340041400414231942319
160024400422991000015389702516001010160000101600005058683330154002140040400402224403200201600102016000020480000400404231811160021109101016000010000001002586113716111283742315418160000104122040041400434004140041
1600244004230010001179025160010101600001016000050128000011540021412264122621141253212061600102016000020480000400404231811160021109101016000010000001002586113716111373540037218160000104122741227412204231940041