Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

TBX (single register table, 8B)

Test 1: uops

Code:

  tbx v0.8b, { v1.16b }, v2.8b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
10042037150611687251000100010002646802022203720371572318951000100030002037203711100110000073116111787100020382038203820382038
10042037150611687251000100010002646802018203720371572318951000100030002037203711100110000073116111787100020382038203820382038
10042037150611687251000100010002646802018203720371572318951000100030002037203711100110000073116111787100020382038203820382038
10042037150611687251000100010002646802018203720371572318951000100030002037203711100110000073116111787100020382038203820382038
10042037150611687251000100010002646802018203720371572318951000100030002037203711100110001373116111787100020382038203820382038
1004203715248216872510001000100026468020182037203715723189510001000300020372037111001100002473116111787100020382038203820382038
10042037150611687251000100010002646802018203720371572318951000100030002037203711100110000073116111787100020382038203820382038
10042037150611687251000100010002646802018203720371572318951000100030002037203711100110000073116111787100020382038203820382038
10042037150611687251000100010002646802018203720371572318951000100030002037203711100110000973116111787100020382038203820382038
10042037150611687251000100010002646802018203720371572318951000100030002037203711100110000073116111787100020382038203820382038

Test 2: Latency 1->1

Code:

  tbx v0.8b, { v1.16b }, v2.8b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.0037

retire (01)cycle (02)03080b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102042003714900000001241968725101001001000010010000500284768002001820037200371842231874510100200100002003000020037200371110201100991001001000010000000000710021622197910100001002003820038200382003820038
102042003715000000001391968725101001001000010010000500284768002001820037200371842231874510100200100002003000020037200371110201100991001001000010000000000710021622198620100001002003820038200382003820038
102042003715000000002511968725101001001000010010000500284768002001820037200841842431874510100200100002003000020037200371110202100991001001000010000000000710021632197910100001002003820038200382003820038
102042003715000000001071968725101001001000010010000500284768002001820037200371842231874510100200100002003000020037200371110201100991001001000010000000000710021623197910100001002003820038200382003820038
10204200371500000000611968725101001001000010010000500284768002001820037200371842231874510100200100002003000020037200371110201100991001001000010000201059800805034834198976100001002003820038200382003820038
1020420276150110267235202471196431021017211410060112106085932852812020198202772027618437241883710876218108282163248720276202755110201100991001001000010040201296034806025622199736100001002027620277202782028020273
1020420229152104579235202422196321221017511810060116107605982852698020162202752027518436261881910765214108312143248120274202765110201100991001001000010002211097830800035643199897100001002027720266202762028020276
102042027515110456604400395219610142102231241010812511064620285666102023420375204161844739188351118722411162224335012040420371811020110099100100100001000020021588008780273252004311100001002037120374203732041620421
1020420370153010021006119687251010010010000100100005002847680020018200372003718422818745107242261116323033990201812037181102011009910010010000100220000159850867039623200419100001002037420422203752037220374
10204200371500087924704034481961017910219123100841221115761528577650202702041920370184513918891113242321132922834008204172040581102011009910010010000100222000138852874029732200959100001002031720324203222032420326

1000 unrolls and 10 iterations

Result (median cycles for code): 2.0037

retire (01)cycle (02)030708090a0b18191e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a7a8a9accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024200371501001000016419687251001010100001010000502847680200182003720037184443187671001020100002030000200372003711100211091010100001000000644816681978510000102003820038200382003820038
10024200371501001000016419687251001010100001010000502847680200182003720037184443187671016220100002030000200372003711100211091010100001000006644916981978510000102003820038200382003820038
10024200371501001000016419687251001010100001010000502847680200182003720037184443187671001020100002030000200372003711100211091010100001000000642916681978510000102003820038200382003820038
10024200371501001000016419687251001010100001010000502847680200182003720037184443187671001020100002030000200372003711100211091010100001000100642916991978510000102003820038200382003820038
10024200371501001000016419687251001010100001010000502847680200182003720037184443187671001020100002030000200372003711100211091010100001000000642716991978510000102003820038200382003820038
10024200371501001000016419687251001010100001010000502847680200182003720037184443187671001020100002030000200372003711100211091010100001000103642816991978510000102003820038200382003820226
10024200371501001000016419687251001010100001010000502847680200182003720037184443187671001020100002030000200372003711100211091010100001000000642916991978510000102003820038200382003820038
100242003714910010002251122019687251001010100001010000502847680200182003720037184443187671001020100002030000200372003711100211091010100001000000642916991978510000102003820038200382003820038
10024200371501001000016419687251001010100001010000502847680200182003720037184443187671001020100002030000200372003711100211091010100001000000642916881978510000102003820038200382003820038
100242003715010010000264196872510010101000010100005028476802001820037200371844431876710010201000020300002003720037111002110910101000010000006429169101978510000102003820038200382003820038

Test 3: Latency 1->2

Code:

  tbx v0.8b, { v0.16b }, v1.8b
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.0037

retire (01)cycle (02)031e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a9acc2c3cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020420037150180611968725101001001000010010000500284768002001820037200371842203187451010020010000200300002003720037111020110099100100100001000000071011610197910100001002003820038200382003820038
102042003715000611968725101001001000010010000500284768002001820037200371842203187451010020010000200300002003720037111020110099100100100001000000071011611197910100001002003820038200382003820038
102042003715000611968725101001001000010010000500284768012001820037200371842203187451010020010000200300002003720037111020110099100100100001000032071011611197910100001002003820038200382003820038
102042008415000611968725101001001000010010000500284768002001820037200371842203187451010020010000200300002003720037111020110099100100100001000000071011611197910100001002003820038200382003820038
102042003715000611968725101001001000010010000500284768002001820037200371842203187451010020010000200300002003720037111020110099100100100001000000071011611197910100001002003820038200382003820038
1020420037150006119687251010010010000100100005002847680020018200372003718422031874510100200100002003000020037200371110201100991001001000010000002001371011611197910100001002003820038200382003820038
102042003715090611968725101001001000010010000500284768012001820037200371842203187451058620010000200300002003720037111020110099100100100001000030071011611197918100001002003820038200382008620038
1020420037150270611968725101001001000010010000500284768012001820037200371842203187451010020010000200300002003720037111020110099100100100001000000071011611197910100001002003820038200382003820038
102042008415000611968725101001001000010010000500284768012001820037200371842203187451010020010000200300002003720037111020110099100100100001000000071011611197910100001002003820038200382003820038
1020420037150240611968725101001001000010010000500284768002001820037200371842203187451010020010000200300002003720037111020110099100100100001000000071011611197910100001002003820038200382003820038

1000 unrolls and 10 iterations

Result (median cycles for code): 2.0037

retire (01)cycle (02)03070a1e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100242003715011026719687251001010100001010000502847680120018200372003718444318767100102010000203000020037200371110021109101010000100644101610101978510000102003820038200382003820038
10024200371501102671968725100101010000101000050284768012001820037200371844431876710010201000020300002003720037111002110910101000010064451610101978510000102003820038200382003820038
100242003714911026719687251001010100001010000502847680120018200372003718444318767100102010000203000020037200371110021109101010000100644101610101978510000102003820038200382003820038
100242003715011026719687251001010100001010000502847680120018200372003718444318767100102010000203000020037200371110021109101010000100644101610101978510000102003820038200382003820038
1002420037150110273219687251001010100001010000502847680120018200372003718444318767100102010000203000020037200371110021109101010000100644101610101978510000102003820038200382003820038
100242003715011026719687251001010100001010000502847680120065200372003718444318767100102010000203000020037200371110021109101010000100644101610101978510000102003820038200382003820038
100242003715011026719687251001010100001010000502847680120018200372003718444318767100102010000203000020037200371110021109101010000100644101610101978510000102003820038200382003820038
100242003715011026719687251001010100001010000502847680120018200372003718444318767100102010000203000020037200371110021109101010000100644101610111978510000102003820038200382003820038
10024200371501102671968725100101010000101000050284768002001820037200371844431876710010201000020300002003720037111002110910101000010064410161051978510000102003820038200382003820038
100242003715011026719687251001010100001010000502847680020018200372003718444318767100102010000203000020037200371110021109101010000100644101610101978510000102003820038200382003820038

Test 4: Latency 1->3

Code:

  tbx v0.8b, { v1.16b }, v0.8b
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.0037

retire (01)cycle (02)030918191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8acc2cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204200371500000061196872510100100100001001000050028476800200182003720037184223187451010020010000200300002003720037111020110099100100100001000000000071011611197910100001002003820038200382003820038
10204200371500000061196744410129100100001001000050028476800200182003720037184223187451010020010000200300002003720037111020110099100100100001000000000071011611197910100001002003820038200382003820038
102042003715000000611968725101001001000010010000500284768002001820037200371842231874510100200100002003000020037200371110201100991001001000010000003000071011611197910100001002003820038200382003820038
102042003715000012061196872510100100100001001000050028476800200182003720037184223187451010020010000200300002003720037111020110099100100100001000000030071011611197910100001002003820038200382003820038
10204200371500000061196872510100100100001001000050028476800200182003720037184223187451010020010000200300002003720037111020110099100100100001000000000071011611197910100001002003820038200382003820038
10204200371500000061196872510100100100001001000050028476800200182003720037184223187451010020010000200300002003720037111020110099100100100001000000000071013711197910100001002003820038200382003820038
1020420037150000420061196872510100100100001001000050028476800200182003720037184223187451010020010000200300002003720037111020110099100100100001000000000071011611197910100001002003820038200382003820038
10204200371490000061196872510100100100001001000050028476800200182003720037184223187451010020010000200300002003720037111020110099100100100001000000000071011611197910100001002003820038200382003820038
102042003715000027061196872510100132100001001000050028476800200182013220179184293187451010020810000200300002003720037111020110099100100100001000000060071011611197910100001002003820038200382003820038
102042003715000000103196872510100100100121091000050028476800200182008420085184233187451010020010000200300002003720037111020110099100100100001000000002071011611197910100001002003820038200382003820038

1000 unrolls and 10 iterations

Result (median cycles for code): 2.0037

retire (01)cycle (02)0308090b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100242003715000000006119687251001010100001010000502847680120018200372003718444318767100102010000203000020037200371110021109101010000100000000006402162219785010000102003820038200382003820038
100242003715000000906119687251001010100001010000502847680020018200372003718444318767100102010000203000020037200371110021109101010000100000000006402162219785010000102003820038200382003820038
100242003715000000006119687251001010100001010000502847680020018200372003718444318767100102010000203000020037200841110021109101010000100000000006402162219785010000102003820038200382003820038
100242003715000000606119687251001010100001010000502847680020018200372003718444318767100102010000203000020037200371110021109101010000100000000006402162219785010000102003820038200382003820038
10024200371500000028206119687251001010100001010000502847680020018200372003718444318767100102010000203000020037200371110021109101010000100000000006402162219785010000102003820038200382003820038
1002420037150000006606119687251001010100001010000502847680020018200372003718444318767100102010000203000020037200371110021109101010000100000000006402162219785010000102003820038200382003820085
100242008315000000006119687251001010100001010000502847680020018200372003718444318767100102010000203000020037200371110021109101010000100000000006402162319785010000102003820038200382003820038
1002420037150000000044119687251001010100001010000502847680020018200372003718444318767100102010000203000020037200371110021109101010000100000000006403162219785010000102003820038200382003820038
10024200371500000012006119687251001010100001010000502847680020018200372003718444318767100102010000203000020037200371110021109101010000100000000006402162219785010000102003820038200382003820038
1002420037150000001506119687251001010100001010000502847680020018200372003718444318767100102010000203000020037200371110021109101010000100000000006402162219785010000102003820038200382003820038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  tbx v0.8b, { v8.16b }, v9.8b
  movi v1.16b, 0
  tbx v1.8b, { v8.16b }, v9.8b
  movi v2.16b, 0
  tbx v2.8b, { v8.16b }, v9.8b
  movi v3.16b, 0
  tbx v3.8b, { v8.16b }, v9.8b
  movi v4.16b, 0
  tbx v4.8b, { v8.16b }, v9.8b
  movi v5.16b, 0
  tbx v5.8b, { v8.16b }, v9.8b
  movi v6.16b, 0
  tbx v6.8b, { v8.16b }, v9.8b
  movi v7.16b, 0
  tbx v7.8b, { v8.16b }, v9.8b
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)0308090b18191e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a7a8a9acc2cfd0d2d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204200651500000012038258010010080000100800005006400001520044200632006332180100200800002002400002006320063111602011009910010016000010000000001011150116112006001600001002006420064200642006420064
1602042006315000000330513258010010080000100800005006400001520044200632006332180100200800002002400002006320063111602011009910010016000010000000001011151116112006001600001002006420064200642006420064
160204200631500000012038258010010080000100800005006400001520044200632006332180100200800002002400002006320063111602011009910010016000010000000001011151116112006001600001002006420064200642006420064
1602042006315000000300038258010010080000100800005006400001520044200632006332180100200800002002412032006320063111602011009910010016000010000000001011151171112006001600001002006420064200642006420064
16020420063150000000038258010010080000100800005006400001520044200632006332180100200800002002400002006320063111602011009910010016000010000000001011151116112006001600001002006420064200642006420064
16020420063150000000038258010010080000100800005006400001520044200632006332180100200800002002400002006320063111602011009910010016000010000000001011150116112006001600001002006420064200642006420064
160204200631500000000703258010010080000100800005006400001520044200632006332180100200800002002400002006320063111602011009910010016000010000000001011151116112006001600001002006420064200642006420064
1602042006315000000297038258010010080000100800005006400001520044200632006332180100200800002002400002006320063111602011009910010016000010000000001011151116112006001600001002006420064200642006420064
16020420063150000000038258010010080000100800005006400001520044200632006332180100200800002002400002006320063111602011009910010016000010000000001011151116112006001600001002006420064200642006420064
160204200631500000018038258010010080000100800005006400001520044200632006332180100200800002002400002006320063111602011009910010016000010000000001011150116112006001600001002006420064200642006420064

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)030b1e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8accfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002420078150010716164427800121280000128000062640000010200402005020050321800122080000202400002005920050111600211091010160000100000100303128252111910200472201160000102005120051200512005120051
160024200501610004427800121280000128000062640000110200312005020050321800122080000202400002005020050111600211091010160000100000100313519252111910200472201160000102005120051200512005120051
1600242005015000044278001212800001280000626400001102003120050200503218001220800002024000020050200501116002110910101600001020001004231119361111921200472201160000102005120051200512005120051
160024200501500151324427800121280000128000062640000115200312005020059321800122080000202400002005020050111600211091010160000100000100333117252111910200472201160000102005120051200512005120051
1600242005015000013927800121280000128000062640000110200312005020050321800122080000202400002005020050111600211091010160000100000100321361934422921200472201160000102005120051200512005120060
16002420050150024018427800121280000128000062640000111020031200502005032180012208000020240000200502005011160021109101016000010000010045161119342121921200472201160000102005120060200602005120051
16002420050150000145278001212800001280000626400000110200312005020059321800122080000202400002005020050111600211091010160000100013100301351925121721200472201160000102005120051200512005120051
1600242005015003604427800121280000128000062640000101020031200502005032180012208000020240000200502005011160021109101016000010000010042135119252111921200472201160000102005120051200512005120051
160024200501500390442780012128000012800006264000010102003120059200503218001220800002024000020059200501116002110910101600001000001004213521925212158200472201160000102005320053200532005320053
16002420052150000442780012128000012800006264000010102003120050200503218001220800002024000020050200501116002110910101600001000001004236119342211910200472201160000102005120051200512005120051

Test 6: throughput

Count: 16

Code:

  tbx v0.8b, { v16.16b }, v17.8b
  tbx v1.8b, { v16.16b }, v17.8b
  tbx v2.8b, { v16.16b }, v17.8b
  tbx v3.8b, { v16.16b }, v17.8b
  tbx v4.8b, { v16.16b }, v17.8b
  tbx v5.8b, { v16.16b }, v17.8b
  tbx v6.8b, { v16.16b }, v17.8b
  tbx v7.8b, { v16.16b }, v17.8b
  tbx v8.8b, { v16.16b }, v17.8b
  tbx v9.8b, { v16.16b }, v17.8b
  tbx v10.8b, { v16.16b }, v17.8b
  tbx v11.8b, { v16.16b }, v17.8b
  tbx v12.8b, { v16.16b }, v17.8b
  tbx v13.8b, { v16.16b }, v17.8b
  tbx v14.8b, { v16.16b }, v17.8b
  tbx v15.8b, { v16.16b }, v17.8b
  movi v16.16b, 17
  movi v17.16b, 18

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)03080b181e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a7a8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020440116299000300040010016010010016000010016000050054863900400194012140117200260320070160100200160000200480000400384003811160201100991001001600001000000010110216224010001600001004003940104402484012740127
160204401032990005700611372516019110016012510016048550054031251401074003840057200120319996160100200160000200480000400384012611160201100991001001600001000000310110216224003501600001004009840345401184011840118
1602044011830000030601254002516021110016011110016000050054038900400844006340038200120320075160100200160000200481443400384011711160201100991001001600001000000010110416224010001600001004003940127400394012740104
16020440038301003300010302516010010016009110016000050054863900400844005740066199730319996160100200160000200480000401034010311160201100991001001600001000000010110216224010001600001004010440104400394010440104
1602044010330000018008202516010010016000010016000050054031250400844003840106199730320084160100200160000200480000401264010311160201100991001001600001000000010110216224010001600001004010440039401274003940127
16020440038300000360070502516019110016008810016000050054038900400194003840057199730319996160100200160000200480000400384010311160201100991001001600001000000010110216224011401600001004011840118400394011840039
160204400383001003300611512516021110016011110016000050012800001400194006540112199760320061160100200160000200480000400384006511160201100991001001600001000000010110216224010001600001004011840039401184011840118
1602044011730000021006102516021110016011110016000050012800001400194003840126200350320061160100200160000200480000401034003811160201100991001001600001002000010110216224003501600001004010440039401044010440104
1602044012630000024006102516010010016009110016000050012800001400844010340344199730320075160100200160000200480000401174011711160201100991001001600001000000010110216224003501600001004003940127401044010440104
160204401033000002700401492516022510016000010016000050012800001400934005740126200120320076160100200160000200480000400384011711160201100991001001600001000000010110216224003501600001004003940104400394012740039

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)03080b18191e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8accfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaeb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002440148301000051042451102516009210160082101600005012800000154008040038400992003132001816001020160000204800004009940099111600211091010160000100001002282191621199400960155160000104010040100401004010040100
16002440039300000000827302516009210160082101600005054026330154008040038400992007932007916001020160000204800004009940099111600211091010160000100001002282181621186400960155160000104010040100401004003940100
16002440038299000000109450251600921016008210160000505402633115400804003840099199963200791600102016000020480000400384009911160021109101016000010000100241132616422694009603010160000104010040100401004010040100
1600244009930000000082671102516009210160082101600005054026331154008040038400991999632007916001020160000204800004009940099111600211091010160000100001002282141621198400960155160000104010040100401004010040100
16002440099301000000827101102516009210160082101600005054026331154008040038400381999632012116001020160000204800004009940099111600211091010160000100101002282161621198400960155160000104010040100401004010040100
16002440099300000000826702516009210160082101600005012800001154008040038400992003132007916001020160000204800004009940099111600211091010160000100001002282161621185400960156160000104010040100401004010040100
160024400993000000000450251600921016000010160000505402633115400804003840074200313200181600102016000020480000400994009911160021109101016000010000100241132516422884009603010160000104010040100401004010040100
1600244009930000001800510251600921016008210160000505402633015400804003840038200313200791600102016000020480000400384009911160021109101016000010000100241032916422644009603010160000104014940040401024003940100
16002440099300000060082511102516001010160000101600005012800000154008040039401412003111200181600102016000020480000400384003811160021109101016000010000100241132416422464014513011160000104015140249401124021940100
1600244009930000000082510251600101016000010160000505402633015400804003840071199963203361600102016000020480000400994003811160021109101016000010000100241132616422854009603010160000104010040100401004010040039