Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

TBL (three register table, 16B)

Test 1: uops

Code:

  tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v3.16b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3
  movi v3.16b, 4

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 2.000

retire (01)cycle (02)03181e3a3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a1a8c2cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
200440373000014736872520002000200055168040184037403734473377020002000600040374037111001100000073216223787200040384038403840384038
20044037300006136872520002000200055168040184037403734473377020002000600040374037111001100000073216223787200040384038403840384038
20044037300006136872520002000200055168040184037403734473377020002000600040374037111001100000073216223787200040384038403840384038
20044037300006136872520002000200055168040184037403734473377020002000600040374037111001100000073216223787200040384038403840384038
20044037300006136872520002000200055168040184037403734473377020002000600040374037111001100000073216223787200040384038403840384038
20044037300006136872520002000200055168040184037403734473377020002000600040374037111001100000073216223787200040384038403840384038
20044037300006136872520002000200055168040184037403734473377020002000600040374037111001100000073216223787200040384038403840384038
20044037300006136872520002000200055168040184037403734473377020002000600040374037111001100000073216223787200040384038403840384038
20044037300006136872520002000200055168040184037403734473377020002000600040374037111001100000073216223787200040384038403840384038
20044037300006136872520002000200055168040184037403734473377020002000600040374037111001100000073216223787200040384038403840384038

Test 2: Latency 1->2

Code:

  tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v3.16b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3
  movi v3.16b, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030818191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acbranch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
20204400373000000001020396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000010710121622397870200001004003840038400384003840038
202044003730000000161396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000710121622397870200001004003840038400384003840038
202044003730000000061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000710121622397870200001004003840038400384003840038
202044003729900000061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000710121622397871200001004003840038400384003840038
202044003729900000061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000710121622397870200001004003840038400384003840038
2020440037300000000726396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000710121622397870200001004003840038400384003840038
202044003730000000061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000710121622397870200001004003840038400384003840038
202044003730000000061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000710121622397870200001004003840038400384003840038
202044003730000000061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000710121623397870200001004003840038400384003840038
202044003730000000061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000710121623397870200001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03080b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
2002440037300000000061396872520010102000010200005057176800400184003740037371943375172001020200002060000400374003711100211091010100001000000000640216223978720000104003840038400384003840038
2002440037299000000061396872520010102000010200005057176800400184003740037371943375172001020200002060000400374003711100211091010100001000000000640216223978720000104003840038400384003840038
20024400373000000000251396872520010102000010200005057176801400184003740037371943375172001020200002060000400374003711100211091010100001000000000640216223978720000104003840038400384003840038
2002440037300000000061396872520010102000010200005057176800400184003740037371943375172001020200002060000400374003711100211091010100001000000000640216223978720000104003840038400384003840038
2002440037300000000061396872520010102000010200005057176800400184003740037371943375172001020200002060000400374003711100211091010100001000000000640216223978720000104003840038400384003840038
2002440037300000000061396872520010102000010200005057176800400184003740037371943375172001020200002060000400374003711100211091010100001000000000640216213978720000104003840038400384003840038
200244003730000001200726396872520010102000010200005057176800400184003740037371943375172001020200002060000400374003711100211091010100001000000000640216223978720000104003840038400384003840038
2002440037299000000061396872520010102000010200005057176800400184003740037371943375172001020200002060000400374003711100211091010100001000000000640216223978720000104003840038400384003840038
2002440037299000000061396872520010102000010200005057176801400184003740037371943375172001020200002060000400374003711100211091010100001000000000640216223978720000104003840038400384003840038
2002440037300000030082396872520010102000010200005057176800400184003740037371943375172001020200002060000400374003711100211091010100001000000000640216223978720000104003840038400384003840038

Test 3: Latency 1->3

Code:

  tbl v1.16b, { v0.16b, v1.16b, v2.16b }, v3.16b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3
  movi v3.16b, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03080b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
2020440037299000000061396872520100100200001002000050057176800400184003740085371723374952010020020000200600004003740037111020110099100100100001000000000710121622397870200001004003840038400384003840038
2020440037299000000061396872520100100200001002000050057176800400184003740037371723374952010020220000200600004003740037111020110099100100100001000000000710121623397870200001004003840038400384003840038
2020440037300000000061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000000710121622397870200001004003840038400384003840038
2020440037300000000061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000000710121622397870200001004003840038400384003840038
20204400373000000000776396872520100100200001002000051157176800400184013440085371723374952010020020000200604984003740037111020110099100100100001000000000710121622397870200001004003840038400384003840087
2020440086301100100061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000000710121622397870200001004003840038400384003840038
2020440037299000000061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000000710121622397870200001004003840038400384003840038
2020440037300000000061396872520100100200001002000050057176800400184003740037371723374952025220020000200600004003740037111020110099100100100001000000000710121622397870200001004003840038400384003840038
20204400373000000000103396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000020020560710131622397870200001004003840038400874013440086
202044003730000010104061396872520100100200001002000050057176801400184003740037371723374952010020020000200600004003740037111020110099100100100001000001000710121622397870200001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)0307181e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
200244003730000061396872520010102000010200005057176804001840037400373719433751720010202000020600004003740037111002110910101000010016640316323978720000104003840038400384003840038
200244003730000061396872520010102000010200005057176804001840037400373719433751720010202000020600004003740037111002110910101000010000640216233978720000104003840038400384003840038
200244003730000061396872520010102000010200005057176804001840037400373719433751720010202000020600004003740037111002110910101000010000640216223978720000104003840038400384003840038
200244003730000061396872520010102000010200005057176804001840037400373719433751720010202000020600004003740037111002110910101000010000640316223978720000104003840038400384003840038
200244003730010061396872520010102000010200005057176804001840037400373719433751720010202000020600004003740037111002110910101000010000640216223978720000104003840038400384003840038
200244003730000061396872520010102000010200005057176804001840037400373719433751720010202000020600004003740037111002110910101000010000640216323978720000104003840038400384003840038
200244003730000061396872520010102000010200005057176804001840037400373719433751720010202000020600004003740037111002110910101000010000640216233978720000104003840038400384003840038
200244003730000061396872520010102000010200005057176804001840037400373719433751720010202000020600004003740037111002110910101000010000640216223984420000104003840038400384003840038
200244003730000061396872520010102000010200005057176804001840037400373719433751720010202000020600004003740037111002110910101000010000640216223978720000104003840038400384003840038
200244003729900061396872520010102000010200005057176804001840037400373719433751720010202000020600004003740037111002110910101000010000640216323978720000104003840038400384003840038

Test 4: Latency 1->4

Code:

  tbl v2.16b, { v0.16b, v1.16b, v2.16b }, v3.16b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3
  movi v3.16b, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.0039

retire (01)cycle (02)031e373a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
202042003915027120611973425201041002000210020000500283898720032200632003917207317497201002002000020060000200392006311102011009910010010000100710021622198760200001002004020040200402004020040
2020420054150050611973925201051002000210020000500283550520020200632005117210317524201002002000020060000200392005411102011009910010010000100710031622198640200001002005220040200642005220064
202042006315034820611973425201021002000510020000500283536520020200392005417195317521201002002000020060000200632006311102011009910010010000100710021622198521200001002006420052200402004020064
2020420063150279120611971825201061002000410020000500283898720020200662006317219317521201002002000020060930200542011011102011009910010010000100710121622198640200001002004020040200402006420040
2020420051150040611971825201011002001210020000500283538920020200392005117195317497201002002000020060000200512003911102011009910010010000100710121622198640200001002005220040200642005220064
2020420039150453120611971825201021002000510020000500283538120032200632006317195317497201002002000020060000200632005111102011009910010010000100710121622198640200001002004020064200522004020052
20204200391506931611971225201021002000510020000500283538120032200632003917207317497201002002000020060000200542006311102011009910010010000100710121622198520200001002005220040200402004020064
2020420039150020611971825201051002001210020000500283718520020200392006317195317512201002002000020060000200542005111102011009910010010000100710121623198760200001002006420040200642005220040
20204200601501260611971825201121002000210020000500283550520020200632005117195317521201002002000020060000200392003911102011009910010010000100710121622198520200001002005520052200402006420040
20204200391506120611971825201061002000510020000500283550520020200392005117195317497201002002000020060000200632005111102011009910010010000100710121622198640200001002005220040200642005220052

1000 unrolls and 10 iterations

Result (median cycles for code): 2.0051

retire (01)cycle (02)031e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
2002420039151420132511974225200161020009102000050283941820032200632003917217317531200102020000206000020054200541110021109101010000100000640216221987420000102006420052200642005220064
2002420051150005611971725200171020007102000050283941820044200392003917232317531200102020000206000020063200541110021109101010000100000640216221986220000102005220064200522006420052
20024200511502105611971525200181020008102000050283718520032200542003917217317519200102020000206000020039200661110021109101010000100000640216221987420000102006420052200522005220064
20024200511500096119751252002310200131020000502835365200202005120039172383175192001020200002060000200572005411100211091010100001000015640316231986220000102006420052200642005220064
20024200631502705611971525200201020009102000050283898720032200512003917229317531200102020000206000020063200511110021109101010000100000640316321986220000102006420052200522006420040
2002420039150005611971725200201020010102000050283941820047200512003917217317519200102020000206000020039200661110021109101010000100000640216221986220000102004020067200402005220058
2002420039151309611975125200231020013102000050283551320020200512003917235317519200102020000206000020054200511110021109101010000100000640216221987720000102004020052200402006720040
2002420039150243525611973425200161020005102000050283718520044200632003917217317519200102020000206000020057200541110021109101010000100000640216221986820000102005220058200642005820052
20024200511500013611971725200171020008102000050283718520035200542005417229317543200102020000206000020063200511110021109101010000100000640216221987420000102006420052200642005220067
2002420066150005611973425200221020012102000050283718520032200542005117229317531200102020000206000020051200631110021109101010000100000640216221987420000102005220064200522005220064

Test 5: Latency 1->5

Code:

  tbl v3.16b, { v0.16b, v1.16b, v2.16b }, v3.16b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3
  movi v3.16b, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)03080b191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a8a9acc2branch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
20204400373000000007263968725201001002000010020000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010000000000710021622397870200001004003840038400384003840038
2020440037300000000763968725201001002000010020000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010000000000710021622397870200001004003840038400384003840038
2020440037300000000613968725201001002000010320000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010000106400710021622397870200001004003840038400384003840038
202044003729900008801043968725201001002000010020000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010000000000710021620397870200001004003840038400384003840038
2020440037300000000613968725201001002000010020000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010000000000710121622397870200001004003840038400384003840038
2020440037300000000613968725201001002000010020000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010000100000710121622397870200001004003840038400384003840038
2020440037300000000613968725201001002000010020000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010000020000726132622397870200001004003840038400864003840038
20204400373011010880613968725201001002000010020000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010000000000710121632397870200001004003840038400384003840038
2020440037300000000613968725201001002000010020000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010000103000710121622397870200001004003840038400384003840038
20204400373000001200613968725201001002000010020000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010000000000710121622397870200001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9faccfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
20024400373000000061396872520010102000010200005057176800400184003740037371940337517200102020000206000040037400371110021109101010000100640216223978720000104003840038400384003840038
200244003730000000726396872520010102000010200005057176800400184003740037371947337517200102020000206000040037400371110021109101010000100640216223978720000104003840038400384003840038
20024400373000000061396872520010102000010200005057176800400184003740037371940337517200102020000206000040037400371110021109101010000100640216223978720000104003840038400384003840038
20024400373000000061396872520010102000010200005057176800400184003740037371940337517200102020000206000040037400371110021109101010000100640216223978720000104003840038400384003840038
20024400373000000061396872520010102000010200005057176800400184003740037371940337517200102020000206000040037400371110021109101010000100640216233978720000104003840038400384003840038
20024400373000000061396872520010102000010200005057176800400184003740037371940337517200102020000206000040037400371110021109101010000100640116223978720000104003840038400384003840038
20024400372990000061396872520010102000010200005057176800400184003740037371940337517200102020000206000040037400371110021109101010000100640216223978720000104003840038400384003840038
20024400373000000061396872520010102000010200005057176800400184003740037371940337517200102020000206000040037400371110021109101010000100640216323978720000104003840038400384003840038
20024400372990000061396872520010102000010200005057176800400184003740037371940337517200102020000206000040037400371110021109101010000100640216223978720000104003840038400384003840038
20024400372990000061396872520010102000010200005057176800400184003740037371940337517200102020000206000040037400371110021109101010000100640216223978720000104003840038400384003840038

Test 6: throughput

Count: 8

Code:

  tbl v0.16b, { v8.16b, v9.16b, v10.16b }, v11.16b
  tbl v1.16b, { v8.16b, v9.16b, v10.16b }, v11.16b
  tbl v2.16b, { v8.16b, v9.16b, v10.16b }, v11.16b
  tbl v3.16b, { v8.16b, v9.16b, v10.16b }, v11.16b
  tbl v4.16b, { v8.16b, v9.16b, v10.16b }, v11.16b
  tbl v5.16b, { v8.16b, v9.16b, v10.16b }, v11.16b
  tbl v6.16b, { v8.16b, v9.16b, v10.16b }, v11.16b
  tbl v7.16b, { v8.16b, v9.16b, v10.16b }, v11.16b
  movi v8.16b, 9
  movi v9.16b, 10
  movi v10.16b, 11
  movi v11.16b, 12

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)0308091e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020440063300000342251601001001600001001600005001599960040023040042400421997332000016010020016000020048000040042400421180201100991001008000010000511021611400391600001004004340043400434004340043
1602044004230000044251601001001600001001600005001599960140023040042400421997332000016010020016000020048000040042400421180201100991001008000010000511011611400391600001004004340043400434004340043
1602044004230000044251601001001600001001600005001599960040023040042400421997332000016010020016000020048000040042400421180201100991001008000010000511011611400391600001004004340043400434004340043
16020440042300000709251601001001600001001600005001599960140023040042400421997332000016010020016000020048000040042400421180201100991001008000010030511011611400391600001004004340043400434004340043
1602044004230000067251601001001600001001600005001599960140023040042400421997332000016010020016000020048000040042400421180201100991001008000010000511011611400391600001004004340043400434004340043
16020440042300000199251601001001600001001600005001599960140023040042400421997332000016010020016000020048000040042400421180201100991001008000010000511011611400391600001004004340043400434004340043
16020440042300000197251601001001600001001600005001599960040023040042400421997332000016010020016000020048000040042400421180201100991001008000010000511011611400391600001004004340043400434004340043
160204400423000012195251601001001600001001600005001599960140023040042400421997332000016010020016000020048000040042400421180201100991001008000010000511011611400391600001004004340043400434004340043
1602044004230000067251601001001600001001600005001599960040023040042400421997332000016010020016000020048000040042400421180201100991001008000010000511011611400391600001004004340043400434004340043
1602044004230000044251601001001600001001600005001599960040023040042400421997332000016010020016000020048000040042400421180201100991001008000010000511011611400391600001004004340043400434004340043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)0307080a0b18191e1f3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2branch mispredict (cb)cfd5d6ddinst fetch restart (de)dfe0? int output thing (e9)eaeb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600244005229900000000070825160010101600001016000050159996004002340042400421999603200221600102016000020480000400424004211800211091010800001000000000050201716166040039000160000104009540043400434004340043
16002440042300000000000552925160010101600001016000050159996014002340042400421999603200221600102016000020480000400424004211800211091010800001000000000050201716166040039000160000104004340043400434004340043
160024400422990000000004325160010101600001016000050159996014002340042400421999603200221600102016000020480000400424004211800211091010800001000000000050201616616040039000160000104004340043400434004340043
160024400423000000000004325160010101600001016000050159996014002340042400421999603200221600102016000020480000400424004211800211091010800001000000000050201616616040039000160000104004340043400434004340043
160024400423000000000004325160010101600001016000050159996014002340042400421999603200221600102016000020480000400424004211800211091010800001000000000050201616616040039000160000104004340043400434004340043
16002440042299000000000432516001010160000101600005015999601400234004240042199960320022160010201600002048000040042400421180021109101080000100000000005020616613040039000160000104004340043400434004340043
160024400423000000000004325160010101600001016000050159996014002340042400421999603200221600102016000020480000400424004211800211091010800001000000000050201316166040039000160000104004340043400434004340043
1600244004230000000000043251600101016000010160000501599960140023400424004219996032002216001020160000204800004004240042118002110910108000010000000000502016161316040039000160000104004340043400434004340043
160024400423000000000004325160010101600001016000050159996014002340042400421999603200221600102016000020480000400424004211800211091010800001000000000050201616166040039000160000104004340043400434004340043
16002440042299000000000432516001010160000101600005015999601400234004240042199960320022160010201600002048000040042400421180021109101080000100000000005020616166040039000160000104004340043400434004340043