Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

TBL (three register table, 16B)

Test 1: uops

Code:

  tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v3.16b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3
  movi v3.16b, 4

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 2.000

retire uop (01)cycle (02)03181e3a3f4e51schedule uop (52)schedule simd uop (54)dispatch simd uop (57)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map simd uop (7e)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd alu (9a)l1d tlb miss (a1)l1d cache writeback (a8)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)f5f6f7f8fd
200440373000014736872520002000200055168040184037403734473377020002000600040374037111001100000073216223787200040384038403840384038
20044037300006136872520002000200055168040184037403734473377020002000600040374037111001100000073216223787200040384038403840384038
20044037300006136872520002000200055168040184037403734473377020002000600040374037111001100000073216223787200040384038403840384038
20044037300006136872520002000200055168040184037403734473377020002000600040374037111001100000073216223787200040384038403840384038
20044037300006136872520002000200055168040184037403734473377020002000600040374037111001100000073216223787200040384038403840384038
20044037300006136872520002000200055168040184037403734473377020002000600040374037111001100000073216223787200040384038403840384038
20044037300006136872520002000200055168040184037403734473377020002000600040374037111001100000073216223787200040384038403840384038
20044037300006136872520002000200055168040184037403734473377020002000600040374037111001100000073216223787200040384038403840384038
20044037300006136872520002000200055168040184037403734473377020002000600040374037111001100000073216223787200040384038403840384038
20044037300006136872520002000200055168040184037403734473377020002000600040374037111001100000073216223787200040384038403840384038

Test 2: Latency 1->2

Code:

  tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v3.16b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3
  movi v3.16b, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)03mmu table walk data (08)18191e1f3a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)acbranch mispred nonspec (cb)cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
20204400373000000001020396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000010710121622397870200001004003840038400384003840038
202044003730000000161396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000710121622397870200001004003840038400384003840038
202044003730000000061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000710121622397870200001004003840038400384003840038
202044003729900000061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000710121622397871200001004003840038400384003840038
202044003729900000061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000710121622397870200001004003840038400384003840038
2020440037300000000726396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000710121622397870200001004003840038400384003840038
202044003730000000061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000710121622397870200001004003840038400384003840038
202044003730000000061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000710121622397870200001004003840038400384003840038
202044003730000000061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000710121623397870200001004003840038400384003840038
202044003730000000061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000710121623397870200001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f3a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
2002440037300000000061396872520010102000010200005057176800400184003740037371943375172001020200002060000400374003711100211091010100001000000000640216223978720000104003840038400384003840038
2002440037299000000061396872520010102000010200005057176800400184003740037371943375172001020200002060000400374003711100211091010100001000000000640216223978720000104003840038400384003840038
20024400373000000000251396872520010102000010200005057176801400184003740037371943375172001020200002060000400374003711100211091010100001000000000640216223978720000104003840038400384003840038
2002440037300000000061396872520010102000010200005057176800400184003740037371943375172001020200002060000400374003711100211091010100001000000000640216223978720000104003840038400384003840038
2002440037300000000061396872520010102000010200005057176800400184003740037371943375172001020200002060000400374003711100211091010100001000000000640216223978720000104003840038400384003840038
2002440037300000000061396872520010102000010200005057176800400184003740037371943375172001020200002060000400374003711100211091010100001000000000640216213978720000104003840038400384003840038
200244003730000001200726396872520010102000010200005057176800400184003740037371943375172001020200002060000400374003711100211091010100001000000000640216223978720000104003840038400384003840038
2002440037299000000061396872520010102000010200005057176800400184003740037371943375172001020200002060000400374003711100211091010100001000000000640216223978720000104003840038400384003840038
2002440037299000000061396872520010102000010200005057176801400184003740037371943375172001020200002060000400374003711100211091010100001000000000640216223978720000104003840038400384003840038
2002440037300000030082396872520010102000010200005057176800400184003740037371943375172001020200002060000400374003711100211091010100001000000000640216223978720000104003840038400384003840038

Test 3: Latency 1->3

Code:

  tbl v1.16b, { v0.16b, v1.16b, v2.16b }, v3.16b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3
  movi v3.16b, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f3a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acc2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2020440037299000000061396872520100100200001002000050057176800400184003740085371723374952010020020000200600004003740037111020110099100100100001000000000710121622397870200001004003840038400384003840038
2020440037299000000061396872520100100200001002000050057176800400184003740037371723374952010020220000200600004003740037111020110099100100100001000000000710121623397870200001004003840038400384003840038
2020440037300000000061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000000710121622397870200001004003840038400384003840038
2020440037300000000061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000000710121622397870200001004003840038400384003840038
20204400373000000000776396872520100100200001002000051157176800400184013440085371723374952010020020000200604984003740037111020110099100100100001000000000710121622397870200001004003840038400384003840087
2020440086301100100061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000000710121622397870200001004003840038400384003840038
2020440037299000000061396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000000000710121622397870200001004003840038400384003840038
2020440037300000000061396872520100100200001002000050057176800400184003740037371723374952025220020000200600004003740037111020110099100100100001000000000710121622397870200001004003840038400384003840038
20204400373000000000103396872520100100200001002000050057176800400184003740037371723374952010020020000200600004003740037111020110099100100100001000020020560710131622397870200001004003840038400874013440086
202044003730000010104061396872520100100200001002000050057176801400184003740037371723374952010020020000200600004003740037111020110099100100100001000001000710121622397870200001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)03mmu table walk instruction (07)181e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
200244003730000061396872520010102000010200005057176804001840037400373719433751720010202000020600004003740037111002110910101000010016640316323978720000104003840038400384003840038
200244003730000061396872520010102000010200005057176804001840037400373719433751720010202000020600004003740037111002110910101000010000640216233978720000104003840038400384003840038
200244003730000061396872520010102000010200005057176804001840037400373719433751720010202000020600004003740037111002110910101000010000640216223978720000104003840038400384003840038
200244003730000061396872520010102000010200005057176804001840037400373719433751720010202000020600004003740037111002110910101000010000640316223978720000104003840038400384003840038
200244003730010061396872520010102000010200005057176804001840037400373719433751720010202000020600004003740037111002110910101000010000640216223978720000104003840038400384003840038
200244003730000061396872520010102000010200005057176804001840037400373719433751720010202000020600004003740037111002110910101000010000640216323978720000104003840038400384003840038
200244003730000061396872520010102000010200005057176804001840037400373719433751720010202000020600004003740037111002110910101000010000640216233978720000104003840038400384003840038
200244003730000061396872520010102000010200005057176804001840037400373719433751720010202000020600004003740037111002110910101000010000640216223984420000104003840038400384003840038
200244003730000061396872520010102000010200005057176804001840037400373719433751720010202000020600004003740037111002110910101000010000640216223978720000104003840038400384003840038
200244003729900061396872520010102000010200005057176804001840037400373719433751720010202000020600004003740037111002110910101000010000640216323978720000104003840038400384003840038

Test 4: Latency 1->4

Code:

  tbl v2.16b, { v0.16b, v1.16b, v2.16b }, v3.16b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3
  movi v3.16b, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.0039

retire uop (01)cycle (02)031e373a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
202042003915027120611973425201041002000210020000500283898720032200632003917207317497201002002000020060000200392006311102011009910010010000100710021622198760200001002004020040200402004020040
2020420054150050611973925201051002000210020000500283550520020200632005117210317524201002002000020060000200392005411102011009910010010000100710031622198640200001002005220040200642005220064
202042006315034820611973425201021002000510020000500283536520020200392005417195317521201002002000020060000200632006311102011009910010010000100710021622198521200001002006420052200402004020064
2020420063150279120611971825201061002000410020000500283898720020200662006317219317521201002002000020060930200542011011102011009910010010000100710121622198640200001002004020040200402006420040
2020420051150040611971825201011002001210020000500283538920020200392005117195317497201002002000020060000200512003911102011009910010010000100710121622198640200001002005220040200642005220064
2020420039150453120611971825201021002000510020000500283538120032200632006317195317497201002002000020060000200632005111102011009910010010000100710121622198640200001002004020064200522004020052
20204200391506931611971225201021002000510020000500283538120032200632003917207317497201002002000020060000200542006311102011009910010010000100710121622198520200001002005220040200402004020064
2020420039150020611971825201051002001210020000500283718520020200392006317195317512201002002000020060000200542005111102011009910010010000100710121623198760200001002006420040200642005220040
20204200601501260611971825201121002000210020000500283550520020200632005117195317521201002002000020060000200392003911102011009910010010000100710121622198520200001002005520052200402006420040
20204200391506120611971825201061002000510020000500283550520020200392005117195317497201002002000020060000200632005111102011009910010010000100710121622198640200001002005220040200642005220052

1000 unrolls and 10 iterations

Result (median cycles for code): 2.0051

retire uop (01)cycle (02)031e1f373f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)ld unit uop (a6)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
2002420039151420132511974225200161020009102000050283941820032200632003917217317531200102020000206000020054200541110021109101010000100000640216221987420000102006420052200642005220064
2002420051150005611971725200171020007102000050283941820044200392003917232317531200102020000206000020063200541110021109101010000100000640216221986220000102005220064200522006420052
20024200511502105611971525200181020008102000050283718520032200542003917217317519200102020000206000020039200661110021109101010000100000640216221987420000102006420052200522005220064
20024200511500096119751252002310200131020000502835365200202005120039172383175192001020200002060000200572005411100211091010100001000015640316231986220000102006420052200642005220064
20024200631502705611971525200201020009102000050283898720032200512003917229317531200102020000206000020063200511110021109101010000100000640316321986220000102006420052200522006420040
2002420039150005611971725200201020010102000050283941820047200512003917217317519200102020000206000020039200661110021109101010000100000640216221986220000102004020067200402005220058
2002420039151309611975125200231020013102000050283551320020200512003917235317519200102020000206000020054200511110021109101010000100000640216221987720000102004020052200402006720040
2002420039150243525611973425200161020005102000050283718520044200632003917217317519200102020000206000020057200541110021109101010000100000640216221986820000102005220058200642005820052
20024200511500013611971725200171020008102000050283718520035200542005417229317543200102020000206000020063200511110021109101010000100000640216221987420000102006420052200642005220067
2002420066150005611973425200221020012102000050283718520032200542005117229317531200102020000206000020051200631110021109101010000100000640216221987420000102005220064200522005220064

Test 5: Latency 1->5

Code:

  tbl v3.16b, { v0.16b, v1.16b, v2.16b }, v3.16b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3
  movi v3.16b, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)191e1f3a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache writeback (a8)a9acc2branch mispred nonspec (cb)cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
20204400373000000007263968725201001002000010020000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010000000000710021622397870200001004003840038400384003840038
2020440037300000000763968725201001002000010020000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010000000000710021622397870200001004003840038400384003840038
2020440037300000000613968725201001002000010320000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010000106400710021622397870200001004003840038400384003840038
202044003729900008801043968725201001002000010020000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010000000000710021620397870200001004003840038400384003840038
2020440037300000000613968725201001002000010020000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010000000000710121622397870200001004003840038400384003840038
2020440037300000000613968725201001002000010020000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010000100000710121622397870200001004003840038400384003840038
2020440037300000000613968725201001002000010020000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010000020000726132622397870200001004003840038400864003840038
20204400373011010880613968725201001002000010020000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010000000000710121632397870200001004003840038400384003840038
2020440037300000000613968725201001002000010020000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010000103000710121622397870200001004003840038400384003840038
20204400373000001200613968725201001002000010020000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010000000000710121622397870200001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)03l2 tlb miss data (0b)18191e1f3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9faccfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
20024400373000000061396872520010102000010200005057176800400184003740037371940337517200102020000206000040037400371110021109101010000100640216223978720000104003840038400384003840038
200244003730000000726396872520010102000010200005057176800400184003740037371947337517200102020000206000040037400371110021109101010000100640216223978720000104003840038400384003840038
20024400373000000061396872520010102000010200005057176800400184003740037371940337517200102020000206000040037400371110021109101010000100640216223978720000104003840038400384003840038
20024400373000000061396872520010102000010200005057176800400184003740037371940337517200102020000206000040037400371110021109101010000100640216223978720000104003840038400384003840038
20024400373000000061396872520010102000010200005057176800400184003740037371940337517200102020000206000040037400371110021109101010000100640216233978720000104003840038400384003840038
20024400373000000061396872520010102000010200005057176800400184003740037371940337517200102020000206000040037400371110021109101010000100640116223978720000104003840038400384003840038
20024400372990000061396872520010102000010200005057176800400184003740037371940337517200102020000206000040037400371110021109101010000100640216223978720000104003840038400384003840038
20024400373000000061396872520010102000010200005057176800400184003740037371940337517200102020000206000040037400371110021109101010000100640216323978720000104003840038400384003840038
20024400372990000061396872520010102000010200005057176800400184003740037371940337517200102020000206000040037400371110021109101010000100640216223978720000104003840038400384003840038
20024400372990000061396872520010102000010200005057176800400184003740037371940337517200102020000206000040037400371110021109101010000100640216223978720000104003840038400384003840038

Test 6: throughput

Count: 8

Code:

  tbl v0.16b, { v8.16b, v9.16b, v10.16b }, v11.16b
  tbl v1.16b, { v8.16b, v9.16b, v10.16b }, v11.16b
  tbl v2.16b, { v8.16b, v9.16b, v10.16b }, v11.16b
  tbl v3.16b, { v8.16b, v9.16b, v10.16b }, v11.16b
  tbl v4.16b, { v8.16b, v9.16b, v10.16b }, v11.16b
  tbl v5.16b, { v8.16b, v9.16b, v10.16b }, v11.16b
  tbl v6.16b, { v8.16b, v9.16b, v10.16b }, v11.16b
  tbl v7.16b, { v8.16b, v9.16b, v10.16b }, v11.16b
  movi v8.16b, 9
  movi v9.16b, 10
  movi v10.16b, 11
  movi v11.16b, 12

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk data (08)091e3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
16020440063300000342251601001001600001001600005001599960040023040042400421997332000016010020016000020048000040042400421180201100991001008000010000511021611400391600001004004340043400434004340043
1602044004230000044251601001001600001001600005001599960140023040042400421997332000016010020016000020048000040042400421180201100991001008000010000511011611400391600001004004340043400434004340043
1602044004230000044251601001001600001001600005001599960040023040042400421997332000016010020016000020048000040042400421180201100991001008000010000511011611400391600001004004340043400434004340043
16020440042300000709251601001001600001001600005001599960140023040042400421997332000016010020016000020048000040042400421180201100991001008000010030511011611400391600001004004340043400434004340043
1602044004230000067251601001001600001001600005001599960140023040042400421997332000016010020016000020048000040042400421180201100991001008000010000511011611400391600001004004340043400434004340043
16020440042300000199251601001001600001001600005001599960140023040042400421997332000016010020016000020048000040042400421180201100991001008000010000511011611400391600001004004340043400434004340043
16020440042300000197251601001001600001001600005001599960040023040042400421997332000016010020016000020048000040042400421180201100991001008000010000511011611400391600001004004340043400434004340043
160204400423000012195251601001001600001001600005001599960140023040042400421997332000016010020016000020048000040042400421180201100991001008000010000511011611400391600001004004340043400434004340043
1602044004230000067251601001001600001001600005001599960040023040042400421997332000016010020016000020048000040042400421180201100991001008000010000511011611400391600001004004340043400434004340043
1602044004230000044251601001001600001001600005001599960040023040042400421997332000016010020016000020048000040042400421180201100991001008000010000511011611400391600001004004340043400434004340043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f3a3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acc2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)dfe0? int output thing (e9)eaeb? simd retires (ee)? int retires (ef)f5f6f7f8fd
1600244005229900000000070825160010101600001016000050159996004002340042400421999603200221600102016000020480000400424004211800211091010800001000000000050201716166040039000160000104009540043400434004340043
16002440042300000000000552925160010101600001016000050159996014002340042400421999603200221600102016000020480000400424004211800211091010800001000000000050201716166040039000160000104004340043400434004340043
160024400422990000000004325160010101600001016000050159996014002340042400421999603200221600102016000020480000400424004211800211091010800001000000000050201616616040039000160000104004340043400434004340043
160024400423000000000004325160010101600001016000050159996014002340042400421999603200221600102016000020480000400424004211800211091010800001000000000050201616616040039000160000104004340043400434004340043
160024400423000000000004325160010101600001016000050159996014002340042400421999603200221600102016000020480000400424004211800211091010800001000000000050201616616040039000160000104004340043400434004340043
16002440042299000000000432516001010160000101600005015999601400234004240042199960320022160010201600002048000040042400421180021109101080000100000000005020616613040039000160000104004340043400434004340043
160024400423000000000004325160010101600001016000050159996014002340042400421999603200221600102016000020480000400424004211800211091010800001000000000050201316166040039000160000104004340043400434004340043
1600244004230000000000043251600101016000010160000501599960140023400424004219996032002216001020160000204800004004240042118002110910108000010000000000502016161316040039000160000104004340043400434004340043
160024400423000000000004325160010101600001016000050159996014002340042400421999603200221600102016000020480000400424004211800211091010800001000000000050201616166040039000160000104004340043400434004340043
16002440042299000000000432516001010160000101600005015999601400234004240042199960320022160010201600002048000040042400421180021109101080000100000000005020616166040039000160000104004340043400434004340043