Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

TBL (three register table, 8B)

Test 1: uops

Code:

  tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v3.8b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3
  movi v3.16b, 4

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 2.000

retire uop (01)cycle (02)031e3f4e51schedule uop (52)schedule simd uop (54)dispatch simd uop (57)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map simd uop (7e)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd alu (9a)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)f5f6f7f8fd
2004403730061368725200020002000551680401840374037344733770200020006000403740371110011000073116113787200040384038403840384038
2004403730084368725200020002000551680401840374037344733770200020006000403740371110011000073116113787200040384038403840384038
20044037300117368725200020002000551680401840374037344733770200020006000403740371110011000073116113787200040384038403840384038
200440373121159368725200020002000551680401840374037344733770200020006000403740371110011000073116113787200040384038403840384038
2004403730361368725200020002000551680401840374037344733770200020006000403740371110011000073116113787200040384038403840384038
20044037305161368725200020002000551680401840374037344733770200020006000403740371110011000073116113787200040384038403840384038
2004403730061368725200020002000551680401840374037344733770200020006000403740371110011000073116113787200040384038403840384038
2004403730061368725200020002000551680401840374037344733770200020006000403740371110011000073116113787200040384038403840384038
2004403730061368725200020002000551680401840374037344733770200020006000403740371110011000073116113787200040384038403840384038
20044037303156368725200020002000551680401840374037344733770200020006000403740371110011000073116113787200040384038403840384038

Test 2: Latency 1->2

Code:

  tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v3.8b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3
  movi v3.16b, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)03mmu table walk data (08)1e3a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)ld unit uop (a6)l1d cache writeback (a8)accfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
202044003730000058439687252010010020000100200005005717680140018400374003737172337495201002002000020060000400374003711102011009910010010000100000071003162239787200001004003840038400384003840038
20204400373000006139687252010010020000102203045335718963140018401314018437173337554201002002000020060000400374003711102011009910010010000100220071002162239787200001004003840038400384003840038
202044003730010010339687252010010020000100200005005717680040018400374003737172337495201002002000020060000400374003711102011009910010010000100000071002162239787200001004003840038400384003840038
20204400373000006139687252010010020000100200005005717680140018400374003737172337495201002002000020060000400374003711102011009910010010000100000217571012162239787200001004003840038400384003840038
20204400373000006139687252010010020000100200005005717680140018400374003737172337495201002002000020060000400374003711102011009910010010000100000071212162239787200001004003840038400384003840038
20204400372990008239687252010010020000100200005005717680140018400374003737172337495201002002000020060000400374003711102011009910010010000100000071012162239787200001004003840038400384003840038
20204400373000006139687252010010020000100200005005717680140018400374003737172337495201002002000020060000400374003711102011009910010010000100000071012162239787200001004003840038400384003840038
20204400372990006139687252010010020000100200005005717680140018400374003737172337495201002002000020060000400374003711102011009910010010000100000071012162239787200001004003840038400384003840038
20204400373000006139687252010010020000100200005005717680040018400374003737172337495201002002000020060000400374003711102011009910010010000100000071012162239787200001004003840038400384003840038
20204400373000006139687252010010020000100200005005717680040018400374003737172337495201002002000020060000400374003711102021009910010010000100000071212162239787200001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f3a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache writeback (a8)a9acc2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
2002440037299000000061396872520010102000010200005057176800400180400374003737194337517200102020000206000040037400371110021109101010000100000000640316333978720000104003840038400384003840038
20024400373000000000191396872520010102000010200005057176801400180400374003737194337517200102020000206000040037400371110021109101010000100000000640316333978720000104003840038400384003840038
20024400373000000000346396872520010102000010200005057176800400180400374003737194337517200102020000206000040037400371110021109101010000100000000640316333978720000104003840038400384003840038
2002440037300000000061396872520010102000010200005057176801400180400374003737194337517200102020000206000040037400371110021109101010000100000300640316333978720000104003840038400384003840038
2002440037299000000061396872520010102000010200005057176800400180400374003737194337517200102020000206000040037400371110021109101010000100000000640316333978720000104003840038400384003840038
20024400372990000000613968725200101020000102000050571768014001804003740037371943375172001020200002060000400374003711100211091010100001000001800640316333978720000104003840038400384003840038
20024400373000000144192061396872520010102000010200005057176800400180400374003737194337517200102020000206000040037400371110021109101010000100000000640316333978720000104003840038400384003840038
2002440037299000060061396872520010102000010200005057176800400180400374003737194337517200102020000206000040037400371110021109101010000100010000640316333978720000104003840038400384003840038
2002440037300000000061396872520010102000010200005057176800400180400374003737194337517200102020000206000040037400371110021109101010000100000000640316333978720000104003840038400384003840038
2002440037300000000061396872520010102000010200005057176800400180400374003737194337517200102020000206000040037400371110021109101010000100000000640316343978720000104003840038400384003840038

Test 3: Latency 1->3

Code:

  tbl v1.8b, { v0.16b, v1.16b, v2.16b }, v3.8b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3
  movi v3.16b, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)03mmu table walk data (08)181e3a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
2020440037299000061396872520100100200001072000050057176801400184003740037371723374952010020020000200600004003740037111020110099100100100001001071012162239787200001004003840038400384003840038
20204400373000000124396872520100100200001002000050057176801400184003740037371723374952010020020000200600004003740037111020110099100100100001000071012162239787200001004003840038400384003840038
20204400373000000726396872520100100200001002000050057176801400184003740037371723374952010020020000200600004003740037111020110099100100100001001071012162239787200001004003840038400384003840038
2020440037300000061396872520100100200001002000050057176801400184003740037371723374952010020020000200600004003740037111020110099100100100001004071012162239787200001004003840038400844003840038
2020440037300000061396872520100100200001002000050057176801400184003740037371723374952010020020000200600004003740037111020110099100100100001001418071012162239787200001004003840038400384003840038
20204400373000000613968725201001002000010220000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010071671013162239787200001004003840038400384003840038
20204400372990000613968725201001002000010020000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010056071012162239824200001004003840038400384003840038
20204400372990000613968725201001002000010020000500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010053071012163339787200001004003840038400384003840038
202044003730010006139687252010010020000100200005005717680140018400374003737172337495201002002000020060000400374003711102011009910010010000100681271012162339787200001004003840038400384003840038
20204400372990000613968725201001002000010020152500571768014001840037400373717233749520100200200002006000040037400371110201100991001001000010069371012163239787200001004003840038400384008640038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)031e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb miss (a1)l1d cache writeback (a8)acbranch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
200244003730006139687252001010200001020000505717680040018400374003737194337517200102020000206000040037400371110021109101010000100230640216223978720000104003840038400384003840038
200244003730006139687252001010200001020000505717680140018400374003737194337517200102020000206000040037400371110021109101010000100600640216223978720000104003840038400384003840038
200244003729906139687252001010200001020000505717680040018400374003737194337517200102020000206000040037400371110021109101010000100000640216223978720000104003840038400384003840038
200244003729906139687252001010200001020000505717680040018400374003737194337517200102020000206000040037400371110021109101010000100600640216223978720000104003840038400384003840038
2002440037300022639687252001010200001020000505717680140066400374003737194337517200102020000206000040037400371110021109101010000100100640216223978720000104003840038400384003840038
2002440037299061396872520010102000010200005057176801400184003740037371943375172001020200002060000400374003711100211091010100001005600640216223978720000104003840038400384003840038
200244003730005555396872520010102000010200005057176801400184003740037371943375172001020200002060000400374003711100211091010100001003660640216223978720000104003840038400384003840038
20024400373000726396872520010102000010200005057176800400184003740037371943375172001020200002060000400374003711100211091010100001003700640216223978720000104003840038400384003840038
200244003730006139687252001010200001020000505717680040018400374003737194337517200102020000206000040037400371110021109101010000100330640216223978720000104003840038400384003840038
2002440037314031539687252001010200001020000505717680140018400374003737194337517200102020000206000040037400371110021109101010000100000640216223978720000104003840038400384003840038

Test 4: Latency 1->4

Code:

  tbl v2.8b, { v0.16b, v1.16b, v2.16b }, v3.8b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3
  movi v3.16b, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.0054

retire uop (01)cycle (02)03mmu table walk data (08)18191e1f373a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fld unit uop (a6)l1d cache writeback (a8)acbranch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
202042003915000000106119718252010610020013100200005002835358020047200662006617202617493201002002000820060024200542006611102011009910010010000100000111716001601198750200001002010020040200552006420040
202042003915000000506119751252011210020013100200005002837615020044200512003917217617520201002002000820060024200392005411102011009910010010000100000111716001600198600200001002006720040200402005520040
2020420039150000001306119751252011310020006100208855112839418020035200542003917202617493201002002000820060024200792006611102011009910010010000100040102111716011600198750200001002006720055200672005520040
2020420039150000001306119736252010710020007100200005002843188020047200662006617226617508201002002000820060024200662003911102011009910010010000100000111716011600198870200001002005520067200402004020040
202042006615000000606119751252011310020013100200005002835482020020200392005117217617520201002002000820060024200542003911102011009910010010000100010000710121622198790200001002005220052200402005220055
202042006615000000906119718252010510020007100200005002839418020047200392005417210317524201002002000020060000200542003911102011009910010010000100000000710121622198520200001002005520067200522004020067
20204200391500000013061197482520113100200131002000050028394180200202005420066172223175122010020020000200600002016020078111020110099100100100001000580000710121622198640200001002006420067200402006720067
2020420054150000001306119698252010510020116111200005002837709020035200662003917195317497201002002000020060000200542006311102011009910010010000100000000710121622198790200001002006720067200802006720067
2020420039150000006061197172520109100200051002000050028389870200352005420039172223174972010020020000200600002006620066111020110099100100100001000500000710121622198670200001002010020067200402004020040
202042006315000000506119718252010510020006100200005002838987020035200512006317210317512201002002000020060000200542006611102011009910010010000100000000710121623198760200001002006720040200642008420064

1000 unrolls and 10 iterations

Result (median cycles for code): 2.0051

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f373a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acc2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2002420039150000000506119734252002210200071020000502837185120047200662003917232317534200102020000206000020063200511110021109101010000100000021006402162219862020000102005220064200522005220052
20024200511510000006061197482520017102001210202955028353651200442006320051172413175432001020200002060000200392006311100211091010100001000000117006402162219865020000102006420052200642005220055
2002420039150000000606119734252001510200121020000502837185120032200512003917241317543200102020000206000020063200511110021109101010000100001015006402162119862020000102005520052200402011520040
200242006315000000050251197172520017102000710200005028371850200322006320051172293175312001020200002060000200542006311100211091010100001000000165006402162219862020000102005220040200642005520040
200242005415000000070611973425200221020012102000050283898712003220051200511722931753120010202000020600002006320039111002110910101000010000003006402162219862020000102006420052200642005220064
20024200511500000006061197342520015102000710200005028371850200322006620039172173175312001020200002060000200632005111100211091010100001000000177006402162219874020000102006720040200642005220052
20024200511500000005061197342520019102000110200005028353560200202003920051172293175312001020200002060000200662003911100211091010100001000000108006402162219862020000102005220040200522004020052
2002420051150000000120611973425200151020001102000050283718512002020039200511722931753120010202000020600002003920051111002110910101000010000000006403162219874020000102006420052200642005220040
2002420039150000000502511971825200161020005102000050283535602004420051200631724131753120010202000020600002005120063111002110910101000010000003006402162219874020000102006420052200642005220052
2002420051150000000120611973425200221020012102000050283718512003220051200511722931754320010202000020600002005420051111002110910101000010000103006402162219862020000102005220040200522004020064

Test 5: Latency 1->5

Code:

  tbl v3.8b, { v0.16b, v1.16b, v2.16b }, v3.8b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3
  movi v3.16b, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)03191e3a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)6061696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d cache writeback (a8)a9accfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
20204400373000006139687252010010020000100200005005717680144001804003740037371723374952010020020000200600004003740037111020110099100100100001000100710544162239787200001004003840038400384003840038
20204400373000006139687252010010020000100200005005717680144001834003740037371723374952010020020000200600004003740037111020110099100100100001000000710502162239787200001004003840038400384003840038
20204400373000006139687252010010020000100200005005717680144001804003740037371723374952010020020000200600004003740037111020110099100100100001000000710502162239787200001004003840038400384003840038
20204400373000008239687252010010020000100200005005717680144001804003740037371723374952010020020000200600004003740037111020110099100100100001000000710503162239787200001004003840038400384003840038
202044003729900077739687252010010020000100200005005717680144001804003740037371727375122010020020000200600004003740037111020110099100100100001000000710502162239787200001004003840038400384003840038
20204400373000006139687252010010020000100200005005717680144001804003740037371723374952010020020000200600004003740037111020110099100100100001000000710502162239787200001004003840038400384003840038
2020440037299000843968725201001002000010020000500571768014400180400374003737172253749520100200200002006000040037400371110201100991001001000010000021710502162339787200001004003840038400384003840038
20204400373000006139687252010010020000100200005005717680144001804003740037371723374952010020020000200600004003740037111020110099100100100001000000710502162239787200001004003840038400384003840038
20204400373000006139687252010010020000100200005005717680144001804003740037371723374952010020020000200600004003740037111020110099100100100001000000710502162239787200001004003840038400384003840038
20204400373000006139687452010010020000100200005005717680144001804003740037371723374952010020020000200600004003740037111020110099100100100001000000710502162239787200001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss data (0b)3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)acbdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
2002440037300000613968725200101020000102000050571768014001840037400373719433751720010202000020600004003740037111002110910101000010000640216223978720000104008540038400384003840038
2002440037300000613968725200101020000102000050571768014001840037400373719433751720010202000020600004003740037111002110910101000010000640216223984720000104003840038400384003840038
2002440037300000613968725200101020000102000050571768014001840037400373719433751720010202000020600004003740037111002110910101000010000640216223978720000104003840038400384003840038
2002440037300000613968725200101020000102000050571768014001840037400373719433751720010202000020600004003740037111002110910101000010000640216223978720000104003840038400384003840038
2002440037300000613968725200101020000102000050571768014001840037400373719433751720010202000020600004008540037111002110910101000010000640216223978720000104003840038400384003840038
2002440037300000613968725200101020000102000050571768014001840037400373719433751720010202000020600004003740037111002110910101000010000640216223978720000104003840038400384003840038
2002440037300000613968725200101020000102000050571768014001840037400373719433751720010202000020600004003740037111002110910101000010100640216223978720000104003840038400384003840038
20024400373000001453968725200101020000102000050571768014037840464404743721640376732122620213162064920405754032681100211091010100001021801307986125664021220000104042640526405214057440716
20024405243042116139687252001010200001421368615717680140018400374003737194337517200102020000206000044785400371110021109101010000100020640216223978720000104003840038400384003840038
20024400373000006139687252001010200001020000505718066140018400374003737194337517200102020000206000040037400371110021109101010000106100640216223978720000104003840038400384003840038

Test 6: throughput

Count: 8

Code:

  tbl v0.8b, { v8.16b, v9.16b, v10.16b }, v11.8b
  tbl v1.8b, { v8.16b, v9.16b, v10.16b }, v11.8b
  tbl v2.8b, { v8.16b, v9.16b, v10.16b }, v11.8b
  tbl v3.8b, { v8.16b, v9.16b, v10.16b }, v11.8b
  tbl v4.8b, { v8.16b, v9.16b, v10.16b }, v11.8b
  tbl v5.8b, { v8.16b, v9.16b, v10.16b }, v11.8b
  tbl v6.8b, { v8.16b, v9.16b, v10.16b }, v11.8b
  tbl v7.8b, { v8.16b, v9.16b, v10.16b }, v11.8b
  movi v8.16b, 9
  movi v9.16b, 10
  movi v10.16b, 11
  movi v11.16b, 12

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb miss (a1)l1d cache writeback (a8)a9acbranch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
160204400633000000044251601001001600001001600005001599960140023400424004219973320000160100200160000200480000400424004211802011009910010080000100018200511031633400391600001004004340043400434004340043
160204400423000000044102160100100160000100160000500159996014002340042400421997332008816010020016000020048000040042400421180201100991001008000010000000511031623400391600001004004340043400434004340043
16020440042300000008625160405100160000100160000500159996014002340042400421997332000016010020016000020048000040042400421180201100991001008000010000000511031633400391600001004004340043400434004340043
16020440042300000006525160100100160000100160000500159996004002340042400421997332000016010020016000020048000040042400421180201100991001008000010000000511034433400391600001004004340043400434004340043
16020440042300000004425160100100160000100160000500159996004002340042400421997332011816010020016000020048000040042400421180201100991001008000010000000511031633400391600001004004340043400434004340043
16020440042300100004425160100100160000100160000500159996004002340042400421997332002916010020016000020048000040042400421180201100991001008000010000000511031633400391600001004004340043400434004340043
160204400423000000010725160100100160000100160000500159996004002340042400421997332000016010020016000020048000040096400421180201100991001008000010000000511021633400391600001004004340043400434004340043
16020440042300000004425160100100160000100160000500159996004002340042400421997332000016010020016000020048000040042400421180201100991001008000010000000511031633400391600001004004340043400434004340043
16020440042300000004425160100100160000100160000500159996004002340042400421997332000016010020016000020048000040042400421180201100991001008000010000000511031623400391600001004004340043400434004340043
16020440042299000004425160100100160000100160000500159996004002340042400421997332000016010020016000020048000040042400421180201100991001008000010000000511031633400391600001004004340043400434004340043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f3a3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)5f60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acc2cdcfd5map dispatch bubble (d6)dadbddfetch restart (de)e0ea? simd retires (ee)? int retires (ef)f5f6f7f8fd
16002440052299000000012725160010101600001016000050159996001400234004240042199963200221600102016000020480000400424004211800211091010800001000000000050202160322400390160000104004340043400434004340043
1600244004230000000004325160010101600001016000050159996000400234004240042199963200221600102016000020480000400424004211800211091010800001000000000050202160033400390160000104004340043400434009540043
1600244004230000000004325160010101600001016000050159996000400234004240042199963200221600102016000020480000400424004211800211091010800001000000000050202160022400390160000104004340043400434004340043
1600244004230000000004325160010101600001016000050159996000400234004240042199963200221600102016000020480000400424004211800211091010800001000000030050202160222400390160000104004340043400434004340043
1600244004230000000004325160010101600001016000050159996000400234004240042199963200221600102016000020480000400424004211800211091010800001000000000050202160222400390160000104004340043400434004340043
1600244004230000000004325160010101600001016000050159996000400234004240042199963200221600102016000020480000400424004211800211091010800001000000000050202160022400390160000104004340043400434004340043
1600244004230000000004325160010101600001016000050159996000400234004240042199963200221600102016000020480000400424004211800211091010800001000000000050202160232400390160000104004340043400434004340043
1600244004230000000004325160010101600001016000050159996000400234004240042199963200221600102016000020480000400424004211800211091010800001000000000050202160223400390160000104004340043400434004340043
1600244004230000000004325160010101600001016000050159996000400234004240042199963200221600102016000020480000400424004211800211091010800001000000000050202160122400390160000104004340043400434004340043
1600244004229900000008525160010101600001016000050159996000400234004240042199963200221600102016000020480000400424004211800211091010800001000000000050202160032400390160000104004340043400434004340043