Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

DMB (NSHST)

Test 1: uops

Code:

  dmb nshst

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)0309l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f3a3f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)60696a6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)8283flush restart other nonspec (84)85inst all (8c)inst barrier (9c)l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
1004303523010002940030202009100010001000600013403027303532885100010003026303511100110000001000000073116113032100030273036302730363028
10043027220000090030202009100010001000600014203035302732884100010003027303511100110000001000000073116113024100030363028303630283036
10043035230000000030202019100810081000627214983303531626299610081008306630642110011000022100000775085124113138100031553036312231563070
1004316522001112788030122001100810081008604814981302630353299810001000316230692110011000002100012738273124113023100031483115316430733157
10043063231001114788031522009100810081000640014985313431503296310081008315130272110011000000100800493273124113254100030273156302731163153
100430272300111120031342073100810001008600004965314630636288410001000311631142110011000003100010845084124113032100031623159317930283155
10043035240011013888030202009100010001000600004203035302732884100010003027303511100110000001000000073116113032100030283036302730363028
1004302723000103003011208310081008100860001340312031096296610001008307431302110011000420100010710073116113063100031463156310431613061
10043155240011116888031372111100810081008640014975315031576300510081008314430351110011000000100820745285116113135100030363201314131433165
10043154230011113588030642083100810081008635214970313931506299510081016314330351110011000220100810725084124113130100030363146315131553167

Test 2: throughput

Code:

  dmb nshst

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.9044

retire uop (01)cycle (02)03mmu table walk data (08)181e1f3f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst barrier (9c)9fl1d tlb access (a0)l1d tlb miss (a1)st unit uop (a7)l1d cache writeback (a8)a9acbranch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
102042913521700128829012189231010010010000100100005005980049260552903529135627738101002001000020029035232951110201100991001001000010000100000031117170160029132100001002903529136290442913629037
1020429036218000029020190141010010010000100100005005980049259552913529027627738101002001000020029035232951110201100991001001000010000100000031117170160029132100001002903629136290282913629035
10204290342180012029120190141010010010000100100005005980049260552904229135627839101002001000020029135232161110201100991001001000010000100004061117170160029132100001002903629029291362903529136
1020429135217000029120190141010010010000100100005005980049263492913529028627739101002001000020029036232951110201100991001001000010000100000001117170160029022100001002913629045291362904429136
1020429135217000029120190141010010010000100100005005980049260552902529135627839101002001000020029135232231110201100991001001000010000100000001117170160029031100001002903729136290432913629044
1020429043218000029019190141010010210000100100005005980049260552902529135627839101002001000020029135232231110201100991001001000010000100000001117170160029132100001002903629136290452913629035
1020429034218003029120190141010010010000100100005005980049260552902529135627839101002001000020029135232231110201100991001001000010000100000001117170160029031100001002913629036291362904529136
10204291352181027029028189141010011910000100100005005980049259452903429135627839101002001000020029135232221110201100991001001000010000100000001117170160029132100001002903729136290432913629036
1020429035218000029120190141010010010000100100005005980049259452913529035627746101002021000020029043232951110201100991001001000010000100000001117170160029024100001002913629043291362904429136
102042913521700120290101892210100100100001001000050059800492595529135290441527747101002001000020029043232951110201100991001001000010000100000001117170160029031100001002904429136290362913629044

1000 unrolls and 10 iterations

Result (median cycles for code): 2.9951

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)181e1f3a3f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst barrier (9c)9fld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acc2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1002429919241100120129936198311001010100001010000506010714926785301713006332868110010201000020299512986611100211091010100001001000003000641624552986410000102986829952298682995229866
100242986524110000129936199151001010100001010000505998204926871298652995132868110010201000020299512986611100211091010100001001000000000641516452986310000102995229867299522986829952
100242995122311000129849198311001010100001010000505998214926787299512986732859710010201000020298662995111100211091010100001001000000000641516442986410000102995229866299522986629952
1002429951231110150129852198321001010100001010000505998214926871298662995132868110010201000020299512986611100211091010100001001000000000641516552994810000102995229868299522986829952
100242995122411000129936199151001010100001010000505998204926871298642995132868110010201000020299512986711100211091010100001001000000000641416432994810000102986529952298672995229868
100242986722411000129936199151001010100001010000505998214926871298662995132868110010201000020299512986611100211091010100001001000000000641516552994810000102986829952298682995229867
100242986622511000129936199151001010100001010000506004514926871298652995132868110010201000020299512986611100211091010100001001000000000641516552994810000102986829952298682995229866
100242986522411000129936199151001010100001010000505998214926785299512986432859610010201000020298672995111100211091010100001001000000000641516452986310000102995229866299522986729952
100242995122411000129852198301001010100001010000505998214926786299512986732859610010201000020298672995111100211091010100001001000000000641516452986210000102986829952298682995229867
100242986622411000129852198291001010100001010000505998204926871298662995132868110010201000020299512986511100211091010100001001000000000641516552994810000102986729952298672995229868