Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

DMB (NSHLD)

Test 1: uops

Code:

  dmb nshld

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)60696a6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)8283flush restart other nonspec (84)85inst all (8c)inst barrier (9c)st unit uop (a7)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
1004302723030202009100010001000600013303027303532885100010003026303511100110001000073116113022100030363028303630283036
1004303522030112001100010001000600014203035302532885100010003025303511100110001000073116113032100030273036302830363028
1004303522030122001100010001000600013303027303532893100010003035302711100110001000073116113032100030263036302730363028
1004302623030202009100010001000600004203035302632885100010003026303511100110001000073116113023100030363027303630283036
100430272213530202009100010001000600003303035302732884100010003027303511100110001000073116113032100030283036302830363027
1004303523030122001100010001000600003403026303532893100010003035302711100110001000073116113032100030283036302830363027
1004302623930112000100010001000600013403027303532893100010003035302511100110001000073116113024100030363028303630283036
1004302723030202009100010001000600013403026303532893100010003035302711100110001000073116113024100030283036302730363027
10043035230301120011000100010006000134030353027328841000100030273035111001100010001573116113023100030363027303630283036
1004302522030112001100010001000600014203035302532885100010003025303511100110001000073116113032100030273036302630363028

Test 2: throughput

Code:

  dmb nshld

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.9043

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f3f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst barrier (9c)9fl1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acc2cfd2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1020429036217000000290201890610100100100001001000050059800049259552913529027327843101002001000020029135232071110201100991001001000010000100000007910173222991925100001002996929884299772999229956
10204297752251177933704299251961210185118100621251005661162000049267673008229940262870310188200100562022990729987811020110099100100100001000210032044133710014011290400100001002913629028291362903729136
102042927321800006029027189001010010010000100100005005980004925963291352903432773510100200100002002902823295111020110099100100100001000010000000710011611290240100001002913629043291362904429136
102042903621800000029120190081010010010008100100005005980004926055290342913532774410100200100002002913523214111020110099100100100001000010000000710011611290330100001002913629044291362919629136
1020429042226000030291201900810100100100001001000050060864049260552903429135327843101002001000020029135232221110201100991001001000010013010000100710011611290400100001002913629028291362903729136
10204290352190000302902818908101001001000012110000500598000492595529135290273277421010020010000200290352329511102011009910010010000100001000000071046511611291320100001002904529136290442913629044
10204290362720000002902718900101001001000010010000500598000492595529135290443277511010020010000200290432329511102011009910010010000100001000001260710011611291320100001002904429136290352913629044
10204291352170000002902818908101001001000010010000500598000492596329135290433277431010020010000200290352329511102011009910010010000100001000001350710011611291320100001002902929136290352913629028
102042913521700000029120190081010010010000100100005006126104926055290272913532784310100200100002002913523221111020110099100100100001000010000000710011611290320100001002913629037291362904329136
102042913521700000029120190081010010010000100100005005980004925948291352903432775210100200100002002904223295111020110099100100100001000010000000710011611290310100001002913629036291362904529136

1000 unrolls and 10 iterations

Result (median cycles for code): 2.9951

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f3f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst barrier (9c)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acc2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1002429888224000000002985119830100251010000101000050599824926871298662995132859610010201000020298662995111100211091010100001000010000060006405165529948010000102986529952298672995229868
10024299512230000000029850198311001010100001010000505998249267872995129865328595100102010000202986729951111002110910101000010000100000930006406166629861010000102995229868299522986729952
1002429867224000000002985219831100101010000101000050599824926871298672995132868110010201000020299512986411100211091010100001000010000030006406166529948010000102986729952298672995229868
100242995122400000000299361991510010101000010100005059984492687129866299513286811001020100002029951298671110021109101010000100001000001530006406166629864010000102995229865299522986729952
1002429867224000000002985219832100101010000101000050610434926786299512986632859510010201000020298652995111100211091010100001000010000000006406166529948010000102986629952298652995229868
1002429951223000000002985219829100101010000101000050599824926871298642995132868110010201000020299512986611100211091010100001000010000060006404166629864010000102986629952298662995229866
1002429867224000000002993619915100101010000101000050599824926785299512986432859610010201000020298672995111100211091010100001000010000000006406166629948010000102986829952298672995229868
1002429951224000000002993619915100101010000101000050599824926871298662995132868110010201000020299512986711100211091010100001000010000060006406165529862010000102995229867299522986829952
1002429867224000000002993619915100101010000101000050599824926785299512986532859710010201000020298662995111100211091010100001000010000060006405166629948010000102986829952298672995229868
1002429951223000000002985119915100101010000101000050608594926871298642995132868110010201000020299512986711100211091010100001000010000000006406165529948010000102986829952298682995229867