Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

DMB (ST)

Test 1: uops

Code:

  dmb st

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l2 tlb miss data (0b)1e3f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)8283flush restart other nonspec (84)85inst all (8c)inst barrier (9c)st unit uop (a7)l1d cache writeback (a8)acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
10043035230030112009100010001000600042302730353288410001000302730351110011000100060073216223024100030363028303630273036
10043035220030122009100010001000600042303530263288510001000302630351110011000100000073216223023100030363028303630283036
10043035220030202001100010001000600042303530273288410001000302730351110011000100040073216223022100030283036302830363027
10043026220030202009100010001000600033302730353289310001000303530261110011000100050073216223032100030283036302830363027
10043026220030202009100010001000600042303530273288510001000302730351110011000100040073216223032100030273036302730363027
10043026220030202009100010001000600034302630353288410001000302730351110011000100000073216223032100030283036302730363036
10043035230030202001100010001000600042303530263288510001000302630351110011000100000073216223032100030283036302830363027
100430262301530202009100010001000600034302730353289310001000303530251110011000100000073216223024100030363028303630283036
10043035230030112009100010001000600034302730353289310001000303530261110011000100000073216223024100030363028303630283026
10043025230030112001100010001000600034302630353289310001000303530261110011000100000073216223032100030283036302830363036

Test 2: throughput

Code:

  dmb st

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.9044

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)1e1f3f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst barrier (9c)9fl1d tlb access (a0)l1d tlb miss (a1)st unit uop (a7)l1d cache writeback (a8)acbranch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10204290282180000290281890610100100100001001000050059800149265282902529135627732101002001000020029025232951110201100991001001000010000100006181117170160029132100001002904529136290442913629136
10204291352180000291201891610100100100141251000050059800049259552913529027327742101002001000020029035232951110201100991001001000010000100000990007101161129132100001002913629043291362904429136
102042913521800002902919008101001001000010010000500598000492605529042291353278431010020010000200291352321611102011009910010010000100001000001260007101161129132100001002913629026291362903629044
1020429043219000029029189001010010010000100100005005980004926055290352913532784310100200100002002913523222111020110099100100100001000010000100007101161129040100001002913629035291362913629036
1020429035219000029120190081010010010000100100005005980004925963291352903532784310100200100002002913523216111020110099100100100001000010000100007101161129132100001002904529136291362913629036
10204290352180000290211889910100100100001001000050059800049260552902529135327843101002001000020029135232231110201100991001001000010000100003130007101161129132100001002904429136291362913629028
1020429027218000029120189081010010010000100100005005980004925962291352904332774210100200100002002903523295111020110099100100100001000010000030007101161129132100001002913629029290262904429136
10204291352170000290121897510100100100001001000050059800049259482913529034327752101002001000020029042232951110201100991001001000010000100004421580007101161129033100001002913629044290362904429136
10204291352170000291201890810100100100001001000050059800049260552903529135327743101002001000020029036232951110201100991001001000010000100004060007101161129032100001002913629035291362913629044
10204290432180000291201900810100100100001001000050059800049259542913529043327751101002001000020029261293471110201100991001001000010000100003930007101161129132100001002904429136291362913629035

1000 unrolls and 10 iterations

Result (median cycles for code): 2.9951

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst barrier (9c)9fl1d tlb miss (a1)st unit uop (a7)l1d cache writeback (a8)acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10024298672240299361983110010101000010100005059982049267872989729867328681100102010000202995129867111002110910101000010010000000640516552994810000102995229868299522986629952
10024299512240298511991510010101000010100005059982049267872995129867328597100102010000202986629951111002110910101000010010000000640516552994810000102995229868299522986829952
10024299512240299361982910010101000010100005059982049268712986529951328681100102010000202995129866111002110910101000010010000000640416452994810000102995229866299522986729952
10024299512230298511991510010101000010100005059982149268712986729951328681100102010000202995129867111002110910101000010010000000640416452986210000102995229868299522986829952
10024299512230298521991510010101000010100005059982149267872995129867328596100102010000202986629951111002110910101000010010000000640416442994810000102986829952298682986529952
10024299512230298521991510010101000010100005059982149267872995129867328681100102010000202995129867111002110910101000010010000000640316442994810000102995229891298682995229867
10024298662240299361982910010101000010100005059982149268712986729951328681100102010000202995129865111002110910101000010010000000640516552986410000102986529952298672995229868
10024298672240298521982910010101000010100005059982149268712986729951328681100102010000202995129865111002110910101000010010000000640516442986210000102995229866299522986529952
10024299512240298521991510010101000010100005059982149268712986529951328681100102010000202995129866111002110910101000010010000000640316432994810000102995229866299522986529952
10024299512230299361991510010101000010100005059982149267862995129865328597100102010000202986429951111002110910101000010010000000640416342986210000102986729952298682995229867