Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

DMB (OSHLD)

Test 1: uops

Code:

  dmb oshld

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)60696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)8283flush restart other nonspec (84)85inst all (8c)inst barrier (9c)st unit uop (a7)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
1004302623030202009100010001000600003430273035328851000100030273027111001100010000073216113023100030363028303630273036
1004303523030112001100010001000600003330263035328931000100030353035111001100010000073116113032100030283036302730363027
1004302623030112001100010001000600003430273035328851000100030263026111001100010000073116113023100030363027303630283036
1004303522030122001100010001000600003330273035328931000100030353027111001100010000073116113032100030273036302730363027
1004302623030122001100010001000600003430273035328931000100030353035111001100010000073116113022100030363028303630283028
1004302723030202000100010001000600003430273035328931000100030353027111001100010000073116113023100030283036302830363027
1004302623031712001100010001000600003430263035328931000100030353035111001100010000073116113024100030363027303630273036
1004303522030122001100010001000600003430253035328931000100030263026111001100010000073116113023100030363028303630273036
1004303522030112000100010001000600003430263035328931000100030353027111001100010000073116113032100030283036302830363027
1004302623030122009100010001000600004230353027328841000100030273027111001100010000073116113032100030283036302830363027

Test 2: throughput

Code:

  dmb oshld

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.9044

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss data (0b)18191e1f3f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst barrier (9c)9fst unit uop (a7)l1d cache writeback (a8)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
102042913521700000002902018917101001001000010010000500598001492596329135290353278431010020010000200291352329511102011009910010010000100100000071021622297730100001002913629035291362902629136
102042913521700000002912019008101001001000010010000500598000492595429135290253278431010020010000200291352329511102011009910010010000100100000071021622291320100001002902829136290372913629026
10204290252180000000290281948010178123100601241005661062856149260552904329135327843101002001000020029135232951110201100991001001000010010055028932104223073221100001003050329617310493086330537
102043051223601115141332132029120202371012310010000100100005005980004925945291352903532775010100200100002002904223214111020110099100100100001001000042071021622291320100001002903629136291362903629136
102042913521700000002912019008101001001000010010000500598000492596329135290433277431010020010000200290352321611102011009910010010000100100000071021622291320100001002913629044291362903529136
102042913521700000002912019008101001001000010010000500598001492595529135290273277421010020010000200290352329511102011009910010010000100100000071021622291320100001002903529136290442913629037
102042903621900000002902018906101001001000010010000500598000492605529033291353278431010020010000200290342321511102011009910010010000100100000071021622291320100001002903629136290292913629036
102042903521800000002912019008101001001000010010000500598000492596329135290433277431010020010000200290352329511102011009910010010000100100000071021622291320100001002902929136290352913629028
102042902721800000002901018916101001001000010010000500598001492605529034291353278431010020010000200291352322111102011009910010010000100100000071021622291320100001002902829136290372913629136
102042904321800000002902918899101001001000010010000500598000492605529025291353278431010020010000200291352321511102011009910010010000100100000071021622291320100001002903529136290262913629036

1000 unrolls and 10 iterations

Result (median cycles for code): 2.9867

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f3a3f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst barrier (9c)9fld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
100243103723311187105670413086020555100761210064121006455619101492791731142311512729781100922010032203102531094101100211091010100001021002424679047564963430991210000103119031184312663114731112
100243103022910099119779203117220637100841210064121008671627391492810430304311141229755100842010024203095331102101100211091010100001021009212730807575403331036210000103119530867308823103131146
10024311582340005101320880030229200231007612100271210008605998214926871298672995132868110010201000020299512995111100211091010100001001000000006402162329864010000102986729952298682995229868
10024298672240000000029851198301001010100001010000505998214926871298672995132868110010201000020299512995111100211091010100001001000000006403163329862010000102995229867299522986829866
10024298652250000000029852198311001010100001010000505998214926871298662995132868110010201000020299512986611100211091010100001001000030006402162329948010000102986629952298652995229868
10024298672250000000029936199151001010100001010000505998214926784299512986632859610010201000020299512995111100211091010100001001000000006403163329948010000102986829952298682995229866
10024298652240000000029851198291001010100001010000505998214926871298662995132868110010201000020299512986511100211091010100001001000000006403162229948010000102986829952298682995229866
10024298652240000000029936199151001010100001010000505998214926871298662995132868110010201000020299512995111100211091010100001001000000006402163329948010000102986829952298682995229867
10024299512230000000029852198311001010100001010000505998214926871298662995132868110010201000020299512986511100211091010100001001000002006402162229948010000102986829952298682995229867
10024298662240000000029936198311001010100001010000505998214926871298672995132868110010201000020299512986711100211091010100001001000000006402163329948010000102986729952298672995229868