Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

DMB (OSHST)

Test 1: uops

Code:

  dmb oshst

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e1f3f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)60696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)8283flush restart other nonspec (84)85inst all (8c)inst barrier (9c)st unit uop (a7)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
10043027220030202009100010001000600003330263035328851000100030263035111001100010000373116113024100030363027302730363028
10043027220030202001100010001000600004230353027328851000100030273035111001100010000073116113024100030363036303630273036
10043035230030202001100010001000600003330273035328931000100030353027111001100010000073116113032100030283036303630283036
10043035220030202009100010001000600004230353027328851000100030273035111001100010000373116113032100030283036303630283036
10043035230030202001100010001000600003430263035328931000100030353026111001100010000073116113032100030283036303630273036
100430352208830112009100010001000600004230353027328851000100030353027111001100010000073116113024100030363027302830363028
10043027230030202001100010001000600003330263035328931000100030353027111001100010000073116113024100030363027302730363027
10043026220030202000100010001000600003330273035328931000100030263035111001100010000073116113024100030363027302730363027
10043026230030202001100010001000600003230263035328931000100030353027111001100010000073116113032100030283028302730363028
10043027230030202001100010001000600004230353027328851000100030273035111001100010000073116113023100030363036303630273036

Test 2: throughput

Code:

  dmb oshst

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.9135

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f3f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst barrier (9c)9fld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acbranch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10204291002180000000291201900810100100100001001000050059800049260552903529135327742101002001000020029035232951110201100991001001000010001000010000071011611291320100001002903529045291362904429136
10204291352170000000291201900810100100100001001000050059800049259632913529034327735101002001000020029028232951110201100991001001000010001000020000071011611291320100001002902929044291362903529136
1020429135218000004140290211900810100100100001001000050059800049260552903429135327843101002001000020029135232221110201100991001001000010001000040000071011611290400100001002903629043291362904429136
10204291352170000000290131889910100100100001001000050059800149260552904329135327843101002001000020029135232081110201100991001001000010001000030000071011611290320100001002913629136290362913629044
10204290432180000000291201928910100100100001001000050059800049260552903529135327743101002001000020029036232951110201100991001001000010001000000900071011611290390100001002903629043291362904429136
10204291352170000000291201890610100100100001001000050059800149260552904329135327843101002001000020029135232141110201100991001001000010001000000000071011611291320100001002913629136290352913629028
10204290272180000000290271900810100100100001001000050059800149259542913529025327744101002001000020029034232951110201100991001001000010001000020000071011611290320100001002913629136290282913629035
10204290342180000000290281891710100100100001001000050059800149260552903529135327843101002001000020029135232211110201100991001001000010001000010000071011611291320100001002913629136290452913629044
10204290432180000000290201900810100100100001001000050059800149260552904329135327843101002001000020029035232951110201100991001001000010001000030000071011611291320100001002913629036291362902829136
10204291352180000000291201900810100100100001001000050059800049259552913529044327751101002001000020029043232951110201100991001001000010001000000000071011611291320100001002903629136290292913629036

1000 unrolls and 10 iterations

Result (median cycles for code): 2.9867

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst barrier (9c)9fst unit uop (a7)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1002429951224029936199151001010100001010000505998214926871298672995132859610010201000020298672995111100211091010100001010000006403162629863010000102995229866298682995229866
1002429865224029936199151001010100001010000505998214926787299512986732859610010201000020298662995111100211091010100001010000006403163529864010000102995229868299522986829952
1002429951223029851198291001010100001010000505998204926871298652995132868110010201000020299512986611100211091010100001010000006403163529862010000102995229867299522986729952
1002429951224029852198301001010100001010000505998214926871298652995132868110010201000020299512986611100211091010100001010000006403162329864010000102995229868299522986629952
1002429951224029851198301001010100001010000505998214926871298652995132868110010201000020299512986611100211091010100001010000006402163429948210000102995229868299522986729952
1002429951224029849198311001010100001010000505998214926871298672995132868110010201000020299512986711100211091010100001010000006402162529864010000102995229866299522986629952
1002429865224029936199151001010100001010000505998214926787299512986732859710010201000020299512986611100211091010100001010000006403163729862010000102995229867299522986729952
1002429951224029851199151001010100001010000505998214926785299512986532859710010201000020298662995111100211091010100001010000006403162329948010000102986829952298682995229867
1002429866224029936199151001010100001010000505998214926787299512986732859510010201000020298672995111100211091010100001010000006403163429948010000102986729952298662995229868
1002429867225029936199151001010100001010000505998214923741298672995132868110010201000020299512986411100211091010100001010000006403163529948010000102986729952298672995229867