Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD4 (single, D)

Test 1: uops

Code:

  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 6.010

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 4.010

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e2223243a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
66005293172190111210005100466828836000169016010401020004000200010000476725229862915229275310600020004000200012000291902906611610011000100002003342006012200040202013107929268353042151200543110382514394128549163651343814797200040002928929285292422926629313
66004292422200101111008000461328891200169226012401020004000200010000476800229982907329226310600020004000200012000290842919611610011000100002002342004012200000242012906917468783128039200973135382517323828424164441324414754200040002925329342293312935329353
66004292202190100110108100460528865200169316010400420004000200010000476200230542909429288310600020004000200012000292202915511610011000100002002302003002200240262012967908769263032041200923067382812424728412162761335114910200040002927229280292392932429224
66004292492190000010004000461828817000169956000400420004000200010000475380229982922529297310600020004000200012000291962920111610011000100002000062000103200040040012993913468733044039201083073382617373428429164651318514915200040002928929271293132931929333
660042929122001001001014000454028867000169176010401020004000200010000476060230682918929320310600020004000200012000291672920011610011000100002003462003024200060202113065913668643143040200883100382618414128537163651340715173200040002938229278292752930629251
66004292932190110110004100463428855002168536010401020004000200010000476181229782914129329310600020004000200012000291872919911610011000100002004442004104200240262112935932768983018040200353039382911454228465164261331214789200040002935729339293002930929378
66004293292190100011006000456428865000169316004401020004000200010000475825230322918729253310600020004000200012000291502915511610011000100012003362006122200040242012949937469903048042201533114382915403928520162871334714943200040002930129276293002926329315
66004292572190100011005100456128825000169086010401020004000200010000476540229952907129265310600020004000200012000291122921011610011000100012004302004002200000242112872908568833067042201613055382515424228372164061332115009200040002936029185293362928629284
66004292742192101111108000459328794000169896010401020004000200010000476125230202914029331310600020004000200012000291672919511610011000100002004242002022200040262212815910768923136049200293125382716404028520163291331914957200040002932629323291932930629276
66004292392190111101115000457228856000169206004401020004000200010000475960230372911729312310600020004000200012000291342915911610011000100002003362003022200440262113000932669363088040201463100382814424728541163111330714719200040002922729290292772927429343

Test 2: throughput

Count: 8

Code:

  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0008

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f191e22233a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4802051600611199111110013100160025212127995625480108100320008160000100320000160000500800222227154841160034016004016005379947038002248010020016000032000020016000096000016004616006111802011009910010080000800000100160000000160024000016002461027000005110217221600501662160000320000100160054160041160054160054160057
48020416004011980000110301001600380012799532548010810032000816000010032000016000050080000022715484116002101600401600537995003800354801002001600003200002001600009600001600611600461180201100991001008000080000010016001212420160051001501600386013421310005110217221600590992160000320000100160062160113160047160062160047
48020416004611991010000571001600312160799612548010810032001616000010032000016000050080121722716936016004201600611600617995503800434803722001600003200002001600009600001600461600611180201100991001008000080000110016001414420160013041511600386150421300005110217221600600902160000320000100160062160062160047160062160062
48020416004611981111000570021600462160799612548011610032001616009010032000016000050080075322716888116004201600611600617995503800234801002001600003200002001600009600001600461600611180201100991001008000080000010016001414420160013001121600006151421310005110217221600580992160000320000100160062160047160099160062160062
48020416006111991010000000116003800079953254801001003200001600001003200001600005008009792271268801600370160056160056799340380040480100200160000320000200160000960000160056160040118020110099100100800008000001001600000270160210010351600320132350000051102172216005001002160000320000100160078160057160057160055160057
4802041600561199000000001001600382012799402548010810032000816000010032000016000050080037122715484016002101600561600407995003800384801002001600003200002001600009606001600531600531180201100991001008000080000010016000002701600240002416002461320000005110217221600580002160000320000100160062160047160062160062160062
4802041600461199111000013002160046201679961254801161003200161600001003200001600005008007532271688801600420160061160061799550380028480100200160000320000200160000960000160061160061118020110099100100800008000001001600121500160052000501600006150421300005110217221600580900160000320000100160062160047160062160047160062
48020416006111991111000570121600460161679961254801161003200161600001003200001600005008007532271688801600270160061160061799400380043480100200160000320000200160000960000160061160061118020110099100100800008000001001600141342016005200057160038615001220005110217221600580902160000320000100160062160062160062160062160062
48020416006111991000000570121600682161679961254801161003200161600001003200001600005008000422271688811600420160046160061799550380043480100200160000320000200160000960000160061160061118020110099100100800008000011001600131300160013000511600386150421210005110217221600430092160000320000100160047160047160047160062160062
4802041600611198100000012010160025200799582548022210032000816000010032000016000050080037122715484116002101600531600567993403800384801002001600003200002001600009600001600561600531180201100991001008000080000110016000002701600310003216003260240000005110217211600580902160000320000100160062160062160064160062160062

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0008

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd0l1i cache miss demand (d3)d5map dispatch bubble (d6)d9ddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4800251600611199110010057010216004621607996302548002610320016160000103200001600005080074722716888016004216006116006179955380043480010201600003200002016000096000016004616006111800211091010800008000001016001312428160013000511600006151421300050200021701116005899216000032000010160062160047160062160062160062
4800241600611199111000056010216004601616799610254800261032000816000010320000160000508007472271688801600421600611600617995538002848001020160000320000201600009600001600631600461180021109101080000800000101600121400160050001531600000013421210150200011701116005890216000032000010160047160062160062160063160062
480024160061119910100005701001600462160799610254800261032001616000010320000160000508007532271688801600271600611600617994038004348001020160000320000201600009600001600611600611180021109101080000800000101600131400160052011501600386150421220050200011701116005899216000032000010160047160062160062160062160062
48002416006111991110000130002160031216167996102548001810320016160000103200001600005080071722716888016002716004616006379955380043480010201600003200002016000096000016006116006111800211091010800008000001016001313420160052001511600376151421210050200011701116005899216000032000010160062160047160062160062160047
48002416006111981110000570002160046216167994602548002610320016160000103200001600005080004222716888116004216004616006179955380045480010201600003200002016000096000016006116006111800211091010800008000011016001213420160052000501600386150421310050200011701116005899016000032000010160047160063160062160047160047
480024160061119911100005701021600462161679961925480018103200161600001032000016000050800053227168880160027160284160046799551080043480010201600003200002016000096000016006116004611800211091010800008000011016037313420160142030631600386151421320050200011701516005899216000032000010160047160062160047160062160062
48002416006111991000000639010016004621616799610254800261032001616000010320000160000508007662271688801600421600611600637995538004348001020160000320000201600009600001600611600461180021109101080000800000101600131400160014002541600386050421330050200031701116004399016000032000010160062160099160062160062160062
480024160063119810000005601021600312161679961012448001810320016160000103200001600005080073722716888116004216028516006179940380043480010201600003200002016000096000016006116006111800211091010800008000001016001413420160014011541600386112421200050200011701116005890216000032000010160047160047160062160062160062
4800241600461199110000413264000160046216167996302548015610320064160540103200001602705081594122724588016035416022816006179955380028480010201600003200002016000096000016006116006111800211091010800008000001016001414420160051000131600386050421200050200011703116004399016000032000010160062160047160062160062160062
4800241600611199101003057000216004600167996102548002610320016160000103200001600005080004622716888016002716006116006179955380028480010201600003200002016000096216016006116006111800211091010800008000001016001415420160052043541600006151421310050200011701116004399216000032000010160047160062160290160062160047