Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD4 (single structure, D)

Test 1: uops

Code:

  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 6.010

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 4.010

retire (01)cycle (02)0304050708090a0b0e0f1e2223243a3f43464951inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)5f696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
66005293172190111210005100466828836000169016010401020004000200010000476725229862915229275310600020004000200012000291902906611610011000100002003342006012200040202013107929268353042151200543110382514394128549163651343814797200040002928929285292422926629313
66004292422200101111008000461328891200169226012401020004000200010000476800229982907329226310600020004000200012000290842919611610011000100002002342004012200000242012906917468783128039200973135382517323828424164441324414754200040002925329342293312935329353
66004292202190100110108100460528865200169316010400420004000200010000476200230542909429288310600020004000200012000292202915511610011000100002002302003002200240262012967908769263032041200923067382812424728412162761335114910200040002927229280292392932429224
66004292492190000010004000461828817000169956000400420004000200010000475380229982922529297310600020004000200012000291962920111610011000100002000062000103200040040012993913468733044039201083073382617373428429164651318514915200040002928929271293132931929333
660042929122001001001014000454028867000169176010401020004000200010000476060230682918929320310600020004000200012000291672920011610011000100002003462003024200060202113065913668643143040200883100382618414128537163651340715173200040002938229278292752930629251
66004292932190110110004100463428855002168536010401020004000200010000476181229782914129329310600020004000200012000291872919911610011000100002004442004104200240262112935932768983018040200353039382911454228465164261331214789200040002935729339293002930929378
66004293292190100011006000456428865000169316004401020004000200010000475825230322918729253310600020004000200012000291502915511610011000100012003362006122200040242012949937469903048042201533114382915403928520162871334714943200040002930129276293002926329315
66004292572190100011005100456128825000169086010401020004000200010000476540229952907129265310600020004000200012000291122921011610011000100012004302004002200000242112872908568833067042201613055382515424228372164061332115009200040002936029185293362928629284
66004292742192101111108000459328794000169896010401020004000200010000476125230202914029331310600020004000200012000291672919511610011000100002004242002022200040262212815910768923136049200293125382716404028520163291331914957200040002932629323291932930629276
66004292392190111101115000457228856000169206004401020004000200010000475960230372911729312310600020004000200012000291342915911610011000100002003362003022200440262113000932669363088040201463100382814424728541163111330714719200040002922729290292772927429343

Test 2: throughput

Count: 8

Code:

  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0008

retire (01)cycle (02)030508090b0e0f191e22233a3f4346494e51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0e7eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
4802051600611199111110013100160025212127995625480108100320008160000100320000160000500800222227154841160034016004016005379947038002248010020016000032000020016000096000016004616006111802011009910010080000800000100160000000160024000016002461027000005110217221600501662160000320000100160054160041160054160054160057
48020416004011980000110301001600380012799532548010810032000816000010032000016000050080000022715484116002101600401600537995003800354801002001600003200002001600009600001600611600461180201100991001008000080000010016001212420160051001501600386013421310005110217221600590992160000320000100160062160113160047160062160047
48020416004611991010000571001600312160799612548010810032001616000010032000016000050080121722716936016004201600611600617995503800434803722001600003200002001600009600001600461600611180201100991001008000080000110016001414420160013041511600386150421300005110217221600600902160000320000100160062160062160047160062160062
48020416004611981111000570021600462160799612548011610032001616009010032000016000050080075322716888116004201600611600617995503800234801002001600003200002001600009600001600461600611180201100991001008000080000010016001414420160013001121600006151421310005110217221600580992160000320000100160062160047160099160062160062
48020416006111991010000000116003800079953254801001003200001600001003200001600005008009792271268801600370160056160056799340380040480100200160000320000200160000960000160056160040118020110099100100800008000001001600000270160210010351600320132350000051102172216005001002160000320000100160078160057160057160055160057
4802041600561199000000001001600382012799402548010810032000816000010032000016000050080037122715484016002101600561600407995003800384801002001600003200002001600009606001600531600531180201100991001008000080000010016000002701600240002416002461320000005110217221600580002160000320000100160062160047160062160062160062
4802041600461199111000013002160046201679961254801161003200161600001003200001600005008007532271688801600420160061160061799550380028480100200160000320000200160000960000160061160061118020110099100100800008000001001600121500160052000501600006150421300005110217221600580900160000320000100160062160047160062160047160062
48020416006111991111000570121600460161679961254801161003200161600001003200001600005008007532271688801600270160061160061799400380043480100200160000320000200160000960000160061160061118020110099100100800008000001001600141342016005200057160038615001220005110217221600580902160000320000100160062160062160062160062160062
48020416006111991000000570121600682161679961254801161003200161600001003200001600005008000422271688811600420160046160061799550380043480100200160000320000200160000960000160061160061118020110099100100800008000011001600131300160013000511600386150421210005110217221600430092160000320000100160047160047160047160062160062
4802041600611198100000012010160025200799582548022210032000816000010032000016000050080037122715484116002101600531600567993403800384801002001600003200002001600009600001600561600531180201100991001008000080000110016000002701600310003216003260240000005110217211600580902160000320000100160062160062160064160062160062

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0008

retire (01)cycle (02)0305080b0e0f18191e1f22233a3f4346494e4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cfd0icache miss (d3)d5d6d9ddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
4800251600611199110010057010216004621607996302548002610320016160000103200001600005080074722716888016004216006116006179955380043480010201600003200002016000096000016004616006111800211091010800008000001016001312428160013000511600006151421300050200021701116005899216000032000010160062160047160062160062160062
4800241600611199111000056010216004601616799610254800261032000816000010320000160000508007472271688801600421600611600617995538002848001020160000320000201600009600001600631600461180021109101080000800000101600121400160050001531600000013421210150200011701116005890216000032000010160047160062160062160063160062
480024160061119910100005701001600462160799610254800261032001616000010320000160000508007532271688801600271600611600617994038004348001020160000320000201600009600001600611600611180021109101080000800000101600131400160052011501600386150421220050200011701116005899216000032000010160047160062160062160062160062
48002416006111991110000130002160031216167996102548001810320016160000103200001600005080071722716888016002716004616006379955380043480010201600003200002016000096000016006116006111800211091010800008000001016001313420160052001511600376151421210050200011701116005899216000032000010160062160047160062160062160047
48002416006111981110000570002160046216167994602548002610320016160000103200001600005080004222716888116004216004616006179955380045480010201600003200002016000096000016006116006111800211091010800008000011016001213420160052000501600386150421310050200011701116005899016000032000010160047160063160062160047160047
480024160061119911100005701021600462161679961925480018103200161600001032000016000050800053227168880160027160284160046799551080043480010201600003200002016000096000016006116004611800211091010800008000011016037313420160142030631600386151421320050200011701516005899216000032000010160047160062160047160062160062
48002416006111991000000639010016004621616799610254800261032001616000010320000160000508007662271688801600421600611600637995538004348001020160000320000201600009600001600611600461180021109101080000800000101600131400160014002541600386050421330050200031701116004399016000032000010160062160099160062160062160062
480024160063119810000005601021600312161679961012448001810320016160000103200001600005080073722716888116004216028516006179940380043480010201600003200002016000096000016006116006111800211091010800008000001016001413420160014011541600386112421200050200011701116005890216000032000010160047160047160062160062160062
4800241600461199110000413264000160046216167996302548015610320064160540103200001602705081594122724588016035416022816006179955380028480010201600003200002016000096000016006116006111800211091010800008000001016001414420160051000131600386050421200050200011703116004399016000032000010160062160047160062160062160062
4800241600611199101003057000216004600167996102548002610320016160000103200001600005080004622716888016002716006116006179955380028480010201600003200002016000096216016006116006111800211091010800008000001016001415420160052043541600006151421310050200011701116004399216000032000010160047160062160290160062160047