Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD4 (multiple, 4H, post-index)

Test 1: uops

Code:

  ld4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 7.008

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 4.008

retire (01)cycle (02)030708090a0b0e0f1e22243a3f43464951inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a3a6a8acafb5b6bbdcache load miss (bf)cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
66005293602202601210117104668288230001695570081000400820001000400020005000100004752452297129065292393107000200040003000800029242291261161001100010001200062002002004400412890917768243063106920064310838181060572838210001564613374143972000400010002931229359294012924429389
6600429357219220020000400454728850020169477008100040082000100040002000500010000476045229562908429235310700020004000300080002926429190116100110001000120004200000200040061279393166842327595820262326338101667612839510001583013138143102000400010002924829504293962940829305
6600429269219220021000510457428959001168977008100040082000100040002000500010000476083230212926129383310700020004000300080002921629142116100110001000020004200000200040041292192636837322876220130312638131464562843810001635113222142942000400010002927029461293152936429324
66004292772202000210006004674288700001687970081000401220001000400020005000100004756422299029099292623107000200040003000800029213292551161001100010000200062000002004400612857943970283233106620172304438151754582849310001593113266144182000400010002927329381291712933129383
66004294142202000230006104557288900001682770161000401620001000400020005000100004752622296929155292523107000200040003000800029221291701161001100010000200062000022002400612897970270293066165420079313638131266582842310001628312963144022000400010002939729237293332936029283
6600429159220240021000810469129023020168387008100040122000100040002000500010000475645229672909529348310700020004000300080002922729130116100110001000020006200200200040001293892086856325577020172305238151760602841010001634513249142862000400010002933829426292332930529244
6600429266219190023000600457828819020168457012100040002000100040002000500010000476252230222906629306310700020004000300080002919529167116100110001000020000200010200040261293191656874306586720087310038111455642843110001631313204144822000400010002917029323293272943029269
66004294042202400210002104880289470001702370121000401220001000400020005000100004757852293529095293253107000200040003000800029126291881161001100010000200002000122004404412862926868263096116519975306738201659622844110001633713263144072000400010002929029369293272942729268
66004292612212600180000004567288090021700070121000400820001000400020005000100004759052294429167293903107000200040003000800029152291871161001100010000200002000022000404613092916870283035116120100303738181658602842510001643913150140312000400010002928629237291942927529406
66004292262202400250018004566289120221686270001000400820001000400020005000100004740872295829124293383107000200040003000800029112291111161001100010001200042000002000400412988915070703053107520087302938141757622848210001630412998143652000400010002930529392293562933429312

Test 2: throughput

Count: 8

Code:

  ld4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  ld4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  ld4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  ld4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  ld4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  ld4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  ld4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  ld4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0009

retire (01)cycle (02)0305080b0e0f18191e1f22233a3f4346494e4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)6067696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
48020580069600101100056000280032277502556013480106320064160008801083200321600164805379611965760268108005008006980069307405601482001605503200322002400246400648006980070118020110099100100800008000001001600121143160013000101600026150431111115116016018006680006013160000320000801008004980048800708007080275
480204800696001010100550102800542605812556021080106320076160008801083200241600164805379612321088092000800290800698006900954056014820016001632003220024002464006480069800691180201100991001008000080000010016001311431600520111031600416050011111151171160080676800061313160000320000801008007080070800488007080070
480204800486001000000560002800332065025560174801063200761600088010832002416001648053896119257602680080050080069800480074056014820016001632003220024002464218480069800751180201100991001008000080000010016001312431600531001116004161504311111151160160080045800061313160000320000801008007080049800488007080070
4802048006959910010005601028005420650255601908010632007616000880108320024160016480537961192576026810800290800478006900617560148202160016320032200240024640064800538004811802011009910010080000800000100160012114316005203150160041005043111111511611620800668000600160000320000801008007080070800498004880048
48020480069599100000011000280054077002556013880106320096160008801083200241600164805379611965440284108005008006980069307405601562001600163200322002400246400648005080060118020110099100100800008000001001600151243160053121101600020150011111151160160080045800061313160000320000801008007180070800708004980049
48020480069600112100023010180033066502556021080106320876160008801083200241600164805379611961088092010800280800488006930719560148200160532320032200240219640064800478004811802011009910010080000800000100160013104316001404310160002614943112111511611600800458000600160000320000801008078780070800708007080070
4802048006960010100001101038003327700255602108010632009616052880108320024160016480537961196108810561080050080069800483064056014820016001632003220024002464006480079800691180201100991001008000080000010016001411431600521001116000200104310011151170160080066800061313160000320000801008004980070800708007080049
4802048004859911011005501028003226000255601788010632002016000880108320024160016480538961182108809240080050080069800693029195601482001600163200322002400246400648007280069118020110099100100800008000001001600141143160052001111600026050431101115117065008006680006013160000320000801008007080070800488007080049
48020480069600100000011000380054000502556019080106320076160008801083200321600164805379601135760268008005008006980069007195601482001600163200322002400246400648006980069118020110099100100800008000001001600131101600131025016056161504311011151171160080066800061313160000320000801008007080070800708007080070
480204800696001001000560003800332665025560138801063200201600088010832002416001648053796011357602680080050080069800693071856015620016001632003220024002464006480054800691180201100991001008000080000010016001513431600120005016004100504310011151170160080066800061313160000320000801008007080070800498004980070

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0009

retire (01)cycle (02)0305080b0e0f18191e1f22243a3f4346494e51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)6061696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cdcfd0d2icache miss (d3)d5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
4800258006960011100001101008005406002556003480010320248160000800103200001600004800489611075760016008005080069800690033056001020160000320000202400006400008006980069118002110910108000080000010160012124301600490012021600396050431100005019000617538006680000000160000320000800108007080049800708007080048
4800248006959911000005600028005400652556007480010320096160000800103200001600004800499611125440032108005080069800690035256001020160000320000202400006400008004780069118002110910108000080000010160011110016005010050160039015043111000501900051653800668000013130160000320000800108007080070800708007080070
480024800696191010000560100800542705255600308001032002416000080010320000160000480048961101108807521080050800698006930329560010201600003200002024000064000080047800691180021109101080000800000101600111143016004900050160039605043111000501900031635800668000013130160000320000800108007080049800498007080049
480024800476001010000560102800542666255600348001032009616000080010320000160000480049961112576001610800298006980069003305600102016000032029220240000640000800698004811800211091010800008000001016001212001600100034916003901494310100050190005163580066800000130160000320000800108007080049800708007080070
480024800696001011100560002800542700255601068001032009616000080010320000160000480048961112108807521080050800698006930329560010201600003200002024000064000080069800691180021109101080000800000101600101100160010001101600396149011100050190007165580044800001300160000320000800108004880070800498004980070
480024800696001000100110002800542665255600868001032002416000080010320000160000480049961108576001610800508004780048003525600102016000032000020240000640000800698004711800211091010800008000001016001111001600501115016003960504311000050190005165380044800001300160000320000800108007080070800498007080070
4800248006960010000005500018003300652556008680010320076160000800103200001600004800499600295760016108005180069800473035256001020160000320000202400006400008006980047118002110910108000080000010160012114301600490021016003901500101000501900051653800668000013130160000320000800108004980049800488007080049
4800248006959910100001000038003300652556007080010320024160000800103200001600004800489600295760016108005080048800690035256001020160000320000202400006400008006980048118002110910108000080000010160011110016005101011160039615043112000501900041645800668000013130160000320000800108007080049800708004980070
4800248006960011100001101028005420652556008680010320064160000800103200001600004800489611385760016108005080048800690035256001020160000320000202400006400008006980048118002110910108000080000010160011114301600111004916003901504311100050190005165580066800000130160000320000800108007080070800498007080049
48002480048599111014410000380054066025560074800103200761600008007332051616000048004896110810880752108019080069800693046533056001020160136320000202400006400008004880069118002110910108000080000010160011110016004900049160039615043111000501900051653800458000013130160000320000800108007080070800708007080070