Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, three registers, 16B)

Test 1: uops

Code:

  ld1 { v0.16b, v1.16b, v2.16b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.000

Integer unit issues: 0.000

Load/store unit issues: 3.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)03070a0b0e0f1e223a3f43464951inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)5f696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a3a6a8acafb5b6bbdcache load miss (bf)cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0eaebecld/st retires (ed)f5f6f7f8fd
63005285262142928011815073283032302326630003000300015000716159281562835531030003000300028470283051161001100010000300003004003004300613192968871623289119119665331138131464652794114617122091373130002867428429285342842828501
630042829121328290110048792823920323226300030003000150008161512802528399310300030003000282482817911610011000100003000630041430000079133141019871703208146219718324738191167702797714650121861367130002846728469285982862128325
63004283792143228011905134282012232325530003000300015000616199280552848431030003000300028377282041161001100010000300093004033004507613785941171933338156919657325838151264592792915110122671375330002849028626284492830628562
63004285292122426011005045281720032337330003000300015000316158280452839131030003000300028420284711161001100010000300063007043000003913717101047159345686519825332038121669642789514478125621361030002841728515284012823928375
630042838921229250006052002822123023243300030003000150004161472813728485310300030003000284562823311610011000100003000930040730074009136121008070333372136419594327938122262592814614518121771420830002831828601284612845328315
630042834021229240000051442809323223442300030003000150006161702811028680310300030003000283712829111610011000100003000930000730034016137301015871783400156619803330138121253612801114943124061370330002835128301284812822428351
6300428275212342900010051362821800323290300030003000150024161582798028262310300030003000283332827611610011000100003000930040330015149134671004670943371146919863331938091357592803114451121271342530002813828372283552840428360
6300428210213292701013150662807822323206300030003000150005161512802328364310300030003000284052835111610011000100003000930040030074036139421016270963267156319684322238091454552787714434122171369330002833428516283132836528383
63004283832133231000110501628138223231923000300030001500041614028198283823103000300030002817428252116100110001000030009300700300750761369310258720533958531962333153813866652789614721125261378530002843828472285322839028331
6300428556213332801013050062809823323102300030003000150004161592802628357310300030003000283922825311610011000100003000930040030043176135581029871713394136520090330238121357652798114338120031373230002821328479283192839328303

Test 2: throughput

Count: 8

Code:

  ld1 { v0.16b, v1.16b, v2.16b }, [x6]
  ld1 { v0.16b, v1.16b, v2.16b }, [x6]
  ld1 { v0.16b, v1.16b, v2.16b }, [x6]
  ld1 { v0.16b, v1.16b, v2.16b }, [x6]
  ld1 { v0.16b, v1.16b, v2.16b }, [x6]
  ld1 { v0.16b, v1.16b, v2.16b }, [x6]
  ld1 { v0.16b, v1.16b, v2.16b }, [x6]
  ld1 { v0.16b, v1.16b, v2.16b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0008

retire (01)cycle (02)0305080b0e0f18191e2223243a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
2402058007059900000004400008004601019252401001002400001002400175003503030180015800658006549993650017240116200240024200240024800408006111802011009910010080000800000100240000043024004000039240038604100011151160160080058014072400001008004180066800668006680041
24020480062600000000044000180050211192524010010024000010024001650035041880800408006480064499686499922401152002400242002400248006580061118020110099100100800008000001002400000430240000010542400396159440011151160160080063001442400001008006380062803728027880041
24020480216602110000078000080050001221252401001002400001002400145003518162180044800658006549969650013240116200240024200240024800658006111802011009910010080000800000100240000000240000000024003950404400111511601600800580131302400001008007080070800668007580071
240204800685990010000210001800462100252401001002400001002400005003551453180015800658004049967350023240100200240000200240000800698006131802011009910010080000800000100240000003024000000210332400390104401000511011611800620141472400001008007080062800668006980066
240204800406020001000720001800542012125240100100240000100240000500351001418003680044800654996735002724010020024000020024000080069800651180201100991001008000080000010024000000024003800042240039014144000005110116118006600002400001008007680065800628006680041
2402048004059900010007900008004901016252401001002400001002400005003506992180015800658006549963350019240100200240000200240000800648006111802011009910010080000800000100240000043024000000062400406100000005110116118006200072400001008006680066800668006680066
24020480065599000000000001800492112025240100100240000100240000500354239018001580040800404996334999824010020024000020024000080040800401180201100991001008000080000010024000004302400400000240000013900000051101161180062001472400001008004180066800668006580065
2402048006160210001001260001800250112025240100100240000100240000500351984018003980065800654998435001924010020024000020024000080040800611180201100991001008000080000010024002120430240039000382400000159000000511011611800620131372400001008006580041800668006680066
240204800405990000000440001800502010252401001002400001002400005003505448180040800408006149988349998240100200240000200240000800618006111802011009910010080000800000100240000043024000000002400390040000000511011611800370141472400001008006280066800418004180066
24020480066599000000045100180025012121925240100100240000100240000500351854308004180040800404996335002324010020024000020024000080040800401180201100991001008000080000110024000000024000000040240039610440000051101161180062001072400001008006680066800658004180041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0009

retire (01)cycle (02)030508090b0e0f18191e1f22243a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cdcfd5d6dbddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
2400258007160010011100670100800543902025240010102400001024000050352711208004580070800694999335004924001020240000202400008006980069118002110901010800008000011024001919432400601751622400395160431910502020160613800451305240000108007080049800488007180048
240024803486001011000066000080032079192524001010240000102400005035015750800238007080446500373500492400102024000020240000800708007011800211090101080000800000102400191943240059250260240000011901900502061601368006613130240000108007080049800708007180196
240024800695991011000021000180036077192524001010240000102400005035215820800458004880048500153500502400102024000020240000800698006911800211090101080000800000102400192043240019111212400400059018105020131601388007013135240000108007080070800488004880302
240024800705991010000021000080054307025240010102400001024000050350946518002380047800705001435002724001020240000202400008004780069118002110901010800008000001024002019432400601516324004100194319005020816074800660130240000108007180048800708007080662
24002480048599101100201000002800542072025240010102400001024000050350946518004580072800705001535005024001020240000202400008006980069118002110911010800008000001024001919432400601581622400005119019005020616145800461305240000108007480071800708007080186
240024800695991011000067010080055207202524001010240000102400005035015751800228006980069500143500492402072024000020240000802028007511800211090101080000800000102400212043240060101287240040005901900502010160658006713135240000108004880070800708007080209
2400248004760011010000670002800323771925240010102400001024000050350017618004580070800705001535002824001020240000202400008006980047118002110901010800008000001024001920432400581412124004151190190050206160658006613135240000108007080070800718007080212
2400248007060010100000670002800573701925240010102400001024000050351150018002280070800704999335004924001020240000202400008007380047118002110901010800008000001024002019432400600216124004151194319105020616056800661305240000108004880048800488004980206
2400248007059910110000670001800323094125240010102400001024000050352354518004480084800485001535002824001020240000202400008004880069118002110901010800008000001024002021024005904712324004151194319105020716077800671300240000108007180071800488004880664
240024800716001101000022200038005400719252400101024000010240000503521582080022800698007050015350050240010202400002024000080069800471180021109010108000080000010240019194324006012812124004051604319005020616145800441305240000108007180070800718004880166