Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD4 (multiple, 8B, post-index)

Test 1: uops

Code:

  ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 7.016

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 4.016

retire (01)cycle (02)03050708090a0b0e0f18191e1f223a3f464951inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a7a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
6600528591213122011810100801508728230221618070081000401220001000400020005000100004754600022995282852841131070002000400030008000283342837911610011000100002004462003001420004242213927103517235336857219127337338211750522805310001443512170127282000400010002824828426283752840328428
660042857921302000160010020050962829402160367004100040002000100040002000500010000474361000230592825528650310700020004000300080002811528346116100110001000020034420040034200042422139031010971423379105019344321738121749512808010001452212020126052000400010002842128589285502838628353
6600428306211119011610000601513228144201615370161000400420001000400020005000100004755690023007281702844431070002000400030008000283842843311610011000100002003462005001220004262213912103757149340995619206343438211050552798510001444611837126452000400010002841228368282862837728166
66004283772111180118201006015009281440215951701610004016200010004000200250051000047610400230592847428395310700020004000300080002842028211116100110001000120043420031012200042622140701006171843361125518972339138201661562801910001455711832127292000400010002844928585283862846528380
66004283162111171015100006015041280182015956701210004016200010004000200050001000047552800230132826928231310700020004000300080002837228245116100110001000120034420060004200042620137321036271663188104519106322038201249462806510001440011712125982000400010002830828494283892832628333
6600428431212122111910000600523528162201577870161000401220001000400020005000100004761240023063282312837831070002000400030008000283692832511610011000100002003362004012420024262213973102367218329275019147334638191150562797010001397311952125512000400010002832428410286262863328420
6600428386212113001710000601502528191221600770161000401620001000400020005000100004746040023029281912832131070002000400030008000281932839911610011000100002003542004102220004242113709100727149335776019226335938181453542794510001469012068127382000400010002845628413283912836528473
6600428213212116111910000801514428174221603470161000401220001000400020005000100004758871023014283912851931070002000400030008000282902837511610011000100002005362004001420024462113876101397192342075419240323138151451572798410001429012073128252000400010002833128455285152840028519
6600428415212118112001000801515428057221603370121000400420001000400020005000100004760271023041283442844631070002000400030008000284612825711610011000100002003462006012220024462113704992571783401114719147334138151356492809410001458211905125422000400010002823728430283992818828466
66004285282141221019011008015144281072215895701610004012200010004000200050001000047612100023023283262833631070002000400030008000283252827511610011000100002004402004000220024462113526101007186336785319072330438171151452802010001456311828127592000400010002828528284284652843428409

Test 2: throughput

Count: 8

Code:

  ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0007

retire (01)cycle (02)030b0e0f1e22243f43464951inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6e72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8acafb5b6bbdcache load miss (bf)cdcfd5d6ddinst fetch restart (de)e0e7? int output thing (e9)eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
480205800656000002810800410141225560156801003200561600008010032000016000048049996018983200041800238005680056033856010020016000032000020024000064000080044800561180201100991001008000080000110016000001600210211600216102505109117118005308000066160000320000801008005780057800578005780057
4802048005660000028008004121212255601568010032005616000080100320000160000480499959996832000418003780056800560338560100200160000320000200240000640000800568004211802011009910010080000800001100160000251600220221600006102505109117118003918000006160000320000801008005780057800578005780057
48020480056599000281080041212122556015680100320056160000801003200001600004804999601794159992180037800568005603385601002001600003200002002400006400008005680056118020110099100100800008000011001600002516002212216002200212505109117118005318000066160000320000801008005780057800578005780057
4802048005660000027008004121412255601568010032005616000080100320000160000480499960174832000418003780056800560338560100200160000320000200240000640000800568005611802011009910010080000800000100160000251600220616002201212505109117118005318000066160000320000801008005780057800438005780057
4802048005660000027108004121212255601568010032000016000080100320000160000480499960185832000418003780056800567338560100200160000320000200240000640000800568005611802011009910010080000800000100160000251600220251600220122005109117118005308000066160000320000801008005780057800578005780057
4802048004260000027108004101412255601008010032005616000080100320000160000480499960185415999218003780056800560338560100200160000320000200240000640000800568005611802011009910010080000800000100160000251600220221600226121005109117118005308000060160000320000801008005780057800438005780057
48020480056600000281080041012122556010080100320056160000801003200001600004804999601858320000180037800428005603385601002001600003200002002400006400008004280056118020110099100100800008000001001600002516002102216000001222505109117118005318000006160000320000801008004380043800578005780057
48020480056599000281080041212122556015680100320056160000801003200001600004804999601858320004180037800428005603385601002001600003200002002400006400008005680056218020110099100100800008000001001600002516002202216002261212505109117118005318000066160000320000801008005780057800578005780057
48020480056599000301080041212122556015680100320056160000801003200001600004804999601858320004180026800468005603385601002001600003200002002400006400008005680056118020110099100100800008000001001600002516002102116002160212505109117118005318000066160000320000801008005780057800468005780057
48020480056599000281080041012122556010080100320056160000801003200001600004804999599964159992180037800568005603245601002001600003200002002400006400008005680056118020110099100100800008000011001600002516002202216002260222505109117118005318000006160000320000801008005780057800438005780057

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0008

retire (01)cycle (02)03040508090b0e0f18191e1f2223243a3f4346494e4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f6067696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cdcfd5d6daddinst fetch restart (de)e0e7? int output thing (e9)eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
480025800565990000000000010008005021212002556008280010320000160000800103200001600004800499601798320004010800378005680060003385600102016000032000020240000640000800568004211800211091010800008000001016000002501600300000160000602225000501951705680053080000100160000320000800108006180043800438004380061
4800248005660000000000035000008004130000255600788001032002016000080010320000160000480049960019108801520108002980065800470034756001020160000320000202400006400008006580065118002110910108000080000110160011110016004710047160036604840100050195170448006208000099160000320000800108006680066800668004980048
4800248004860001100100011010028005021414105056006680010320072160000800103200001600004800499600718320004010800378006080060003245600102016000032000020240000640000800608005611800211091010800008000001016000002501600220002216002260213300050197170658005718000006160000320000800108006180061800618006180061
480024800606000000000003600001800272121200255600668001032000016000080010320000160000480049960179832000401080041800568004200342560010201600003200002024000064000080042800561180021109101080000800000101600000250160030000301600300029000050195170558005718000000160000320000800108006280061800618004380061
4800248004559900000000035000008004521200025560082800103200721600008001032000016000048004996018283200040108004180060800600033856001020160000320000202400006400008004280056118002110910108000080000110160000000160021000211600006022000050195170458005708000066160000320000800108006180043800618006180061
480024800605990000000020000018002701501025560030800103200201600008001032000016000048004996087410880152010800468004880047003305600102016000032000020240000640000800658004711800211091010800008000011016001011001600460021016000061460001501961705580057080000010160000320000800108004380047800618004380061
4800248006060000000000036000018002721200025560066800103200001600008001032000016000048004995999641599920108002380060800560033856001020160000320000202400006400008004280056118002110910108000080000110160000025016002900001600306022330005019517066800390800001010160000320000800108004380043800438006180043
480024800425990000000002800000800452121200255600108001032005616000080010320000160000480049959996832000401080023800428004200324560010201600003200002024000064000080060800421180021109101080000800000101600000001600210002216003060223300050196170468005718000066160000320000800108004380057800618006180061
48002480042599000000000001000800272121200255600108001032007216000080010320000160000480049960419960000001080041800608006000342560010201600003200002024000064000080060800561180021109101080000800001101600000250160000000291600006100000501951706680057080000106160000320000800108006180061800578006180061
4800248005659900000000035000008004520000255600668001032000016000080010320000160000480049960330415999201080037800428005600338560010201600003200002024000064000080042800561180021109101080000800000101600121040016004900211160000004840101050195170558006208000009160000320000800108006180043800618004380061