Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST4 (multiple, 2D)

Test 1: uops

Code:

  st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 12.000

Issues: 12.000

Integer unit issues: 0.000

Load/store unit issues: 4.000

SIMD/FP unit issues: 8.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f24373a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
7200729145234124222602239618100467129045031489712001800040048000400051905716059246372921929194310120004000800080002000029006289801161001100010004005701400411740004840308813287931069443115860189863198381219675928546156791252512833400080002896429010290042939529117
720042930023412511231000500477729065001534712000800040008000400451893715998246812867628841310120004029803080002000029273293001161001100010004005500400402740004041376132789380680830671464196183274381326687328638159851299913146400080002968229195294392924029158
7200429235233121002310005004835287183014740120128001400080084000519007160682460629152301373101200040008000800020000288862901911610011000100040055014004124400048410132109550698730871267187463229381526726928439154851243512516400080002905728914289322896728865
72004290692331220126100940047532894903145111200080004000800040005189771616122470628774289273101200040008000800020000287932880111610011000100040055121400401440004041648129829237687531251261190513210381214766928222152461235211981400080002860628644288872891628699
720042863222813310300000500479429464441513012001800040008000400051894715922247522938229740310120004000800080002000029391295011161001100010004004500400410440004041315123708117645226961465194773352382014716328744163451293313237400080002962629592295712958329446
720042953823712410271001794123605453330237001552412184809940418108403453064724661250252989330189753381212140528080812220195301343019516161001100010004031101214004014400048410131519338700631141262194763176381414726328980160591292612954400080002942329458295802957229539
7200429388237123102610004004664294330115342120008000400080004000519047161012475929365295283311200040008000800020000294332957211610011000100040045814005014400040410129539568687231361568195343344381721706429201163441294713051400080002961229561296622974629847
720042985323912610251000500464329834441466012000800040008000400051891716051124957292382902731012000400080088008200002873328934116100110001000400551214004014400040410130899633690931651256187853151382018626328273155811233412403400080002888128831288082877928802
720042879522312110251000510461328691441446012000800040048000400051895716044246752874628726310120004000800080002000028839288581161001100010004011580400401440004841013194951869873140135618707317638188616228199158351238712360400080002871528792287372888428713
7200428780223117103210004004695287310014497120008000400080004000519137162412470828528288423101200040008000800020020288422878711610011000100040065814004004400048410131619507688031191354187933271381514616528307156391235812531400080002878028800287472877828854

Test 2: throughput

Count: 8

Code:

  st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6]
  st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6]
  st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6]
  st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6]
  st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6]
  st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6]
  st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6]
  st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.4999

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f222324373a3f46494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaacafatomic or exclusive succ (b3)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0ea? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
960207201957155900000129300080820020007516161406171482510466411007259853200001006407143200005009274424108261600201852199762202292416123407319601002003200006402402006400001600000200949199761118020110099100100800008000010032043417024658332049600085653200020163414005109117111997430320000640000100198562199440199358200101200051
96020419914515340000002000885660200795161614561703125104465610072557332000010064000032000050091305351072734502003422001441988253743834164596010020832096064000020064000016000001983302002581180201100991001008000080000100320015153425847032001678103593200620163614005109117112011830320000640000100198028199538199495200727199260
960204200125154511000121020008997311986450161469177972510469731007293853200001006400003200005009296744109828510200173202075201107399423389399601002003200006400002006400001600000197375201252118020110099100100800008000010032001515362408203200761005232000202340005120117111997140320000640000100202023199996199381198912199642
9602042006221553000002150008595202015321616162016770251041199100727276320000100640000320000500928814710571901020288620160720027140510133980896010020032000064000020064000016000002006821993751180201100991001008000080000100320014143625315132001801023732000002340205109117112005110320000640000100199708198799198940200458198844
96020420034915980000012200087334020108216161421164582510442701007264673200001006400003200005009283340107366730200101200488197824377143433599601002003200006400002006400001600000200871199416118020110099100100800008000010032001414362618703200141101513200020163414105109117112001690320000640000100199671200779199990200502198814
9602041987941603101100180008179711994891601506157192510465251007323213200001006409523200005009285411108137440200709199673201345412363405419601002003200006400002006400001600000199650201968218020110099100100800008000010032001515362613013200140005432000200360005109126112016620320000640000100202917200997201361201815198761
96020419705216051000092000087210119700516161376166112510438861007291973200001006400003201085009270970108430570201964202167199715406254294228196148420032036064096020064096016018002014372010635180201100991001008000080000100320074170256063320256000300732018202340005154244121999390320000640000100200349198343198191201108199634
9602041990891609000000200081555019926216161638169722510454551007305893200001006400003200005009271225107285020200001201596201417405753399379607922003200006400002006400001600000201033201186118020110099100100800008000010032001415362677203200160004032000202340015109117111989920320000640000100199083200734199934197557201082
960204198996161500000128000815630199389161616741702425104790110072614132000010264000032000050091873201081179802006571996521999844071533983596010020032000064000020064000016000002000351992481180201100991001008000080000100320015153626460032001600012532000202340005110117211979190320000640000100200687199482198485201291200191
9602042002731626111000170009396011990841601361168472510460481007227743200001006400003200005009412037107866480201486201932202046415663401439601002003200006400002006400001600000200210198628118020110099100100800008000010032001614362555703200160104232000202340005109117112020030320000640000100200256200748201996201180196973

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.5015

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f222324373f46494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)c2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
9600271975661562000001383100906652003730161402170122510513951072491032000010640000320000509320882107124900201171200327200002378091640025960010203200006400002064000016000002003112012151180021109101080000800001032000004226232032000205320002242200501911711201501032000064000010200864200646201910200408197813
96002420073115410000009110085781199954161615001729825104020910724018320000106400003200005093449421083220501984061989541992463895633993396001020320000640000206400001600000201447202056218002110910108000080000103200600025903032000201091320002242000501911711201314032000064000010198757200748202399199531200763
9600242005121546000000300088619199113161615891672425104746810729430320000106400003200005092701541068047901997102004861992093980534043196001020320000640000206400001600000200721199557118002110910108000080000103200000422596703200020332000220000501911711197724032000064000010198892199959198193200948199898
96002419843015470000012400089712197321016146215999251049053107205313200601064000032000050920317510799525120050120032519895641398340515960010203200006400002064000016000002118431992562180021109101080000800001032007004225551032000012320000042000503211711201793032000064000010201605201934199107200275198779
96002419934415410000003010848902001431616141717168251049623107307063200001064000032000050928714010736502020096720020220048139939339769960010203200006400002064000016000001998662007641180021109101080000800001032000004225742232000218320002242000503123511199629032000064000010200616201064202373198630201873
960024199394155011034264440000806811999761616121317302132103920910725894320180106407143202165092575541089454902003261993952007103875533908596001020320000640000206400001600000199728197980118002110910108000080000103200000024463032000202320002242000501911711198996032000064000010196414201040198685198178201588
960024201195152900000184000829791996831616141016627251051765107277393200001064000032000050916483810734418019977020276120031340940339380960010203200006400002064000016000002020931979551180021109101080000800001032000004225112032000202320002242000501911711199601032000064000010199469198265201533199554200926
96002420191415480000003000840621990891616172217030251049677107179033200001064000032000050924190510717062019958519994820179442014339387960010203200006400002064000016000001994451994501180021109101080000800001032000004225693032000205320002242000501911711199972032000064000010199760198260200121201957198768
9600242001191551000000300088056197103016144316916251047490107253093200001064000032000050919204010865590020020820209920047039988338701960010203200006400002064000016000002001952008451180021109101080000800001032000004225178032000212320002242000501911711201392032000064000010198452201637199431200666200657
9600241997881555000000300086049200316161615971557725104703910729858320000106400003200005092364941074861702009821972762001143959435605596001020320000640000206400001600000198733198686118002110910108000080000103200000024181032000202320002242010502011711201046032000064000010199625201665197647200360201127