Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST4 (multiple, 4S)

Test 1: uops

Code:

  st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 12.000

Issues: 12.000

Integer unit issues: 0.000

Load/store unit issues: 4.000

SIMD/FP unit issues: 8.000

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f373a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
72007296132382210170000004729295260015216120008000400080004000519007161001224646293322951131012000400480008000200002951829548116100110001000400001204000004904000012001322794936929323066119571332938109595228756164331306413112400080002944829489295402947829436
720042949323722001600000047652946204153891200080004000800040005189471602052470829572297121231120054000800880002000029585295452161001100010004006012240010015604005112023113139944069173151848196063312380821595328856165901287613272400080002952029632296362959029716
720042959523918002001008814648295950015345120088004400480004008520207173015248542931829691849120244012801680082004029638296183161001100010004006080400400933400800025413267945268783108552199213309381711565728757161391303313212400080002962829557294792955029683
7200429393238200023000392146112943900153761200080004000800040005190971623052472429346295873101200040008000800020000296092976911610011000100040000804000001400008097813063930169433124853202373267380815504928679163311291113051400080002932729390294342937930041
7200430198241210016000270045302964000158581200080004000800040005190871607082468229308295723101200040008000800020000295893034131610011000100040000004000001400018001312093486974310744719469336138138525528627163901286613117400080002980429712298272947029384
72004300402412000210002710804678296063015218120008000400080004000519007160905246432930829374310120004000800080002000029403293641161001100010004000000400010040000000130759424690331291256195413330380418605428583161241287012916400080002930729398293632942829379
72004292852351600210002410460429374001503112001800040008000400051896716080924659291642964431012000400080008000200002958229496116100110001000400000040000204001080012927938069193156650193673309381023505629239159771282712816400080003013529898300053017429377
720042938523616002000091046443012000160241200080004000803540195273572412092501429474294178025912060403981168084202353015929781216100110001000404248040261086434026080013030943569693169657191763375381212555628646160091271813075400080002941529380293932940329398
7200429407237200019000010465829316401509712000800840008000400051903716310524712293172948631012011400080008000200002936729400116100110001000400008040000004000000013270924269793118749194223297381512525128796161911289312896400080002955929515293342953829471
72004294242361800190000104652293020015068120008000400080004000519637161801246722917529439101012012400080008000200002958029681416100110001000402428040223233914013080013072927169493068855199273175381612535229286159451276813044400080002966029552293382932629775

Test 2: throughput

Count: 8

Code:

  st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6]
  st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6]
  st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6]
  st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6]
  st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6]
  st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6]
  st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6]
  st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.5054

retire uop (01)cycle (02)03mmu table walk data (08)0918191e1f222324373a3f46494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
96020720138215360000211900075826020113115161344172942510458291007300813200001006400003200005009313121107999380200042200693200984411043396189601002003201206400002006412001600600198975197673318020110099100100800008000010032025403625912332019600199232000216344005131238611198968320000640000100198475200244199643200091200364
960204199274153201000800087990020059716161166169132510392941007264833200001006400003200005009254584108755320200535199397199779383293383559601002003200006400002006400001600000198930198191118020110099100100800008000010032000000253630320002005320002234000510911711199291320000640000100200291199078199694200599199523
9602041999001558000062100881530199150161613021670325104352610072338632000010064000032000050092553091071776402018281995222015214036333989996010020032000064000020064000016000002009821996871180201100991001008000080000100320000034254960320002205320002234000510911711199596320000640000100201255203080199282199494201638
9602042002671547000090000794810199350001556177682510463741007261113200001006400003200005009209300107490720201078197539199253378263407269601002003200006400002006400001600000198549200111118020110099100100800008000010032000003424431032000200232000220000510911711198491320000640000100198001199497201166218655200874
9602041999641550000002000837590199680161617731647125103920410072565132000010064000032000050091908501067936302008782004011977334072034085996010020032000064000020064000016000002006692008401180201100991001008000080000100320000034266740320002108320002236000511021711200625320000640000100200897199442201098201140199816
96020420078215530010518010809220201035161614651688825104104610072959832000010064000032000050092951571073254711989132007372007054056533879696010020032000064000020064000016000001974912008711180201100991001008000080000100320000034258060320002002320002234000511011711201100320000640000100199997200893199018199359201065
960204201792155700000200086168020033816161287172671331046130102727501320240104640950320324500919502110918333020136520057219968141695465300496148420232048064096020064072016024001991742016515180201100991001008000080000100320249234255217320242041915320002036000510911711201663320000640000100201059198675201561201144198941
960204200803157000000900087592019932116014651782225104600010073161132000010064000032000050090888291090019901993892016482012514056934020896010020032000064000020064000016000001995722001071180201100991001008000080000100320000034248970320000002320002034000512221711220376320000640000100198467201863202331200728199487
9602042009171547000068000845460199835161612231631052105069810072605732000010064000032000050092420901087942702010472012612018203962133859696010020032000064000020064000016000001989771966721180201100991001008000080000100320000034252760320002001160320002236000510911711199018320000640000100201219200685198508200593201405
9602041997571553000030200087853019914701615081648525104910910072931832000010064000032000050093155691079189202023472007272015773942012142292963910200321200642400200642640160659519949020005291802011009910010080000800001003206600342678203207221010067320482234000510912611198324320000640000100201300200302198648201181201434

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.5004

retire uop (01)cycle (02)03mmu table walk instruction (07)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f23373a3f46494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0ebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
960027200631159761207444256111438208780801994931616160316976251040550107219053200001064000032000050930106610831380199729199177202012412353399549600102032000064000020640000160120020000820212111800211091010800008000010320000342559203200001014320002234000501912171481992930032000064000010199574199721198756202124201143
9600242013311587000000122082447019685616013481640325104640210730092320000106400003200005092337371072556719959019869220186438870339537960010203200006400002064000016000002012322006331180021109101080000800001032000034268610320002102320002034000501961713720184516032000064000010200362199964200692202421201190
9600242009231566000000122191135019705416011041746925104921410733089320000106400003200005093746551078900520203819697920067040276340553960010203200006400002064000016006001986361979111180021109101080000800001032000034252550320240202320002234000501912177131992620032000064000010199261200452201226200267202585
96002420053815520000000208379602009761616143816598251042814107232033200001064023832000050925900110778012199969200645201724407793410769600102032000064000020640000160000019767720027711800211091010800008000010320000342638103200020023200022340105019121712122020292032000064000010199524199227197969201342199412
9600241992911555000010050842130200245161618051720025105331210731334320060106400003200005092661231081957019929119957819775941619341876960010203201206400002064000016000002007452000611180021109101080000800001032000034248910320002105320060034000501981712121997450032000064000010198851197840200579197795198744
960024201037156300000012208719302011491601559171255210485111072255632000010640000320000509253346107628501999481990932008464032034000696001020320000640000206400001600000199351201145218002110910108000080000103200003625772032006210827320002200005019121712121990800032000064000010197816200244200987202061198624
9600242007611559000000090093124019996116161344161425310460511072980432000010640000320000509091147109806582007261988751988723988333827996035620320000640000206400001600000200176198320118002110910108000080000103200603424486032000200173200022340005019131712520164816032000064000010200537201054198031197871199490
960024201117156200000108081903019656916161371165932510498111072203632006010640000320000509319342107535822002401968502112653634034046296001020320120640000206400001600000200915200546118002110910108000080000103200000260330320062102320002234000502012171371999040032000064000010200029200954198998201149198724
9600241996161544000000090085259020236116161565171271301039521107361013201801064095232054050938438410752324198469202593200185409073238577961394203250406414402064096016030001973972000995180021109101080000800001032018034272232320180024353732012223401050671644178200381102032000064000010197970199342198217200325199698
9600241996201517000000080793680200674160167916719251043993107283973200001064000032000050924692810938181199588197980200246408653414129600102032000064000020640000160000019984819930111800211091010800008000010320000342568103200020033200022340005019121712122003060032000064000010201430199784200220199727199140