Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STEORL (32-bit)

Test 1: uops

Code:

  steorl w0, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 3.000

Issues: 3.006

Integer unit issues: 1.006

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e1f22243a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6061696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
7300532942249212021001008110602832597002064230051005200010002000110121691630049300423261632743313300010002000200040003267625831171001100010000200430210061001025200441222116503120191843437780572336739644443124750324511005176431536715451200010003277632809327943322333234
7300433219249011111011008110568932691002072730061004200010002000110081685450049301933265833191312300010002000200040003285925761171001100010001200342010061001025200351222316367118191823540451492284936204450164848324881004175861499716366200010003282733283332763275233213
7300432860245011101111008110595232752002079530061006200010002000109981692130049301313305632873312300010002002200240003270125681171001100010000200332110071001026200441202215721119952845940390462329737334445154843325071006177511486914933200010003323232660327543283833287
7300432800245011110011008110590432704002110430051006200010002000110481691760049296913267633190313300010002000200040003288826391171001100010000200332210051001016200441222116470120842849640880472311237884449124141325351006175281481115687200010003279632699331663267533214
73004326532450111110010041105716326690020838300410062002100020001123517108500492964632757329201212300010002000200040003269725791171001100010000200332210051001116200441122216351122111847536611472331240534440203645324521005160711472616144200010003283732793329273257532754
7300432751246011111001010110619732797002064430091006200010002000110121691040049300033290333164313300010002000200040003266826121171001100010000200432310061001026200441222116360121611825837141492314240534443124548324781005164421489515245200010003284332848329203269932762
7300432763245011110001007100586532608002072830061006200010002000110021690660049300873258532717310300010002000200040003282426131171001100010000200332210061001027200441022116418119052845040910412326639854443124544324921006175531477415564200010003273733235327273280032798
7300432696246011101001008100598832636002077930071006200010002000110071691320049297833267933187313300010002000200040003283525841171001100010001200232210061001018200661022216357120782842540340452293139274442143646324211009174331457616464200010003271033091331873273833277
73004331332470111110010101105961329790021174300810062000100020001109616908110049300793265232797313300310002006200040043309925712171001100010000200342010101001018200981222115398111181842340670462286740164444184657323111006165181455315288200010003271132769328093280433155
730043286524801111100100811060513310801207483008100820001000200011227169191000492961232703331473103000100020002000400033122258411710011000100002003232100510010111200451222116324120382841037920452291939964440194951326501006164981507714926200010003258533056328863276732733

Test 2: throughput

Code:

  steorl w0, [x6]
  add x6, x6, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 7.0069

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f191e1f2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4020770070525000000527521431053148700541856413857682254067320324200002010020000204551233211100144496700007002970069645183650364010020200200003020040000700816111202011009901001000010100100001002020945318019872103111003039324072109202170531123428013101161169857204459620000201007007670071700707007070070
402047008352500010052291166105410070054134632305739825403122054920000201002000020448993319670118449669890700417006964508365027401002020020000302004000070079651120201100990100100001010010000100202116411911988510217100321431937011720287167282494013101161169856203576620000201007007070070700707007070070
4020470069525000000522521621044128700542086444056822254044920425200002010020000204555033211880115496700407004770082645013650274010020200200003020040000700746111202011009901001000010100100001002025860213519872102781002515316630123202720652106428013101161169871204286620000201007007070070700867007370070
4020470069525000000527821911836176700542186314257920254037220404200002010020000204511133214441213496698907003970070645223650264010020200200003020040000700636111202011009901001000010100100001002024466814419872102261006033021596131202540490126448013101161169851203176620000201007006170070700707008670070
4020470069524000000524622341040070054164636455835925403882058920000201002000020452393319810013749669890700437008464507365027401002020020000302004000070069611120201100990100100001010010000100202306321501987110218100262031878890202071532108433013101161169856202806620000201007007470071701257007070070
40204700895250000005201216710476870066209536415805125404122048820000201002000020453823320470199496699007002970073645073650274010020200200003020040000700726111202011009901001000010100100001002021257016619876103141002719322186166202690680114479013101161169877203059620000201007007070070700647007470070
4020470070525000000526922081037180700542276353057311254037120303200002010020000204468933209911207496698907002970081645103650414010020200200003020040000700696211202011009901001000010100100001002021063915519872103061000518325430159203121551108529013101161169857203236620000201007007070071700717006470070
402047006952500001052481146104464700541816403557311254041220727200002010020000204480333209711127496699907003070070645073650274010020200200003020040000700676111202011009901001000010100100001002016855914419876103211006131318364120202140612105592013101161169857203986620000201007007070061700867007670070
4020470069525000000534122481060070056213535425738725405572038620000201002000020450493321892153496698907003970070645073650294010020200200003020040000700646111202011009901001000010100100001002023950318819872102101003225321972125202000771165494013101161169864203526620000201007007070085700707007070075
40204700695250001005259321410571887005419954238570512540438204292000020100200002045900332091009149670080700297007064508365027401002020020000302004000070065611120201100990100100001010010000100202285341361987210265100121831948012020181061293591013101161169856202196620000201007007170071700837008670070

1000 unrolls and 10 iterations

Result (median cycles for code): 7.0069

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f191e1f202223293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4002770091524000000053421242100410700541952353057467254037120647200002001020000204433333213921974966989700457006964529365049400102002020000300204000070080611120021109010100001001010000010202650634158198711020010020213194821152024006018834801270011161210698652031266020000200107007070071700707007070070
4002470070525001001052162172100506870054173135495676725401732025720000200102000020448513320717182496699470042700696452936504940010200202000030020400007007261112002110901010000100101000001020210052618819873102791005120324276852024305229448101270014161111698602019366020000200107007870070700707007070079
400247006952500000005268121410052927005414223839576152540464204062000020010200002043753332078411424966990700297007364529365049400102002020000300204000070085611120021109010100001001010000010202420492160198741021910086173211749420295062110739901270011161211698712033566020000200107007170070700867007070075
4002470080525000000059812192106453228700552292293957002384037020219200002001020000204448933209011211496698970030700836452936504940010200202000030020400007007061112002110901010000100101000001020182047416919869102611002923321111211920239041210147601270010161112698482036066020000200107007070070700707007070074
4002470081525000100056862186101444013270054216141435714025402472067220000200102000020443543321110118149669897004170082645493650504001020020200003002040000700696111200211090101000010010100000102021706381761987210267100382032255811020345060110630401270013161212698722034866020000200107008270087700707008270070
4002470069525000000152442215100446470055194228355774825402292040620000201592000020446633320834188496698970040705666453236504940010200202000030020400007008665112002110901010000100101000001020225055017719872102291003621316770952021705131296200127005251112698562034266020000200107007170085700707007070070
40024700635250000000525111871040514870054179132335775225405832030320000200102000020441793321055110249669897002970073645293650524001020020200003002040000700726111200211090101000010010100000102020906181471988710186100191631976487202620463823370127005161212698572045566020000200107007370084705597007070071
400247057352400000005176223010052887056619113132565602540523203392000020010200002044457332092201234966989700337007064543365049400102017320000300204030171061611120021109010100001001010000010202020440174202761024710035253257601162024305429236601270010161212698612030666020000200107007070074700707007070087
400247006952500000005229216010047124700541393294557616254028520255200002001020000204485233214561179496698970029700696452936504940010200202000030020400007007064112002110901010000100101000001020197033812619871101691002317319136952024005429241501270013161111698702018866020000200107007270070700617007170082
4002470069525000000052091163100412047005417233438571342540359203192000020010200002044440331982616949669897002970069645293650494001020020200003002040000700706111200211090101000010010100000102022604861861987210254100761932181896202450563945870127005161111698572031866020000200107007470071700707007070075

Test 3: throughput

Code:

  steorl w0, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 22.0131

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)daddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
3020522019916490000110027263010122011100122022142550884286672250210100200008689891105264090511492170720220147220132215399621630230100161016611020020000202004000022015086011102011009910010000100100000100200000332427008100001016477362291682901931001117161123901600021942816027439131302000010100220135220140220123220149220134
30204220129164800000000279221500022011749020188525507072726423022101002000085754211052551105234921705202201102201522154586216350301002059170710203200052020640009220117926111020110099100100001001000001002000003329269271000100162883658416866117000111716100680161002194951742745201002000010100220165220181220108220109220126
3020422013316490000000027376000022011510020197425506932746123001101002000084217061052801214574921707302201572201412155426216331301001488206610203200052020640009220134884111020110099100100001001000001002000000027017100000016941371471728602131001117161103801600021940817327553131302000010100220137220122220125220155220113
30204220143164900000000272690001220159012020186634506272728423259101002000086212481052480505114921703202201092201212155286216330301001821215510203200052020640009220136904111020110099100100001001000001002000003327273141000000174123714817307150332101117161074001600021940920028041131302000010100220155220153220153220145220106
302042201201649111100002724814101220106030201958255048827467231001010020000855658410526829147649217061322011522013321555662163633010017661733102032000520206400092201298821110201100991001000010010000010020000029182734010001001691536741176271421312211117161096801600021938419126198141002000010100220123220144220186220110220145
30204220119164900000000270080000220124150202169255078227385234921010020000868136110526184048049217015022011722010721551362163303010017731940102032000520206400092201179102110201100991001000010010000010020000032232692410000001685036770169560153100111716111150160002193842072742701302000010100220161220098220100220149220123
302042201061648111000002748116100220090045202197255068527306228021010020000834141810524734052649217005022011622011121544762163803010020972250102032000520206400092201289251110201100991001000010010000010020022210422575310000501670237025171980033001117161107501600021942020527320131302000010100220141220170220145220096220145
3020422013216491101000027284140012201474121120206325507862725722885101002000084083021052432804904921706402201472201512154656216315303381899189210203200052020640009220142869111020110099100100001001000001002002321310268711001401173933721817488150292111117161124501600021944820627643131302000010100220147220142220117220121220128
302042201401650100100002745500012201420902020452550258272302309910100200008649097105263000470492170310220147220141215448621634330100186319811020320096202064000922012117861110201100991001000010010000010020000000267131000000171323753717851000001117161070601600021944214827481131302000010100220129220119220125220145220109
3020422011216491011000027605161012201220372018292550362273762335510100200008426852105244280536492170100220121220177215473621637130100192819091020320005202064000922013095711102011009910010000100100001100200232333342703710016021691936028174791522342211117161044801600021943323027617101302000010100220094220142220146220084220128

1000 unrolls and 10 iterations

Result (median cycles for code): 22.0255

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f181e1f22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
3002522007016500001103189411012202383151420069525528633286920069100102000010688738105258930121949217175022022822024321532632164763001099114100202000020020400002202117041110021109883241010000101000001020027272242317331001500217734175421727142832261006401531941544219331032969992000010010220245220301220291220297220259
30024220282165011000131621141032202390181720090525529053282820057100102000010682461105247990139349217166022024422031221541032164733001012913810020200002002040000220203733111002110956836101000010100000102000002726315271000100215134151121523102700006401520831644219439032825002000010010220238220240220244220226220266
30024220253164900000031487100122025631402014742552793326872011710010200001057479710526091013924921718502202692203012154533216475300101501511002020000200204000022025572411100211093092710100001010000010200000310314201000100215114143621550102700006401527041644219407032794662000010010220230220206220238220230220194
30024220241165000000031509000122023620020154825528183280620079100102000010625676105245430139049217173022028222026921537432164993001013914710020200002002040000220261704111002110956505101000010100000102000003128315141000110214994151121495126000006401528841634219402032747992000010010220202220272220263220251220230
3002422022116500000003148810012202743151320117134529293277620077100102000010675396105234320131249217155022024922022321536832164753001013413110020200002002040000220249658111002110949961101000010100000102000003129314391000100214404146121478002700006401514841534219345232789062000010010220189220226220206220234220228
300242202611650000000316120101220202301320084525529253274620055100102000010680681105229520136949214135022027122024021534032165023001013212110020200002002040000220189658111002110961952101000010100000102000002729315791000100215574160621565102700006401531841644219397132802602000010010220260220220220277220255220248
30024220241165000000031608100022025231313202021255285932801200931001020000106660491052338101306492171650220257220235215330321647330010141130100202000020020400002202476482110021109604171010000101000001020000031283148010001002152441524216011252700006401520241644219437232790662000010010220246220306220260220246220310
30024220242165000000031450010122023621213201302255281032733200601001020000106140131052806901393492171590220258220325215405321652730010174151100202000020020400002202796501110021109354341010000101000001020000031293153810001002155641517215351290000064015387416432193411327601062000010010220236220256220266220318220180
30024220199164900000031479100022024831202013402552914327362009610010200001061246410524256014144921719302202772202552153583216501300101421621002020000200204000022024970211100211094152310100001010000010200000003140310000002144141449215831262700006401516231544219359032747062000010010220302220218220202220214220286
300252202771650000000314820000220230213152011912552864328432006010010200001064283010526328013264921717002202692202472154323216495300101291151002020000200204000022024770821100211096369410100001010000110200000003150310001002146141592214880292700006401545841643219412232869662000010010220212220232220262220234220268