Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STEORL (64-bit)

Test 1: uops

Code:

  steorl x0, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 3.000

Issues: 3.005

Integer unit issues: 1.005

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f181e1f223a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f61696a6d6emap rewind (75)map stall (76)dispatch uop (78)79map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
73005332162482040120010100710615132825002086830011005200010002000110901691020492980232984328583133003010002000200040003274525801171001100010000200442010081001118200420220016165116891840438692572293638574435125756323441005163471493915328200010003281732603329593265632682
73004327602490111011010100610596632826002068230061003200010002000110431701740492979632630329793133000010002000200040003278525801171001100010000200422210071001028200351022216019118243842540942502286041124445164850324331005164051435315691200010003272332916330953298732851
73004327022460121011101100910609732726002066930061006200010002000109971693640492976432950334423133000010002000200040003281025721171001100010000200240210071001004200551222216269117681850339961532284740154443155451323841005168991468314821200010003274233064328043324633325
7300432886245012102000010191061853267900206363004101020021004200011029171020049296463269332968313300301001200220004000326812577117100110001000020042221009100101620038122211582612186184394015052228073985444495154323901008161551466416029200010003272632843329793290733055
7300432957246001002000010160060223271900207583004100620001000200010993169350049297663265532885313300001000200020004000327552623117100110001000020000221003100000720022022001638211576184864062154231603969445095551324491004165831496915450200010003287932645327633280932704
73004332052460020020100101500596632851002082530051004200010002000109221690220492996032741326753133000010002000200040003261825891171001100010000200002210041000207200120220016377118411848541000462282740464441155348323281002164391472415158200010003279132732327333283532707
73004327392440030030000100201587432803002081530061003200010002000109251697900492980432540327673133000010002000200040003291626121171001100010000200002210021000207200320220016042119051843640351502284539164444185555324931002166931509115679200010003289332903330983289532809
73004327952490020020110100600593732764002077830011004200010002000109991690800492968732623326793133000010012000200040003269726141171001100010000200002210041000002200550020016430123052847440271472294540374441114550324691006161101495614769200010003246632699329793272932945
7300432896245002002010010060060363264400206673005100420001000200011023169340049297393283632740313300001000200020004000329822636117100110001000020000021006100000320051002001632311980285424096152228844009444284948323301003161591467415192200010003321933019326773310632682
73004326972460010030000100501605532786002058630041003200010002000109971692540492965432543327083133000010002000200040003266825941171001100010000200002210031000002200120020016372120161850639773522301140094444135451323971005163181533116011200010003277432986331323255532544

Test 2: throughput

Code:

  steorl x0, [x6]
  add x6, x6, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 7.0130

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e1f2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4020970128525100007437083818001021567011182117170147567422540744207502000020100200002048071332255502014967047070090701276456836508640100202002000030200400007013269112020110099010010000101001000001002091821193911199321085610237289193532117321114520014431096101310116116991820638101020000201007013370132701337013070131
4020470129526222017267278817761271487011782732156161569352540947209852000020100200002048614332262311554967048070091701326456836508440100202002000030200400007012569112020110099010010000101001000001002095821152930199411085410257335393734109921188522024171192201310116116991320664101020000201007013070138701297014270125
4020470130525200007318282614961211567011584324150156576782540775209382000020100200002049025332323811574967054070090701306457036509240100202002000030200400007013270112020110099010010000101001000001002096541218920199331087410271299191140118121153521124451144101310116116992220872101020000201007013570138701317012970140
40204701305252200073831832272810915670115820181411445602225407522085020000201002000020481693323829013849670550700877012864566365090401002020020000302004000070134133112020110099010010000101001000001002093001288874199471089910256289195134109621161519913911176101310116116992020852101020000201007012770129701267013470129
402047012752522000745428441744941567011882128135157566992540686207262000020100200002048097332356811164967046070090701346456736508740100202002000030200400007013069112020110099010010000101001000001002092216119189319938108521025928618973610462111451951405984221310116116991720630101020000201007012870128701297013170128
402047012452622000744918421696901567011283530153147561212540838208042000020100200002048123332253201344967539070085701306457236509340100202002000030200400007013368112020110099010010000101001000001002090421197882199301087010259297192934106721149521924141120101310116116991320700101020000201007013270125701267012270125
4020470136525220007369283717361251447011583017145183557112540689207782000020100200002048651332363011154967052070093700996457236508440100202002000030436400007013068112020110099010010000101001000001002092491276888199281090010268289188932109421141519114141128241310116116991720828101020000201007012670127701307013170128
402047013552520000747129241728114927011380343142147562862540701206792000020100200002048232332249512034967056070089701276456736509240100202002000030200400007012968112020110099010010000101001000001002088421209934199481092010257295394736114921157522324111040221310116116992320598101020000201007013470127701307014170127
402047012952510000716618281728941327011381427147137565712540795207192000020100200002048970332286911374967051070096701306457336508440100202002000030200400007013670112020110099010010000101001000001002090911267952199321087110249293391134110021117518714691160101310116116990920689101020000201007013070138701307013170131
402047012752510000766118481728105112701168072414116356355254078220847200002010020000204825533237881147986705007009670130645683650914010020200200003020040000701327011202011009901001000010100100000100209042133888619930108671026928938913210512111462192453968221310116116991920847101020000201007013270129701347012770132

1000 unrolls and 10 iterations

Result (median cycles for code): 7.0121

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e1f2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4003070136525100007256183216808516070105798141271405686925406042063720000200102000020480233323470027949670447008070118645803651064001020020200003002040000701206811200211090101000010010100001020928311938211993010892102302973885801129211312236140510081012702161169908208436620000200107012270126701257012070124
4002470117525100007277178816969516070107784101201395673425405982071520000200102000020478613323815013149670287008470122645753651034001020020200003002040000701246811200211090101000010010100001020882212427891992910890102442894874421162211492238140310480012701162169909206026620000200107010670118701237012670118
40024701235251010073011807169681160701088051212613157099254080120529200002001020000204711433232951231496704270084701216458536510440010200202000030020400007011766112002110901010000100101000010208851119681119924108451025429338811581101212102247139810721012701161169912207646620000200107012970120701287012070121
4002470123525101007385180017049023670108779111301345706925408002065520000200102000020478993323443111449670377008470127645783651024001020020200003002040000701206611200211090101000010010100001020944113038031992910846102452903877581101211842231146910161012701161169906207586620000200107012572105701337012070122
40024701235251100073561812166475104701047581412213857549254061020842200002001020000204714833233081494967038700837012064570365112400102002020000300204000070119671120021109010100001001010000102092311208861199231098010263293487788114321118223114149601012701161169912205546620000200107012370131701287012770121
4002470120525110008187183417448110470103788101341365693725405192073120000200102000020479923323094113049670437006970121645603651024001020020200003002040000701296911200211090101000010010100001020940212468231992710915102392794911441201211352240139810481212701161169911207986620000200107012170125701237011070130
400247012152510100739318361712831527011577614124142570332540631205902000020010200002047621332361118549670487008270126645803651094058220020200003024840000706236721200211090101000010010100001020940111698121992810918102322963915561165211172223143110421012901161169909205756620000200107012270128701217012470123
40024701205261000074611803171293164701097811512515156957254081220678200002001020000204796933241910202496704270080701176458036510340010200202000030020400007011868112002110901010000100101000010209192107380719928108911025429738831561097211743242144511121012701161169914206986620000200107012270128701237010370119
400247012152610000744717911696771247010481314133122571932540729207292000020303200002059975332347612644967049700867012765011365102400102032520000300204000070124681120021109349101000010010100001020893211718281992910893102242954879801113211102212137910081112701162169906205906620000200107009070123701217210070120
400247011252510000729818081544671447011078416122119569782540582207292000020010200002048289332415702794967047700827011864578365101400102002020000300204000070118681120021109010100001001010000102090011072810199271088410237293487934109821116220714529441012701161169912206276620000200107012570124701237012070124

Test 3: throughput

Code:

  steorl x0, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 22.0133

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
302062201901649101011002733411002201531642020102550577278632299210100200008661227105252141536492170490220131220145215433621633130100181617831020320005202064000922014090411102011009910010000100100001100200000312627140100010016812367001761011832011171610453425442194351852739413002000010100220132220100220133220108220143
3020422013316490000000027054010122010005020205925507502765722753101002000083612511052207014884921703702201312201572154416216300301001844221710200200002020040000220122929111020110099100100001001000001002000003202657710000201695436661178781032011172210080424442194311532708013012000010100220164220132220148220148220144
3020422014216490000000027759010122010413420229525506262753723037101002000085148151052673815484921708902200932201272153876216328301001683186610200200002020040000220138874111020110099100100001001000011002000003202737610000001759837587174161210011172210981424442193822412717514002000010100220162220134220162220175220114
302042201531649000000002858201012201280502022782550666277332293410100200008415994105281331489492170470220131220118215436621634130100191019051020020000202004000022017591011102011009910010000100100001100200000300270311000000168423727017261118330111722105744254421941538127361141002000010100220144220109220161220147220132
302042201391648000000002776800012201560782018382550821276992273310100200008679097105250091575492170540220159220170215449621634530100173517711020320005202064000922010790611102011009910010000100100001100200000332426909100010017195369771704802100111716101641161121945519027803131002000010100220129220122220162220116220116
3020422012316481010000027463110022010414320211825507352757823084101002000083717201052530515114921702102201092201432154886216400301001764183210203200052020640009220095874111020110099100100001001000001002000000202724710001001659137201171671233301117169672116112195211522706110012000010100220150220137220116220155220200
302042201221649101000002731910002200922402019072550809277092256710100200008621130105270270410492170810220131220143215489621633830100189617341020320005202064000922015788211102011009910010000100100001100200000302127253100011017388371771709001830011173010552116112194231992683201012000010100220118220084220092220138220132
302042201531649101000002725610012201100702023702550555267122352210100200008467758105236500493492170510220112220137215439621634530100199420331020320005202064000922012690911102011009910010000100100001100200000310271391000100174023737717541100011171610925116112194531962725013002000010100220123220130220132220116220120
302042201461649101000002751300012201140062018272550486274952309210100200008380003105253730459492170390220135220133215447621635930100212319681020320005202064000922013483811102011009910010000100100001100200000312627029100000017135371351713600320111716105031161121944115027663141002000010100220138220142220165220128220152
302042201371649101000002744410002201030972021592551050278512326410100200008647540105245021505492170330220163220121215461621636530100277219491020320005202064000922012591811102011009910010000100100001100200000330269471000000174563717616983120310111716105401161121943217026777141002000010100220121220142220134220130220167

1000 unrolls and 10 iterations

Result (median cycles for code): 22.0258

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
300262201971649101103165014001220212013142006462552821328262006610010200001069914810522473013724921713902202372201892153453216475300101241221007720000200204000022028671211100211095967610100001010000010200272824031598100160121539415582152915252526206401534621622219413132774062000010010220200220266220212220242220232
300242202451650000003148101032202230016201138255296932798200661001020000106254481052575211396492171760220240220277215352321649830010145143100202000020020400002202376671110021109519401010000101000001020026293441315951001602216244160821594142930261064015307216222194402326931062000010010220304220244220270220266220244
30024220297164900000314290103220256017020140825528603271820114100102000010594660105262991137149217178022026622023321540732164863001015613710020200002002040000220239669111002110943680101000010100000102002627343931472100160121511415732148216263226206401525421522219369032756062000010010220258220258220232220288220226
3002422019816500000031501110122025100020140225528943275420089100102000010646333105274030134049217166022024922024221537632164813001013412410020200002002040000220250704111002110966127101000010100000102000003126315531000100215134149121522126310006401529421622219406232828992000010010220267220222220297220233220227
300242202521649100003153215000220264900201621255295732801201851001020000106419271052376011388492171380220275220273215376321651330010141122100202000020020402182202866671110021109500551010000101000001020027280423154610016002160141490215211402225206401535821523219417032756992000010010220274220250220261220261220252
300242202581650111003150715001220232315020133825528163273920085100102000010610601105268910136549217217022027222026321542214216444300101471281002020000200204000022028164811100211096929110100001010000010200262522423163010016002166641628217001428026106401544521622219434032861092000010010220251220210220238220287220263
30024220314164910100316161510122020401112201250255292832871200491001020000106793631052368801338492171530220223220269215376321645930010140141100202000020020400002202657161110021109697271010000101000001020000031293159110000002163941560215460300262064015247215222193191327561062000010010220272220240220230220250220252
300242202871650000113147410022202439017201546255293432843200551001020000106854681052488811335492171780220237220249215463321652130010146138100202000020020400002202296691110021109709821010000101000001020000031283157310000002151441575215391292700064015262215232193092327901062000010010220266220222220214220274220242
30024220253165000000322351001220247001720094125529593289820083100102000010681920105236940131849217149022021722022721540032164283001013312510020200002002040000220225654111002110967898101000010100000102002727334131574100160121656416192160614272226106401529621622219411132848062000010010220228220238220282220256220218
30024220195165000001316170103220198001620088925529143282720055100102000010694921105247490131849217182022026422023221541932165023001011611110020200002002040000220259711111002110943855101000010100000102000003127314451000000214984147421504123300006401519821622219366732734662000010010220244220258220250220234220302