Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STEOR (64-bit)

Test 1: uops

Code:

  steor x0, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 3.000

Issues: 3.003

Integer unit issues: 1.003

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)0e0f1e22243a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6061696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
73006330682461901700100310584232772102079730051001200010002000109151685970049296243286833010310300010002000200040003274825681171001100010000200002210031000002200240216304120650839340477582291639284437114043323221003167401450815354200010003321633046329223275433159
7300432739246200130010010056833295610207273003100420001000200010916168075004929750326733317431030001000200020004000331602590117100110001000020000221000100000520050221640212008083493680941228553862443893338323291004165551453915267200010003311033097332213269932841
73004328842471201600100310605732802102083130001001200010002000109541690940049296453257632792310300010002000200040003302525641171001100010000200002210031000103200320016371121851843238778332330938254436134038322991003161981482416187200010003295733020329253286032993
73004327962451101000100310573732633112093730031000200010002000109471685650049296943300433087310300010002000200040003305725661171001100010000200000210031000000200302216370113561817137499362328838584438103137324021002170231464716597200010003280832803329073290832684
73004333362491501200100300597532633112107130011003200010002000109551685270049296783269932597310300010002000200040003265425891171001100010000200002210031000002200222216191119760850340168392282939504440104334323911003163361451315035200010003303732963329533299233227
7300432717248120150010030060803254001207853003100320001000200010952168465004929671326843274831030001000200020004000325412566117100110001000020000221001100000120020211579412008081953809437226803953443884038322701003167201411615619200010003288132981328633284632735
73004327452451201400100410581532678002082230011001200010002000109561685250049296923261332874310300010002000200040003292825961171001100010000200002210021000102200222216426118350823837026382277539714434114237324371003167551557314910200010003293933060332423275332769
73004327122461001000100110600332794112094430031001200010002000109561685560049296003271132870310300010002000200040003312525931171001100010000200002010011000002200331116098118420844140716472284239184436114141323671003163331441515332200010003319332699329583318532712
73004327842481501300100810606332613102089330041003200010002000109821685050049297023261732785310300010002000200040003266926351171001100010000200002210001000002200222216218120190821639068382321438214436114339323401003165151452416242200010003314132854328223281032758
7300432767246150100010081060723256211208453003100320001000200010952168203004929723331183317231030001000200020004000326822563117100110001000020000121003100000220023211653011960083293973840233313687444163736323691001160941405016308200010003305332971327473294232659

Test 2: throughput

Code:

  steor x0, [x6]
  add x6, x6, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0071

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0f1e1f2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4020930071225101105855880817121061523005680833203231131602541134212212000020104200009160501412727133549269910300713007124824725025401042020420004302064001630071691120201100990100100001010010000010021002312051013109361031930338714413132126418310106791017170011113160160030068210599920000201003007230072300723007230072
4020430071225101006047780217681141483005680721200233131912541179211672000020104200009160571412662134649269910300713007124823725023401042020820004302064001630071661120201100990100100001010010000010021034191302101410968103133185943541116213191731777531072141011113170160030068211189920000201003007330072300723007230072
402043007122511000601978321720104963005680922214188131522541117211542000020104200009158861412618135249269913300783007124819625023401042020820004302064001630071681120201100990100100001010010000010021026171287100910923103162610909521325213051628997261090141011113170160030068210369920000201003007230072300723007230072
40204300872251001058527848176899116300568182221824113200254114221265201052010020000916164141752003644926991030071300712482072502340104202042001230212400083007168112020110099010010000101001000001002102817121999010901103412913905401265212811637777201077142011113180160030068210899920000201003007530072301923007230072
402043007122511000581978491704113100300568172826022113207254109721139200002010420000916582141262003244926991030068300712482172502540108202042000430206400163007169112020110099010010000101001000001002098816134195910938103343005924421289212862036676441067131000013101161130068211939920000201003007230072300723007230072
402043007122511010597378341776991043005681222239247132192541229211502000020100200009166121412649033349269910300683007124814325029401002020020000302004000030071691120201100990100100001010010000010021016201256100710942103312996907441300212761736577031040140000013101161130072210449920000201003008830072300723007230072
402043007122510000576178461720120152300567752019521713227254118221198200002010020000916726141275203664926991030068300712481332502940100202002000030200400003007168112020110099010010000101001000001002099520131397610976103252786939461212212911135177351061130000013101161130068210999920000201003007230072300723007230072
402043007122610100602178202720108160300568121921622113154254107821142200002010020000916676141270703584926991030068300712481332502940100202002000030200400003007168112020110099010010000101001000001002101419119610371093110338253392552139121264183719728960142000013101161130068211529920000201003007230072300723007230072
40204300712261000060767848173610714430056805181962021317810741073211112000020100200009163481412791031249269910300713007124816325029401002020020000302004000030071681120201100990100100001010010000010021007191227106210981103332953900561226213251731486881146141000013101171130068211649920000201003007230072300723007230072
402043007122510010586688081712115112300568082325923913237254114321202200002010020000916465141255103514926991030071300712481732502840100202002000030200400003007169112020110099010010000101001000001002100916124497710930103122834910401242212471537876601086130000013101161130068210359920000201003007230072300723007230072

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0071

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk data (08)l2 tlb miss data (0b)0f18191e1f2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4002930090226000000588427711696821323005680432215263132092541097211862000020010200009159281412593034549269913007130071248370325051400102002020000300204000030071701120021109010100001001010000010210150123510330109611032727818754410922122623328074810960001270416223006821111101020000200103008830072300673007230072
40024300712250000005798077817841141043005679444252250132252541252211312000020010200009157801412623034249269913007130071248350325051400102002020000300204000030071701120021109010100001001010000010210290135210610108841032028819295211802127818370071810960001270216223006821115101020000200103008430072300723007230072
4002430071225000000584608381776120144300547863922021013168254116121071200002001020000916102141262403114926991300713007124837032505240010200202000030020400003007169112002110901010000100101000001020991013649850109481033430919153412562121417341073612240001270216333006621058101020000200103007230073300723007230072
4002430071225000000588608331688124152300568282922622613129254122421097200002001020000915859141262013574926991300713007124837032505140010200202000030020400003007169112002110901010000100101000001020999013819930109031033729218793011432125016356160110460001270217223006821153101020000200103006730072300723007230072
4002430071225000000589117811704951523005681241182246131908041175211742000020010200009162811412654036549269913007130071248380325051400102002020000300204000030071691120021109010100001001010000010209890117111170109221032925119593211372141723315079610221001270316223006821162101020000200103007230072300723007230072
40024300712250000005719081617201151523005676641196205131892541176210812000020010200009161571412612030149269913007130071248370325052400102002020000300204000030071681120021109010100001001010000010209680132510260109051031829119454412732122923356174911080001270216223006821145101020000200103007230072300723007230072
4002430071226000000593607861704931083005678325228195131882541044210972000020010200009162251412654033349269913007130071248400325051400102002020000300204000030071691120021109010100001001010000010209580125810490109911031728219273213252126320400161410320001270316343006821094101020000200103007230072300723007230072
40024300712250000005912080717281261563005679225215187132332541116210932000020010200009160741412704034349269913007130071248360325051400102002020000300204000030069681120021109010100001001010000010209680127910790108711032626119073412052121424311075910720001270216223006821070101020000200103007230072300723007230072
4002430069225000000592618361704134104300567882822027013201254104321194200002001020000916477141266103264926991300713007124840032505140010200202000030020400003007170112002110901010000100101000001020972011531025010938103392751897341235212391830207009960001270316223006821126101020000200103007230072300723007230072
40024300712250000005849181817681131163005678526218225132225341094211932000020010200009160481412614030249269913007130071248370325051400102002020000300204000030071681120021109010100001001010000010209780124810940109681033026619653212092124718342177710350001270216223006821066101020000200103007230072300723007230072

Test 3: throughput

Code:

  steor x0, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 10.2645

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f181e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
302051101188210021018518000010310101010852462541370205242040410100200004812643499275001564910051801044341037579856269869630100704855102002000020200400001001768051110201100991001000010010000110020000027019455100010103312886079050210111722243242210250252136801002000010100102548103377103343104283103784
302041063998010000019632100110148027081786254128620692201441010020000447586848617130149499971201052601048659743569996830100743891102002000020200400001053645742110201100991001000010010000110020000350022319100000958730014101210220111722222252210312125199611414020000101009969099485101753101833104603
30204104414783000001864001011017931014857302541734203992008210100200004615248493388801234910042401022811025609967789717630100107612921020020000202004000010275459011102011009910010000100100000100200003429018966100010101382889583950263111172226225221033611119800141402000010100104495103886103224103420101013
3020496764718000001982910021041900608583925448652195220245101002000045848784935331022049983180101895101963999856100123301006385581020020000202004000010483057111102011009910010000100100001100200003500200331000108996290151041812301117221922522103013111863601002000010100105357104882104926104499103730
302041021007750000019958100110469100118364425395281897120870101002000044824534838649018249101157010448010376498138696426301001471146010200200002020040000972425621110201100991001000010010000110020000343101906010001089463007013146029341117220224221037372019769141402000010100106567106539106106104638103321
30204103479781000001817311029772719138442425404472011420471101002000042298924767826114349961030101372101353969136989373010010021182102002000020200400001035835761110201100991001000010010000110020000025019599100010919029936985610351117221722500997953419414141002000010100103079101918102686103447102764
302041026917631000319353105101986790898343911640787191642033610100200004800621499177901784910133301018581024329772069895630100897100610203200052020640009101414508111020110099100100001001000001002000003401880510001111120284991042900341117162101600102349820999000200001010010158910089010017510056599989
30204101206763000001969501001050081098551925396822259720369101002008946173504901904114249101996010239210251197667139359930848133710991025520195202064000910081155811102011009910010000100100000100200000260209461000011023929230102720293111171719016001040032206461310020000101009695898621994469883496635
302041006887530000018115010099977039812741564185218987211691010020000418883648214381296491016580104814104889100983710140630100595968102032010720206400091027075961110201100991001000010010000110020000343202030410000095243060110637124341117172101600101507320556141402000010100102896103849103720104485104700
3020410437678500000191180001104335089834122540557196762023010100200004630829496473312204999730010410210335510037161003663010081384310203200052020640009105061375111020110099100100001001000001002000033270203521000101095830532984902701117172101600102170619267141002000010100103439103928103497101799103826

1000 unrolls and 10 iterations

Result (median cycles for code): 11.1629

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
3002511184383610001100237331510011178681817918072544837240422014010010200005458330531905201027491085370111515111430107230310770530010354210020200002002040000111595505111002110928242101000010100000102002826380253631001702137783509515796142902610640302162211146912269113132000010010111221111428111789111693111527
30024111769836100111002278215103111653818179200425444472635820095100102000054726265316764088049108192011131011128410703331078393001019261002020000200204000011182953211100211092923010100001010000010200282731532383710017011370433507134621631302520640552152211122032501313132000010010111849111666111753111603111528
300241117268371100000023410140011116620181891659254619427187200351001020000549981253271350913491085930111542111591107214310774530010494710020200002002040000111795395111002110927052101000010100001102002827054234671001400143733324212996153037261064050215221111685258871302000010010111539111765111648111496111569
30024111657837100100002221714000111691018189121125434112236620011100102000054715225314728099749108193011154511146610679531080213001034181002020000200204000011182239411100211092919910100001010000110200282636024458100140215985351431240415029251064052216221112360245161302000010010111838111854111684111693111476
30024111806838110100002276314000111504818091574254586323304200411001020000545690353155270118249108362011155511162310727731079713001033361002020000200204000011146249111100211092257010100001010000010200272637532206410018001315132835134441531382620640562152211132612725013132000010010111680111823111847111853111613
3002411162983610010000226991400111150091617916702546822241962021110010200005450697531039001123491086870111827111720107169310775130010272510020200002002040000111658492111002110928769101000010100000102002627002388710016021357233010143721629342610640542162211130002527613132000010010111745111827111830111828111662
30024111769836110100002304514001111320818189171925445452677520083100102000054492695317365012264910840601111851113961072423107654300102212100202000020020400001113303941110021109250661010000101000001020025263754234591001500137703445415677143002600640512162211148302506613132000010010111670111737111550111616111674
30024111667837110100002132116101111525618189167025465122504820002100102000054851865327970093049108405011165211154510746931079403001029481002020000200204000011162449211100211092902010100001010000110200272737512335710016001611735202152501530382600640562162211125712356813132000010010111472111596111643111654111624
300241118068361001000025079140031115930171791601254578124127200891001020000544569253146720104249108439011173811143610725431079383001012910020200002002040000111672503111002110927109101000010100001102002727002402610017021465333527151741432282610640562161211101412440213132000010010111537111811111520111647111632
30024111467836100100002373915001111649801891676254495424633200011001020000547608553260320107649108439011178911160410723531079383001040521002020000200204000011166051111100211092153410100001010000110200272730532268310016011508234809154671531312620640572162211113412373213132000010010111209111217111676111541111678