Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STXR (64-bit)

Test 1: uops

Code:

  stxr w0, x1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 1.000

Issues: 2.945

Integer unit issues: 0.000

Load/store unit issues: 2.943

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)191e223a3f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f606164696a6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafatomic or exclusive fail (b4)l1d cache miss st nonspec (c0)c2cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
7100545448340141200121574543630735293529252939147050220100049423724546545455478710793294629285900454484547610011710011000100029420294129390629321000100002202259135064209711846632271445286888934536820700205441873210004545845461454564545445458
710044546734101010002133454323071029572943294514715022010004942360454544545947951078229372932588645466454881001171001100010002937029422933002931100010000220755736504920899045282300445287195994534120734205631876010004545345461454614545745460
710044545734010010002176454393072829452939294014685422010004942373454534545048061080929342933589645445454761001171001100010002931029162936002950100010000220695827507221309046612295445586691884535420712205671875110004544945461454584545445457
7100445433341110002102155454403070329452946293314640022010004942370454514545348181082729542948588845452454841001171001100010002938029402931132938100010000220655789507221159144292280445187489894536920668205301875610004544745447454514544345446
710044546334010000002125454473064029402943294914695522010004942357454614545948191081129502952590045443454741001171001100010002947029462938002930100010000220475819506820928144292270445286189834532320701205611874710004548345454454614546145463
710044544634011010002181454383066629412933296014740622010004942387454634545747891084429402947589845446454871001171001100010002949029352932002936100010000220815833510921138945222299444886897874534920710205431870810004545645432454454544245448
710044544834011010002130454393072829362928294514695022010004942365454544544547901081829482938589445459454871001171001100010002939029352946002923100010000220645792507821028745572267444786791924536620704205281874910004545245454454504546645459
710044545634001000012198454253073929452941293714750222010004942383454544545948011080829512929588645464454711001171001100010002935029312945002936100010000220415790508021019443262279444887388854535420714205651877910004545745459454514545245456
710044547234010010012165454403073629372947294614705022010004942371454494545448011082729522946588245461454801001171001100010002933029412954392924100010000220575779508421018345552287445086991814534220686205291870010004545945454454634546645453
7100445459340010100121574543830745293929322937147450220100049423804545945447485610822294329335874454524546610011710011000100029380294229290362937100010000220805747506520919045262291444887191944534820703205331874710004546045446454594545345647

Test 2: throughput

Code:

  stxr w0, x1, [x6]
  add x6, x6, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 38.9250

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f202224293a3e3f404f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606467696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? int retires (ef)f5f6f7f8fd
202093891482915150031002045808432072016424038911357606940255850629892525957678880069748712744742117170161100000493861710389250389251834675908318130514862597944717786007944961557334389130389128100011202011009910010000101001000010023052713533123306922298421109593620019230625100000134400020945616010934556438919429142601000010100389254389254389253389128389253
202043891282915010030002021608101052019128838923769806927855850229892725957878877469748712744601117169650100000493861730389127389250834675908818129314864317944737786657944731557212389127389255100011202011009910010000101001000010023059003481225807262298625418517820016230522100000131201020946316110978596438906729142301000010100389254389130389254389251389128
20204389127291415005000202310826105761641443891145810694045585022989252595787887976974871274466411716895110000049383127038925438912983532592031811741486265794497778608794504155720838912938925010001120201100991001000010100100001002305438301832930761229881609127820007230551100000144000020946226010979556438919229142301000010100389253389251389130389129389250
202043891272916040030002017108171071215484389114696069404558500298926259578788801697487127448011171773301000004938604803892523891308352959201181296148628579447377860479449715573383892542916510001120201100991001000010100100001002305900294843320697229850708655220017230567100000156811020945815710970586138923429142301000010100389129389128389198389260389128
2020438912729150500500020222085510848170204389235645069279558502298927259576788797697487127446571171773601000004938617103891273891288346759206181174148626179449677860479448015573443891282429710001120201100991001000010100100001002305174351582440674229866828874220025230548100000121600320945716010978556438919329142301000010100389129389128389251389253389130
20204389251291501004000201810826104961711483892377130694045585282989262595787887746974861274458711717718110000049386047038925138925583531592021811711486274794475778600794473155733838925129713210001120201100991001000010100100001002306370292922940685229852809044420028230468100000146400020945515910978586438923629142501000010100389130389129389253389198389128
20204389253291514102100202400823106561762123892366180694025585042989242595787887976974881274476511717615110000049383102038925138925283527592121812911486260794470778664794499155720038917238912710001120201100991001000010100100001002305665388332600685229840919158820038230621100000142401020946015810970586138906829142401000010100389255389129389251389252389128
2020438925029151500500020190090510576177184389237572069279558500298925259576788773697486127447421171768911000004938605003892533892228346759210181307148618679450077866879450515573463892553851110001120201100991001000010100100001002306132289022640691229873509128220013230542100000148801020945615710976576438906829142601000010100389130389131389130389128389255
2020438912729151210500020682720893106961691643891145730694045585022989242595777888016974871274464411717110010000049386050038925238925283530592101811711486262794501778608794481155733838925238925610001120201100991001000010100100001002305050316882750725229857508948020019230548100000163200020945615810975586038907129142401000010100389256389255389253389132389254
202043891282914150021002025408531078418915238923866306927955850229892825957778880269752612744762117169220100000493861700389208389128834675920418130414862607945007786077944731557224389130389252100011202011009910010000101001000010023052113295630006952298671119645020038230537100000130400020946316010972586438919329142401000010100389252389254389250389282389253

1000 unrolls and 10 iterations

Result (median cycles for code): 38.9947

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f2022293a3e3f404f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606467696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20029389968292122200201577805170495156389929598470088550218290554259664760252697577126899331176511101000004938686638996438995570828598991888531457832760825759183760832151836438994626994210001120021109101000010010100001023062773537230106872299597090432200202306101000001352000201090551097854563898872897941000010010389937389950389951389950389949
200243899492921111002022708801744114204389934579070088550204290546259656760248697567126893921176476101000004938686838994738993770814598881888581457820760822759183760820151832838994738994810001120021109101000010010100001023067213593725706832299376088752200042306841000001472030200997551097455623898762897951000010010389939389937389938389938389937
200243899362921000002017878101624112280389944578370153550216290552259666760248697576126898431176526601000004938687638995839002270822599031888741457830760826759176760816151835238995744100011200211091010000100101000010230616733517294074622996712388134200202306731000001368002200998531097655573898762898011000010010389948389938389938389937389937
200243899362921000002020208531488120332389923689070089550204290545259664760247697603126894381176485201000014493868643899463899457082059901188857145782376082375916976082215183483899574410001120021109101000010010100001023062482711329506422299365092240200212306401000071696100200998561097053583898882897951000010010389947389937389938389979389941
2002438995029210000020317076215609196389924531070088550200290546259656760243697566126895141176475311000004938686738995138995170820598951888671457813760810759164760814151833638993638993710001120021109101000010010100001023063103133226907792299487094756200042306241000071368100200997571097052573898992898171000010010389946389945389956389947389947
200243899552922110002015407951544106112389932512070101550204290544259661760254697569126893831176480611000004938685838996038994870815598901889111457804760814759171760815151833438993738994810001120021109101000010010100001023061402712330706642299335095264200912306351000071632100200999561097656563898892898021000010010389946389958389959389958389946
20024389945292110100202747761265612610038994159427008855021629055225966476026169757612689886117652550100000493868563899383899407081559890188846145781276082275922276080915183203899844410001120021109101000010010100001023062993701829206772299396091140200142305941000001464002200997581097855583898882897951000010010389950389940389953389950389948
20024389947292110100204100851175210815238993352607009955020229054425965676025069757812689770117652780100000493868643899443899927083259889188857145783776081675917676082215183563899454410001120021109101000010010100001023059673516920206332299281289476200202306521000071520100201001571097654553898972898021000010010389946389956389945389959389957
200243899562921111002013078162792122132389921573070088550224290544259658760240697566126893461176477211000004938384538993738993670811598901888611457825760812759171760818151832838993638993610001120021109101000010010100001023065223794224107002299398289450200162307001000001440000200997581097654543898762897941000010010389951389952389939389952389949
2002438995029210010020266082216881071123899335840700905502002905442596577602446975661268941511764712110000049386856389950389947708375989518884714578117608127591717608241518350389941389937100011200211091010000100101000010230682837746257073222993215088890200132306231000001496000200997541097654573898762897931000010010389938389950389949389949389951

Test 3: throughput

Code:

  stxr w0, x1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 38.0107

retire uop (01)cycle (02)03mmu table walk data (08)18191e1f22233a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606467696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)c2cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
102053801122847000199980100380092070271529272106975185751069791573654384104618630100000493770270380107380180130437599492489789264331910093844319100187688638010738010710001110201100991001000010010000100458866045886600458866001999845886610000010000010172636112911061130130380040119710000100380108380108380108380116380108
10204380107284700019998000181380092070271529272106975185751069791573654384104618630100000493770270380107380107130437599632489819264331910093844319100187688638010738010710001110201100991001000010010000100458866045886600458892202000145886610000010000010172648112911072137129380040146810000100380108380108380108380130380109
10204380107284700019998010181380379070271529252106975185811070291573654384104618630100000493770270380107380107130440599772489829264331910093844319110187688638010938010710001110201100991001000010010000100458868045886600458866001999845886610000010000010172638112911072130129380040119710000100380113380374380108380108380108
10204380107284700420571000181380092070271529272106975185751069791573654384104618630100000493770270380107380107130564599482489799266541910093844319100187688638010738010710001110201100991001000010010000100458870045886600458866001999845886610000010000010172636113011064130130380041119710000100380121380108380108380108380108
10204380109284700420003000181380092070271529284106975185751069791573654384104648220100000493772670380107380107130437599552489929264331910093844319100187688638010738010710001110201100991001000010010000100458874045886600458866101999845891010000010000010172636112911072130130380040119710000100380424380110380243380108380109
10204380107284704019998000180380093070271529272106975185751069791573654384104618630100000983770270380107380107130437599472489929264331910093844319100187688638032338010710001110201100991001000010010000100458874045886600458866102000145886610000010000410172638112911072130130380234119710000100380108380108380108380108380223
10204380107284812420010000181380368070271529272106995185751069791573654384104638920100000493770280380108380107130451599952491589264331910693877819104187688638010738010710001110201100991001000010010000100458882045886600458866101999845887210000010000010172636113011061129129380104119710000100380109380405380108380109380108
10204380107284700020007000181380377070271529272106975186471069791573654383104618630100000493770270380107380107130437599602489809264331910093844319100187688638010938010710001110201100991001000010010000100458874245886602458866002143645887210000010000010172638113011072130130380040119710000100380108380108380109380108380109
102043801072847000200100001803800920702715325341069751857710697915736543841046186801000004937727503801073801071304375995024898192643319100938443191001876886380107380323100011102011009910010000100100001004588720458866290458866422000145886610000010000010172685112911061130130380263119710000100380114380108380342380108380108
10204380424284704019998000180380092070271529272106975185491069791573654384104618680100000493770270380108380107130437599532489799265801910093844319103187688638010738010710001110201100991001000010010000100458866045886600458858001999845886810000010000010172638112911156130130380040119710000100380170380112380108380108380393

1000 unrolls and 10 iterations

Result (median cycles for code): 38.0011

retire uop (01)cycle (02)031e22233a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6064696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafatomic or exclusive fail (b4)l1d cache miss st nonspec (c0)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1002538001128462000410237999607026851973210675186651067915805542410470139010000493769310380011380011138348599472493279168721910953875191019077543800113800111000111002110910100001010000104589564589560458956020018458956100001000000169678541114870723801241171000010380012380012380012380012380012
1002438001328471999810237999607026751973210675186651067915805542410470139010000493769310380011380011138348599392493299168711910953968191019077583800123800111000111002110910100001010000104589564589560458956019998458956100001000000169676561108069703799461171000010380012380012380012380012380012
1002438001128461999800237999707026751973210675186651067915805542410470274010000493769310380011380011138348599452493809168721910953884191019078163800113800111000111002110910100001010000104589564589560458956019998458956100001000000169680721108071723799931171000010380012380012380012380012380014
1002438001328461999810237999607026751973110675186651067915805542410470081010000493738830380011380034138348599402493309168711910953875191019078043800113800111000111002110910100001010000104589564589560458956019998458956100001000000169678711108070703799441171000010380012380013380012380012380051
1002438001128461999810237999607026751973210675186651067915805542410470139010000493769310380011380011138348599472493409168721910953876191019077583800113800111000111002110910100001010000104589564589560458956019998458956100001000000169678651108072713799441171000010380012380012380012380014380012
1002438001128461999810237999607026951973210675186651067915805542410470154010000493769310380011380011138348599492493379168721910953876191019078363800113800111000111002110910100001010000104589564589560458956019998458956100001000000169678561107071723799441171000010380012380014380012380012380012
1002438001328461999800237999607026751973210675186651067915873542410470154010000493769330380011380011138348599492493409168721910953875191019078843800113800111000111002110910100001010000104589564589560459006019998458956100001000000169679571107472713799711171000010380012380012380012380012380034
10024380011284620004002379996070267519732106751866510679158055424104701540100004937693103800113800111383485994124934091687219109538751910190777038001138001110001110021109101000010100001045895645895604589587220004458956100001000000169680671106970693799441171000010380048380013380012380012380012
1002438001128461999800237999607026751973110675186651067915805542410470154010000493769310380011380034138348599472493269168721910953876191019077803800133800111000111002110910100001010000104589564589560458956019998458956100001000002169681631108171713799441171000010380012380012380013380012380014
1002438001128461999800237999607026751973210675186651067915841542410470139010000493769310380011380011138348599392493279168721910953875191019077583800123800111000111002110910100001010000104589564589560458956019998458956100001000000169676511106970723799451171000010380012380012380012380012380012