Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STLXP (64-bit)

Test 1: uops

Code:

  stlxp w0, x1, x2, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 50.673

Integer unit issues: 0.000

Load/store unit issues: 50.673

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)191e1f22233f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)6064696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
100538001284101010199815103798607267506725067487816100358811000493492138001380021314759492441487818914382743383800238001100111001100010004498115449672449810120124496610001410001016595122062673791710003800238002380023800238002
10043800128410101019981410379860726750673506728781610036061100049349213800138001131475949244028783891438274314380013800110011100110001000449801444967044981002012449671000141000101659572062773791710003800238002380023800238002
10043800128411101019981510379864726750672506738781610036031100049349213800138001131475937244028781691438274314380013800110011100110001000449811644967144981002012449671000141000121659582062873791710003800238002380023800238002
10043800128410010019981410379860726750672506738781610036031100049349213800138001131475943244128784891438274314380013800110011100110001000449821544966044981012012449661000141000121659552062553791710003800238002380023800238002
10043800128410100020071410379860726750672506738781610036031100049349213800138001131475937244028781591438274314380013800110021100110001000449811644967144981012012449661000141000121659552062573791710003800238002380023800238002
10043800128410001019981410379860726750673506728781610036061100049349213800138001131475941244038783891438274314380013800110011100110001000449821644966144980012013449661000141000111659572054553791710003800238002380023800238002
10043800128410001019981410379861726750673506728781610036060100049349213800138001131485937244028783191438274314380013800110011100110001000449821544966044981022012449661000141000101659582054783791710003800238002380023800238002
10043800128510000019981410379864726750673506728781610035950100049349213800138001131475937244028781691438274314380013800110011100110001000449811544967144981002013449661000141000111659572061553792510003800238002380023800238002
10043800128410001020071410379862726750672506738781610036031100049349213800138001131475937244028782891438274314380013800110011100110001000449821644966144981012012449671000141000111659572062553791710003800238002380023800238002
10043800128410100019981410379860726750672506738781610036030100049349213800138001131475937244858781691438274314380013800110011100110001000449821544967044980002012449661000141000121659572061773791710003800238002380023800238002

Test 2: throughput

Code:

  stlxp w0, x1, x2, [x6]
  add x6, x6, 16

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 38.9265

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2022293a3e3f404f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606467696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2021438927529162002002215472229122561141243892552026469280558524298939259605788783697541127460961171836001000004938618838926938914383543592031813141486310794517778684794521233603138926844100011202011009910010000101001000010023217773270515610188223031291025134820278232205100007164810002094531561097933593891542914331000010100389167389276389266389144389137
2020438926829161010002252672217123601211243892552029469279558520298937259595788798697508127459131171858211000004938606838926638926483543590831811831486278794508778672794497233586038927344100011202011009910010000101001000010023220082958615720188223027982125228020207232159100007162814002094531551097653593890772914471000010100389269389146389268389268389138
20204389142291522000022143722111235210512438924821074692795585242989442595937888186974991274647011718348010000049386058389268389138835395907918131114863047945177786127944892335875389149441000112020110099100100001010010000100232203213691615530188623028571024815020272232152100007156010002094531381097853593890792914381000010100389263389147389266389138389148
2020438926229151010002257872199122961191243892502099369279558541298936259593788817697500127453381171858201000004938618238927238913983536590791811791486312794605778687794494233604338913844100011202011009910010000101001000010023219573557615500189223029480125176620227232194100007156810002094531551097253593890822914351000010100389267389264389137389139389272
20204389270291511010022630722281231212112438925720293692795585382989372595907887906974971274566011718273110000049386185389185389276834835920218131414863197945207787037945052335839389267441000112020110099100100001010010000100232178837657164401912230309670249711220240232169100008138412002094531551097653593892162914351000010100389144389145389263389266389146
20204389141291420000022194722321232011711638924721613694025585282989392595887887906975081274545811717575110000049386060389263389270834745920218118814863157945057787567945212335833389141441000112020110099100100001010010000100232185154124316310187823027372224946220277232216100008161611002094551551097653593892012914391000010100389263389142389145389143389142
20204389264291510000022045722241233610512438912520012694025585322989652595957888176975021274529511718423010000049386189389268389237835505907918130714862957945057786167944852336436389145441000112020110099100100001010010000100232225153859115960188223027576124845220238232169100007174811002094551551097853593892062914471000010100389269389270389264389265389143
202043892652915111044223983592247123441261243891232160369404558526298943259586788813697499127454711171817201000004938606338914038913883473592011811751486306794510778624794505233585738926344100011202021009910010000101001000010023216073281215740189423027880025275420241232160100007165210002094531551097553593890862914401000010100389264389143389141389141389141
202043892682914111100219237221612336120124389248203636946055853629893825959178881069750112745393117183440100000493861843891423892608347959080181181148631379451077868979451723360313891401871221000112020110099100100001010010000100232161153684616570189123030588125375420250232185100007155211002094541551097853593890942914341000010100389271389269389142389274389260
2020438914329151110002200072230123121332363891282137369402558534298964259591788811697497127455241171820601000004938618438914038913883534592061811821486303794486778616794505233585138914544100011202011009910010000101001000010023218393602516299188023030497025145420233232201100007160010002094531551097853593890782914381000010100389262389262389151389138389140

1000 unrolls and 10 iterations

Result (median cycles for code): 38.9955

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)1e1f2022293a3c3e3f404f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606467696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
200343901912921410142204972261219609002523899312184470099550232290557259685760276697581126920601176527611000004938688103899463899647082459898188864145788276083175923476083122775733901854410001120021109101000010010100001023226624371711627019092303047902506102202482322521000001708000200995451109540473900742897981000010010390234389952389960390220389955
2002438995529214001421953022732134494023639021521640702765502062905542597677602866975701269065511765170110000049386864038994438994470827599091889391457817760927759187760818227807738994938994410001120021109101000010010100001023226044406681561019122303268562514178202422322011000001760000201060491097643453898832898021000010010389955389940389954390193389950
20024389958292331410222137240022096101024438992821250700995502202906192596637603976975701268949711765087010000049387114038998938994370849598991888511457831760821759181760823227754638994039015810001120021109101000010010100001023236840428991636019252302257902572104202042322231000001720040200997451097641483898762897931000010010389955389951389958389953390239
2002438995729203031321885022382212089324838993921770701005502042905492596617602506975711269002211765343010000049386868038995338995370822598921888511457845760823759200760816227759438994935959310001120021109101000010010100001023225916400301572019332303486632481206202512322131000001656060201000481097745403898932898041000010010389957389961389945389943389948
2002438994529213130321740022522212879062038994221200700995502062905542596647602706975711268973511764967010000049386874038994138994370832598901888611457842760836759183760831227754338993935958810001120021109101000010010100001023230716385591628019312302916932536240202292321851000001500000201003461097745403898952897991000010010389951389953389959389949389946
2002438994129213000321908022582216878023638992421720700885502062905492596617602546975691268963111764923010000049386879038995838994570829598951888461457833760834759177760826227757038994438995110001120021109101000010010100001023225724384841623019402303168562510126202632321991000001708000201002461097641473898892898011000010010390192389965389953389956389943
2002438995329213130021873022282208892022438993321790700995502142905702596747602686975711268957011765051010000049386874038995838994670829598881888581457833760852759194760844227756738998838995410001120021109101000010010100001023227224380861632019362303488202476108202522322481000001704030200996471097045433898992897961000010010389960389938389963389958389958
2002438995729213001021748022542216889024838994320590700995502142905632596657602666975771268954311765004110000049386858038994538995870826599011888561457847760841759185760842227814038994038995410001120021109101000010010100001023223027407741553019072303568802477158202062322381000001728000200997391097443443898782897981000010010389957389945389943389953389938
2002438995729213131321796022502211271024038993521060700995502142905512596647602636975731268951211765081110000049386869038994538994170849599001888471457811760810759193760855227757938995838993710001120021109101000010010100001023226430377701572019272301908102570164202062322321000001578000200997461097237453898782898081000010010389944389953389952389963389942
2002438994329213100022007022462208875012439016921940700885502202905512596657604876975721268995511765158010000049386859038995438994570838598881888471457811760820759171760832227755238995334471010001120021109101000010010100001023226016276151607019542303437702513152202052322461000001772000200999461097841473898822898011000010010389938389988389942389954389956

Test 3: throughput

Code:

  stlxp w0, x1, x2, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 38.0107

retire uop (01)cycle (02)031e2223243a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6064696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive fail (b4)l1d cache miss st nonspec (c0)c2cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10205380107284819998000038009207027152927210697518575106979157365438410461863110000493770270380107380109130446599472489899264331910093854419100281532938010738010710001110201100991001000010010000100458866458866004588660019998458866100001000004172682113611070129129380040119710000100380110380108380109380109380108
10204380107284720001100180380092070271529272106975185751069791573654384104618631100004937702703801073801071304375994524897992644319100938443191002815329380107380107100011102011009910010000100100001004588664588660045886600199984588661000010000010172636113011072130130380040119910000100380108380108380776380109380108
10204380108284719998100180380116070271529272106975185751069791573654384104618631100004937702703801073805211304375994724897992643319100938443191002815329380108380107100011102011009910010000100100001004588664588660045886600200014588661000010000010172638113011071130129380040119710000100380108380109380108380108380108
10204380107284819998000180380118070271529272106975185751069791573654384104618631100004937702803801073801071304375994724897992643319100938443191002815329380107380107100011102011009910010000100100001004588664588680045886600202084588661000010000010172638113011072130130380040119710000100380108380108380108380108380108
10204380107284719998100181380113070271529272106975185751069791573654384104618631100004937702803801103801071304375996024897992643319100938443191002815329380107380107100011102011009910010000100100001004588664588660045886600199984588681000010000010172636113011069130130380040119710000100380108380108380108380108380108
10204380107284719998100181380109070271529272106975185751069791575854384104618680100004937702703801073801071304375995624900292643519100938443191002815329380107380107100011102011009910010000100100001004588664588660045886604200044588661000010000010172639113011059129129380138119710000100380110380108380108380108380108
1020438010728471999801018138010607027152927210697518587106979157365438410461863110000493770270380109380108130437599512489799264331910093844319100281532938010838010810001110201100991001000010010000100458866459168004588660019998458866100001000001017263811301107013043380054119710000100380108380109380108380108380108
10204380107284720010000181380111070271529272106975185751069791573654384104618631100004937703703801073801071304405994724898792643319100938443191002815329380109380107100011102011009910010000100100001004588664588660045886600199984588661000010000010172640112911062130130380040119710000100380108380108380108380108380108
10204380107284719998010181380101070271529272106975185751069791573654384104618630100004937702703801073801091304375995024898992643319102938507191002815329380107380107100011102011009910010000100100001004588664588660045886610199984588661000010000010172638113011072130130380040119710000100380108380109380110380108380108
1020438010728471999801018038009207027152927210697518575106979157665438410461863110000983770340380107380107130437599472489879264331910093844319100281532938010738010810001110201100991001000010010000100458866458866004588660019998458866100001000001017263611301106213041380042119710000100380108380108380108380108380108

1000 unrolls and 10 iterations

Result (median cycles for code): 38.0011

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f223f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6064696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
100253800112847000000199980037999607026851973210675186641067915805542410470486010000493769310380011380011138348599392493279168721910953892191028616613800113800111000111002110910100001010000104589560458956004589560019998458956100000100000001696465611080434137994411701000010380012380012380012380014380012
100243800132846000000201720137999607026751973210675186651067915805542410470399010000493769310380011380011138348599472493379168721910953892191028616703800123800111000111002110910100001010000104589560458956004589560019998458962100000100000001696464811078404037994411701000010380012380012380014380012380012
100243800122846000000199980037999607026751973210675186651067915805542410470336010000493769310380011380013138348599472493279168721910953892191028616673800213800111000111002110910100001010000104589560458956004589560019998458956100000100000001696466211058414137997111701000010380012380012380013380012380012
100243800132846000000199980037999607026751973210675186651067915805542410470394010000493769310380013380011138348599392493279168721910953892191028616733800113800111000111002110910100001010000104589560458956004589560019998458956100000100000041696465711068414137994411701000010380013380012380012380012380013
100243800112846000000199980037999607026751973210675186651067915881542410470394110000493769320380012380011138366599392493309168721910953892191028618653800113800111000111002110910100001010000104589560458956004589560019998458956100000100000001696464811080424237994411701000010380012380012380012380012380012
100243800122847000000199980037999607026751973110675186651067915805542410470394010000493708560380011380011138348599432493379168721910953892191028619853800113800111000111002110910100001010000104589560458956004589560019998458956100000100000001696505011079404137994411701000010380035380014380023380012380012
100243800112847000000199980037999607026751973210675186651067915805542410470394110000493769310380011380012138348599472493279168721910953892191028619373800113800111000111002110910100001010000104589560458956004589560019998458956100000100000001696485811083414037994411701000010380012380035380027380012380012
100243800112846000000199980037999707026751973210675186651067915805542410470995110000493769310380011380011138348599392493279168721914956485191428622463800133800111000111002110910100001010000104589550458956004589560019998458956100000100000001696465111070424237994411701000010380012380012380012380012380012
1002438001128460000001999801379996070267519732225551866310679158055424104703940100004937693103800113800111383485994124933091687219109539001910286167338001138001110002110021109101000010100001045895604589560045895863019998458955100000100000001696465411078404037994411701000010380012380012380012380036380012
100243800112846000000199980037999707026951973510675186641067915804542410470394010000493769330380011380011138348599452493279168721910953892191028616763800123800111000111002110910100001010000104589560458956004589560019998458956100000100000001696466311078414137994411701000010380012380014380012380012380013