Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STXR (32-bit)

Test 1: uops

Code:

  stxr w0, w1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 1.000

Issues: 2.938

Integer unit issues: 0.000

Load/store unit issues: 2.941

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)1e223a3f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f606164696a6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafatomic or exclusive fail (b4)l1d cache miss st nonspec (c0)cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)dfe0eaebec? ldst retires (ed)f5f6f7f8fd
7100545467341140500121784543330771292229312931147909110100049423534544145435478610840292529085786454304541910011710011000100029082917290000293510001000220955768508821000114453622764445867888964526720737205211878010004544645432454364545345460
71004454463416000012187454383073429312923293914685011010004942374454424545548441079929162942587445433454331001171001100010002935294329340029351000100022095572650652091090423822924438865828964532420648205321871410004543445453454484544445453
71004454433405000002154454233070629062928290514770422010004942366454414545248281084229242928589245450454461001171001100010002943295129400029491000100022091577850442081090433422884440869847764533720679205371874810004545045450454404543345439
71004454513414000002165454283064929392955295514640522010004942366454544545048261084829352934584245448454341001171001100010002940293729260029361000100022100573250322064083423422794438863868264531220681205091874010004543145443454404544145457
71004454343404100012131454283080929342943291314735522010004942366454454545448271083829402935590645448454401001171001100010002940293329490029291000100022106573350442075082425622634439867798364534620710205821881610004545645441454394543645438
710044543734040000021304542430723294029362953147419110100049423674543745446483110823294529385888454394543110011710011000100029452922293544029381000100022092576850522081085433222754436868828164535120669205271872810004545245437454394545445438
71004454473406100012158454403075329482941293914600722010004942375454574545248251083929392955589645443454431001171001100010002942293529510029541000100022118573850232102084446122794439863887964532920647205461870310004543945436454444548045448
71004454563416000012168454313072229462944293514725011010004942356454454544548021082029152945582045440454491001171001100010002923290929060029411000100022118577350762096084446222654437867818964534220667205371877310004544845447454404545345452
7100445451340310055802152454333072229222932293414905522010004942369454394546248491086529442939588245434454401001171001100010002905295429410029671000100022141570750492089085433422644439864888464531420663205051872410004544945447454494545645441
71004454393405100012160454223065029332932294114635322010004942359454474543848261082729402939588245441454491001171001100010002931293929460029361000100022122570250222054089419522744436863858364531520645205481871210004546045436454364544545447

Test 2: throughput

Code:

  stxr w0, w1, [x6]
  add x6, x6, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 38.9189

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f202223293a3e3f404f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606467696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)c2cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2020738916029161100002003002962001201523892421850694025585042989242595767887966974861274465911718631010000049386047038925238912883467590881812931486259794471778665794471155733038912838925210001120201100991001000010100100001002299601273382202422297007030834199982297281000001424202094521551097853363891892914231000010100389253389129389253389253389128
202043891262915100300199980840096720389113210692805585002989242595767887736974861274460511716871010000049386172038925038925083464590881813041486256794472778667794473155733038925238912710001120201100991001000010100100001002299822313934102962297263210428200042300191000001440132094521551095053593891922914231000010100389128389253389128389129389251
202043892522915100100200040312102881330389112960694025585002989242595897887736974871274465711716969010000049386172038912738912783528590861811741486257794498778664794499155733038912738912710001120201100991001000010100100001002299900306583402772297241134130200012297201000001968002094521551097553593891922914231000010100389251389129389128389251389252
20204389129291610010020037033820232136563892371980694045585002989242595767887976974861274455111716933010000049386048038912838912783467590791812951486256794497778600794474155732838925738912710001120201100991001000010100100001002299580284533002672297162033152200012299691000002112102094521551097053593891902914261000010100389251389254389251389129389128
20204389252291511000020039029910104128243891121490694045585002989242595817888046974861274467411717689010000049386048038912838912883464590881813021486260794496778665794473155720038912738925010001120201100991001000010100100001002299612299183402652297204129046200042299731000001088102094521551097653593891922914251000010100389128389314389251389253389255
202043892562916100000200550321101441300389237560692795585002989242595767887736974861274520611717639010000049386048038924938924983468592021811701486259794498778604794470155720838912738925210001120201100991001000010100100001002300280299203402942297253032396200102299921000001232002094521551097253593891922914231000010100389289389253389268389261389128
202043891262915200100200200335103121310389112350692795585002989242595767887726974861274468811716917010000049386047038925038925083528590811812971486259794470778604794473155721038925038912710001120201100991001000010100100001002299791280824602512297233036040200132299891000001232002094521551097853593890682914251000010100389128389127389253389128389254
202043892522915110100200280299102641261163892351450694045585002989242595767888006974861274468811717615010000049386170038912838925083467592041811681486257794513778605794473155733438913638925210001120201100991001000010100100001002299511317592302492297160133338200042299971000001216102094521551097653593890712914231000010100389251389129389128389251389251
20204389250291511010020004130120011228389243158069280558500298924259577788772697486127446591171692201000004938604703891273892508352759201181304148628279447177860479449615572003891273892501000112020110099100100001010010000100229963834696302902297203032562200072299891000001248002094531551097953593890672914231000010100389253389128389128389253389251
20204389252291510010020039024210208124243891121740692785585002989242595767888006974861274466411717702010000049386047038925238912783466592061812981486260794498778663794473155720838912638925010001120201100991001000010100100001002299742277163102352297024130882200072299741000001456102094521551097953593891892914231000010100389128389127389250389128389129

1000 unrolls and 10 iterations

Result (median cycles for code): 38.9947

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f202223293a3e3f404f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606467696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)c2cdcfd0l1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2002738995529211110002002903101030480268389922237070099550200290544259656760244697566126893871176473801000004938686803899483899497081359892188857145780676083975922576081015183223899363899361000112002110910100001001010000102301049336092929622980833375482000423007910000720800020099400371097834373898782897931000010010389937389948389948389948389937
20024389976292110010020027026110884243899221580700995502002905442596567602416975661268939211764732110000049386858038993738994870811599001888581457812760909759177760814151832838994738994710001120021109101000010010100001023003410291985827822980200348341999822978410000021762020099400341097732433898882897931000010010389937389939389939389950389937
200243899372921200200201377336202240038992170701005502002905442596567602406975661268938711764799110000049386856038993638997770809599091888571457811760812759168760810151832238994738993610001120021109101000010010100001023006542625641952298113035824200072300821000009922020099400341097232343898782897931000010010389937389948389949389949389937
20024389947292122200020043028100280320389921135070089550200290544259656760242697566126894071176471111000004938685603899483899367082359906188857145780676081475916976081215183223899493899361000112002110910100001001010000102300388284666828322980610362522000723008310000011521020099400341097832343898782897931000010010389979389960389950389937389939
2002438998529211100001999802921027265180389932171070088550218290552259656760242697568126893601176477301000014493868690389949389936708145990918885714578057608107591707609941518336389947390160100011200211091010000100101000010230088102957052257229801603371302000723009410000016480020099400371097032343898882897931000010010389937389948389940389937389938
200243899482921220200200340252200722438997319607009955020029054425965676024669756612689375117648431100000493868690389948389948708165990018884614578107608127591727608141518322389936389938100011200211091010000100101000010229730252732027222980132303402000723001510000012800020099400361097234353898882897931000010010389949389949389948389937389937
200243899362921020000200640299102648116389933174070088550202290544259656760242697566126893921176471101000004938686903899373899477081659903188857145781076080975920376081415183223899363899471000112002110910100001001010000102300808262665127922980630357382000423008110000013760020099400341097032343898872897941000010010389937389937389937389949389949
2002438993729210000002004402382006624389933150070099550200290544259656760244697566126897641176505111000004938685603899483899497081459900188857145780476081275916776081115183243899363899471000112002110910100001001010000102300800299323328422980820376422000123010410000011840020099500341095232343898872897931000010010389948389938389944389948389948
2002438994729210000002002803261024069196389921181070090550200290544259656760239697566126894201176476111000014493868680389947389947708095990918884514578087608097591697608101518328389949389936100011200211091010000100101000010230019026615212292297944027598200072299991000009762020099400341097032343898932897931000010010389937389937389937389949389948
20024389952292110000020016022420568028389921137070099550214290546259656760244697566126893881176473501000004938381403899363899367081459903188851145782976090075920676080915183283899373899361000112002110910100001001010000102300630266212726522980030346442000423003310000019042020099400361097032343898772897931000010010389948389937389948389948389948

Test 3: throughput

Code:

  stxr w0, w1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 38.0107

retire uop (01)cycle (02)0318191e1f22233a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6064696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)c2cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1020538010428470020001010181380092070271529272106975185731069791573654384104618631100004937702738011138010713043759959249018926433191009384431910018768863801073801071000111020110099100100001001000010045900604588660458866001999845886610000010000010172638113111059131131380048119710000100380108380108380108380108380108
1020438010928470019998010181380092070271529272106975185751069791573654384104618630100004937702738010738014613043959948248980926433191009384431910018768863801073801071000111020110099100100001001000010045886604588660458868001999845886610000010000010172551112911066130130380040119710000100380108380116380108380109380108
1020438010728470019998010181380092070271529299106975185811069791573654384104618630100004937704438010738010813043759947248992926433191009384431910018768863801073801071000111020110099100100001001000010045886604588660458866001999845886610000010000010172638113011072130130380040119710000100380110380108380108380110380108
1020438010728470019998010180380092070271529272106975185751069791573654384104618681100004937705238010738010713043759955248995926433191009384431910018768863801073801081000111020110099100100001001000010045886604588660458866001999845886610000010000010172636113011062129129380040119710000100380108380108380108380108380110
1020438010728470020715010181380094070271529272106975185751069791573654384104618631100004937704438010738010713043759960248987926433191009384431910018768863801073801071000111020110099100100001001000010045886604588660458866201999845886610000010000010172638113011072129129380040119710000100380108380108380108380108380108
10204380107284700199980101813800920702715292721069751857510697915736543841046186311000049377318380107380107130437599492489809264331910093844319100187688638010738010710001110201100991001000010010000100458866045886604588689302000145886610000210000010172636112911072130130380040119710000100380108380108380108380108380109
1020438012428470020127010180380092070271529272106975185751069791573654384104618631100004937702738010938012413047259951248979926433191009384431910018768863801073801281000111020110099100100001001000010045886604588660458874001999845886610000010000010172636113011066130130380042119710000100380108380108380108380108380108
1020438010728470019998000180380092070273529272106975185751069791573654384104618630100009837703238010738010713043759951248979926433191009384431910018768863801073801071000111020110099100100001001000010045886604588660458866001999845886610000010000010172640113111072131131380040119710000100380108380108380108380108380108
1020438010828470019998010181380092070271529272106975185751069791573654384104618631100004937702738010738010713043759953248979926433191009384431910018768863801073801071000111020110099100100001001000010045886604588660458866012000445886610000010000010172640113011072130130380040119710000100380108380108380108380108380108
102043801072847001999801018138009707027152927210697518575106979157365518910461863110000493770273801073801071304375994724898592643319100938443191001876886380109380108100011102011009910010000100100001004588660458866045886600200154588661000001000001017263911301107542131380045119710000100380111380111380111380113380111

1000 unrolls and 10 iterations

Result (median cycles for code): 38.0019

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)191e1f2223243a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6064696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst simd load (98)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)dbddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10025380019284711311710199981400023800060702675197311067518665106791580754241047321611000049376939038001938001913835559939249335916874191095387719101907752380019380019100011100211091010000100100001045897315458957004589710220012458958100001410000131696827511079077703799541171000010380020380067380064380020380020
100253800222846115101410199981400003800040707025198461067518667106791606354301047063011000049377631038068938001913836059945249345916924191095388119101907762380019380019100011100211091010000100100001045897414458958024589720120012458958100001410000111696668411074077773799521171000010380020380020380020380020380020
10024380019284619111310199981400003800040702695197311067518663106791580754241047064811000098376950038001938001913835559945249351916873191095388219101907752380019380019100011100211091010000100100001045897314458958024589710120012458958100001410000111696787611080069743799521171000010380022380021380020380020380020
100243800232847113111400200161400003800040702685197301067518709106791580754241047063611000049376939038001938001913835559940249341916874191095393019101907754380019380019100011100211091010000100100001045897214458958014589720020012458958100001410000121696787611067076773799521171000010380020380020380021380020380020
10024380019284711401910200011410003800040702675197301067518664106791580754241047054311000049376939038001938001913835559946249341916874191095388219101907752380021380019100011100211091010000100100001045897414458957014589720220222458958100001410000101696667711080077773799521171000010380022380020380020380020380020
100243800192846114001210199981400003800040702675197301067518663106791580754241047054311000049376941038001938002113835559955249335916873191095387619101907752380019380019100011100211091010000100100001045897215458957024589770220012458958100001410000111696787711080075773799521171000010380020380020380020380020380020
100243800202846114011410199981500003800044702675197301067518667106791580754241047063011000049373892038001938001913837359939249339916874191095387619101907752380019380019100011100211091010000100100001045897116458957004589720020012458957100001410000111696667811072077723799521171000010380022380020380020380020380022
100243800192846114101410199981400003800044702675197311067518667106791580754241047063511000049376939038001938002113835559939249338916874191095387819101907752380037380019100011100211091010000100100001045897215458958014589720120012458957100001410000101696807011080076773799521171000010380020380020380020380020380020
100243800212847112011300199981400003800053702675197291067518665106791580654241047054611000049376939038001938001913835559940249339916874191095388119101907752380019380019100011100211091010000100100001045897315458958014589720120012458958100001410000111696787611080075763799521171000010380022380020380020380020380020
10025380020284718111410199981400003800040702675197301067518663106791580754241047063311000049376939338001938001913835659939249335916874191095387619101907752380019380019100011100211091010000100100001045897114458958024589740220012458958100001410000121696657711074069763799521171000010380031380021380020380020380020