Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STLXR (32-bit)

Test 1: uops

Code:

  stlxr w0, w1, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 50.674

Integer unit issues: 0.000

Load/store unit issues: 50.674

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l2 tlb miss data (0b)1e22233f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)6064696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)st unit uop (a7)acafatomic or exclusive fail (b4)l1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
100537993285019980037978726750674506748781410032341100049349133799337993131405937243948781491439182878379933799310011100110001000449654496544965199844965100010001659122062223790910003801837994379943799437994
100437993285019981037978726750674506748781410032341100049349133799337993131405947243978781491439182878379933799310011100110001000449654496544965199844965100010001659122063223790910003799637994379943799437994
100437993285019980037978726750674506748781410032341100049349133799337993131405949244068781491439182878379933799310011100110001000449654496544965199844965100010001659122057223790910003799437994379943799437994
100437993285019980037978726750674506748781410032341100049349133799337993131405947243978781491533182878379933799310011100110001000449654496544965199844965100010001659122055223790910003799437994379943799437994
100437993284019980037978726750674506748781410032341100049349133799337993131405949244068781491439182878379933799310011100110001000449654496544965199844965100010001659122060223790910003799437994379943799437994
100437993285019980037978726750674506748781410032341100049349133799337993131405937243948781491439182878380313799310011100110001000449654496544965199844965100010001659122057223790910003799437994379943799437994
100437993285019980037978726750674506748781410032341100049349133799337993131405943243958781491439182878379933799310011100110001000449654496544965199844965100010001659122063223790910003799437994379943799437994
100437993284020040037978726750674506748781410032341100049349133799337993131405949244028781491439182878379933799310011100110001000449654496544965199844965100010001659122057223790910003799437994379943799437994
100437993285019980037978726750674506748781410032341100049349133799337993131405945244008781491439182878379933799310011100110001000449534496544965199844965100010001659122055223790910003799437994379943799437994
100437993285019981037978726750674506748781410032341100049349133799337993131405947244008781491439182878379933799310011100110001000449654496544965199844965100010001659122063223790910003799437994379943799437994

Test 2: throughput

Code:

  stlxr w0, w1, [x6]
  add x6, x6, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 38.9167

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e18191e1f20222324293a3e3f404f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606467696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive fail (b4)l1d cache miss st nonspec (c0)c2cdcfd5map dispatch bubble (d6)d8dbddfetch restart (de)e0? int output thing (e9)eb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
202093892362916565075003209100849100536167252389183584069476558500298924259576788798697483127445911171774401000004938606538912638925083530590811811721486258794497778668794496155720838925038925310001120201100991001000010100100001002305319366332980659229834101834721999823055110000152800209481651097500617438919429142601000010100389241389251389129389129389253
202043891282915170071000201150791200816162160389236626069280558504298927259576788800697489127446671171773301000004938604938912738924983467592101813041486282794472778664794473155732838925238912810001120201100991001000010100100001002305921832787285072422985660985942002823056910000171200209482691097000417038906929142601000010100389253389130389251389128389252
20204389252291521000100000201600842100544183148389114608069402558500298924259576788771697486127446311171770201000014493860473891293891278352859203181171148626079449777866579449815573303891273891291000112020110099100100001010010000100230535232434301071822985990952862001023057410000140820209481691097500577838919029142301000010100389251389253389251389131389129
202043892502915210001000002024308181007761752323892355740692805585162989272595777887726974871274457811716922010000284938617338925338925383531592061811741486257794504778665794471155732638912838925010001120201100991001000010100100001002305870399452150721229839220897882002823055910000138440209480701097000657838906729142501000010100389130389252389129389305389253
20204389129291621020100000201620856200608155272389234615069430558524298926259576788778697488127447061171768911000004938604738912738912783464591771812971486261794497778669794470155721238925238912810001120201100991001000010100100001002305321235700196067922986130864902000723060810000155220209479661097200657838919029143801000010100389128389129389253389128389253
202043892492978262060000200930844100744157212389113563069404558502298925259577788771697487127447401171696501000004938604738925038913083464592031811731486284794496778665794497155733038926038912810001120201100991001000010100100001002305521233346309067322985852849722001623059510000143220209479701097518964607238937229145301000010100389137389251389128389251389130
202043892063135372070100202530819020720189116389235569069402558516298926259576788770697486127447031171697301000004938604738925238925483467592061812921486391794474778604794500155720838912938925410001120201100991001000010100100001002305876320963710685229863110928522003123062410000134420209479701097800657838907929142301000010100389128389129389129389129389253
202043892512915250210200020124085700257619639638911352306940255851629893525957678880369748612744601117169170100000493861703892503892528352759255181295148625679449777860479447315573303891283891311000112020110099100100001010010000100230544831681241066022985350910622001623052910000128000209479661097500657838906729142801000010100389253389255389254389129389254
202043893162915292212000020154084300251217415238911260306928055851429892425957778880169748612744605117168710100000493860483891273892518353059080181168148628279447877866479447415572003892493892501000112020110099100100001010010000100230652436087291069422986780929422004623051410000128000209483661097200657838919029142301000010100389252389215389303389253389128
20204389128291521000900002021707840205841732963892366180694045585022989252595777887716974871274471111717796010000049386173389253389252835285920218129114862847944777786677944731557208389250389127100011202011009910010000101001000010023047923553232606482298486289092200162306071000013928102095781051107100657838906829142401000010100389128389254389250389129389131

1000 unrolls and 10 iterations

Result (median cycles for code): 38.9945

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f202223293a3e3f404f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606467696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20029389954292100000002033308091045611322838993453707009955020029054425965776024969756712689430117647721100000493868690389937389937708115988918885714578127608157591647608111518322389949389939100011200211091010000100101000010230707113885829407252299459088832200132306221000071288010200997491097645473898962898031000010010390253389958389948389950389957
20024389957292001100002021578111063212825238993759907009955020229054425966676024869757412689803117652390100000493868660389945389946708205988918885114578247608167591597608161518354389939389949100011200211091010000100101000010230642732069308067522993611089082200132306131000001488000201001481097646433898902897941000010010389958389949389953389951389949
200243899482921000000020289083610496122124389921647470100550216290552259664760261697574126899151176518411000004938687633899553899557081559889188864145783776082275917176082015183543899454410001120021109101000010010100001023074773556630607262299735096646200172306981000071472112200999511097846483898762897961000010010389959389977389948389940389938
200243899372921000000020246085310632126112389931533370090550218290559259665760244697566126894381176478501000004938685703899363899507082859895188856145782676082075917676082315183563899554410001120021109101000010010100001023073283960329406852299254189690200202306011000001328000200996481097847483898852898021000010010389971389947389945389958389989
200243899442921010010020166779410704112308389941704370088550216290554259665760252697575126898591176508711000014493868690389937389937708165990618885714578087608137592817608141518328389940389941100011200211091010000100101000010230699163411632207452299629189742200232306731000071360110200999481097646493898852897931000010010389958389945389937389948389948
200243899472921000000020227084120728117224389978715070099550204290560259657760244697567126894361176471201000004938686503899563899487081659898188846145782376081875916476082315183443899373899501000112002110910100001001010000102307291032797272064322996262886114200382306251000071432100201000481097447483899372897961000010010390234389963389949389946389957
2002439003029210110100201887808106321122203899345460701005502022905452596577602426975671268940311764700010000049386857038994938994870809598981888581457936760818759177760812151834238995038994910001120021109101000010010100001023077582949928607012299229087284200072306251000001368000200997481097846483898782897961000010010389982389957389942389937389997
2002438995129210010000201877811207281101363899336414700995502182905662596657602486975741268975411765196010000049386864038994538994670821598881888561457824760828759174760823151835238995844100021200211091010000100101000010230665740148340070622996112092150200262306321000071568110200999481097647483898852898031000010010389962389957389957389945389947
200243899452920010000020222081410696961803899224600701345502282905652596577602446975671268950711764719010000049386857038993638994870814598891888591457818760813759160760818151834038993738993910001120021109101000010010100001023068583428426607102299397095266200102305601000001504010200997491097746433898762897931000010010389947389952389937389949389939
2002438993729200110100203210856107441052163899236100701005502022905452596647602526975671268945311764711010000049386869038994738995170828599061888591457825760820759170760819151832438995038993710001120021109101000010010100001023067873464526496702299507188842200172306431000071616130201000481097046493898952897971000010010389983389957389948389937389949

Test 3: throughput

Code:

  stlxr w0, w1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 38.0115

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f22243a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6064696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
102053818002901102114115349021048610038392707027652927210697518575106979157155438410461754010000493770300380154380119130429599472489759264121910093841819100187683638014338012010001110201100991001000010010000100458882154588680045888201200124588681000014100001110172637113311082131131380051119710000100380119380119380119380119380119
102043801182848111001999814103338010427027652927010697518577106979157175438410462140010000493770380380118380118130436599512489829264141910093841819100187683638014738011810001110202100991001000010010000100458885144588900145888200201004588671000014100001010172651113711182131131380267119710000100380119380119380119380119380119
1020438012028471100019998140018238010307027652927110697518575106979157175438410462141110000493770390380118380118130437599512489989264131910093841819100187683638012538011810001110201100991001000010010000100458882144588670045888200200124588681000014100001010172651113711081130130380051119710000100380141380119380119380119380119
1020438011828481010019998141018338010307027652927410697518574106979157165438410462146010000493770380380118380120130436599612489909264141910093893319100187683638011938011810001110201100991001000010010000100458882144588680045888101200124588671000014100001210172637113111068130130380051119710000100380119380120380119380119380119
1020438011828471110019998140018238010307027752927110697518574106999157175438410463683010000493770380380118380118130436599442489949264141910093841819100187683638015038011810001110201100991001000010010000100458882154588670045888101200124588681000014100001010172636113011074132132380059119710000100380119380119380119380119380119
1020438011828471100020040141018538010337027652927110697518576106979157175438410462141110000493770380380118380228130409599512489989264741910093841819100187683638012038011810001110201100991001000010010000100458882154588680145888202200124588671000014100001210172651113011079131131380053119710000100380119380121380133380130380120
1020438011828471110020025141018538010307027852927010697518575106979157175438410462151010000493770380380118380118130436599542489849264141910093841819100187683638013238012610001110201100991001000010010000100458883154588670145888111200214588681000014100001010172639113111081130130380051119710000100380119380119380119380119380119
1020438012228471100019998140018338010307027652927010697518577106979157175438410462141010000493770390380118380118130429599642489909264141910093841819100187683638013338011810001110201100991001000010010000100458882154588780145888202200124588671000014100001010172639113111082131131380051119710000100380119380119380119380119380119
1020438011828481100019998141018338010307027652927210697518574106979156625438410462148010000493770380380118380118130437599512489919264341910093844319100187688638012638011710001110201100991001000010010000100458882154588670045888200200124588671000014100001110172638113011064130130380048119710000100380118380116380116380117380131
10204380115284710000199981400180380102137027152927410697518574106999157405438410462434010000493770350380115380115130444599532489899264351910093844319100187688638013538013010001110201100991001000010010000100458883154588680245888201200124588681000014100001010172638113011069130130380048119710000100380122380117380116380116380118

1000 unrolls and 10 iterations

Result (median cycles for code): 38.0011

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)1e1f22233f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6064696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10025380019284810119998010380004070267519731106751866710679158065424104707500100004937693903800193800541383555994724934591687419109538911910190778238001938004310001110021109101000010100001045897215458971024589530120022458957100001410000120169648481108051453799521171000010380020380020380020380020380282
1002438001928471111999814003799980702675197311067518665106791580554241047037901000049376933038001138004813834859939249327916872191095403419101907786380013380016100011100211091010000101000010458956045895600458956001999845895610000010000000169648501117651503799491171000010380012380012380012380012380012
100243800112846000199980003799960702675197321067518665106791580554241047036411000049376931038001138005013834759939249327916872191095389019101907782380011380011100011100211091010000101000010458956045895600458956001999845895510000010000000169650511107751433799921171000010380012380012380012380014380012
100243800112846000200110003799960702675197321067518665106791580554241047036401000049376931038005038005013835159947249329916872191095389119101907780380011380011100021100211091010000101000010458956045895600458956001999845895610000010000002169650511108044513799671171000010380012380012380012380013380012
100243800122847000199980103800044702675197301067518667106791580754241047069211000049376931038001138001913834859941249333916872191095389119101907786380011380011100011100211091010000101000010458956045895800458956001999845895610000010000000169650451107650513799441171000010380014380012380012380012380012
100243800112846000199980103799960702675197321067518665106791580554241047038401000049376931038001138004413834659939249327916872191095389119101907782380011380013100011100211091010000101000010458966045895600458956002020645895610000010000000169650511108043433799451171000010380012380012380012380012380012
100243800112847000199980003799970702685197321067518665106791580554241047050201000049376931038001238001913834859942249331916872191095389019101907780380012380013100011100211091010000101000010458956045895600458956001999845895610000010000000169654441107651513799441171000010380012380012380012380012380012
100243800112847000199980103799960702675197321067518664106791580454241047036401000049376931038001638002813834859947249335916872191095389019101907780380011380011100011100211091010000101000010458956045895802458956001999845895610000010000000169650441107051433799451171000010380012380012380012380012380012
100243800112846000199980103800250702695197321067518665106791580554241047036401000049376931038001138005713834859939249340916871191095401119101907796380013380013100011100211091010000101000010458956045895600458956001999845895610000010000000169649511108151433799441171000010380012380013380134380013380013
100243800112846000199980003800230702675197321067518665106791580554241047037901000049376931038001138002113834859952249327916876191095389019101907780380012380011100011100211091010000101000010458956045895600458956001999845895610000010000000169648501107844503799441171000010380012380012380012380012380012