Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STLXRB

Test 1: uops

Code:

  stlxrb w0, w1, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 50.674

Integer unit issues: 0.000

Load/store unit issues: 50.674

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f223f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)6064696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
100537993285000202013003797807267506745067487814100318911000493491337993379931314059452439487814914361829203799537993100111001100010004496504496504496520199844965100001000001659537206325193790910003799437994379943799437994
100437993284000001998013797807267506745067487814100320401000493491337993379931314059372439887814914371829863799337993100111001100010004496504496504496500199844965100001000001659522205924193790910003799437994379943799437994
100437993284000001998003798657274506875067687816100359001000493492138001380011314759402440287816914371828743799337994100111001100010004500204496704496500199844965100001000001659522206329283790910003799437994379943799437994
100437996285000001998003798607267506725067387816100359301000493492138001380011314759372440487816914361828723800437994100111001100010004496504496504496500199844965100001000001659523206322273791710003800238002380023800238002
1004380012851003119981403797807267506745067487814100320401000493491337993379931314059372439587814914371828743800937994100111001100010004496504496504496500199844965100001000001659527206324263791710003800238002380023800238002
1004380012851010019981503797807267506745067487814100320401000493491337993379931314059452439487814914371828743799437993100111001100010004496504496504496500199844965100001000001659524205319283790910003799437994379943799437994
100437993284000001998003797807267506745067487814100320401000493491337993379931314059432439487814914371829383799537994100111001100010004496504496504496700200544965100001000001659526206329243790910003799437994379943799437994
100437993284000001998003797807267506745067487814100320401000493491337993379931314059372440087814914371828743799738002100111001100010004496504496504496500199844965100001000001659524206323273791710003800238002380023800238002
1004380012851100019981513797807267506745067487814100320401000493491337993379931314059492439487814914371828743799437993100111001100010004496504496504496500199844965100001000001659523206123243791710003800238002380023800238002
1004380012841000019981403797807267506745067487814100320401000493491337993379931314059372440287814914371828903799337993100111001100010004496504496504496500199844965100001000001659522206325233790910003799437994379943799437994

Test 2: throughput

Code:

  stlxrb w0, w1, [x6]
  add x6, x6, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 38.9156

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f20222324293a3e3f404f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606467696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive fail (b4)l1d cache miss st nonspec (c0)c2cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2020638935529161011002006607000008103891138069404558500298924259576788865697489127446671171770201000004938617003891893892718346659210181296148628279449777860479451215572103891283892521000112020110099100100001010010000100229648032855000229648100019998229648100001568002094521551097853353891902914231000010100389128389253389251389128389251
20204389250291600000020010014210001170389112590694025585002989242595767887966975281274541811716917010000144938617203891273892508346459206181296148628479465777866379449715575563891283892521000112020110099100100001010010000100229798044624901522296481015214620001229794100001985002094521551097053593891892914231000010100389127389128389253389250389128
2020438925029160000002000401401000830389113236469279558500298924259576788797697486127447861171692101000004938604803892503891278346659202181178148628679447477866579449915573263892503892501000112020110099100100001010010000100229731025130108422968062756620010229705100001424002094521551097853593890672914231000010100389129389380389897389252389128
20204389250291500000019998028000054038917731069279558500298924259576788799697486127446501171692101000004938617303892523892528352959088181180148628379449777866579451715572083891263892501000112020110099100100001010010000100229776033959901252296481012511820001229773100001616002094521551097653593891922914231000010100389128389127389253389129389129
202043891282915000000200040150100070038911314069404558500298924259576788798697526127447031171692111000004938617203891273892508352759080181298148625979447277866479448515573263892503891271000112020110099100100001010010000100229783039103801362296481012612019998229776100001744002094521551097853593890672914231000010100389251389253389250389128389129
2020438912829140000001999801401000114038911345069429558520298924259576788773697486127446051171766501000004938616903891273891278346659202181170148625979447477873579449915572083892503891271000112020110099100100001010010000100229790039839901422296482014213620001229790100001712002095351551097853593891922914231000010100389253389128389253389128389128
2020438912629140000002000401381000114038927447069279558500298924259576788796697486127445781171770201000004938618203891273891288346859210181173148625979449977866579447015572003891273891271000112020110099100100001010010000100229752030644110108229665301089820001229736100001600002094521551097853593891922914251000010100389128389250389128389253389128
2020438912729140000002000409120005003892370069404558500298924259576788796697486127446501171693301000009838617203891273892508353059218181301148628679447177860679451415573343892493892521000112020110099100100001010010000100229648032118000229648000019998229648100001872002094521551097853593891922914231000010100389129389253389253389129389129
202043892502915000000199980482000510389235406940255850029892425957678877469748612744644117176150100000493860470389127389250835295912518129814862607944967786657944981557200389127389126100011202011009910010000101001000010022964803653000022964812417020004229670100001184102094521551097853593891922914251000010100389251389128389251389253389128
2020438924929150000042060635298000050038911200692795585002989242595767887966974861274463011716921010000049386047038912838924983529592041812951486257794472778605794517155733038912738912710001120201100991001000010100100001002296880402130099229648101039019998229756100001248002094521541097853593891922914231000010100389253389128389250389251389128

1000 unrolls and 10 iterations

Result (median cycles for code): 38.9948

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f202223293a3e3f404f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606467696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
200263899702921102081401999871420056643899433947009955021829055225966476028269757412689805117650610100000493871340389956389957708175989018887014578207608187594257608231518338390169441000112002110910100001001010000102298551032138801112297661110613020118229872100007960122200996501097451533901052898011000010010390218389945389958389957389957
200243899562923161001002001535995100003899293537010055021629055225971676029269757412689770117674480100000493868640390188389944708175989218887314578277608207591707608221518338389955441000112002110910100001001010000102297439328720002297434100221482297361000071408110200998561097646533898962898261000010010389945389947390174389956389956
200243899572920101001301999895000044038992928470100550369290552259664760248697574126897961176802301000004938687603899443899557082259900188863145821976081875916976082215183383899554410001120021109101000010010100001022981524277220002297430100200052297361000071376120200998531097851533898842898011000010010389956389956389945389956389945
200243902182921111101001999878120020038992930270099550216290552259664760252697574126897621176514901000014493868640390249389955708725992418886414578267608187591697610041518348389956441000112002110910100001001010000102297447343460072297430100200262297361000072080110200995531097843533898842898011000010010389982389946389956389956389957
200243899442920100001402000479120036038992926370100550216290566259664760248697574126899081176503501000004938687603899553899557081659888188856145782176103175918076082215183423899464410001120021109101000010010100001022986673802411012122974331123106200112298341000071152110200998531097045543898952898061000010010389945389945389956389945389945
200243899452921101101001999877900040038994020270088550216290552259664760247697574126897681176517211000004938687503899443899447082259901188853145782876081975916976081815183483899444410001120021109101000010010100001022983583287910098229743018874200082298361000071536110200998531097850563898842898011000010010389956389956389946389945389956
20024389944292110100100199987000012038992903702875502162905522596647602526975741268976211765087010000049386876038995639027270822599001888561457826760817759272760819151834238995644100011200211091010000100101000010229771925514307229743006560200112298351000071312122200998531097851483898952898011000010010389945389956389945389956389945
20024389944292110101100200047721004103899311227008855021629055225966476025269757412689777117651750100000493868640389944389956708175988818885314578267608257591687608221518338389945441000112002110910100001001010000102297538284580022297430200200052298311000071152110200998481097852473898952898011000010010389958390201389946389945389945
200243899562921100000001999872800093638994082700885502162905662596647602466975741268976211765135010000049386875038995538995570822599001888681457826760818759168760822151833438995644100011200211091010000100101000010229783826986603722975311280200052297841000071056110200999541097051453898962898011000010010389945389945389945389957389957
200243899562920100001001999871081004803899294427008855021629055225966976024769757412689806117651480100000493868660389957389956708155990018885614578287608177591717608221518344389955441000112002110910100001001010000102297767306663012022974300117102200082298521000081088100200998531097451533898862898011000010010389984389957389956389957389945

Test 3: throughput

Code:

  stlxrb w0, w1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 38.0107

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22233a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6064696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive fail (b4)l1d cache miss st nonspec (c0)c9cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
102053801182848000000199980100380092070271529272106975185751069791573654384104618631100004937702703801073801071304375995124898492643919100938443191001876886380109380107100011102011009910010000100100001004588660458866045886600199984588661000010000010172638013411072131131380040119810000100380108380108380108380108380109
10204380107284700000019998010181380092070271529272106975185751069791573654384104620011100004937399603801073801071304375995524898792643519100938443191001876886380107380107100011102011009910010000100100001004588660458866045886600199984588661000010000010172638113211064132132380040119710000100380110380108380108380109380123
10204380109284700000019998010180380092070271529272106975185751069791573654384104618631100004937397503801103801071304375995024897992643319100938443191001877438380107380107100011102011009910010000100100001004588660458870045886600199984588661000010000010172636113111064132132380042119710000100380108380108380108380108380108
10204380107284700000019998010180380092070271529272106975185751069791573654384104618680100004937702703801073801071304375994724897992658919100938443191001876886380107380107100011102011009910010000100100001004588660458866045886600199984588661000010000010172636113111069132132380040119710000100380108380108380145380108380108
10204380107284700000020008010181380092070271529272106975185751069791573654384104618631100004937702703801073801081304375994724897992643319100938443191001876886380107380107100011102011009910010000100100001004588660458866045890600199984588661000010000010172636113211072132132380040119710000100380137380110380145380139380171
10204380109284700000020789001180380092070271529272106975185751069791573654384104618630100004937702703801083801071304375995124898292643319100938443191001876886380107380127100011102011009910010000100100001004588660458866045886600199984588661000010000010172636113211062131131380043119710000100380108380108380108380108380108
10204380108284700000019998001181380092070271529272106975185751069791573654384104618631100004937702703801073801091304375996324898992643319100938443191001876886380107380107100011102011009910010000100100001004588660458866045886600199984588661000010000010172636113211061132132380041119710000100380108380111380108380108380108
10204380107284710000019998001181380094070271529272106975185751069791580854384104618631100004937702703801073801071304375996024899292643319100938443191001876886380107380107100011102011009910010000100100001004588660458866045886600199984588661000010000010172636113211071132132380041119710000100380110380108380108380108380160
10204380107284700000020025001180380092070269529281106975185751069791573654384104618681100004937702703801073801071304375994724897992643319100938443191001876886380107380107100011102011009910010000100100001004588720458866045886600199984588661000010000010172638113211072132132380064119710000100380122380126380108380108380110
10204380109284700000019998000181380092070271529272106975185751069791573654384104618631100004937702703801073801081304375994824897992643319100938443191001876886380107380107100011102011009910010000100100001004588660458866045886600199984588661000010000010172638113211070132132380071119710000100380108380111380108380108380108

1000 unrolls and 10 iterations

Result (median cycles for code): 38.0019

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f22233a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606467696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1002538001928471010001999814100380004470267519766106751866410679158215424104707710100000493769390380022380019138378599392493359168731910953891191019077823800543800331000111002110910100001010000104589731445895800458972002001345895710000141000011169648421108141413799521171000010380020380020380020380020380020
10024380019284610110019998141003800040702675197311067518667106791580654241047076501000004937693903800193800191383555994024934391687419109538911910190778238002138001910001110021109101000010100001045897315458957004589726412003345896010000141000010169648421107941413799531171000010380021380022380020380020380021
1002438001928471111001999814100380004070267519731106751866410679158795424104707680100000493769390380019380019138355599492493359168741910954274191019091363804783800191000111002110910100001010000104589731545896002458971012001245895810000141000012169647441107041413799521171000010380020380020380020380020380020
1002438001928461100002002214100380004070267519731106751866510679158065424104707711100000493769390380019380019138355599552493359169481910953891191019078943800193800191000111002110910100001010000104589711445895800458972002001245895810000141000010169648421108041413799521171000010380021380020380020380020380020
1002438001928461000001999814000380004070268519731106751866210679158065424104707680100000493769390380019380019138355599552493359168741910953891191019078843800193800191000111002110910100001010000104589721645895700458972022002245895810000141000012169648421107444443799521171000010380020380020380020380020380022
1002438001928471111002010314100380004470267519730106751866410679158075424104707680100000983769390380019380020138358599392493379168741910953891191019078143800193800191000111002110910100001010000104589721545895601458971012001245895810000141000011169648421106942433799521171000010380021380020380020380020380020
100243800192847111100199981400038000407026751973110675186671067915807542410470765010000049376940038002138001913835559939249335916874191095389119101907878380019380020100011100211091010000101000010458972144589570045895600199984589561000001000000169650411108041413799451171000010380012380012380012380012380012
100243800132847000000199980100380004470267519730106751866610679158075424104722600100000493769390380019380019138355599392493459168741910953891191019077883800193800191000111002110910100001010000104589731645895802458972022001245895710000141000011169648421108043433799521171000010380022380020380020380020380020
1002438001928471011001999814100380004070267519733185951866710679158075424104707070100000493769390380019380019138354599392493359168741910953894191019079103800193800191000111002210910100001010000104589731445895701458972012001245895810000141000011169648411108042433799521171000010380020380021380022380050380020
1002438001928471101001999815100380004070267519743106751866710679158065424104707680100000493769390380031380019138355599552493359168741910953894191019078623800193800191000111002110910100001010000104589731545895800458971912020645895810000141000011169652421108142413799531171000010380020380022380020380020380020