Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STLXR (64-bit)

Test 1: uops

Code:

  stlxr w0, x1, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 50.674

Integer unit issues: 0.000

Load/store unit issues: 50.674

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)1e223a3f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)6064696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive fail (b4)l1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
10053799328576199810379780726750706506808781410031291100049349133799337993131405937243948781491431182864379933799310011100110001000449654496504499750200244965100010001660727206230293790910003799437994379943799438000
100437995284771998003797807267507105067687814100311411000493491337993379931314059412439687814914311828623799337993100111001100010004496544965044965460200144965100010001660731206031313790910003799437994379943799438001
10043799328477199800379780726750674506838784410031140100049349133799337993131405945244068781491432182864379933799310011100110001000449654496504498170200744965100010001660731206429303790910003799437994379943799438015
100437993284561998103797807267506745068087814100312911000493491337993379931314059372439487814914311828623799337993100111001100010004496544965044965510387844965100010001661728205728313790910003799437994379943799437994
100438232285771998003797807267506745067487814100311401000493491337993379931314059372439487814914311828643799337996100111001100010004496544965044965430200444965100010001660730205927293790910003799437994379943799438008
100437993284561998103797807267506745067487814100312901000493491337993379931314059372439487814914321828643799337993100111001100010004496544965044965530200144965100010001660729206329313790910003799437994379943799438002
100437997284771998003797807267506745067487814100311411000493491337993379931314059372439487814914311828643799337993100111001100010004496544965044965630219044965100010001660729206329313790910003799437994379943799438007
10043799328467199800379780726750697506988781410031141100049349133799337993131405937244068781491432182864379933799310011100110001000449654496504496500210544965100010001660730206328313790910003799438029379953799437995
100437993284771998003797807267506745067487816100314411000493491337993379931314059492440287814914321828623799337993100111001100010004496544965044965420389644965100010001660731206328303790910003799437994379943799437994
100437998285771998003797807267506745067487814100311411000493491337993379931314059432439687814914311828623799337993100111001100010004496544965044965400199844965100010001660530205729293790910003799437994379943799438005

Test 2: throughput

Code:

  stlxr w0, x1, [x6]
  add x6, x6, 16

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 38.9197

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f2022293a3e3f404f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606467696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2021438917129152300021452722173231211312838926021523692795585382989432595887888136975021274565811719193010000049386827389142389147835435921218118814863447944887786227944851557264389267441000112020110099100100001010010000100232193233562615510189223029076025441042019623213010000716641002094561371097856643890852914421000010100389264389264389266389230389145
2020438923329154000021953723551227213412438924521833692805585312989392595887888106975181274704211718313010000144938619338926738927883478590781813021486321794509778616794649155721438926444100011202011009910010000101001000010023219239346401565019072302997202501542025623209910000713081002094541561097556643892042914341000010100389150389264389267389266389262
202043892682916444002200572240122881171243891282114469401558538298938259600788812697506127454031171829701000004938619538927638926483480590791811851486300794520778687794505155734438928044100011202011009910010000101001000010023220731350991558018992303087602518542024323216710000717121002094551571097256643892052915561000010100389139389139389270389143389169
202043891442916400002181072239123041121243891341987369402558540298957259612788816697506127463811171788201000004938619538926838934083549590811813231486319794507778684794529155736838914144100011202011009910010000101001000010023218431415051582019112303117642535542071023217010000716081402094541581097254653892172914491000010100389283389154389262389139389142
20204389147291544000218983592217123441251243892442148469404558530298977259594788796697498127454121171739101000004938606338914638941883543592061814341486331794665778708794521155772638914944100011202011009910010000101001000010023219431349711564018942302987802514802023723213910000713481002094531581097256663892252914441000010100389146389146389150389146389142
202043891512914404002148572215122401371243892502081369404558555299069259590788793697523127463871171860501000004938606438926838914783483590831811951486318794508778612794508155736038927644100011202011009910010000101001000010023221031350971584018712302957882519502026823217410000716001802094581561097836653890842914431000010100389293389268389145389266389451
202043891392916440402205772189123361221243892562149369280558544298935259592788987697495127463061171828401000004938608038914838914583475592061813091486309794521778624794488155735838915344100011202011009910010000101001000010023220627378331611018872302927742524542227423215810000717401002094551571108655663890902914371000010100389265389417389276389266389270
2020438913729174000421563183222612368119124389251214645694045585242989532595707887896975211274577311719819010000144938618238926638914783555592021813111486309794517778620794527155739638926944100011202011009910010000101001000010023219031379991599018932303147482497542217923212110000718361602094541571097056663892042914401000010100389267389542389146389263389264
2020438914129154400021693722381226411712438925021284769402558530298938259588788820697513127453241171879001000004938620738927138915083544590791813061486337794522778616794507155722438942744100011202011009910010000101001000010023219935371081596018812303028802484502023223222110000715601002094531571097856643890802914321000010100389157389140389153389448389139
202043894572915404002144272234123361521163892502128469279558563298949259592788991697499127450641171900601000004938618938914838926883538592011811941486331794497778638794485155726438914944100011202011009910010000101001000010023217131360931615018912303008742607522023623215110000717841802094681571097654653892042914421000010100389267389162389399389269389142

1000 unrolls and 10 iterations

Result (median cycles for code): 38.9956

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f2022293a3e3f404f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606467696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfl1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
200343899682921300002201472274220886824438995521223700885502302905702596707602656975791269032011765299110000049386869038995138997670837599001888551457967760832759188760840151838438995744100011200211091010000100101000010232280213782515510191923034573025341082023023225910000717601002010000331097831343898972898061000010010389961389963389966389950389997
2002438995029213401321629722662188885244389936215637008855022829055725967176027269757912690039117652640100000493868750389952389962708295989018887314579647608557591937608511518380389962265071100011200211091010000100101000010232264234010815960190623034776025381122022123230710000715681002009980331097431333899052898041000010010389961389949389959389962389949
200243899512921300002206172277218409024438993521744700885502422905532596727602846975821269014711765625110000098386871038996138995170827598881888671457848760832759192760834151838438994744100011200211091010000100101000010232282193706215990194423035875125562422024323226010000717361302010000331097431333899052898141000010010389962389959390024389954389960
200243899462921222002227172304221527724838993921874700885502322905552596677602826975791269016911765938110000049386873038996138995670838598951888531457839760830759197760842151840238994944100011200211091010000100101000010232265153768116080191723034874025642802022923226010000715201002009970331097437393898932898091000010010389947389949389959389949389949
200243899492921300002180072282220809024838994621604700885502322905582596717602686975811268994511765370010000049386868038996738996170826598891888521457852760824759173760836151839038994644100011200211091010000100101000010232292233395515740194523034775325251082026123226510000716601002010010331097231333898882898061000010010389964389951389965389951389953
200243899602921330002153172296220247125238994420934700895502222905592596687602576975901269006011765369110000049386870038996238995170826598901888531457844760828759185760848151841838996144100011200221091010000100101000010232264153597916350190623030574025102562023023226410000715401002009990391097831323898942898071000010010389947389964389960389951389960
20024389961292122000217721152258221367424038994620634700995502382905562596667602706975831268999311765417010000049386871038996138996470824599001888641457848760852759272760840151839838995244100011200211091010000100101000010232278233671915810192323034769025451222020423227410000716281042009990391097637393899032898221000010010389964389953389963389949389951
200243899632921200002200872391218728224438995021424700995502242905602596687602656975741269077611765202010000049386869038996139000170842599081888681457838760855759178760834151837838994844100011200211091010000100101000010232273153158116130189223037162025381082022923225210000716681002009990391097731333899652898101000010010389982389963389948389957389951
20024389950292033000220527223322136822483899462144370099550234290557259689760270697646126902171176537001000004938688003899623899497082859899188871145783076084075919376084915183623899584410001120021109101000010010100001023227323356111580019612303265902510962023323231310000717361302010000401097431333898862898141000010010389951389961389951389950389959
200243899632921220002193972235221367224838994221303700995502242905562596697602666975771269008711765317010000049386870038995938995170843599001889261457841760840759187760852151840638994544100011200211091010000100101000010232285153825616390190523035076025041942021623226910000716881002009980331097431333899102898081000010010389968389954389950389949389948

Test 3: throughput

Code:

  stlxr w0, x1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 38.0110

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f22233a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6064696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
102053801012847000001999800031380092070271529272106975185751069791573654384104618630100004937702703801073801071304375995324897992643319100938443191001876886380107380107100011102011009901001000010010000100458866045886604588660020013458866100000100000210172682113111071131131380168119710000100380109380108380108380108380108
1020438010728470000019998000180380092070271529276106975185751069791573654384104618631100004937702703801073801071304375996324898592643319100938443191001876886380109380107100011102011009901001000010010000100458866045886604588660019998458868100000100000010172642113011059131131380040119710000100380108380108380108380108380108
1020438010728470000019998000181380092070272529302106975185751069791573654384104619090100004937397503801073801071304375994324898692643319100938443191001876886380107380108100011102011009901001000010010000100458872045886604588660019998458866100000100000010172640113011072131131380040119710000100380109380108380108380110380108
1020438012928480000019998000181380092070272529272106975185791069791573654384104618630100004937702703801243801071304375994924898992643319100938443191001876886380107380107100011102011009901001000010010000100458866045886604588660019998458866100000100000010172640113011064131131380040119710000100380108380110380108380108380108
1020438010728470000020061010181380092070271529272106975186191069791573654384104618630100004937702703801073801071304565995324898592643319100938443191001877128380107380107100011102011009901001000010010000100458866045886604588660019998458866100000100000010172638113111068131131380040119710000100380108380108380108380108380108
1020438010728470000019998000181380092070271529272106975186181069791573654384104618630100004937702703801073801071304375994724898792643319100938443191001876886380107380107100011102011009901001000010010000100458866045886604588660019998458866100000100000010172640113111070131131380041119710000100380108380109380108380108380108
102043801072847000001999800018138009210270398529328106975186061069791578454395104618630100004937702903801073801071304375996024898392643319100938443191001876886380107380107100011102011009901001000010010000100458872045886604588661020941458904100000100000010172640113111071131131380040119710000100380110380108380108380108380108
1020438010728470000019998010181380092070272529272106975185811069791573671302104620010100004937702703801073801071304735994424900192643319100938443191001876886380108380107100011102011009901001000010010000100458866045886604588660020052458866100000100000010172640113111072131131380040119710000100380108380108380108380108380133
1020438010728470000019998000181380092070271529272106975185751069791573654384104618630100004937702703801073801101304375994724897992643319100938443191001876886380107380107100011102011009901001000010010000100458866045886604588660020005458866100000100000010172640113111072130130380070119710000100380125380108380108380108380108
1020438010928470000019998000181380092070271529272106975185751069791573654390104624610100004937702703801073801071304375996324898992643319100938443191001876902380107380108100011102011009901001000010010000100458866045886604588664019998458866100000100000010172639113111071131131380040119710000100380108380108380108380108380108

1000 unrolls and 10 iterations

Result (median cycles for code): 38.0019

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1e1f223f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6064696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1002538001928461110199981403800040702675197311067518664106791580754241047078611000049376941038001938001913835659949249335916874191095389219101908018380019380019100011100211090101000010100001045897116458957024589710120012458958100001410000100169646401107840403799521171000010380021380020380020380022380020
1002438001928461100199981403800040702675197301067518664106791580754241047078301000049376939038002138001913835559942249345916956191095389219101907784380019380019100011100211090101000010100001045897115458958004589720220012458958100001410000100169646421108040403799521171000010380020380020380021380020380020
1002438001928471100201061403800040702675197311067518663106791580754241047078301000049376939038001938001913836759942249339916874191095389419101907784380019380042100011100211090101000010100001045897414458958004589710020012458957100001410000120169646421108041423799641171000010380022380020380022380020380020
1002438002128461111199981403800042702675197301067518667106791580754241047078001000098376942038001938001913835559939249351916874191095389519101907790380019380019100011100211090101000010100001045897215458956014589711020012458957100001410000100169646411107243433799611171000010380020380020380020380022380020
1002438001928471101199981403800040702675197301067518666106791580754241047078001000049373903338001938002113835559939249337916874191095389219101907784380019380019100011100211090101000010100001045897115458958014589720120012458957100001410000110169646411107940413799521171000010380022380020380020380020380020
1002438018728541222199981403800040702675197311067518664106791591154241047196801000049376939038001938001913835559939249335916874191095389219101907784380020380019100011100211090101000010100001045897314458958014589710020012458957100001410000112169647401107040423799521171000010380021380020380020380020380020
1002438001928471101199981403800040702675197641067518664106791580754241047078601000049376939038002038001913835559939249348916874191095389219101907784380019380019100011100211090101000010100001045897314458958024589720120012458958100001410000110169646421108041443799581171000010380020380020380044380020380020
1002438001928471011199981403800040702675197311067518599106791580754241047078001000049376939038001938001913835559939249335916874191095389219101907784380021380019100011100211090101000010100001045897314458957014589710020012458957100001410000110169646421108040443799581171000010380043380032380020380022380020
1002438001928471001199981403800040702675197301067518667106791580754241047078501000049376939038001938001913835559943249335916874191095389219101907784380019380019100011100211090101000010100001045897414458957904589720220012458958100001410000120169646421107942423799521171000010380024380020380020380020380020
10025380019284711011999814038000407026751973010675186661067915806542410470786010000493769393380028380022138355599552493489168741910953895191019077843800193800191000111002110901010000101000010458973144589580045897265120052458956100001410000100169646421108042423799611171000010380020380020380020380020380020