Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

SWP (32-bit)

Test 1: uops

Code:

  swp w0, w1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)0e0f1e1f2223243a3f464951schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f60696a6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
7200533224249312701100310005810330471128241200020002000100003049301873312833269313200020003000331355211117100110001000020003210011000102200112201608311931081723889118223452396244461459613260117186157161635320003305933202335413336333118
7200433266248262900100201005977329261128037200020002000100005049301383314433183310200020003000330475239117100110001000020002310001000102200112201602811722082453971125723419387944411655593256316650146301627320003311333073333383315433276
7200433221248252900100201005856329250028149200020002000100003049299543301533145310200020003000330725268117100110001000020003210011000102200112201588711757083583859115923360384844461163603268417222151361615620003305133171333593336633196
720043307624727280010020100595933120112839620002000200010000304930107329913309331020002000300032877519511710011000100002000231001100000220011220160671183208349397410542332039374444858583260017821148021619620003312233076336273314033175
7200433211249332400100001005990330021128141200020002000100004049299723302533094310200020003000329815219117100110001000020002010011000105200112201532311483183243952106223398400044381258593256216551147161674920003324332982332693337233285
7200433223248272700100200006063330891128103200020002000100004049301743306833474310200020003000330205206117100110001000020003310071000102200012201621211781083263936136823204386944431456623257216944148091649720003306133266332963341033266
720043334424829260010250100600433204112809720002000200010000104929988328543323231020002000300032925519511710011000100002000221001100010220001220160031181908334393496123184380244432157573266317335148971630020003314833210334283335433101
7200433202248302401100201005880328631128336200020002000100004049300613311033085310200020003000330085206117100110001000020002210011000002200102201615611971083653970124723230394544441462543261117308148981599120003310333100331273353033260
7200433039249262900100201005998330061128170200020002000100015049299723289533145310200020003000328945239117100110001000020002210011000002200112201619211921083583988156023229396744441456563255516732146591627820003318433195332323321533128
7200433184249243000100201005904330520127942200020002000100001049300223300633073310200020003000331895195117100110001000020003210011000002200110201605212142083273994155823453394344431362553259516839151871656320003300133180334963319332990

Test 2: throughput

Code:

  swp w0, w1, [x6]
  add x6, x6, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0067

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f202224293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
3020730067225202003740112221005612030052233296213113511253010010101200001010020000604861411397014926987030067300672602732627530100102002000010200300003006778112020110099100100001010010000010020291155413370103531006322421222251204645158919956913113101161130064100009920000101003006930068300683006830068
3020430067226100003679719410850100300522532472125135132530100101002000010100200006047314114231049269870300673006726028326275301001020020000102003000030067771120201100991001000010100100000100202271666625418103531006422422018214204323136817768114113101161130064100009920000101003006830068300683006830068
30204300672251000036237225100621923005219424698113472253010010100200001010020000604581411414134926987030067300672602932627530100102002000010200300003006777112020110099100100001010010000010020260154273120103011006621326088237202717137922260014013101161130064100009920000101003006830068300683006830068
302043006722510000380891841085222430052217265710913568253010010100200001010020000604881411354004926987030064300672603132627530100102002000010200300003006777112020110099100100001010010000010020332145202340103651006823619898245202225131729071214213101161130064100009920000101003006830068301193006830068
30204300672251000036757225100651163005221523639313550253010010100200001010020000604931411361104926987030067300672602832627530100102002000010200300003006776112020110099100100001010010000010020251145932340102981004317321124204202634130823063213013101161130064100009920000101003006830068300683006830068
302043006722511100381772401085518030052236244911213534253010010100200001010020000604811411382014926987030067300672602832627530100102002000010200300003006778112020110099100100001010010000010020270145602720103131004021420070230203007130819854014113101161130064100009920000101003006830068300683006830068
30204300672251000037288205108651163005221120719913549253010010100200001010020000604521411433104926987030067300672603032627530100102002000010200300003006776112020110099100100001010010000010020302155332780103501029727125524218203485146921450314213101161130064100009920000101003006830068300683006830068
30204300672251010037589218100567230052210176612413555253010010100200001010020000604911411409104926987030064300672602832627530100102002000010200300003006777112020110099100100001010010000010020274145892610103861006722122956221204073135719945714013101161130064100009920000101003006830068300693006830068
302043006722511100364982191006019230052220358110113567253010010100200001010020000605071411429004926987030067300672602732627530100102002000010200300003006777112020110099100100001010010000010020251145973000103151003917121228219203183131818950513113101161130064100009920000101003006830068300683006830068
30204300672251000036538235108694830052240274212113532253010010102200001010020000604821411412114926987030067300672604732627530100102002000010200300003006778112020110099100100001010010000110020237145532340103561006414420042211203377131721645714113101161130064100009920000101003007830068300683006830068

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0070

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f202224293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
300273007622520200000003757617610144591723005519010815513492253001010010200001001020000600491411139014926990300623007026054326300300101002020000100203000030070781120021109010100001001010000010202251652527201029410060191252282222033861401127753416201270415223006810000101020000100103007130071300713007130071
30024300702261010010177461862223310566412430517225175786135911833130310577207301057221123742441422619004927451305203052826115412650631640105982115310603317353051977912002110917101000010010100000102098036463511461062210467232222301143121200712542295523201437108310103043610562131020000100103056030519306023059030572
300243052522922201000674781627177101605532300551606676713527253001010010200001001020000600521411492004926990301333007026053326300300101002020000100203074530525758120021109151010000100101000001020466356325312810574105291822145813058210494924210584300143488216123045410482101020000100103059030520305823051730190
300243051422910000100774755620193101044516030581132125989136252013130410576208371065321216742641416186004927502305843032726119402649331406106842115710677319713055678912002110971010000100101000001021163204812491661068910490194186341463321280310910217592170014851491383057410724101020000100103067930704307093071130650
300243070822714002000910482179720410855212304411788636613523253001010010200001001020000600521411482004926982300703007026047326301300101002020165100203000030070781120021109171010000100101000001020908175272781301052710463150130401786420902812211180455173014232373833007310000101120000100103007130071300713007130071
30024300702331010200000379062011013667883005521511716813545253001010010200001001020000601471411585114926990300673007026054326300300101002020000100203000030070791120021109010100001001010000010201824509252010307100681632286419020305511341615843001270415333006710000101020000100103007730077300773007730077
30024300762332020020000375412189100519230055186116574135022530010100102073210496215207242814268861049273053070630784261375126522328071076221982110123246430781771120021109010100001001010000010202043465269146105891034416217726146022032351391122843817201270316433006710000101020000100103007230077300773007730077
3002430076225202000000037091122801052168300551758606213523253001010010200001001020000600451411581104926990300673007026053326300300101002020000100203000030070781120021109010100001001010000110202531749526801028510076243220341892036071351222258217001270216223006810000101020000100103007130071300713007130071
3002430062226101000000036355203010521923004717416726513490253001010010200001001020000600531411576004926990300673007026038326301300101002020000100203000030070751120021109010100001001010000010202531852925501029010070182214342522035351161020650216201270216223007310000101020000100103006330071300633007130063
3002430062225101000000036664225011674963005515814517213538253001010010200001001020000601471411464004926990300703007026053326300300101002020000100203000030070781120021109010100001001010000010202411656535501032010053202185642282034961431017862417001270315223007310000101020000100103007130071300633007130071

Test 3: throughput

Code:

  swp w0, w1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0248

retire uop (01)cycle (02)0309l2 tlb miss data (0b)0e0f191e1f223a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20205302482251011033782083028601601372625201001002000010020000500142120914926972303293005227549627506201002002000820030012301922801110201100991271001000010010000110020000380111100461000011620048340343801117172601600300493066200001003005330193300833032330053
20204303282270100037152114302330160137022520100100202101002000050014236180492697230201303422727013276042010020020008200300123010247611102011009912710010000100100000100200003829010007100000362004811026001117163001600303293000200001003005330053301533033130253
202043034222700000334620283003720013828252010010020000100200005001423588149269723022230339275316274972010020020000200300003005247611102011009901001000010010000010020000029215100411000205220011110343801117223022522303290066200001003009930323300533024930320
2020430292225000003342212930037015161353525201001002000010020000500141014514927242302423032227524627777201002002000020030000303324761110201100998410010000100100000100200000292141004310002050200481123400111722022522301392006200001003005330183300733033030193
20204303322260000033820125300370015138062520100100200001002000050014236351492725230052300522747162769720100200200002003000030052282111020110099131100100001001000001002000038280100311000001820007432343801117223022522303151060200001003005330249300533024930320
2020430052225000003400202930037214151386325201001002000010020000500142214714927152303293031127529627765201002002000020030000300525611110201100990100100001001000001002000002814610046100020582004847234001117223022522302444006200001003034330053300533005330263
20204303322270000033832013031421501353525201001002000010020000500142363514927242300823032227409627637201002002000020030000303424261110201100991141001000010010000010020000382821210012100020442004933200011172218225223004910106200001003034030053302233005330053
2020430052227000003348001930259215171379925201001002000010020000500141960714927242300823005227428727636201002002000820030012303322801110201100991331001000010010000010020000382920610046100020492003512003801117220225003033900106200001003013330331300533033330333
202043024822500000338200253023321401378825201001002000010020000500141008404927237301043023827455627506201002002000820030012302064761110202100998810010000100100000100200003800100451000205220050723400111716001600303272006200001003005330143303333031930153
202043033222500110337421273030311515137662520100100200001002000050014100840492716830327300522727062750520100200200082003001230332375111020110099781001000010010000010020000382918310046100000532004533203001117160016003004930100200001003024930249300533013830053

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0157

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f181e1f223a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
200253014722611111103390161193015321414138252520010102000010200005014233490492724130052303192748232765620010002020000203000030152286111002110901010000101000001020027273442641006410018008120056621600260640414162230056099020000103008830148301783014830060
200243015722610100103408161130044018181357325200101020000102000050141508004926979302013014727380327539200100020200002030000301943881110021109010100001010000010200252504258100661001601902005255142834260640422162230193096020000103008330323302463005330248
200243026222500000003381010303131161413720252001010200001020000501416955049271923032630329273213275392001021202000020300003021847911100211090101000010100000102000003829206100311000200482004731203800640452162230154099020000103006030148300603006030060
2002430159225111000033830129300372151513855252001010200001020000501410084049272373019230319275353276272001030202000020300003032956311100211097310100001010000010200000382915910043100020036200483423338006403221622300493106020000103033330053302493005330053
200243022822500000003379211930233016141385325200101020000102000050142316704927082303393005227287327639200100020200002030000303282861110021109127101000010100000102000003038010046100000048200453123538006402721622300491100020000103005330249303293005330331
2002430052225000000033822110300449181813566252001010200001020000501415028149270673005930059273813276372001002202000020300003005937811100211092710100001010000010200262733439910066100170181200566116034261640452162230154099020000103006030060300603014730060
2002430059226111100334111610301320181813568252001010200001020000501414975049270663005930176272943276692001000202000020300003008738811100211092710100001010000010200262734425710056100180094200546616034261640452162230056000120000103014830158301683015830148
2002430145226111100034121611430142018181350725200101020000102000050141553304927067301473014727390327539200100020200002030000300593781110021109301010000101000001020026283442010062100160090200296714270261640462162230154099020000103014830148301483014830149
20024301762261011000341114093013280181355425200101020000102000050141739004926979301533015727293327627200100020200002030000300593881110021109010100001010000010200000008710046100020048200484602638006403221622303303100020000103034030249300533011430053
200243033022500001003377202130302215151367925200101020000102000050141693204927172300523033327326327637200100220200002030000303224191110021109127101000010100000102000003828610046100020052200503320000640302162230319206020000103033130249303123021030053