Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

SWPAB

Test 1: uops

Code:

  swpab w0, w1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f464951schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f6061696a6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
7200533216245116012211000100201005460331171127711200020002000100002164930106330153290631020002000300032795524711710011000100012000022100210000042001102200163311149108215400485723461398544422045503243817753148381702820003305632914329133306032693
7200433261248016002300000100701006082329180027905200020002000100005004930070330813309931020002000300032960528611710011000100002003322100210000042001202200163111196918396375795222804395944391245503228617183152621591020003295032839329133276333148
72004328552460200022000001005000056793287100281192000200020001000100049296753262332573310200020003000332745297117100110001000020000221002100000420021022001611211924184223923104422814370344451041433260017431155521615020003273733111331353271232718
7200432689250116111910000100410005683331080028257200020002000100009004929685327313281131020002000300032663529611710011000100002000022100210000042002302200156221115618512399294522816375844441251413253217823145631608320003297532699328503266033110
7200432631244116111910000100610005769326900027627200020002000100010004929534329443303931020002000300032881525721710011000100002000022100310000052003302200159731177418248381974723078388744411039423251116755147801662620003275032840327123266932813
72004332142450200015000001004000060873293400278852000200020001000020649299493264333077310200020003000326295180117100110001000120000221002100000520021022001582011887084633951144922995392044371045523237716485151871561720003310232614326623271633148
72004331352490160016011001005010058993273300279362000200020001000030049301543300832741310200020003000331565306117100110001000020000221003100000520033022001569011409181983818124422941377544451348443252516722153421602720003286333122329983276432860
7200432867247117101600000100410005782331020027689200020002000100002064930101326243313331020002000300033017528211710011000100002003322100610010152002410221164491155418427399273923163396344431444413241717711150921666420003275833067326373267632826
7200432634244015001500000100500005778330550027457200020002000100002004929956328063274431020002000300033077526011710011000100002000022100310000042002202200157541217818419386494322760396344411250443254317813148081606320003265433091331773309232747
720043308724801900160000010050000604733068002797320002000200010000000493001632691331163102000200030003270452701171001100010001200322210041001029200231222215718120121829441158462325139904442842503245317264146081698120003283032908326863275332632

Test 2: throughput

Code:

  swpab w0, w1, [x6]
  add x6, x6, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0195

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f191e1f2022233a3e3f4043464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
30206302332270000000374305210270301793527614193891301001010020000101002000059212121734492712030228301991326400301001020020000102003000030173261112020110099100100001010010000010020048013113210386100102539364112037173412754700013101241130160100002421520000101003018130225301873029830188
3020430177226010000037661391018030199462781516390630100101002000010100200005921212174749271043018430178132641730100102002002810200300003018726111202011009910010000101001000001002005805713010363100073561363842035652301639800113231161130146100001315920000101003018130190302223017830228
302043018522600000003781163102203019824292171639083010010100200001011320000593911217354927134301983016513263753010010215200001020030000301642611120201100991001000010100100001100200601271991037810004154003722033543073687000013101161130170100002515920000101003018330182301933020130224
3020430167226000000037461301016030174252841463870301001010020000101002000059212121726492713630166301781326405301001020020000102003000030164261112020110099100100001010010000010020056012310310380100061539040920364332215963000131011611302151000022131020000101003018030167301813019430205
30204301802260000000375303610150302032428310133878301001010020000101002000059212121749492714530195302191326405301001020020000102003000030236261112020110099100100001010010000110020067011698103721000719453638120346431627879001131011611302301000022131520000101003018130200301823016030213
30204301712260000000378423210220301843327919133880301001010020000101002000059212121734492713030225302071326411301001020020000102003000030179261112020110099100100001010010000010020051011883103621000014271436820354431026279000131011611301641000031161720000101003022530190301993019530216
302043022022611110003835961102112301833728616123907301001010020000101002000059212121751492713830184302071326397301001020020000102003000030193261112020110099100100001010010000010020063141051141036710013165130432204076346895731313131011611301881000020131420000101003017530169301703018230175
3020430152226000000038064440020030170252871918390730100101002000010100200005921212173849270973017130177132638830100102002000010200300003017326111202011009910010000101001000001002006014109119103601001326352639720351631881261071310131011611301591000019121420000101003020030166301803017430159
302043017522600000003778336101940302282428525223878301001010020000101002000059212121758492717330244302411326429301001020020000102003000030204276112020110099100100001010010000010020037091921037310004254360397203675324211871001131011611301781000026102220000101003022430234302233023230207
302043026422610100003866122400244030159342812418390530100101002000010100200005921212173649271693023330249132643830100102002000010200300003017526111202011009910010000101001000001002008007898103781001520320416204104357513379000131011611301941000020171620000101003023830224302313024630248

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0177

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f181e1f202223293a3e3f4043464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
3002630212226000000368503210015030146372141322388630010100102000010010200005883012180514927081301643015113264033001010020200001002030000301882741120021109101000010010100000102004506868102721000407243631120304224356782610127051642302121000078820000100103015230157301643014430165
3002430167225100100365543510014030147301961721387630010100102000010010200005883012178414927081301663016013264283001010020200001002030000301512651120021109101000010010100000102006567610010309100072924323172029042446956362012704164530149100001251320000100103017830158301783024530220
300243019422611100036805361003203016045197141938673001010010200001001020000588301217931492708230196301981326396300101002020000100203000030183265112002110910100001001010000010200456601041028410006213602772028922294746861112702165430235100001411920000100103018730194301603015430165
300243015522510100036785320002232301674222732443871300101001020000100102000058830121779149270993016630218132643330010100202000010020300003016426711200211091010000100101000001020038757103102951002438443234020299426871019761012704164430154100001391220000100103020030165301773019530187
300253017722610100036766541003503014948198141838703001010010200001001020000588301217821492713630164301801326408300101002020000100203000030164265112002110910100001001010000110200367559010280100135144030820300325856976620127041644302311000018111420000100103017330164301693018030174
30024301702261010103696540100332030192552212752391830010100102000010010200005883012177704927091301763019313264253001010020200001002030000301942741120021109101000010010100000102004319911051029110027311361031920313224778380610127021642302381000012131220000100103017130190301693017830174
300243018022611100037375391001636301492422014573930300101001020000100102000058830121777149270733014830149132639130010100202000010020300003015626711200211091010000100101000001020042208917610284100182112423852031552255901076101270416443016910000118920000100103016030159301523015430158
3002430167226100000366245100019443015237195321638933001010010200001001020000588301217901492708730184301981326418300101002020000100203000030205274112002110910100001001010000010200446808910256100193136030020305225654965600127041644301661000017131920000100103017030172301923018230150
30024302022261010003690540000202830149421901938391230010100102000010010200005883012179014927084301683016313263973001010020200001002030000301752671120021109101000010010100000102004566081102791000619304429520265326869169600127041644301701000019131120000100103019430185302003018930180
300243018322610011036874560002136301702017019293864300101001020000100102000058830121775149270793016230152132638630010100202000010020300003015226711200211091010000100101000001020043191151381026910010223663292026822625611006001270416423016710000109920000100103016930173302253015430187

Test 3: throughput

Code:

  swpab w0, w1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0051

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e22233a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)7bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbbl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1map dispatch bubble (d6)dde0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20205130057974000000014846001251300360111100482010010020000100200005001065980491269711300411300422612750520100327320020000200300001300481975111020110099100100001001000001002000001212243921000001431234299143961212111716793116012993766920000100130052130052130052130052130052
20204130042974000000014473001221300270001102622010010020000100200005001071360491269711300511300512111275192010032522002000020030000130042197511102011009910010000100100000100200000120243451000001429934278141901212111716763816112993796920000100130052130052130052130043130052
20204130048974000000014403002471300370001100172010010020000102200005001070900491269711300501300512612749620100348120020000200300001300481975111020110099100100001001000001002000000024228100000143063429914324012111716741316012994606920000100130052130064130049130052130049
20204130048975000000014162001221300360201102782010010020000100200005001065030491269711300511300432612750620100337620020000200300001300521975111020110099100100001001000001002000001213243291000001417834389144251212111716765016012994709920000100130052130052130043130043130052
202041300429740000000144470010313003600110989420100100200001002000050010697004912696213005013005226127505201003319200200002003000013004219751110201100991001000010010000010020000000243581000001436734318142471212111716772916012994396920000100130054130049130082130043130044
2020413041197400000001452000102130036002109898201001002000010020000500106520149126962130052130042261275062010035202002000020030000130042196911102011009910010000100100000100200000012243701000001438334235142651212111716780416012998599920000100130052130052130043130052130043
20204130052974000000014399001221300330201099492010010020000100200005001072420491269711300511300512612750220100325520020000200300001300421969111020110099100100001001000001002000001212241811000001441534284143151212111716761116012994690620000100130052130043130052130043130043
2020413005197400000001425510248130036000110024201001002002410020000500106828049126968130050130042261275052010033002002000020030000130042196911102011009910010000100100000100200000012244371000001428934074143891212111716747216012993766920000100130053130053130053130049130053
202041300429740001100144511012213003610011015320100100200001002000050010676604912697113004113005126127496201003477200200002003000013004819751110202100991001000010010000010020000001224243100000142233416814304120111716776616012994696920000100130043130052130052130052130052
20204130048974000000014411001221300270221099192010010020000100200005001063940491269711300501300712612750620100341020020000200300001300513945111020110099100100001001000011002000001214241661000001408434184142411212111716774516012994696920000100130043130049130043130052130043

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0057

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)7bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2002513005797410100001671012109313004002211486420010102000010200005010000304912697213005513040623127537200100202000020300001300571988111002110910100001010000110200242412192669510012012149636644166971212122300064010059216221299439992000010130058130058130058130059130049
2002413004897410110001674712001231300420081148682001010200721020000501000031491269681300471300502312753720010020200962030000130057199711100211091010000101000001020024241202665610012021671536654166661212122310064010022216221299439992000010130058130058130058130058130242
20024130048974101000016703120012013003328011486820010102000010200005010000304912697713005713006323127537200100202000020300001300571997111002110910100001010000010200252612202666810012101671536651166661212122310064010029216221299439092000010130058130058130049130058130058
20024130057974101000016668120012413004227811486120010102000010200005010000414912696913041113005923127537201060202000020300001300571997111002110910100001010000010200262401926687100120016677366561666212121223200640100222162212992810062000010130043130044130053130053130043
2002413005297400000001668601012413003327811485920010102000010200005010000314912699313004713005723127537200100202000020300001300521988111002110910100001010000010200000121526645100000016680366351664401212000064010068216221299439092000010130049130061130058130058130058
200241300579741010000166791200126130033287114869200101020000102000050100005149126977130056130057231275372001002020000203000013005719971110021109101000010100000102002424019266941006431166813667516688120122300064010020216221299433992000010130058130058130058130059130058
2002413005797410100001671112101241300432881148702001010200001020000501000040491269771300561300572312753720010020200002030000130057198811100211091010000101000001020025241219267001001241166763668516691121202310064010026216421299439992000010130049130049130058130058130058
20024130057977100100016677120012213003300711486020010102000010200005010000514912697713005613005723127537200100202000020300001300571997111002110910100001010000010200242512202666810012011670936651166661212122310068610027216221301079992000010130049130058130049130050130058
20024130048974110000016701121012313004227011486120034102000010200005010000614912697713005813042023127540200100202000020300001300571997111002110910100001010000110200472412182668610012002356236656166661212122420064010023216211299359992000010130058130058130058130058130061
20024130057974101000016702120012313004300011487020010102000010200005010000404912696813004813005723127537200100202000020300001300481988111002110910100001010000110200002121526635100000016675366771667001212040064010004216221299449902000010130058130060130060130148130058