Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

SWPA (64-bit)

Test 1: uops

Code:

  swpa x0, x1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e1f22243a3f464951schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f696a6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
72005342402561170018100100811053183389100289892002200020001011824930899337303422231020002000300033989538211710011000100002003420101010031162004102000150561077217958355654924222360944391134383301919069168081853020003416634104341443409734034
72004341852550130014011100500053083409200290372000200020001000164930936337683410231020002000300033822538211710011000100002000002100310000062004203200150361076817955360984224202354844371139423311718962164631849420003404034102341863414434117
7200434033256012001300010150005328338871029036200020002000100019493098733796340613102000200030003382453701171001100010001200002010011000004200220220015082107871797035575392411235884445838343299218990164511863620003408634071340143413034040
72004340682550140016000100300053243395600294442000200020001000034931018338323414331020002000300033935538511710011000100002000020101110000062004202200149611057707978356043924319349644351038343299519095166181818420003410634109340773405634089
72004340772550100012000100301053383390500289462000200020001000034931009337443404931020002000300033963532411710011000100002000022100310000062003203200148791076617997356344224135354944391140343297419085169841820820003416534134340983414134012
7200434003254113011010110061005399338270028927200020002000100009493102433739341063102000200030003389953781171001100010000200003310031000004200220000015086106260796435487362425335164436534393298918783167961845620003399134194341233399634149
720043407125601600150001005000529533891002901720002000200010000104931075337283415431020002000300033967534511710011000100012000020100310001042002200000149811077217942360383924177357144511136373296719168170711833820003415434099340683404034082
72004340912561161118100100610053103390700289582000200020001000024930940338993411031020002000300033956536811710011000100002000020100210000062002200300149061064518024355553724226352944391136333301019001166521838520003409234109340593418234127
72004340992561131013000100410054683395400289702000200020001000044931110337363412531020002000300033518538211710011000100012002320100510010082004413020150131081807963357664024276353744491235353302119177169351828320003414134140341533397934062
72004341822551181119011100510053173391800289892000200020001000044930921336933414531020002000300033945535911710011000100002000032100310000062003300000149031062307951351763724146357844361039403305219139169581836120003408134095341243417434204

Test 2: throughput

Code:

  swpa x0, x1, [x6]
  add x6, x6, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0541

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2022293a3e3f4043464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
302093056122911001000596145812171295112305387585721821540923010010100200001010020000592121222070492737730536305591326760301001020020000102003000030555277112020110099100100001010010000110020892149721094109611036725422848741359212633381418998711416131011611306021000022171920000101003058330591305453056330545
3020430530229101000005953428221704117136305398156423623541073010010100200001010020000592121222480492742530491305361326702301001020020000102003000030471276112020110099100100001010010000010020898139521050109471035422823889721496213313355428097411335131011611304941000034211820000101003052630509305363055030458
302043046222810010000587837794172011892304817775222520840973010010100200001010020000592121223671492745930533304691326763301001020020000102003000030515276112020110099100100001010010000010020902159621116109331033127123885461306213062409508578381328131011611305501000021242120000101003046530563305053046830567
30204304992291000010058224581716961261043048574348202245418630100101002000010100200005921212227914927403304873050213267583010010200200001020030000305432761120201100991001000010100100000100209191410111112109311034726626832881394212878383369058781489131011611305091000018131620000101003054430492306243061430547
302043051822811100000604346818168010192305277504722824740933010010100200001010020000592121221791492743030516305641326665301001020020000102003000030512277112020110099100100001010010000110020885149371144109791034328323850921303212978387328347131412131011611304901000038241520000101003055930543306273053430532
302043060622911100000588246793168811392304947865823724641813010010100200001010020000592121222360492739930531304951326688301001020020000102003000030560276112020110099100100001010010000110020913149691163109611034027421844421473212446395518357291408131011611305701000013301020000101003052830461305573049330509
3020430587228110100005841478221696109140305967726519920341193010010100200001010020000592121223331492739730484305251326773301001020020000102003000030473283112020110099100100001010010000010020873149831045109541035829230872321453213009373427848321465131011611304911000022181620000101003049430552305123048530521
302043060922810000000566044783175210013230509752472112124077301001010020000101002000059212122261149274663049030509132671430100102002000010200300003054227611202011009910010000101001000001002087216911116511027103432580866461715213716396478077141401131011611305501000021241720000101003054430510305213055730486
3020430519228101100005668438101728122923055278963200231417930100101002000010100200005921212228714927516305453057313267453010010200200331020030000304982671120201100991001000010100100001100208991410681229110071034025228830381459212445390458778701409131011611305891000022141220000101003051430535304773053230530
302043054922810100000579840802170498116305537875124121441773010010100200001010020000592121222590492744830516306121326707301001020020000102003000030532276112020110099100100001010010000110020884139551169109631035422628848321379213123380488458301483131011611305501000026171620000101003058330501305583048730545

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0473

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f202224293a3e3f4043464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
300293050522800000010056482876910648921043043372940245217404830010100102000010010200005883012214004927316304063045713266643001010020200001002030000305352591120021109101000010010100000102089619915953109571037125708473813122117343593976569319051270316223054410000199420000100103045530417304323046730437
30024304172291000010005896348311052011212030441796342202294080300101001020000100102000058830122086049273353041730426132671830010100202000010020300003044725911200211091010000100101000001020874684190710939103462364184536116721226144429723736610127021622304521000016131320000100103048830506304223045830431
3002430404228100000000568138795106721149630444769442422444013300101001020000100102000058830122254149273353049530458132667330010100202000010020300003048325911200211091010000100101000001020852087610001095710367235148644213772128233583373871200312701163430467100002111720000100103047630405304113041430475
300243041822800000000056763178510720114128304017483923225840703001010010200001001020000588301221021492736930461303931326657300101002020000100203000030471268112002110910100001001010000010209091685592510899103632211796461158212413365366677181403127031622304351000021191320000100103041930400304743046030452
3002430477228101011000580143752107201101123044679228229246409830010100102000010010200005883012221314927376304373043013267453001010020200001002030000304592681120021109101000010010100000102085114845912108831033924670818381262212236346347307511406127021623304511000017171220000100103041530473304523043130437
300243047522810001000057434678010680117128304017452721222840623001010010200001001020000589851222831492732930428304261326717300101002020000100203000030404267112002110910100001001010000010208810804100610847103402558984436126421226234029752739001012704161130466100002016620000100103050530471304963050030404
3002430421227100001000564642803106081039230537768392372294056300101001020000100102000058830122292049273463050030533132672630010100202000010020300003050425911200211091010000100101000001020847791598910928103412641386432132521253338229683715663127021632304071000013182120000100103046330457304673048130432
3002430384228100000000577135762106961109630449792342482374040300101001020000100102000058830122205049273723049330437132665130010100202000010020300003041625911200211091010000100101000001020897777010091089010329234108404413342123923493974568069312703162230482100002311720000100103040030443304093045730398
300243045122810001000055643678210728109116305307853326726540973001010010200001001020000588301221851492737330443304711326734300101002020000100203000030501514112002110910100001001010000010208846836102110894103332004386030126321250238235710774617127021622304681000023121620000100103049130450304983045730467
3002430387227100010000571131809105041082003041876740223244408330010100102000010010200005883012221514927391304853044113267053001010020200001002030000305172591120021109101000010010100000102089268021028108731034824648543612292124633623683082361012703163230417100002913820000100103041430432304083049830473

Test 3: throughput

Code:

  swpa x0, x1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0060

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f434951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6061696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)7bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20205130056974010010014406010245130041128109906201001002000010020000500106458054912697613005513005626127510201003436200200002003000013005619711110201100991001000010010000010020000121424386100000014515341541444412120111716760701600129951060620000100130057130057130057130057130057
2020413005697400000001414101012213004188110324201001002000010020000500106307054912696413005813004426127510201003273200200002003000013004419831110201100991001000010010000010020000121424370100009014398342781408912120111716753001600129951066620000100130057130057130057130057130057
202041300569740000000143950001221300418810997920100100200001002000050010697800491269761300551300562612751020100370420020000200300001300561983111020110099100100001001000001002000001524394100000014376342381430412120111716765701600130025066620000100130096130057130045130045130057
2020413004497400000001421600011713004188110437201001002000010020000500106524004912697613005513005626127498201002938200200002003000013005619831110201100991001000010010000010020000121424289100000014244341931405512120111716762001600129939006620000100130057130057130057130057130057
2020413005697400000001429900012213004188110195201001002000010020000500107182004912697613005513005626127510201003327200200002003000013005619831110201100991001000010010000010020000121324445100000014344345531439112120111716757701600130093010101020000100130061130061130045130061130061
202041300609740000000144500001221300298811014420100100200001002000050010672000491269801300591300604612751420100357120020000200300001300601985111020110099100100001001000001002000012142419310000001418434183141211212011171675170160012996101010020000100130047130061130061130061130061
20204130060974000000014358000102130029881099062010010020000100200005001066021049126980130043130060261275142010033012002000020030000130060198511102011009910010000100100000100200001202418710000001435534376144541212211171677030160013014501001020000100130061130061130061130061130061
202041300609740000000145480001241300458810989520100100200001002000050010677810491269801300591300602612751420100350520020000200300001300441987111020110099100100001001000001002000012132415910000001418334064141611200111716749001600129947010101020000100130061130061130045130061130045
2020413006097400011001439400112213004588110578201001002000010020000500106300104912698013005913006026127514201003208200200002003000013006019711110201100991001000010010000010020000121424310100001014267343041407312120111716774001600129953010101020000100130061130061130061130045130045
202041300609740001100143150001221300458911013620100100200001002000050010654410491269801300591300602612751420100305220020000200300001300601971111020110099100100001001000001002000012142405910000001424334315142951212011171678520160012994700101020000100130045130061130061130045130045

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0052

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f191e1f22233a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)7bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd1d5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2002613005297400101101665101023913004600411487320010102000010200005010000404912696831300641300612312754120010020200002030000130061200111100211091010000101000001020024251220266601001202167043666816646012120064010009316221299381010102000010130053130043130043130053130053
20024130052974000000016654000124130046066114873200101020000102000050100005149126981013006013006123127528200100202000020300001300612001111002110910100001010000010200252412192672710012001668036668166770121200640100152162212993801002000010130053130053130053130053130043
20024130052975000000016678000123130037022114868200101020000102000050100003149126972013005113004223127534200100202000020300001300421982111002110910100001010000010200000121626637100001016637366461664501212006401000521622129941101002000010130053130053130043130053130053
200241300529740000000166900001231300370021148552001010200001020000501000031491269720130051130052231275322003472020000203000013005319821110021109101000010100000102002423019266971001201167033668816636012120064010007216221299471313132000010130062130062130062130062130049
2002413006197410110001670812001241300460661148722001010200001020000501000032491269720130051130042231275322001002020000203000013005219921110021109101000010100000102000001216266361000000166363664516647012120064010012216221299381010102000010130053130053130043130053130043
200241300529740000000166900101261300270021148892001010200001020000501000171491269720130051130052231275322001072020000203000013005419821110021109101000010100000102002523122126656100120016668366421665412121224064010066216221299341313132000010130062130062130049130062130062
2002413006197410000001667412001271300400221148552001010200001020000501000031491269720130041130052231275322001002020000203000013004219921110021109101000010100000102000000162664610000001664536636166810012006401000421622129938010102000010130092130118130057130053130053
200241300529740000000166870101271300370221148652001010200001020000501000031491269720130041130042231275222001002020000203000013005219921110021109101000010100000102002801216266781000000166783666816671012120064010039216221299381010102000010130546130043130053130053130043
20024130052974000000016643000127130076022114865200101020000102000050100003149126972013005113005223127532200100202000020300001300533975111002110910100001010000010200000121626670100000016670366801667700120064010015216221299381010102000010130053130053130043130053130043
20024130042975000000016678010123130037022114858200101020000102000050100003149126972013005113005223127532200100202000020300001300521992111002110910100001010000010200000121626682100000016677366771666601212006401001421622129941010102000010130053130053130043130043130053