Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

SWPLH

Test 1: uops

Code:

  swplh w0, w1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e1f223a3f4651schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f696a6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
72005332262487131211110051057343302802788720002000200010001114929857329153326531320002000300032972520911710011000100002004423100610011162002312222156741148528243383208823137377144442361683255317457154471661420003273333394331613328033379
7200433176247512110001003105787329970280622000200020001000054930094328903278431320002000300032955524411710011000100012002222100410010142002810022156731147518314400215523368376444491757623266016128154151602020003276932862332523310232929
7200433265248611111001012105985327580280112000200020001000234929894328843305331320002000300032814523311710011000100012003222100510010042002412222161731201018315400626623067382644532065583252017826149691676920003323132933331833281233260
7200432826245811110001005115906329690280692000200020001000024929993329243309631320002000300032809519931710011000100012002302100410010062003410221155221098028344388816323312363044431762643235117107156001634820003320333138331163313033014
7200433029249811010001008105537327600281892000200020001000084929939327413329931220002000300033053521511710011000100012002202100610010192003412222161481145528360394615923153385744421468663274016168154081647120003323033194331903303733308
7200432813249511111001008105994330250282772000200020001000094929906328503303531020002000300032850523511710011000100002003222100710010162006410021159151147828424388826222984379244482370683259916713154971691520003325332932328193298433014
72004330932498111100010061058203335102786220002000200010000104929864329313328431320002000300033112517711710011000100002004202100810020142004312221162331137618249387615823239388444452257643256317006153831675620003304633111328323265932977
7200432989247511111001008105718328500277702000200020001000074929832325383308531320002000300032787517611710011000100002004422100310010162002310020163111198508253399305823299374144401972663250717395151051703820003278633209330293293832915
72004330242485111100010131058283280002778520002000200010000449300483286332846313200020003000329105205117100110001000020043221009100101102002612222156811129328545387306522911382944401464573253316944154541578620003323833124328873281133162
72004331032487111110010051056723320902804020002000200010000249299933294433134313200020003000330435232117100110001000120033201004100100102003412221157851200918275387035823335374844452162603241716739157311617120003310633323331193280432914

Test 2: throughput

Code:

  swplh w0, w1, [x6]
  add x6, x6, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 7.0055

retire uop (01)cycle (02)03mmu table walk data (08)090e0f1e1f2022233a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
3020670055525000050521371014070035321131449585253010010100200001010020000603803319592496697870010700506569666625430100102022000610202300097005078112020110099010010000101001000011002004509435198681006510001213624392004501622910511113170160069826100009020000101007005170051700567005970051
3020470050525000050391601019070040162118495852530100101002000010100200006037833198294966970700187005865693666259301001020220006102023026270058781120201100990100100001010010000010020034016435198681006610001203728352005111802912611113170160069826100006620000101007005670056700597005670056
30204700555240000507113310170700403731512495882530100101002000010100200006038633198314966970700187005865681366266301001020020000102003000070058781120201100990100100001010010000010020031010638198661005810001103628292006512113412400013101161169818100006920000101007005670051700567006670056
30204700505250001505313610150700432931011495882530100101002000010100200006040033199714966975700187005565686366263301001020020000102003000070055781120201100991710010000101001000001002003901594519868100601000131376352005601913410600013101161169818100006020000101007005670059700597005670056
3020470055524000050501371015070035303135495852530100101002000010100200006040033195924966978700107005565686106677030100102002000010200300007005578112020110099010010000101001000001002002409139198681004010001012522272006611701910500013101161169821100006620000101007005670056700557005370059
3020470055524001150361411014070040222115495852530100101002000010100200006040033198314966975700157005565689366266301001020020000102003000070055781120201100990100100001010010000010020027010937198681005210001103028192003701612911100013101161169818100006620000101007005170051700517005670056
3020470058525000050451311013070040352141049588253010010100200001010020000604003319592496697570019700556568636626630100102002000010200300007005578112020110099010010000101001000011002002507243198681005010008313420282005401914012800013101161169821100006620000101007005670051700517005170056
3020470055524000050541440021070035313101149585253010010100200001018220000604003319830496697870019700556568636626330100102002000010200300007005578112020110099010010000101001000011002003001053219868100571000110302827200610151339300013101161169839100009620000101007005670059700517005670056
3020470055525000050411401018070040362141149582253010010100200001010020000604003319829496697570018700586568636626330100102002000010200300007005578112020110099010010000101001000001002003301454019868100561000421480322005111903516200013101161169818100009020000101007005670059700597005670059
30204700555240000503912810170700432921964958525301001010020000101002000060400331997149669757001570055656863662663010010200200001020030000700587811202011009901001000010100100000100200301923019866100481000051251021200471151309100013101161169818100000620000101007005970056700597005970056

1000 unrolls and 10 iterations

Result (median cycles for code): 7.0055

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f20222324293a3e3f404346494d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
300267005552400100050441421000907004012231112049587253001010010200001001020000599733319592049669757001570058657083667823001010020200001002030000700557811200211090101000010010100000102003207053198681006010001206246322006401823812612702151169813100006620000100107005170103700567005170056
30024700505240001005052100000160700438821170495852530010100102000010010200006001733198301496697070015700556570336628130010100202000010020300007005878112002110901010000100101000011020053014436198681005810001314512222006602014527112701151169818100006620000100107005670058700597005970056
300247005552401100050491201000143270043115222120495882530010100102000010010200005997333199731496697070015700506570336628930010100202000010020300007005578112002110917101000010010100000102003201512919868100541000121280352004511211810812701151169821100006620000100107005170056700567005670059
30024700555250000005047024100014070043671121204958525300101001020000100102000060017331997014966975700157005565703366281300101002020000100203000070058781120021109010100001001010000010200201215140198681003810009214728252004701813312512701151169818100006620000100107005670056700517005970056
300247005852500010050241371000130700438011214049582253001010010200001001020000599973319592049669757001870055657083662813001010020200001002030000700557811200211090101000010010100000102002901093319868100751000121383024200570221349012701151169818100006620000100107005670056700517005170056
30024700505250000005055170100017070043103012704958525300101001020000100102000059973331959204966975700187005565708366286300101002020000100203000070055781120021109010100001001010000010200281225350198701006610006204746242005411313912712701151169818100006620000100107005670056700567005970059
300247005552400000050520391000424700351072116049582253001010010200001001020000599733319592149669707001570055657083662893001010020200001002030000700507811200211090101000010010100000102003709032198681005410001214028262004902013620412702151169818100006620000100107005970059700567005670056
30024700585250200005050123100016407004060012120495852530010100102000010010200006001733198310496697570015700586571136628930010100202000010020300007005878212002110901010000100101000001020052018047198681007310001213528292005101612612612701151169819100006620000100107005170056700597005670056
300247005552500000050361281000150700359621110049589253001010010200001001020000599973319592149669757001970058657083662893001010020200001002030000700557811200211090101000010010100000102004802714019866100731000110360342005101714212712701151169821100006620000100107005170051700567005170103
300247005552400100050371191000307004396014120495852530010100102000010010200005999733198300496697870015700586570836628130010100202000010020300007005578112002110901010000100101000001020032014153198681006410000214128472004602313213512701151269813100006620000100107005670056700517005670056

Test 3: throughput

Code:

  swplh w0, w1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 18.0052

retire uop (01)cycle (02)03mmu table walk data (08)0e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)dde0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20205180094134901100321502100180037014161587682520100100200001002000050085658110491769721800521800521760496177507201002002000820030012180052144011102011009999540100100001001000011002000000129221321361000002214742154221602038011171615487316017893900141020000100180040180053180053180049180049
2020418005113490000032154200018002430161587532520100100200001002000050085659961491769721800521800391760456177493201002002000820030012180048144011102011009999537100100001001000001002000046381292213213810002022145421452213723446011171715502016017893900141420000100180049180040180053180053180049
2020418005213480110032160200018003720015876425201001002000010020000500856536104917697218003918003917604961774922010020020008200300121800481440111020110099995441001000010010000010020000001292203214510000022145421522216000001117161550901601789260014020000100180040180053180040180053180040
202041800391349000003213701011800330161615875425201001002000010020000500856536104917696818004818004817604961775022010020020008200300121800481431111020110099995181001000010010000110020000463912921232144100020221494213822136004601117161549101601789394001420000100180053180053180053180053180053
2020418003913490000032162000118003721313158765252010010020000100200005008565811049176968180052180039176049617749320100200200082003001218005214311110201100999953410010000100100000100200004639129212321431000242214742136221452394601117161549501601789261014020000100180053180049180049180049180053
20204180039134900000321682100180063214131587612520100100200001002000050085659961491769721800521800521760366177502201002002000820030012180048144011102011009999540100100001001000011002000046461292213214410000022149421432214100001117161547101601789260010020000100180077180087180053180053180040
20204180052134900000321492001180047001415875325201001002000010020000500856599714917697218005218003917604961775062010020020008200300121800391444111020110099995441001000010010000010020000038129220321411000002215042160221410390011171615515016017892810141020000100180053180053180049180049180040
202041800391349000003213300011800243131515875425201001002000010020153500856581014917697218005218005217603661775062010020020008200300121800391440111020110099995361001000010010000010020000460129212321321000002213942161221380393801117161549601601789360001020000100180053180053180049180053180053
20204180052134900000321480100180033014141587522520100100200001002000050085653611491769591800521800561760366177506201002002000820030261180056144011102011009999540100100001001000001002000000129221321411000292214742165221452046011171615498016017893910141420000100180040180040180053180049180053
20204180052134901000321600000180037300158784252010010020000100200005008566002149176968180039180052176045617749320100200200082003001218005214401110201100999954010010000100100001100200004638129221321361000202214742158221452394601117161549301601789271014020000100180049180053180049180044180053

1000 unrolls and 10 iterations

Result (median cycles for code): 18.0048

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
200251800571349110111321592101180042017171587632520010102000010200005085665480491769771800461800571760693177537200102020000203000018006414671110021109995361010000101000001020025264153129219321881001600222254217122197162942251064015540216221789331131302000010180058180058180058180095180049
20024180048134800001132212210118003331315158762252001010200001020000508565355049176972180048180048176051317753220010202000020300001800481458111002110999540101000010100000102000003840129212321631000000221644215122157203800064015516216221789244101402000010180053180049180053180049180049
200241800481349000000321502100180033314161587632520010102000010200005085658120491769721800391800481760603177528200102020000203000018008514581110021109995401010000101000001020000038381292193214310002002215042169221382350000640155002162217892810002000010180049180053180049180053180053
2002418005213480000003213821011800428180158764252001010200001020000508566545049173952180063180057176069317753720010202000020300001800731467111002110999540101000010100000102002726415312921932188100150222199421792218116294226006401555321622178933013002000010180058180058180058180058180058
200241800571349111100321871610018003080171587632520010102000010200005085665640491769801800451800571760693177525200102020000203000018011114671110021109995361010000101000011020000046381292203213910002002213342161221442353800064015527216221789281141402000010180053180053180040180040180053
20024180048134900000032146200118004270171587682520010102000010200005085665940491769771800941800671760693177537200102020000203000018005814671110021109995401010000101000001020000038381292163214410000002217642173221452344600064015537216221789341131312000010180058180058180058180058180058
20024180085134800000032161210018003300141587652520010102000010200005085659950491769731800521800521760643177519200102020000203000018011014581110021109995361010000101000001020000003812922132158100020022169421382215624000006401549321622178928014002000010180049180040180049180053180053
2002418005213480000003217420001800382001587642520010102000010200005085658120491769681800481800481760513177532200102020000203000018004914581110021109995461010000101000001020000046012921932138100620022147421672214503900006401551721622178924113002000010180058180058180058180058180058
200241800461348101100321911610218004260015875225200101020000102000050856652804917697718005718005717606931775372016320200002030000180121146711100211099954110100001010000010200252541012921832184100170022199421762218216403800064015523216221789241101002000010180053180040180053180049180049
20024180052134900001132154201018003330131587642520010102000010200005085658060491769611800521817331760623177519200102020000203000018005514582110021109101847101000010100000102000004601292213213610002002215442159222242343800164015506216221789921101402000010180053180053180049180053180040