Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

SWPAL (32-bit)

Test 1: uops

Code:

  swpal w0, w1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22243a3f464951schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f696a6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
720053304724970101000000101000059013287200278832000200020001000044929912327723294831320002000300032914523511710011000100002000000100210000032005202200159961162818316399528323076376944421373683253917160150811629820003293333087329473308032968
720043308724840100100000100501057843274100279352000200020001000024929996327683297231220002000300032842526111710011000100002000020100210000032002202200159891173218420394316523069375544451669633251016822152431606620003296532835329313293933031
720043290924850100200000100600060543277501277912000200020001000024929665327203294731220002000300032844522511710011000100002000002100510000042003302000156611188018377389317923118378544401662663242816578147511607520003303833024330383309232989
720043290024850100100000100400058523279801279142000200020001000034929858328063296631520002000300032703521011710011000100002000022100210001022002202200160231179818368388715722997384844421670683238917034148281610620003296633021330243286133039
720043306424760100100000100600060873301900279832000200020001000024929909326963286031320002000300032892522611710011000100002000022100210000032002202000161571206718420405016523098387644421764623245816640147151573220003318233061328443294133027
720043302424870100101100100600059143288210279862000200020001000024930081327563285831320002000300032913523711710011000100002000022100210000042002202200163651159528325397526422876376144411866713250916917154601620120003296632834330063288632953
720043296024730100100000100401059543297300279122000200020001000244930054326913306631020002000300032875526911710011000100002000022100310000042003202200157501164108432392906323129384244371567653247717052149461609920003316932945329953303733126
720043346324780100100000100500060503294801280542000200020001000324929796326463280331220002000300032916524811710011000100002003322100410011142003312222161061186108406394126622949381844372262643241116701149661633620003299933142329213283733158
7200432946246611100100001027110595932938002840220242026200410112149299003345333152261320002000300632807520441710011000100002004322100610010215252002410020161831179818383393507323052383944461371643250316723151861611120003289932866330423297532999
720043303424741110100070141911059013286300278652000200020001000124929860329543286631020002000300032944522211710011000100012000022100210000052003202204160441179808328397406323108379044531772703249617314151011667020003314033457332833344433377

Test 2: throughput

Code:

  swpal w0, w1, [x6]
  add x6, x6, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 7.0055

retire uop (01)cycle (02)03mmu table walk data (08)0e0f181e1f202223293a3e3f4043464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
30207700665250000522731901016039687004120752536423513010010100200001010020000592121219880496697570044700551366263301001020020000102003000070055529112020110099100100001010010000100201978019610293100742162259212120262047211913003131021622700201000066120000101007005770056700587005670058
30204700575250000534311237101285317670041215737484235530100101002000010100200005921212212204966976700477005513662653010010200200001020030000700555291120201100991001000010100100001002031212915010216100492262234011620323047612111603131021622700201000066120000101007005670056700577005870056
3020470057524000052187237101205824700402096243542364301001010020000101002000059212122101049669767004370055136626430100102002000010200300007005552911202011009910010000101001000010020231110159102251005721522366110202600483113125311131021622700211000066120000101007005670056700567005670056
3020470055524000052245200100611847004219923529423583010010100200001010020000592121216700496697770046700561366267301001020020000102003000070055529112020110099100100001010010000100202081191951023310050219217181562027704561169305131021622700201000066120000101007005670056700567005670056
30204700575250000523532081016544870040240627194235730100101002000010100200005921212243704966975700447005513662633010010200200001020030000700555291120201100991001000010100100001002022310216710255100612562301041364203070454114104123131021622700211000066120000101007005770056700567005770056
302047005652400005258727110055212700402281224142367301001010020000101002000059212122183049669757004570055136626330100102002000010200300007005552911202011009910010000101001000010020252100173102181005620132086212020283060613412061131021622700201000066120000101007010470057700567005670056
30204700565250000534872451088471047004023354036423693010010100200001010020000592121220550496697570043700551366263301001020020000102003000070055529112020110099100100001010010000100202551371881025910050186260761542032415121258901131021622700211000066120000101007005770056700587005770056
302047005552500005275424210806017670041220438264235030100101002000010100200005921212198204966976700447005513662633010010200200001020030000700555291120201100991001000010100100001002022111919110260100322562111621182039304959496122131021622700201000066120000101007005670058700567005770056
30204700565250000520092181005517670041194430354235930100101002000010100200005921212226804966976700437005513662633010010200200001020030000700555291120201100991001000010100100001002025211816610264100542213236341082022806061409602131021622700201000066120000101007005670058700567005670066
3020470055525000052305257104853607004020263937423593010010100200001010020000592121221160496697670049700581366264301001020020000102003000070055529112020110099100100001010010000100202678915810213100351571992611920253053711113101131021622700201000066120000101007005670056700567005670058

1000 unrolls and 10 iterations

Result (median cycles for code): 7.0059

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e1e1f2022293a3e3f4043464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
30027700605251000530813246114428470044194133445424713001010010200001001020000588291214800496698270051700601366291300101002020000100203000070059533112002110910100001001010000010202450136243102751005417722202616720309170112081416100012702152270024100001010420000100107006070060700607006070060
30024700595240000528651981838132700442198393642471300101001020000100102000058829121722049669797004770059136629030010100202000010020300007005953311200211091010000100101000001020269011019110278100691748142241362025016581861226110012702152370024100001010420000100107006070060700607006070060
30024700595251110527792211112486870044178114743424673001010010200001001020000588291216000496697970051700591366290300101002020000100203000070059533112002110910100001001010000010202127134226102601005717227196301302028717472161666020112703152270024100001010420000100107006070060700607006070062
300247005952400005271822711683156700441647333842472300101001020000100102000058829121548049669797005170059136629030010100202000010020300007005953311200211091010000100101000001020305714422410294100602378212861602027529352321796810012702152270024100001010420000100107006070060700607006070060
300247005952511005307721718011807004421272446425123001010010200001001020000588291217320496697970051700591366290300101002020000100203000070059533112002110910100001001010000010202716162283102831007626144156542042033719112221506210012702152270024100001010420000100107006070060700607006070060
3002470059524110053249200196501007004416754539424713001010010200001001020000588291215120496697970051700591366290300101002020000100203000070059533112002110910100001001010000010201970137221102601004524591387216320267110122031766010012702152270024100001010420000100107006070060700607006070060
3002470059525000052888210183219270044187143740424693001010010200001001020000588291218350496697970062706701366290300101002020000100203000070059533112002110910100001001010000010202266140220102781008318471601816820310290720814861130012702152270024100001010420000100107006070060700607006070060
300247005952410005309819811684418870044182101928424633001010010200001002420000589421216780496697970051700591366711300101002020000100203004070219533312002110910100001001010000010202300149210102691005319700241732036619461961570010012702152270024100001010420000100107006070060700607006070060
3002470059525000053054232103910870044186163634424623001010010200001001020000588291223550496698070051700591366290300101002020000100203000070059533112002110910100001001010000010201896145270102581005115521302216020285110162101556120012702152270024100001010420000100107006070060700607006070060
30024700595241000524182091104431007004421792432424693001010010200001001020000588291214900496700470051700591366290300101002020000100203000070059533112002110910100001001010000010201937151233102511003915175148261632040218362271546110012702152270024100001010420000100107006070060700607006070060

Test 3: throughput

Code:

  swpal w0, w1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 18.0047

retire uop (01)cycle (02)030e0f18191e1f22233a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)7bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbbl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
202051800441348100024624010118002703016151720148100200001002021851110701604917696101800391800472617749420100283620020000200300001800442712111020110099100100001001000001002000012122446210000014498344611416512120111716782401600179876096620000100180045180048180048180040180040
20204180047134900002437901011800320221598022010010020000100200005001061460491769670180047180047261774922010035282002000020030000180039271211102011009910010000100100000100200001213243831000001424034262144251200111722776922522179865090020000100180045180048180040180048180040
2020418003913480000244420000180032010160046201001002007210020000500106950149176967018004418004726177493201003165200200262003000018003927171110201100991001000010010000010020000002443910000014380343541445412120111716784801600179876099920000100180045180045180040180048180040
202041801241348000024435000018003201216009320100100200001002000050010640304917696701800491800472617749820100363420020000200300001800472717111020110099100100001001000011002000012122472210000214682343561458512120111716781001600179876090020000100180045180048180040180040180049
20204180044134900002437200001800240201597892010010020000100200005001065960491769590180047180048261774982010033292002000020030000180047271711102011009910010000100100001100200001202439010000014303344211428012120111716775301600179873096020000100180081180045180048180040180048
202041800471349000024423000118003202016026920100100200001002000050010675004917696701800471800472617749820100328020020000200300001800392712111020110099100100001001000011002002412132433210000014452345491447112120111716790901600179876096020000100180045180045180045180045180045
20204180047134900002445000001814870021597612017210120024100200325001071520491769590183558180831210180646201003401200200002003000018004727171110201100991001000010010000010020000012242951000001430834531144670120111716765501600179868099020000100180076180040180049180059180048
2020418004713491100243970101180032010159974201001002000010020024500106606049176959018003918003921017749220100323920020024200300001800472717111020110099100100001001000001002000012024493100001414523344411417912120111716769001600179876099920000100180040180048180048180048180040
20204180047134800002427901001800330001602862010010020000100200005001064800491769670180047180048261775012010035142002000020030000180044271711102011009910010000100100000100200001212243281000001421434300141780120111722789122522179857006920000100180048180084180045180050180045
202041800471349010024314000118003202016024220100100200001002000050010684304917696701800441800482617749320100372620020000200300001800472717111020110099100100001001000011002000012132431210000014200343961451012120111722757622522179857090920000100180049180049180045180048180049

1000 unrolls and 10 iterations

Result (median cycles for code): 18.0048

retire uop (01)cycle (02)0309l2 tlb miss data (0b)0e0f191e1f22233a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)7bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
200251800441349000002671501011800830016479220010102000010200005010000314917780618031718009323177528200107202007220301081805982735311002110910100001010000110200000121526740100000016743366961674001200064010105216221798676662000010180325180315180713180455180045
2002418004413520000026702000118008942164783200101020000102000050100003049176968180044180039231775282001002020000203000018004827351110021109101000010100000102014621215267351000030167513669916701120000640101042162217986761002000010180049180045180040180045180040
20024180048134900000267740001180038021647832003410200001020000501000031491769591800441800842317751920010020200002030000180048273511100211091010000101000011020000012026739100000016739367791677112120006401008121622179858610102000010180049180049180040180049180040
200241800481349000002677800001806232216478320010102000010200005010000304917696818004818007523177519200100202000020300001800502735111002110910100001010000010200000121526703100000016771367451674612120006401010421622179867101062000010180049180049180040180049180049
2002418004813480000026755000118004122164792200101020000102000050100003149176968180048180039231775282001002020000203000018004827351110021109101000010100000102000001215267121000000167783673516743121200064010115216221798671010102000010180045180045180049180045180049
200241800481348000002671600101801012216479220010102000010200005010000304917696418004818004823177519200100202000020300001800442735111002110910100001010000010200000015267481000000167993674716713120000640101092162217986366102000010180049180049180049180040180045
200241800481348000002674600001801050216478820010102000010200005010000304917696418004818004843177519200100202000020300001800392735111002110910100001010000010200000121526703100000016738367141671112120006401010621622179867106102000010180049180140180049180045180055
2002418003913480000026743000018003320164792200101020000102000050100003149176968180048180044231775282001002020000203000018004827301110021109101000010100000102000001215267461000000167443677616779121200064010112216221798581010102000010180085180049180049180083180040
200241800481349001102673800101805782216479220010102000010200005010000304917695918004818004843177528200100202000020300001800482735111002110910100001010000110200000015267361000000167363671416711121200064010140216221798671010102000010180049180040180049180049180040
200241800481349001002674100111800462216479420010102000010200005010000314917696418003918004823177519200100202000020300001800482730111002110910100001010000010200000120267111001473016722367391674501200064010083216221798671010102000010180045180049180045180045180045