Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STLXP (32-bit)

Test 1: uops

Code:

  stlxp w0, w1, w2, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 50.674

Integer unit issues: 0.000

Load/store unit issues: 50.674

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)0318191e22233f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)6064696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafatomic or exclusive fail (b4)l1d cache miss st nonspec (c0)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0ea? ldst retires (ed)f5f6f7f8fd
10053799328400199800379787267506745067487814100324911000493491303799337993131405949244068781491440274320380043799310011100110001000449654496544965019984496510001000016590820552237909010003799437994379943799437994
10043799328400199800379787267506745067487814100324911000493491303799337993131405937243948781491440274320380093799510011100110001000449654496544965019984496510001000016590220632237909010003799437994379943799437994
10043799328400199800379787267506745067487814100324911000493491303799337993131405937243948781491440274320379943799310011100110001000449654496544965019984496510001000016590220632237909010003799437994379943799437994
10043799328400199800379787267506745067487814100324911000493491303799337993131405937243948781491440274320379943800010011100110001000449654496544965019984496510001000016590220572237909010003799437994379943799437994
10043799328400200110379787267506745067487814100324911000493491303799337993131405943243978781491440274320380013799310011100110001000449654496544965019984496510001000016590220632237909010003799437994379943799437994
10043799328500199800379787267506745067487814100324911000493491303799337993131405949244028781491440274320380013799310011100110001000449654496544965020084496510001000216590220632237909010003799437994379943799437994
10043799328500199800379787267506745067487814100324911000493491303799337993131405937243948781491440274320380073799410011100110001000449654496544965019984496510001000016590220532237909010003799437994379943799437994
10043799328400199800379787267506745067487814100324911000493491303799337993131405940243948781491440274320379973799310011100110001000449654496544965019984496510001000016590220632237909010003799437994379943799437994
10043799328400199800379787267506745067487814100324901000493491303799337993131405937243948781491440274320380063799810011100110001000449654496544965019984496510001000016590220632237909010003799437994379943799437994
10043799328500199810379787267506745067487814100324911000493491303799337993131405941243978781491440274320380153799610011100110001000449654496544965019984496510001000016590220632237912010003799437994379943799437994

Test 2: throughput

Code:

  stlxp w0, w1, w2, [x6]
  add x6, x6, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 38.9249

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)191e1f20222324293a3e3f404f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606467696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive fail (b4)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20209389225291626002202022818152006321131563892385710694015585002989252595917887736974881274466311718062010000049386049038925238912783468592021811701486285794496778606794504233601638913338925410001120201100991001000010100100001002306748357393090730229883100932462002523052010000134400002094601601104057643890682914251000010100389128389254389128389129389128
20204389250291512002102024108061007281813043892366147694025585182989392595767887736974871274467111717662010000049386175338925338925483527590831813021486262794474778665794473233599538912838912710001120201100991001000010100100001002305428362732380656229849518605220019230510100001552000020946114111007571083891142914231000010100389129389128389256389128389128
2020438925029161210210201710835200520177228389111569069278558502298925259577788773697487127446931171701111000004938605003892513891308353159206181296148646279450177860479447323358123891283892521000112020110099100100001010010000100230604426162310068222984724859782001623057210000140002002094601601097856643890682914231000010100389129389250389129389255389253
20204389252291512102002014708201005921762763891136350694225585272989872595787887726974901274474211716968110000049386172038912738912783528590831811681486260794473778675794473233599238912938912810001120201100991001000010100100001002305740340863380720229877270924462001923052410000151200002094571591158257643891892915751000010100389128389130389131389254389132
2020438925429151210200202720801100584159220389112700069404558500298925259577788777697488127446481171787401000004938605603892513891278346759097181293148628979449677861379449823359893892553892511000212020110099100100001010010000100230653033513329067222984760844422002223051210000148000002094601601097857643891942914241000010100389252389127389130389128389253
20204389253291512001002018817687910073616022438923657332694045585062990902595767888016976781274466211717803010000049386170038925238925283529592061813241486283794496778619794477233581538912738912710002120201100991001000010100100001002305481329653250719229853110875582001623052410000144001002094601601097057643890692914251000010100389251389131389286389129389253
2020438925229161100210202560788100744174112389112663069279558502298925259577788773697487127446481171768601000004938604703892523891288352759078181191148626379447377867979453223360073892523892531000112020110099100100001010010000100230570231297241068222986291906502001623062610000135200002094611621097860663893812914231000010100389254389251389129389253389198
2020438925129151210200201460851200568187108389111575069404558502298926259576788797697486127451731171768901000004938617203891273892058346459203181299148628479450577863379447823359953892523891271000112020210099100100001010010000100230555034974228066322984341880902001623057910000143201002094591601097557643891932914231000010100389130389253389184389254389130
20204389127291512102102032508101006081511203892355895692805585042989242595797887966975311274464411717665010000144938617103891273891288347159207181302148625979449777868179450223358303891303892531000112020110099100100001010010000100230598132977301075422987871873622001023056210000166401002094791591097657633892302914261000010100389130389130389128389131389129
2020438912929151200210202170800100400143112389236676069402558500298926259576788808697486127446011171771801000004938616903892053892508356859083181178148628379450077861679449923358303891263892521000112020110099100100001010010000100230546132956264066822984740898582000423053810000145600002094771601097857643891912914241000010100389253389129389251389128389253

1000 unrolls and 10 iterations

Result (median cycles for code): 38.9946

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)1e1f2022293a3e3f404f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606467696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2002938997329212001220230788515449912438994069447010055022229055325966676024969757512689806117651840100000493868770389944389944708255988818886814578387608217591767608202277525389957441000112002110910100001001010000102306361532973362066922994560916362002023067010000714801602009960431097838403899402898031000010010389993389966389946389946389958
2002438995529213131020278781516881104203899425863700895502162905522596647602526975761268985211765158010000049386876038995638995570828598991888631457822760820759190760820227752238995644100011200211091010000100101000010230639731862267072522994190868742003223064310000713601102010840431097740403898862898011000010010389945389957389956389945389957
20024389989292131000202227783174412028438992966227009955022029055425966576025069757612689799117651951100000493868750389956389944708155988818886714578267608267591787608232277507389944441000112002110910100001001010000102306287326193210679229953112900862002123068710000716561002010000391097839403898852898011000010010389946389946389947389956389958
2002438995729212101020209780916569914838992957327011155021629055725966776025269757412689859117652171100000493868570389938389937708135989918885914578187608107591707608102277513389936389947100011200211091010000100101000010230682036111283069222994364845422001623075610000013680202009960401097238383898822897931000010010389951389948389937389948389937
2002438994829212020020179782016569124038993158547008855021829055325967176026269757412689793117652291100001449386876038994838995570817598981888631457826760824759168760823227753138995644100011200211091010000100101000010230680737780282072522996754933482002023062310000714801302009950391097036383898962898021000010010389957389995389960389957389946
2002438994529213001020114783914169122438994154047008855022029055325966976025869757612689846117651371100000493868640389957389944708265990118885714578427608187591827608202277519389981441000112002110910100001001010000102306901536674309067622995680925822020823063110000716241202009960411097838393898962898021000010010389958389945389947389948389956
20024389944292131000202157801254410222438994170137010055021829055225967076025269757412689814117651961100000493868750389944389946708235990618885314578247608217591727608222277501389958441000112002110910100001001010000102306427342752400748229952110992402001123069210000710881002009970421093140413898972898011000010010389956389956389957389945389945
200243899572953404042027178004392941203899405964700885502202905532596657602466975741268977011765172110000049386864038994538995670822598881888671457822760828759177760828227753138995744100011200211091010000100101000010230670926556306064222994262908902001823061010000712641602009970401097837413898982898021000010010389949389958389957389947389958
2002438994529212100020180780624241089238994155447010055021629056625966776025869757612689823117651450100000493868643389955389959708205989218886714578227608237591697608322277534389956441000112002110910100001001010000102307241530606314068322995250878982001423068410000713681802009960431097441423898982898041000010010389946389960389957389947389949
2002438994529203040020280782717201093203899407043700995502162905532596647603266975741269018211765108110000049386864038994638996170815598881888641457829760817759175760824227751638995744100011200211091010000100101000010230633734990262069122994350920462001723068210000714721002009970431097836413898962898031000010010389949389946389957389947389960

Test 3: throughput

Code:

  stlxp w0, w1, w2, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 38.0115

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f22233a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6064696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1020538010228470010019998000038009207027152928410697518575106979157365438310462783010000493770270380107380107130437599472489799264331910093844319100281532938010738010710001110201100991001000010010000100458865045886604588660019998458866100000100000010172639113111070131131380048119710000100380148380111380111380111380111
102043801102847000001999800018338011007027652927210697518575106979157155438410461754010000493770300380124380124130429599572489749264121910093841819100281527838011038011010001110201100991001000010010000100458911045886604588660019998458866100000100000010172637113111081130130380053119710000100380142380122380111380111380113
102043801112847000001999800018038009207027252927210697518575106969157365438410461863010000493770270380107380107130437599472489799265221910093844319100281532938012238010710001110201100991001000010010000100458953045886604588660019998458866100000100000010172640113111064130129380048119710000100380134380113380108380108380108
102043801082848000001999801018138009207027152927210697518575106979157365438410461863010000493770270380108380107130437599472489799264331910093844319100281532938013138010110001110201100991001000010010000100458906045886604588660020004458866100000100000010172638111611061129129380048119710000100380116380143380108380108380109
10204380107284700000199980001803800920702715292721069751857510697915736543841046186311000049377027038010738010713043759955248992926433191009384431910028153293801083801071000111020110099100100001001000010045893304588660458866002000145886610000010000003172636113011072130130380048119710000100380158380118380108380108380108
102043801072847000001999800018138009207027252927210697518601106979157365438410461863110000493770270380107380107130437599512489799264521910093844319100281532938010738010710001110201100991001000010010000100458866045886804588660019998458866100000100000010172561112911072130135380048119710000100380116380135380118380108380109
102043801072847000002000701018138010307027152927210697518575106979157365438410461863010000493770273380111380107130437599472489799264331910093844319100281532938010738010710001110201100991001000010010000100458915045886604588660019998458866100000100000010172636113011059130130380048119710000100380141380118380108380108380108
102043801072847000001999800018138009207027152927210697518575106989157365438410461863010000493770270380107380107130437599492489799265321910093844319100281532938010738010910001110201100991001000010010000100458919045886604588680020004458866100000100000010172638113011072131131380048119710000100380131380108380108380108380108
102043801072848000001999800018138009207027352927210697518575106979157365438410461863010000493770270380107380124130437599632489959264331910093844319100281532938010738010710001110201100991001000010010000100458865045888204588660019998459538100000100000010172638113011072130130380048119710000100380141380116380117380125380108
102043803872847000002001000018038009207027152927210697518575106979157365438410461863010000493770270380107380107130437599472489799264331910093844319100281532938010738010710001110201100991001000010010000100458970045886604588680020001458866100000100000010172636113011072130129380048119710000100380116380146380116380108380110

1000 unrolls and 10 iterations

Result (median cycles for code): 38.0011

retire uop (01)cycle (02)03l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f233f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606467696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafatomic or exclusive fail (b4)l1d cache miss st nonspec (c0)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10025380019284700001999800379997070268519732106751866510679158055424104703940100000493769330380013380011138348599522493279168721910953932191028616763800193800121000111002110910100001010000104589564589600458956019998458956100001000000169646431108142413801791171000010380012380012380012380012380012
10024380011284600002000700379996070267519732106751866510679158055424104703940100000493769310380011380011138348599432493359168721910953985191028616763800373800111000111002110910100001010000104589564589560458956219998458956100001000000169725441108049493799441171000010380012380012380012380012380013
10024380011284600002002800379996070267519732106751866510679158055424104703940100000493769310380011380011138348599492493649168721910953892191028616763800173800111000111002110910100001010000104589564589560458956119998458956100001000000169646491108042493799461171000010381334381138381161380487380013
10024380011284600001999800379996070267519732106751866510679158055424104703990100000493769310380011380035138348599402493279168721910953892191028616763800263800191000111002110910100001010000104589564589560458956019998458956100001000000169646411108043493799441171000010380012380012380012380012380012
10024380011284700001999800379996070269519732106751866410679158075424104703940100003604937693103800113800121383485994324933091700919109538921910286167638003438001910001110021109101000010100001045895645895604590028719998458956100001000000169646411108242423799441171000010380012380012380012380014380012
10024380011284600001999800380019070267519732106751866510679158055424104703940100000493769330380011380013138348599392493279168721910953892191028616853800193800111000111002110910100001010000104589564589560458956019998458955100001000000169646491106942493799491171000010380012380012380015380012380012
10024380011284600001999800379996070267519731106751866510679158295424104703941100000493769310380011380011138348599392493279168711910953892191028616763800193800191000111002110910100001010000104589564589560458956019998458956100001000000169646491108149493799441171000010380012380012380012380013380012
100243800112847013443277625332038460891272774520290107251890110679158045441104886880100000493769310380034380018138348599402493439168721910953892191028616853800193800191000111002110910100001010000104589564590180458956019998458956100001000000169646491107249493799521171000010380014380012380012380012380033
10024380012284600001999800379996070267519732106751866510679158055423104709000100000493769310380012380011138348599392493339168721910953892191028616763800453800121000111002110910100001010000104589564589560458956020001458956100001000000169646491107043433799441171000010380018380012380012380012380012
10024380011284700002011300379997070267519732106751866510679158055424104703990100000493769310380011380011138348599392493409168711910953892191028616763800423800111000111002110910100001010000104589564589550458956019998458956100001000000169646411107942423799441171000010380012380012380012380012380012