Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STLLRB

Test 1: uops

Code:

  stllrb w0, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l2 tlb miss data (0b)1e1f2223243f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)a9acafl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
1005108480121100106924251000100010004356811032108477839421000100020001085104711100110001000100060010000001000073116111081191915100010481085108510481048
1004108490010001069025100010001000453440995108581539421000100020001084108411100110001000100001410000001000607311611108119015100010481086108510851085
1004108490000001032025100010001000453920103210847783905100010002000108410841110011000100010006014100000010006073116111081191915100010481048108510851085
1004108480011001069202510001000100045344110321049815394210001000200010851047111001100010001000014100000010006073116111081191915100010851085108510851048
100410848001100106920251000100010004534401032108481539421000100020001084108411100110001000100060010000001000607311611108102015100010851085108510851085
1004108480001001032202510001000100045344010321084815394210001000200010841047111001100010001000600100001010006073116111081191915100010851085108510861085
10041047800100010322025100010001000453441103310928153942100010002000108410841110011000100010006014100000010000731161110811900100010481085108510851085
10041085900100010692025100010001000435680103210848153942100010002000108410841110011000100010006001000000100007311611108119200100010851085108510481048
1004108480000001032202510001000100045392099510858153942100010002000108410471110011000100010006014100000010006073116111044191915100010851048108510481085
10041084906110010690251000100010004534411032108481539051000100020001084104711100110001000100060010000001000607311611104401915100010481085104810851085

Test 2: throughput

Code:

  stllrb w0, [x6]
  add x6, x6, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0217

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)191e1f20223a3e3f404f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20206105397800004214461202010208373112520102101031000010104100002614034753041149714610362102217613676932010410208100081020820016102651231120201100991001000010100100001001007782966744710070324134271003834140211113170160010312100021841888110000101001020310263102711024110253
20204101537710004592111901013547572520103101051000010104100001639734755441149707010127102187600676712010410208100081020820016102431241120201100991001000010100100001001011603055137331000210318171002682647011113170160010176100021251162010000101001023910152101491017510190
202041021876100018864116161027835101252010310102100001010410000322606475352104971661026510296763367700201041020810008102082001610277123112020110099100100001010010000100100330244811731100033031241110050102427111113180160010199100021821315310000101001021710222101911022910266
202041019176111161035119121023439148252010410103100001010410000231513485912124971671021310227761367741201041020810008102082001610280123112020110099100100001010010000100100570232793401000600312631003992327211113170160010305100021441313510000101001021810251102441025510217
20204102437711102419360234010317318125201041010310000101001000023371547280810497169101661020175283765520100102001000010200200001022812311202011009910010000101001000010010049028351174810013106136810046112067000013101161110168100001651863410000101001040910360102731033310199
202041017077000063281140101483797252010310101100001010010000244387475352114971621013910205752637663201001020010000102002000010249123112020110099100100001010010000100100290177490391000310444261003610242710001310116111017110000921133610000101001020910147102491020110236
202041028276111072839120241018031972520102101011000010100100002027914714641149708710119102107541376932010010200100001020020000102871231120201100991001000010100100001001002412350023830100113032041001332140000013101161110195100001281172410000101001016810232101741017510170
20204101967600003461120361025333113252010210102100001010010000230453473768104971881024710265759737676201001020010000102002000010228123112020110099100100001010010000100100370274710645100031045281010035924271000131011611101681000018118010710000101001017310206101681022710168
20204101677810104883111701021329109252010410101100001010010000232797474680104970861016910171752637674201001020010000102002000010265124112020110099100100001010010000100100410370787331000310396310036102427100013101161110216100001281061210000101001023210197102251017010204
202041020577110062139100102970204252010410104100001010010000976334835601049711310127101707524376512010010200100001020020000103851231120201100991001000010100100001001002702096108331000320392481004010342710001310116111019710000991102510000101001017010225102161019910192

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0181

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f20223a3e3f404f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)67696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
200261055278111006203111401027125126252001010010100001001010000157921475496049715410179102117565377332001010020100001002020000101781241120021109101000010010100001010064181897993910017303326301002816170144127031611101751000090883410000100101020910202101571015410183
20024101837622200152135113010190291182520010100101000010010100001616124719440497076101001015676383771320010100201000010020200001032712411200211091010000100101000010100431617056727100162233222610023171801401270116111018110000111841910000100101017910186102311020910180
2002410180762020078174311101016924992520010100101000010010100002293774730001497145101501017175243772020010100201000010020200001019912411200211091010000100101000010100408231480331000930332410100291023271127011611101681000092881310000100101018010172101731017210222
200241018176100000193311601020335157252001010010100001001010000210603473720049709210143101537501376342001010020100001002020000101791241120021109101000010010100001010029716866046100091239182310031111877012701151210197100001041193410000100101017110175102191020110149
2002410170771110015163311501020527131252001010010100001001010000315859474872049704310125101987548376262001010020100001002020000101931241120021109101000010010100001010040814654037100092227161710032103187112701162110167100009388810000100101018010146101491017110209
200241019976100006010241180102091994252001010010100001001010000327006472664049707610130101557576376332001010020100001002020000102531241120021109101000010010100001010038263201783510016223382510018162121441270115111028210000901251310000100101022310181101571015910154
200241018176221003022211180101081761252001010010100001001010000314556472856049708810099101707566376992001010020100001002020000101681241120021109101000010010100001010052717837739100092135242010030917871127011511102181000018518012010000100101018910175101691020610193
200241021876101006931111281018525892520010100101000010010100003148614702640497089100731016975483765020010100201000010020200001014312411200211091010000100101000010100357190738371001021472416100231015070127011511101971000090852510000100101022110180101721020510176
2002410221761110061321116010164311012520010100101000010010100002874114725680497138101271017375263774820010100201000010020200001018912411200211091010000100101000010100497306673441001001374416100361154271127011611101661000087832810000100101019610201102171015410172
20024101457611100121721110010130226025200111001010000100101000031933947136804970731012710195754837675200101002010000100202000010146124112002110910100001001010000101003471772133291000930334615100341017870127011511101691000073671010000100101016910175101941020410172

Test 3: throughput

Code:

  stllrb w0, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0084

retire uop (01)cycle (02)031e1f22233f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafl1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10205100847691001006920251010010010000100100005004683441497004100321004786536878010100200100002002000010084795511102011009910010000100100001001000006000100000001000060111722224221004401915100001001008510085100851008510085
102041008475901001006920251010010010000100100005004683380496967999510051865368780101002001000020020000100847955111020110099100100001001000010010000060140100000001000060111722224221008119015100001001008510085100851008510085
10204100847630100100690251010010010000100100005004665620497004100321008486166878010100200100002002000010084795511102011009910010000100100001001000006014010000003100006031172222422100811900100001001004810048100851008510085
102041004775105000100690251010010010000100100005004683380497004100321008486531987431010020010000200200001008479551110201100991001000010010000100100000601401000000310000601117222240010081191915100001001008510085101111014510085
10204100847548100100692025101001001000010010000500466568049700499951008486726878810100200100082002001610084791811102011009910010000100100001001000006048010000000100000111717016001008101915100001001008510085100481008510048
1020410047755110010069202510100100100001001000050046834404969671003210047867268788101002001000820020016100847955111020110099100100001001000010010000060140100000001000060111717016001008119015100001001004810048100851004810048
1020410084753610010069202510100100100001001000050046834404970041003210084867268751101002001000820020016100847955111020110099100100001001000010010000001401000000310000601117170160010081191915100001001004810085100481004810085
10204100847548100100690251010010010000100100005004683440497004999510084867268788101002001000820020016100847955111020110099100100001001000010010000001401000000010000011171701600100810015100001001008510048100481008510085
1020410084750100100690251010010010000100100005004683440497004999510051867268788101002001000820020500100847955111020110099100100001001000010010000060170100000001000060111717016001004401915100001001008510048100851008510085
10204100847500101006920251010010010000100100005004665681497004999510047867268751101002001000820020016100847955111020110099100100001001000010010000060001000000010000601117170160010044191915100001001008510085100481008510085

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0088

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1e1f22233f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
100251008875111130151010073525100101010000101000050468588049697510037100888658388191001020100002020000100881005511100211091010000101000010100151505010014001410000146014164041643100851615710000101005610056100561008910056
1002410055761000015101007312251001010100001010000504685880497011100031005586913878810010201000020200001008810089111002110910100001010000101001515601211001400141000014014164031643100861616010000101009010090100891008910090
10024100557610111815101007402510010101000010100005046858804970091003610089869238818100102010000202000010055100551110021109101000010100001010014146051100140015100001401416404165410052016710000101008910090100591005610056
100241005576111101500100742025100101010000101000050468589049700810037100558658388191001020100002020000100891008811100211091010000101000010100151660601001500141000014601406403163310086150710000101005610090100901009010089
10024100897510101215101004002510010101000010100005046854004970081003610089869238822100102010000202000010089100891110021109101000010100001010014150021001402141000014601426403164410086001210000101009010089100901005610090
10024100897510011816101007311251001010100001010000504669560497008100361008886583881810010201000020200001008910089111002110910100001010000101001415060100140114100001401416404164410085150610000101005610056100891008910090
100241005575111031410100745251001010100001010000504685890497009100031008986923878510010201000020200001008910089111002110910100001010000101001514606010014001410000156014164041643100861501110000101009010056100891009010089
100241008976111118150010074025100101010000101000050468540049700910037100888692388191001020100002020000100891008911100211091010000101000010100151560511001402141000014014064031643100521615010000101009010089100901009010056
1002410089761001181500100741125100101010000101000050466956049700810037100558692388191001020100002020000100881008911100211091010000101000010100151560601001401151000014601406404163410052160610000101009010090100891005610059
10024100587511013160010073112510010101000010100005046854004970091000310089869138819100102010000202000010089100891110021109101000010100001010015166010010014011410000146014164031634100861515710000101005610056100891009010090