Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STLLRH

Test 1: uops

Code:

  stllrh w0, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l2 tlb miss data (0b)1e1f223f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
100510808000110651625100010001000451521102810808113938100010002000108010801110011000100010006001000001000607321622107701511100010811081108110811081
10041080700111065162510001000100045152010281080778390510001000200010801080111001100010001000601010000010006073216221077151511100010811081108110811048
10041080800001065162510001000100045152010281080811390510001000200010801080111001100010001000601010000010006073216221077151511100010811081108110811048
1004108080010106516251000100010004515209951080811393810001000200010471080111001100010001000601010000010006073216221077151511100010811081108110811081
10041080800101065162510001000100045152010281080811393810001000200010801047111001100010001000001000201000073216221044000100010811081108110811081
10041080810111065162510001000100043568010281080811393810001000200010801080111001100010001000601010000010006073216221077151511100010811081108110811081
1004108081010106516251000100010004515201028108081139381000100020001080104711100110001000100060101000001000607321622107701511100010811081108110481048
10041080800101065162510001000100045152010281080811393810001000200010801080111001100010001000601010000010006073216221077151511100010811081108110811081
10041080800011065162510001000100045152010281080778393810001000200010801080111001100010001000601010000010006073216221077151511100010811048108110811081
1004104780010106502510001000100045152010281080811393810001000200010471080111001100010001000601010000010006073216221077151511100010811081108110811081

Test 2: throughput

Code:

  stllrh w0, [x6]
  add x6, x6, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0164

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)1e1f2022293a3e3f404f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)aaabacafbcl1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20206103627700662710812101673173252010210102100001010410000225440474008104970561011210155749777708201041020810008102082001610159123112020110099100100001010010000100100270223364311000123318710033219211113170160010269100029261010000101001014310178101671015810143
202041016476003033510110101512954252010310102100001010410000324639487485114971301010310164750367595201041020810008102082001610139123112020110099100100001010010000100100321229011482410011333249100311192111131701600101981000279721010000101001018210162102131020310191
2020410186760066331012010146291212520102101021000010104100003700214711761149710810130101177542675932010410208100081020820016101221231120201100991001000010100100001001002702046844710003224161210025323011113180160010156100027784010000101001017710331102021025910135
202041015777001222510120101492512325201001010510000101041000032591947180010497101102261037474717761520104102081000810208200161014112311202011009910010000101001000010010027019257633100041332412100372186111131801600101391000064715610000101001015610167101561014910158
202041015776006324005241014416702520103101041000010104100002500894722321049710810124101817588676412010410208100081020820016101861231120201100991001000010100100001001002202624100311000122981110048412811113170160010131100029475410000101001016710180101821015610145
20204101667600123191018010151234525201021010210000101041000032534247180011497140101091015774956761020104102081000810208200161013412311202011009910010000101001000010010021012749133100033411613100323283111131801600101891000262691710000101001026510145101791014310141
20204102287700123241012010129199825201041010810000101041000032396747319211497215102341023475036759420104102081000810208200161013912311202011009910010000101001000010010030025586633100022452416100192128111131701600101611000296711710000101001013510188101471016710160
2020410177760068451092810187121142520103101041000010104100003684374722321049709610118101607513776152010410208100081020820016101811231120201100991001000010100100001001002701344743010003333127100244167111131801600101851000292591610000101001021910248102191025710177
202041013476006327101301016325902520102101001000010104100002390054735761149710210092101367473775862010410208100081020820016101851231120201100991001000010100100001001004102293782410011039281100331230111131701600101411000210784010000101001015810165101571016710163
202041016276010225001201016125442520104101041000010104100002132604720401449707910108101837520776592010410208100081020820016101451231120201100991001000010100100001001003402626822910003243241610041319111113170160010131100006287010000101001014310156101651011210162

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0188

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f20223a3e3f404f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2002610503772222002120393190101853113125200111001010000100101000030784047324011049729510160102227518377212001010020100001002020000102241241120021109101000010010100001010043161974762710019323924311003414224140127011611101441000075102510000100101014410223101471019410334
200241029377232200121227114321029320812520010100101000010010100003867784755920104971661020610277756637651200101002010000100202000010167124112002110910100001001010000101004716139370271001630273429100281119270127011611102041000092872010000100101016510156101891015610294
200241023178111000181027115010149331232520010100101000010010100002434394742000104971121012710212761337740200101002010000100202000010190124112002110910100001001010000101004982894803310010115948231004191667112701151110154100009789910000100101018910179101691020210272
2002410164771111006183318321021920130252001010010100001001010000218904478952010497130101501018775653769220010100201000010020200001014412411200211091010000100101000010100527160576401001511513613100419291721270116111018010000111601110000100101020810167101811019110202
2002410157761111001819391180102133711825200101001010000100101000036436947640801049712110116101577531376782001010020100001002020000102021241120021109101000010010100001010032712848063100103145221910034820572127011611101751000011087210000100101018710177101871018310215
20024102517710010061944280101613188252001010010100001001010000289324474536010497110101201016675413766820010100201000010020200001016612411200211091010000100101000010100417288666391001021393619100331037871127011511101631000079861110000100101016010191102041018610186
2002410186761001006104512101017147124252001010010100001001010000318720474536010497232100901014375133772220010100201000010020200001024112411200211091010000100101000010100442028201123210018324401210021916471127011621101771000092877410000100101018910220101871018210189
2002410210761001001214331130101884985252001010010100001001010000840594770321004972351027910273762237727200101002010000100202000010167124112002110910100001001010000101003371967643110011133318211003081657212701151110153100001561153910000100101018710198102521016410189
200241016477000100612331100101422580252001010010100001001010000354488472088010497167101341017675433766620010100201000010020200001015712411200211091010000100101000010100297160247391000810341612100211016571127011611101521000077692010000100101017710165101351014310184
200241016176111100610301130101612510225200101001010000100101000038433247223201149713010061101377495377562001010020100001002020000102761241120021109101000010010100001010035816526433100204144081002991667112701161110223100001691475810000100101028910309103241028810355

Test 3: throughput

Code:

  stllrh w0, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0080

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)191e1f223a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1020510080760000001101006516251010010010000100100005004681521497008100361005586766879210100200100082002001610088795911102011009910010000100100001001001616605210014011410000060001117170160010044151511100001001008110081100481008110081
102041004775000003100100651625101001001000010010000500468152149700810036100888676687921010020010008200200161009279591110201100991001000010010000100100151460511001401151000006000111717016001007715150100001001004810081100811004810081
102041008075000000000100650251010010010000100100005004681521497008100361008886436875910100200100082002001610090792611102011009910010000100100001001001414604010014001410000060001117170160010077151511100001001008110048100811008110081
1020410080760001062100100730251010010010000100100005004685401497000999510080866868784101002001000820020016100807951111020110099100100001001000010010000060100100000001000000001117170160010077151511100001001004810048100811008110081
102041008075000001510010032162510100100100001001000050046815214970001002810080866868784101002001000820020016100807951111020110099100100001001000010010000060100100000001000014601411117170160010052151512100001001008910089100891008910089
1020410055761100001001006502510100100100001001000050046815214970001002810047866868784101002001000820020016100807918111020110099100100001001000010010000060100100000001000014601401117170160010052151510100001001005610056100561005610089
1020410055751000000001006516251010010010000100100005004681521496977100361008886766879210100200100082002001610055795911102011009910010000100100001001001514600110014001710000060001117170160010077151511100001001004810048100811004810081
10204100807500000015001007332510100100100001001000050046854214970001002810080866868784101002001000820020016100807951111020110099100100001001000010010000060100100000001000015014111171701600100851500100001001005610089100891008910089
10204100557510100450001003216251010010010000100100005004681521497008100361008886766879210100200100082002001610088795911102011009910010000100100001001001414606010014001410001060001117170160010077151511100001001008110081100811008110081
102041004775000002410010065162510100100100001001000050046815214970081000310088867668792101002001000820020016100557959111020110099100100001001000010010015150401001410141000006000111717016001004415150100001001008110048100481008110048

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0084

retire uop (01)cycle (02)03l2 tlb miss data (0b)1e1f22233f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1002510084750010010032025100101010000101000050468344049707010032100848687388141001020100002020000100471008411100211091010000101000010100006014100001001000060640316221008119191510000101008510085100851008510087
100241008476000101006920251001010100001010000504683441497004100321008486503881410010201000020200001004710047111002110910100001010000101000060141000000100000640216221008119191510000101008510085100851008510087
100241008475001001006920251001010100001010000504683441497004100321008486873881410010201000020200001008410084111002110910100001010000101000060141000063100006064021622100810191510000101008510085100851008510087
1002410084750010010069202510010101000010103245046656814969671003210084865038814100102010000202000010084100841110021109101000010100001010000601410000501000060640216221008119191510000101004810048100851008510085
10024100847600100100692025100101010000101000050468344149700410032100848687388141001020100002020000100471008411100211091010000101000010100006014100000010000064021622100811919010000101008510085100851008510085
1002410086750011010069025100101010000101000050468344049700410032100848687387771001020100002020000100841008411100211091010000101000010100006001000000100006064021622100811901510000101004810085100481004810048
100241008476001001006920251001010100001010000504665681497004100321004786873881410010201000020200001008410047111002110910100001010000101000060141000000100000640216221008119191510000101004810085100891008710085
10024100847506100100692225100101010000101000050468344049696710032100478687388141001020100002020000100841004711100211091010000101000010100000141000022110000606402162210081001510000101005010048100851008510085
100241008475001001006902510010101000010100005046656804970041003210084868738814100102010000202000010084100471110021109101000010100001010000601410000100100000640216221004419191510000101008510087100851008510048
1002410047760000010069202510010101000010100005046834404969671003210084868738814100102010000202000010084100471110021109101000010100001010000601410000001000006402162210044001510000101008510085100481004810087