Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STRH (post-index)

Test 1: uops

Code:

  strh w0, [x6], #8

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1e1f20223a3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1005104081111615160401025836825200010001000100010005073845824010401040824389820001000200010401241110011000100010357610181015121781510241256727311611103710001000100010411041104110411041
100410408110061517111210251934925200010001000100010005073045824010401040824389720001000200010401241110011000100010291350013101210001510051348717311611103710001000100010411041104110411041
1004104071111615131801025903625200010001000100010005073845824110401040824389820001000200010401241110011000100010217741211017112801710341748707311711103710001000100010411041104110411041
100410407111261523150102510337252000100010001000100050746458240104010408243897200010002000104012411100110001000102588530101210001510051240707311611103710001000100010411041104110411041
100410408122012191205410251844925200010001000100010005073045824010401040824389820001000200010401241110011000100010257615201012101001810211364717311711103710001000100010411041104110411041
1004104081001620001301025142472520001000100010001000507304582401040104082438982000100020001040124111001100010001007761016101312001510051264727311711103710001000100010411041104110411041
100410408101002000141025043825200010001000100010005072245824110401040824389820001000200010401241110011000100010299462131013102862010311756717311711103710001000100010411041104110411041
10041040811016152001010250006252000100010001000100050738458240104010408243898200010002000104012411100110001000102283862610132217141510271288717311611103710001000100010411041104110411041
1004104081010615361541025024825200010001000100010005072245824010401040824389820001000200010401241110011000100010257705151012101562410221248717311611103710001000100010411041104110411041
100410408100061481741025105156252000100010001000100050738458240104010408243898200010002000104012411100110001000101985331010220220121510201296717311711103710001000100010411041104110411041

Test 2: Latency 2->2

Code:

  strh w0, [x6], #8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0040

retire uop (01)cycle (02)031e1f2022293a3c3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)aaabacafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? int retires (ef)f5f6f7f8fd
102091004075216699845172880511610025777104954425201001010010000101001000052220346882449696010040100408674387472010020010000200200001004012211102011009910010000100100001001090013483778660102472939303481910880151011710217221003710000010000101001004110041100411004110041
102041004075237693815176076311210025845104952425201001010010000101001000052189546882449696010040100408674387472010020010000200200001004012211102011009910010000100100001001093013963538652102532938884091510905171203710217221003710000010000101001004110041100411004110041
102041004075227797834171271111521002575867654725201001010010000101001000052210146882449696010040100408674387472010020010000200200001004012211102011009910010000100100001001092212603750669102572898964279110892131175710217221003710000210000101001004110041100411004110041
10204100407523079580917688459610025764117954325201001010010000101001000052215546882449696010040100408674387472010020010000200200001004012211102011009910010000100100001001089414263550656102522819083485410909101312710217221003710000010000101001004110041100411004110041
102041004075222677863173686514810025816114532325201001010010019101001000052219546882449696010040100408674387472010020010000200200001004012211102011009910010000100100001001092614583570674102672819223282410949211115710217221003710000010000101001004110041100411004110041
10204100407521729186217848610921002578074982625201001010010000101001000052220346882449696010040100408674387472010020010000200200001004012211102011009910010000100100001001088812853756707102703029003891010924151236710217221003710000110000101001004110041100411004110041
1020410040752289918551752770108100258181291032825201001010010000101001000052215746882449696010040100408674387472010020010000200200001004012211102011009910010000100100001001093813983600658102532909203482010931121149710217221003710000010000101001004110041100411004110041
102041004076210693815176076514810025792751052625201001010010000101001000052216346882449696010040100408674387472010020010000200200001004012211102011009910010000100100001001091014103840684102602429004482110980121156710217221003710000010000101001004110041100411004110041
10204100407522958385117447101161002576071921725201001010010000101001000052214746882449696010040100408674387472010020010000200200001004012211102011009910010000100100001001091213653650655102672788885086810984131168710217221003710000410000101001004110041100411004110041
102041004075222089820177685111521002578768553125201001010010000101001000052217146882449696010040100408674387472010020010000200200001004012211102011009910010000100100001001090012163330635102803009564486810973171188710217221003710000010000101001004110041100411004110041

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0040

retire uop (01)cycle (02)031e1f2022293a3c3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)aaabacafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1002910040752445878521768560120100258051279326252001010010100001001010000521113468824049696010040100408696387702001020100002020000100401241110021109101000010100001010914134341506351026430993436850108821012956714163310037100001010000100101004110041100411004110041
1002410040752316828171752700152100257661108347252001010010100001001010000521121468824049696010040100408696387702001020100002020000100401241110021109101000010100001010896135037606541026229686842899109272113286403163310037100003010000100101004110041100411004110041
100241004075233174807171271012410025739738128252001010010100001001010000521137468824049696010040100408696387702001020100002020000100401241110021109101000010100001010950132940106731026130189038926109142212336402162210037100001010000100101004110041100411004110041
100241004075229279835172071012410025766949828252001010010100001001010000521137468824049696010040100408696387702001020100002020000100401241110021109101000010100001010858135134606591024931486636861109161511656403163210037100005010000100101004110041100411004110041
1002410040752304818101696760120100257751298027252001010010100001001010000521131468824049696010040100408696387702001020100002020000100401241110021109101000010100001010884134841406791028226090640916108901912266402162210037100002010000100101004110041100411004110041
100241004075244897789170456015210025795907525252001010010100001001010000521065468824049696010040100408696387702001020100002020000100401241110021109101000010100001010912130644406501024430392652919109201513416403163310077100002010000100101004110041100411004110041
100241004075231395803179268012410025792868622252001010010100001001010000521131468824149696010040100408696387702001020100002020000100401241110021109101000010100001010910129837006331024730489036878108891312406402162310037100000010000100101004110041100411004110041
1002410040752466898111728610104100257531109336252001010010100001001010000521137468824149696010040100408696387702001020100002020000100401241110021109101000010100001010886127538426611025929692236863108981813166402163310037100000010000100101004110041100411004110041
100241004075243996824171271010410025816917519252001010010100001001010000521099468824149696010040100408696387702001020100002020000100401241110021109101000010100001010854157441706431024628990250920108461311616403162210037100210010000100101004110041100411004110041
10024100407622051018361752650168100258031167327252001010010100001001010000521121468824149696010040100408696387702001020100002020000100401241110021109101000010100001010930133640806351025931091838899109213012136403163310037100001010000100101004110041100411004110041

Test 3: throughput

Count: 8

Code:

  strh w0, [x6], #8
  strh w0, [x7], #8
  strh w0, [x8], #8
  strh w0, [x9], #8
  strh w0, [x10], #8
  strh w0, [x11], #8
  strh w0, [x12], #8
  strh w0, [x13], #8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5097

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f2022293a3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020940792306300001881771831169610113240846735169518311612516332480709800148010080000416486187401207074937750040854408073068133067216010020080000200160000407978711802011009910080000100800001008091942439156512843805092870885421651813825044155340511011711407658092280000801004072040854408184071240803
80204408013053000018037598021728115104408257761664181316725160563808308000080100800004026181878692010474937687040847407103058533087716010020080000200160000408308711802011009910080000100800001008092929394051088688052529308941221630814415514426270511011611407698025880000801004079540818407404076840732
8020440805305220001704775745169610613640846760174418301622516084180745800088010080000401943187071201281493771804074640871307543307041601002008000020016000040789871180201100991008000010080000100809012340454802902805562860852941782814265394831280511011711408078045880000801004073140851409314083240716
80204407603052000017408927611592989640760785177517001902516070182725800008010080000402685187756405194937648040782407953067733075216010020080000200160000406567611802011009910080000100800001008089013425752412887805262620848541640814155394126141511011711407108743480000801004073840798407414085940810
8020440687305100001953874796170413114440679754209019571772516320683618800008010080000411570187343203444937580040888406873073633069516010020080000200160000406737511802011009910080000100800001008089684636487682580542268091446160981429549422800511011611408408063680000801004077540784407554075540775
80204407123060000018426678141696108100407607901729164615025160736804518000080100800004024741871931027449376440408114079730634330699160100200800002001600004085575118020110099100800001008000010080873047975321388880507303087768168881383484430400511011711407468019380000801004078540884408184069040791
80204407883050000018278297431688113192408497541744165417325163120805448000080100800004017691875928010564937667040762407113077033065416010020080000200160000406777511802011009910080000100800001008087904318508382680491283086830155781391493477500513711711406908056380000801004082740801408314074440669
802044081230500000180989278917047996407897901884174516725164250814098000580100800004037971874248033664937680040785407673068333054216010020080000200160000406797611802011009910080000100800001008091704341504787180521251089678154881378471408500511011811406938052580000801004076840818406544069840687
8020440807305000001890734798173610112840826788187419011512516723681036800008010080000413977187508801064937675040792408003064033068016010020080000200160000408007511802011009910080000100800001008085804370476483580478283087330166181449531397000511011611406878038580000801004078240760408324069540709
802044069930400000189078876716721091324073676018771937194251604758028680059801008000040288618750160126549377630407894073830598330779160100200800002001600004074375118020110099100800001008000010080881045215141485880525293084842165481345535392500511011611407508057480000801004085340772408314080740761

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5101

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)1e1f2022293a3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d5map dispatch bubble (d6)ddfetch restart (de)dfe0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800294080430500155491775416961111084087977820252103151251605858430280083800108000040109718798740198493777404080740708306823308551600102080000201600004080385118002110910800001080000108087704086475586180561239083532160981356569413514050230217223408818022580000800104074940865407864078840764
80024407943060016387667841720861164079576818141686189251606538625980000800108000040118718753041325493757504078440801307023307251600102080000201600004081276118002110910800001080000108088604247471118358048626718492615358140957442960050230216223408008251080000800104092940856408454083840683
800244087330500159086979416721241004077675817021963195251681798723280000800108000041720518781120250649377340408464078430739330767160010208000020160000408337511800211091080000108000010808410415843148778050122208233413928136154441730050230216223408038055980000800104081840724406654096440856
80024408523060016328007721672123108407407771974173613625160568803448000080010800004023181874956021649376670408224075630754330832160010208000020160000407997511800211091080000108000010808780429843928528053724908713215488143151841930050230218223408468364680000800104081640789407934074640716
80024408303060017287487671672115100408057531554178314425160575805918000080010800004215851879154034749377580408274079330720330894160010208000020160000407387511800211091080000108000010808590417946778568045622008433014888141954442030050230216223408168040180000800104083540806408644079540723
80024407803060017107687741744116964081076714091660145251686508554380000800108000040249818767200280493769304079640774307103308061600102080000201600004078776118002110910800001080000108085804235494108538054422708673613438147156241190050230216223407568067780000800104077340819408184076240825
80024408133050018128357701728106132408337421667178314325161814804078000080010800004008561878256031249377570407704081430712330865160010208000020160000407077511800211091080000108000010808420399546368368047721509023213188138251640370050230217223407598073080000800104080340825408204078140962
80025407823050016387847551688122136407667671862186615225160554803588000080010800004018991872568032449378390410424091130843330773160010208000020160000408717511800211091080000108000010808470449851668548049623108692815698137349842870050230218223407928431380000800104082640736408364076840862
800244082930600159683080316889910840876757187320261452516057288375800008001080000404382187482402614937739040790407513066233076516001020800002016000040749751180021109108000010800001080867044334221082980554262087378155681383528408114150230217223406948191380000800104078940829407284081240761
80024408363060018218387961680115104407567511844175016125162212804088000080010800004017641874848063349377090407514084930752330773160010208000020160000408137611800211091080000108000010808370441645998978048124308518214908138352834250050230218223408868037580000800104071240772407974082840804