Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST2 (single, H)

Test 1: uops

Code:

  st2 { v0.h, v1.h }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22243a3f4651schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696b6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafl1d cache miss st nonspec (c0)c9cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
62006294002282200000000047082917511831520001000100010001000109038000122172402904729335310200010001000200020002924729307116100110001000100002010000001000201312593156932317841213533317380810596328759161611327214980100010002963530352303502978531144
6200430019238000000001004692292330183482000100010001000100010908800012173802899729265310200010011000200020002925129305116100110001000100002010001061000001307792886945307049208313187381513514928576160281337914856100010002941829311294112928429305
6200429287228000000000004682290280184052000100010001000100010902800022176802920229367310200010001000200020002928029260116100110001000100002010000001000201308695306943315953207753121381416574928591162311321314794100010002922829407295402935029302
620042925722700000000000467629119018355200010001000100010001091080003217690290852924831020001000100020002002292762927111610011000100010000001000100100000131079299699131925420753316438108544828636163071327514808100010002933929312293582935629216
62004293982260000000120004628291380182482000100010001000100010909800092176702901729398328200010001000200020002934229303116100110001000100000010000061000001305592746911315160207283220381013564828602161551325315030100010002934329323294192923729334
620042930422700000001320004681291830184752000100010001000100010909800042181802906629400310200010001000200020002935729201116100110001000100000010001001000201327992947011312046207023222381714585128492160531360015072100010002925229317293612937029452
620042945022701000000000465229109018298200010001000100010001091080006217330289672939231020001000100020002000292922920811610011000100010000201000100100020130349424698031725920591318638098556028601160841326715127100010002936729332294532942329235
620042944122701000000000473129023118336200010001000100010001091380000217730290262932431020001000100020002000292632932711610011000100010000201000000100020131939218692131285920644322538148565128601162181342315032100010002926529312293022928229311
6200429266227000000001004650291500183222000100010001000100010910800052177802904429243310200010001000200020002921429282116100110001000100002010000001000201313392576882313850207983097381212505628444162561332215073100010002937529247293722932029308
6200429380226010000000004645291740183382000100010001000100010908800062178402893129316310200010001000200020002934929227116100110001000100002010000001000201308593936985317152206173162381716495328542160831340814923100010002935929394292972932729259

Test 2: throughput

Count: 8

Code:

  st2 { v0.h, v1.h }[1], [x6]
  st2 { v0.h, v1.h }[1], [x6]
  st2 { v0.h, v1.h }[1], [x6]
  st2 { v0.h, v1.h }[1], [x6]
  st2 { v0.h, v1.h }[1], [x6]
  st2 { v0.h, v1.h }[1], [x6]
  st2 { v0.h, v1.h }[1], [x6]
  st2 { v0.h, v1.h }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1e1f23373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)d9ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1602064005229911000200311034004401602516275010082818800001008000080000500184024064960840036040050400471997132001616010020080000800002001600001600004006140058118020110099100100800008000010080014143501800160018800021636140051101160114004980000800001004006040059400594005940054
160204400583001001018012381408511603251615831008357780000100800008000050018402646464344003204005140057199713200161601002008000080000200160000160000400524006111802011009910010080000800001008001416001800160118800601636140051101160114005580000800001004005940059400594005940052
16020440060300100001702149140044161602516283610081569800001008000080000500183990464820340025040059400501996232000916010020080000800002001600001600004005840051118020110099100100800008000010080014163600800160017800021636140051101160114005580000800001004005040049400514005040051
1602044005132310110190127514004315012516295210082231800001008000080000500183990464413440033040057400591996532001716010020080000800002001600001600004006140058118020110099100100800008000010080014143600800160218800021436140051101162114005080000800001004005140059400594005940052
1602044004730011010190169714004301602516342710083090800001008000080000500184026464926540034040051400601997132001816010020080000800002001600001600004005040050118020110099100100800008000010080015143601800160118800021436141051101164114005880000800001004005340052400534005240051
1602044005829910110190284714004516163251612251008224280000100800008000050018402646465114002204005040047199713200161601002008000080000200160000160000400484005011802011009910010080000800001008001414000800160118800001436141051101162114006780000800001004005140054400534005240051
1602044005830010000180124114003816032516279710081670800001008000080000500184036065130740035040053400611997132000916010020080000800002001600001600004005840052118020110099100100800008000010080015143600800160024800021636140051101162114005880000800001004005340052400514005240051
16020440050300110001502824140046141642516383510081119800001008000080000500184028864872740026040050400501997432000916010020080000800002001600001600004005040050118020110099100100800008000010080014153501800160119800001436141051101162114005480000800001004005140059400594005140053
1602044005030011000200319314003516164251629111008115980000100800008000050018398326432604002704005840050199633200101601002008000080000200160000160000400584005111802011009910010080000800001008001515360180016012380002160140051101162114006580000800001004005140051400504005340060
16020440050300100101901273140044161552516217410082955800001008000080000500184031264648740032040050400571997232001616010020080000800002001600001600004005040058118020110099100100800008000010080014143600800160021800001636140051101162114006480000800001004006140049400494004940059

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03181e1f222324373f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16002640043310003000110740027161602516004810808618000010800008000050183971264552804031404049940043199823200331600102080000800002016000016000040043400421180021109101080000800001080000420800023580002242502031633400408000080000104004340045400434004440043
1600244004330000300032114002801602516450410814568000010800008000050183971264012304002104004340042199823200231600102080000800002016000016000040043400431180021109101080000800001080000420800020080002242502031633400408000080000104004740044400444004440044
16002440043300000100179940027161602516540110822198000010800008000050183971264145004002104004240043199823200231600102080000800002016000016000040044400421180021109101080000800001080000420800020380002242502031633400408000080000104004340045402444024840043
1600244004331003330008444025116002516045610823548000010801308000050183971264899304002104004340043199823200231600102080000800002016000016000040043400431180021109101080000800001080000420800020280002242502031633400408000080000104004340053400434004440044
16002440043300003000347840609161645374162950108209180180108034880216501865884643982040568040532404772029017204891606822080240802402016072016048040294402533180021109101080000800001080000420800021280002242502031633400398000080000104004740043400444004340043
160024400433000040003594002816165251636111081338800001080000800005018397126425320400210400434004319982320023160010208000080000201600001600004004340042118002110910108000080000108000000800020280002242502031633400408000080000104004440053400444004340043
160024400423000031003671400271600251628791080597800001080000800005018397126534810400210400434004219982320023160010208000080000201600001600004004240043118002110910108000080000108000000800020280002242502031633400398000080000104004540044400444004440044
16002440042315000000286840028161602516137510808448000010800008000050183971264366904002104004240043199823200221600102080000800002016000016000040043400421180021109101080000800001080000425800000280002042502031633400398000080000104084740044400464004440044
16002440042300003000145640028161602516137510828598000010800008000050183971264253804002134004340042199823200231600102080000800002016000016000040043400431180021109101080000800001080000420800020080002042502031633400408000080000104004340053400444004440043
1600244004230000310035940027005251603691081365800001080000800005018397126479790400210400434004219982320023160010208000080000201600001600004004340043118002110910108000080000108000004800020280002242502031633400408000080000104004740044400444004340044