Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST2 (single, B)

Test 1: uops

Code:

  st2 { v0.b, v1.b }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f233a3f51schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e5f696b6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
62006287022241280028100121048622861117630200010001000100010001090780000321721028195287593102000100010002000200028599285501161001100010001002121100100110000211132059557697332891143200113158381415534828160151251266413674100010002865028589286102875228668
6200428555222119012600001046712857817688200010001000100010001090580000921776028482287753102000100010002000200028552285251161001100010001000030100100110001300132889514695531711055200683169380716535428202154901279414215100010002880628682287742874228774
6200528532224125002110002048252859817494200010001000100010001090580000172170202802028628310200010001000200020002869728510116100110001000100222210010011000021113321957369753238748200053143380416525428184152331270014217100010002862328682286462863028857
6200428683220023102000002047632858217477200010001000100010001090980000172171802834428687310200010001000200020002855528736216100110001000100028821100100110001211132489599695931191658201133192381121485028282151171275714348100010002857328598287042887728571
620042874922211900231000204877285471786920001000100010001000109038000082168902843828588601602000100010002000200028695286491161001100010001000320100101010001201131599578693132501051200133100380816525328206155461289514066100010002871128543286082877328824
6200428683223120012300002048102856317700200010001000100010001090780000921747028474287813102000100010002000200028702286281161001100010001001220100101110001211131079444697032601151200653209380322504928225153521259213975100010002860628771287132877028624
6200428754223122002410002048362857117687200010001000100010001090780000721689028531285813102000100010002000200028608286441161001100010001000220100102010001200134089587690831891050201233195380813474728159153691268014313100010002875228728287292861828556
6200428557223019102300001049102861017575200010001000100010001090080000721701028260286273102000100010002000200028562286041161001100010001000021100000410000200134669570699631661359201143178381018564828120152471299214265100010002884728668286722872028729
62004287492221220017000020478528619176892000100010001000100010913800001221726028418287753102000100010002000200028587286611161001100010001002222100110110000210135529420696431971145200733242380713515228088153021265114086100010002863528729286692862928662
6200428584223120001700002048222856617830200010001000100010001090680000102164402839428739310200010001000200020002867228606116100110001000100002110000211000020013235954270093172649200673204380515515227953152921284714325100010002863428727289012862728760

Test 2: throughput

Count: 8

Code:

  st2 { v0.b, v1.b }[1], [x6]
  st2 { v0.b, v1.b }[1], [x6]
  st2 { v0.b, v1.b }[1], [x6]
  st2 { v0.b, v1.b }[1], [x6]
  st2 { v0.b, v1.b }[1], [x6]
  st2 { v0.b, v1.b }[1], [x6]
  st2 { v0.b, v1.b }[1], [x6]
  st2 { v0.b, v1.b }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk data (08)18191e1f222324373f46494f5051schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1602064005831100000000316540034161600251630331008085780000100800008000050018397126443471400214004340049199610320009160100200800008000020016000016000040048400421180201100991001008000080000100800000340800001008000220000051101161140040080000800001004004340044400434004340043
16020440043310000129000138340028016002516326410082320800001008000080000500183971265017404002140042400431996103200061601002008000080000200160000160000400434004211802011009910010080000800001008000003408000200280002234000051101161140046080000800001004004340044400434004440044
160204400433100000300010394002700002516364210081386800001008000080000500184632664922104002440048400431995903200011601002008000080000200160000160240403564004911802011009910010080000800001008000003408000200080002234000051101161140040080000800001004004340044400434004440050
1602044004331100012000018384002701610251612441008390380000100800008000050018397126443470400214004240043199610320006160100200800008000020016000016000040042400421180201100991001008000080000100800000008000000880002234000051101161140039080000800001004004440043400434004340050
16020440042311000030003275400281616002516230410081345800001008000080000500183971264408404002140043400421995903200001601002008000080000200160000160000400424004311802011009910010080000800001008000003408000200280002234000051101161140040080000800001004004340043400444004340044
160204400423100000310014454002716020251626471008335080000100800008000050018397126488130400214004240042199620320006160100200800008000020016000016000040043400421180201100991001008000080000100800000008000210280000234000051101161140039080000800001004004440049400444004940044
1602044004231100003000113240027160002516198810083278800001008000080000500183983264768504002140043400421995903200071601002008000080000200160000160000400484004311802011009910010080000800001008000003408000200280002234000051101161140040080000800001004005040049400644005040043
16020440049310000000002012400271616002516407210082891800001008000080000500183971264764904002140042400431995903200011601002008000080000200160240160000400484004311802011009910010080000800001008000003408000200380002234000051101161140045080000800001004004940044400434004440044
16020440043310000000002939400341616002516346510083154800001008000080000500183971264868004002140043400421996203200071601002008000080000200160000160000400424004311802011009910010080000800001008000003408000200080002234000051101161140046080000800001004004340044400434004440050
16020440043310000030002124400271616002516234310083058800001008000080000500183971265017404002140042400431995903200011601002008000080000200160000160000400484004211802011009910010080000800001008000003408000210080002234000051101161140039080000800001004004940043400434005040043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2324373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd2l1i cache miss demand (d3)d5map dispatch bubble (d6)daddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1600264005131011000001700479814004716160251608001081978800001080000800005018404566489620400260400544005319988032004216001020800008000020160000160000400524005511800211091010800008000010800141444018001600168000216441400502002116011400508000080000104006340063400534006440055
1600244005231010100001800441614003816161251640131081783800001080000800005018400486509030400290400524005419989032004316001020800008000020160000160000400524005411800211091010800008000010800161544008001600168000216441400502001216011400598000080000104005440055400544005540052
1600244005431111110002000233314003716160251627431082533800001080000800005018400726548510400370400514006219988032003216001020800008000020160000160000404534026411800211091010800008000010800141544208001600188000216441400502001216011400448000080000104005540055400514005540054
1600244005631010100001700218914003816161251615311082497800001080000800005018400726455310400290400524005419989032004216001020800008000020160000160000400514005411800211091010800008000010800151444008001601178000216441410502002216011400528000080000104005440054400554005640048
160024400543101101000180041314003916161251641041084221800001080000800005018400966478210400380400524006219990032003116001020800008000020160000160000400534005211800211091010800008000010800141543008001600168000216461400503801125031400598000080000104005540054400554005440052
1600244005531010110001400859140035161612516234110822618000010800008000050184045664788004002604005340051199870320033160010208000080000201600001600004005440052118002110910108000080000108001415440080016011680002155914105020013151011400588000080000104006340062400634006240063
160024400523221000000200015191400471619549163338108159780000108000080000501840408640956040037040066400601999503200421600102080000800002016000016000040061400611180021109101080000800001080014146110218001501168000115631410502080116011400608000080000104005440055400554005540052
16002440054322101000021001586140036016125160254108218680000108000080000501840120646557040027040054400521999703200311600102080000800002016000016000040051400561180021109101080000800001080014144610108001610238000216441400502020216011400578000080000104006440063400634006340062
160024400613211100000170044611400351918725163316108166280000108000080000501840408649333040036040061400551999703200411600102080000800002016000016000040060400621180021109101080000800001080014140708001501158000116441400502040116011400508000080000104006340063400644006340055
1600244005131010100001900103914003916161251617991082665800001080000800005018400966474980400270400534005119997032003316001020800008000020160000160000400544005111800211091010800008000010800141442308001600188000216441410502010216011400498000080000104005540056400554005540054