Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, post-index, 4 regs, 8B)

Test 1: uops

Code:

  st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 5.000

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 2.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f23243a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696b6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)dfe0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
640062954223821212000020047382921102181995000100020022000100020002000500021823160001821882029106294526105000200020005000400029351293001161001100010002002300200201220002421013145951169133196166204493299380515676422879510001621513114144242000200010002936629501294152939629385
64004295162370100010003004636292310018296500510002000200010002000200050002181016016221928029081294793105000200020005000400029401294251161001100010002003401200200220002622013068937570203204066204853347381120646032878010001646113169146162000200010002954529591295262943329556
640042946823511200000132910046182934700183765000100020002000100020002000500021814160002219230290242939112285005200020025000400029474295411161001100010002004341200201220002420013145932470163146169205333248381211686622880710001634113278144792000200010002959329468294792948029573
64004294912380101210003104600293070018280500010002000200010002000200050002181116000321958029170295083105000200020005000400029419293812161001100010002004341200201220002621013275922269253101069204743228381018697022868210001638113293143512000200010002962029368294402960429388
640042941323601110100332004740295000018452500010002002200010002000200050002179816000721858029299297533105000200020005005400029648295881161001100010002003302200200520002023013145948569233164266205143210381515636822889810001619013283144792000200010002951529526297902948729581
6400429631236011121001520046302925700182515000100020002000100020002000500021824160007218880290362942510105000200020005000400029357294191161001100010002003260200200220002422013214927369343160071206103237380520677122873810001643913239144032000200010002939929427295502938029350
640042948723701110100930045402933300182215000100020002000100020002000500021806160001221891029138295053105000200020005000400429437295312161001100010002002303200201220022421013201932968663141067205373254381118666622874810001624713086145562000200010002956629635296452967629656
6400429705238011110001083004664291972018135500010002000200010002000200050002180516000821968029196295853105000200020005000400029269294111161001100010002003302200202220002421013198929869533190260204313288382018706222868810001617113254145102000200010002945229458294652951929418
640042939523701100000393004678291760018074500010002000200010002000200050002180816000421929028978294583105000200020005000400029357293871161001100010002003301200201220002021013112939369483131066204523276381219676622858510001631813057144422000200010002952329447294342940529384
640042934623601110100333004567294390018404500010002000200010002000200050002180116000221911029195293733105000200020005000400029271293751161001100010002002300200201220002421013282921569053133073205313303381217656622858910001615512889146852000200010002940029505295682943829472

Test 2: throughput

Count: 8

Code:

  st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e18191e1f2324373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320206800716201000010102103002247280035161604840336380100168297160060801001600001600004808512476688129719808002508005080062883474001002001600001600002004000003200008021680061118020110099010010080000800001001600131238011600122021160002123812005122117118004680000160000160000801008021780062800518005080228
3202048023162111000010120053022802030160254061078010016442416000080159160000160000480499224438312878240801840802188006203344003852001600001600002004000003200008005080050118020110099010010080000800001001600121238011600140212160002143612105109117118005880000160000160000801008006280051800518006280051
320204800496201000000016004582800351600254050908010016441616000080100160000160000480499231995112955830800250800508004903334001002001600001600002004000003200008006280061118020110099010010080000800001001600131338101600140012160002143612005109117118004680000160000160000801008006280051800628005080050
320204800496201001000241500688628003516160254057668010016389516000080100160000160000480499231994013129180800250800528004903314001002001600001601202004006003204808057080049118020110099010010080000800001001600141538001328034017160000143812005122117118005980000160000160000801008006380387800518006380050
3202048005865310100102116004999280036161602540359280100165100160000801001600001600004808512319973128075018002708004980049034040010020016000016000020040000032000080231800611180201100990100100800008000010016001312001160012111516000214012005109117118004780000160000160000801008005980233800508005080060
3202048004965210000001621200489528003516160254058698010016589216006080100160000160000480499239993112998371800290802178005103324003852001600001600002004000003200008022880233118020110099010010080000800001001600121238011600142020160002143812005109117218005480000160000160000801008005280050800518005180062
3202048005064511005400219011047582800350160254048978010016619216006080100160000160108480499239772712977650800360802208005103324001002001600001601202004000003200008005080229118020110099010010080000800001001600121238001600141120160002123612005122117118004780000160000160000801008005080050802208023380220
32020480218652100100040512106662280046161699254075658010016311616000080100160000160000480851248179813018611800260800538023007334001002001600001600002004000003202408022180052118020110099010010080000800001001600121546011600141118160002144512005109117118004680000160000160000801008022080223800538005480050
32020480052644102004015100005387280038016101254046618010016702716000080100160000160000480851279963512999320800280800558022003138400100200160000160000200400000320240800508005011802011009901001008000080000100160072124600160014201516000214012005122226118037180118160000160000801008005180062800508005080050
320204800586511000000144120157192800550162254063548015916704616000080100160118160000480499248012413024560801830802188005003314003872001600001601202004000003200008023280049118020110099010010080000800001001601321238001600741019160002143812005109125128004680059160000160000801008005180221802178005280218

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2224373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
3200268005862010000000131049412800381616025404874800101638801600008001016000016000048004922342281295633080023800458004503274000102016000016000020400000320000800458004511800211091010800008000010160013134601160014011516000214461215019121712138004980000160000160000800108004680055800458004680045
32002480045620000000003104695080030161602540393780010169381160000800101600001600004800492799728130057008002880052800520334400010201600001600002040000032000080051800501180021109101080000800001016001213460116001401151600021446120501961712138004880000160000160000800108004780047800468004680046
32002480045621000000018300560308003016160254029138001016429416000080010160000160000480049215858512942691800248004580045032740001020160000160000204000003200008004580044118002110910108000080000101600000400016000200316000200005019121712138004280000160000160000800108004680046800468004680046
32002480044621000000012000550308003016160254053048001016414316000080010160000160000480049271930712952950800278004980053033440001020160000160000204000003200008005280052118002110910108000080000101600121346001600140017160002140120501914171258004980000160000160000800108005280053800538005380053
32002480053620101000001500544228003216160254049458001016497416000080010160000160000480049231994013004810800278005380052033440001020160000160000204000003200008005180052118002110910108000080000101600000400016000200216000224000501971712138004380000160000160000800108005580047800468005580046
32002480045620000000021300317408003001602540644580010165016160000800101600001600004800492719809129996208002580052800530334400010201600001600002040000032000080051800521180021109101080000800001016001212460016001411141600021244120501961713128004980000160000160000800108004680046800468004680046
3200248004462000000006300505208003016160254059778001016431416000080010160000160000480049215722112914860800248004580044032740001020160000160000204000003200008004580045118002110910108000080000101600121200016001400121600001450120501912177108004980000160000160000800108005480053800508005380053
3200248005262010001006160051232800371600254040158001016557416000080010160000160000480049223553112990771800238004580045032740001020160000160000204000003200008004580044118002110910108000080000101600121345001600140114160000144612050197171078005080000160000160000800108005380054800538005480054
3200248005162010000000160058832800371616025404514800101672791600008001016000016000048004922333911296597080024800458004503274000102016000016000020400000320000800458004511800211091010800008000010160000040001600021021600022400050195171368004380000160000160000800108005280053800538005380054
320024800516211000100012005223280038161602540378680010167053160000800101600001600004800492559773129883308002780053800520123440001020160000160000204000003200008005180051118002110910108000080000101600000400016000200216000224000501913171358004280000160000160000800108004680046800468004680046