Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD4 (multiple, 8H, post-index)

Test 1: uops

Code:

  ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 12.000

Issues: 13.014

Integer unit issues: 1.000

Load/store unit issues: 4.000

SIMD/FP unit issues: 8.014

retire (01)cycle (02)03050708090a0b0f181e223a3f43464951inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
72005293482200210126000814611290220001529513030100080264000100080004000500020309982929024780291852923631013000400080005000200002929329277116100110001000040066840040021040056198411277993406787298795919228304738091247472849710001633512953133994000800010002921529302292532920829204
720042931121911510200001204477290730001526513008100080084000100080004000500020355981545024765292562936731013000400080005000200002926029211116100110001000040045840100024400601612421311091046797305413471922331043810854452849810001629313066132014000800010002917129329292622932029276
7200429309219022002100014144952904600015311130141000801440001000800040005000203229843270247612920129316310130004000800050002000029201292671161001100010000400008400300064005615800128539208684530917521928431543811848522856010001639412929132764000800010002924129293292162936629287
7200429304220016002000001457328997200152681300010008014400010008000400050002033798430802481529111292323101300040008000500020000291782912511610011000100004000004002010240006058001277591656811307884019251310038061647462848010001610913038130184000800010002920529208292292925429215
72004292862190170017000814545290782001518513000100080144000100080004000500020335982503024814292342926931013000400080005000200002926829186116100110001000040000840030000400661080012809902069323047114919166314038171145462850510001621812932131364000800010002928829305292992921129246
720042921521802000210000046582907600015232130001000801440001000800040005000203169835215024768292572920831013013400080005000200002911329144116100110001000040000040020002400260500012888912268263076114519253300238091745462851510001623412940133234000800010002926229268292572924729210
7200429222219017002200080450329087000153321301410008014400010008000400050002030398238202473629235292683101300040008000500020000291282920511610011000100014000084005000040056150001270894456859306814471923130603809948482848810001632212903133924000800010002927829237291952921129271
720042926221902100230008046152904100015271130221000803040001000800040005000203329813990248322917429333310130004000800050002000029189292721161001100010000400668401000244002601011411289791566875308865119257300838161244432859310001638713010133444000800010002929629205292352929229207
720042930421911510151001204501291290001525313020100080144000100080004000500020318983385024736291792930231013000400080005000200002922029195116100110001000040000840020000400001500012885910368353062114519300306738151148482848610001632613031134434000800010002920829203293032929129147
7200429242219014002300091465229070000153441301410008020400010008000400050002030598293002485729133292843101300040008000500020000293042920511610011000100004000084002000240006130001294692586831303984819253312438142043472864010001636213088134084000800010002921429326292822929129273

Test 2: throughput

Count: 8

Code:

  ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6], x8
  ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6], x8
  ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6], x8
  ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6], x8
  ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6], x8
  ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6], x8
  ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6], x8
  ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0007

retire (01)cycle (02)030408090b0e0f18191e1f2223243f4346494f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)6067696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a5a6a8a9acafb5b6bbdcache load miss (bf)c5branch mispredict (cb)cdcfd2d5d6ddinst fetch restart (de)e0? int output thing (e9)eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
960205160056119900001100300100160041012120251040144801006400443200008010064000032000047960116802421663998000160037016004316005600326104010020032000064000020040000016000001600561600561180201100991001008000080000010032000027032015500243200246124270005109041611160053800006632000064000080100160057160057160044160057160057
96020416004311990000110030010016004131212025104014480100640044320000801286400003200004796011680274816000010160037016005616005600339104010020032000064000020040000016000001600561600561180201100991001008000080000110032000027032002400243200246124270005109011611160053800006632000064000080100160057160057160161160057160057
9602041600561199000000003001001600413121202510401448010064004432000080100640000320000479601168024216639980101600370160056160056003391040100200320000640000200400000160000016005616005611802011009910010080000800000100320000003200240027320000612400005121011611160053800006032000064000080100160057160057160057160057160057
960204160056119900001100300100160041312120251040144801006400443200008010064000032000047960116800011663998000160037016004316005600339104010020032000064000020040000016000001600561600561180201100991001008000080000010032000027032002400243200246124270005109011611160053800006632000064000080100160057160057160057160057160057
9602041600561199000011003001001600413121202510401448010064000032000080100640000320000479601168024281600000016003701600561600560032610401002003200006400002004000001600000160056160056118020110099100100800008000001003200002703200000003200246124270005109011611160053800006632000064000080100160057160057160057160057160057
960204160056120300100120300100160041312124872691042677801676400003200008010064000032000047960116803421663998010160037016004416005600326104010020032000064000020040000016000001600561600561180201100991001008000080000010032000027032002410243200246124270005109011611160053800000632000064000080100160057160057160057160057160044
960204160056119800000000300100160041312120251040144801006402883200008010064000032000047960116802421663998000160024016005616005600340104010020032000064000020040000016000001600431600561180201100991001008000080000010032000027032002400243200246124270005109011611160053800006632000064000080100160044160057160057160057160045
96020416005611980000000030010016004130120251040144801006400443200008010064000032000047960116802981663998000160037016005616005600343104010020032000064000020040000016000001600561600561180201100991001008000080000010032000027032002400243200246124270005120011611160053800000632000064000080100160044160057160057160057160044
96020416005611990000000030010016004131212025104014480100640044320000801006400003200004796011680242166399800016003701600431600560033910401002003200006400002004000001600000160057160043118020210099100100800008000001003200002703200240003200246024270005109011611160153800006632000064000080100160057160057160057160057160057
960204160056119900000000000001600413121202510401008010064004432000080100640000320000479601168024216639980001600370160056160056003391040100200320000640000200400000160000016005616005611802011009910010080000800000100320000270320000100263200246024270005109011611160053800006632000064000080100160057160057160057160057160044

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0007

retire (01)cycle (02)0305080b0e0f1e22233a3f4346494e51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6e72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cdcfd2d5d6dbddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
9600251600651199110005800216005020151251040066800106400563200008001064000032000046640816811151152002801600461600651600490348104001020320000640000204000001600000160065160066118002110910108000080000103200141400320052000133200396013431300501901160111600628000009032000064000080010160066160066160050160050160066
96002416004911991110058102160050217012510400628001064005232000080010640000320000472041168097811520028016003016006916004903481043514203201486400002040000016000001600651600491180021109101080000800001032001313430320052100523200386113431310501901160111600628000009132000064000080010160050160050160051160050160066
9600241600651199110015800316003401717125104002680010640056320000800106400003200004693921681087217601520160030160065160065033210400102032000064000020400000160000016004916004911800211091010800008000010320013144303200131005610320039015301310501901160111600468000099132000064000080010160050160066160066160066160066
9600241600491199111005910016005201700251040066800106400563200008001064000032000046415616802542176015201600461600491600650332104001020320000640000204000001600000160065160049118002110910108000080000103200141300320052002523200006013431310501901160111600628000000132000064000080010160066160050160066160050160066
960024160065119811101380001600440121202510400548001064005632000080010640000320000472041168024298233840160037160059160056732610400102032000064000020400000160000016005616004311800211091010800008000010320000027032002400003200240024350005019011601116005380000100032000064000080010160044160062160060160057160057
96002416005611990001138001160044001202510400548001064000032000080010640000320000472041168032118560012016004016004316005903421040010203200006400002040000016000001600621600561180021109101080000800001032000002703200000000320024612435000501901160111600538000060032000064000080010160060160057160060160060160060
9600241600591198000113810016002800120251040054800106400443200008001064000032000046969316804841663998001600401600431600590326104001020320000640000204000001600000160059160056118002110910108000080000103200000270320000010243200326132350005019011601116005680000100032000064000080010160044160060160057160060160046
9600241600561199000003800016002831212025104006680010640060320000800106400003200004659001680478166399800160040160062160043032610400102032000064000020400000160000016004316005611800211091010800008000010320000027032003210035320024612435000501901160111600538000000032000064000080010160060160044160060160044160057
9600241600561199000003810116004430002510400548001064005632000080010640000320000472041168041781600000160040160056160045033910400102032000064000020400000160000016005616004311800211091010800008000010320000027032003200024320024602435000501901160111600568000000032000064000080010160057160044160057160156160060
960024160056119800011380001600440120025104006680010640000320000800106400003200004720411680295166399800160040160056160056033910400102032000064000020400000160000016004316005611800211091010800008000010320000027032000000024320032002435000501901160111600568000006032000064000080010160060160057160044160063160060