Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD4 (single, D, post-index)

Test 1: uops

Code:

  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 7.010

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 4.010

retire (01)cycle (02)0304050708090b0e0f181e22243a3f43464951inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
6600529353220811110000200466028818001167657004100040042000100040002000500010000476260229542905329332310700020004000300012000291552929511610011000100002004362008001220004026001281392366853306806620092311338191054502837710001639113273142842000400010002927429244292432919029303
660042922521971010000050045882896200016940701410004014200010004000200050001000047456623017291222937231070002000400030001200029170291481161001100010000200236200600162000602421130029215686730900442007730633819641422842510001628213272142142000400010002924229296293532939529246
6600429373218610101000441047822884902016923701010004004200010004000200050001000047684522970291132944631070002000400030001200029052290851161001100010000200336200200122000402022129189131684930880442004731413816743462834210001600713193142272000400010002924929263292172931129297
6600429306220710001000810453528886002168107010100040142000100040002000500010000475541230082906129268310700020004000300012000290902918411610011000100002003362003102220004024211293291926859307725320127308138151256462841810001629013224142982000400010002927029298292422928629179
6600429380220410100000200464828779202168947010100040042000100040002000500010000475802230262903329273310700020004000300012000290662910711610011000100002004462006022220006026221277891156890301705020112315537621050472838810001606013335144932000400010002917329269292272930429362
66004292672196101000001010476028732020169477010100040102000100040002000500010000474524230082912329282310700020004000300012000291942912711610011000100002003342003001220024026211325889906877311114220142307238202050552849210001621913112142952000400010002922629304291802925629280
66004292412199100000001000455828809202169347010100040102000100040002000500010000476349230272916229260310700020004000300012000292292913211610011000100002002362002000920044026211278390986876316803820032319538131049542841710001623913285144432000400010002912529214292642914829275
6600429241219610101000210452228681002167767004100040142000100040002000500010000476263229832910829277310700020004000300012000290832913811610011000100002003242004000220044026201280290656896304104820131307638221047452851810001636713144144362000400010002927129158291852932529340
6600429328219610100000810460128972002169297010100040142000100040002000500010000476894229812907829219310700020004000300012000291802923611610011000100002003362005002220004026201323991786844320014920155305938251145482868310001660813374146062000400010002925129230292032917629299
6600429203219510100110810472128895220168967004100040142000100040002000500010000475540230022910729244310700020004000300012000291402904711610011000100002004242006000220044020221278294096904308505220059323338211749452835410001634813307145312000400010002931929436292282919429168

Test 2: throughput

Count: 8

Code:

  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6], x8
  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6], x8
  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6], x8
  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6], x8
  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6], x8
  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6], x8
  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6], x8
  ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0007

retire (01)cycle (02)030708090b0e0f18191e1f22233a3f4346494e4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5b6bbdcache load miss (bf)c2c5cdcfd5d6ddinst fetch restart (de)e0e7? int output thing (e9)eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
4802051600551199000011002701011600412012799400255601088010032000816000080100320000160000480499800429227126921160037160056160040799340380038560100200160000320000200240000960000160056160055118020110099100100800008000001001600000251600000022160022612100005110217221600530800000616000032000080100160041160057160057160057160041
48020416005511980000000036000016004021207995502556010880165320008160000801003200001600004808938002982271582001600361600551600557993403800375601002001600003200002002400009600001600561600401180201100991001008000080000010016000002516002100301600216022000151102172216003708000010616000032000080100160057160041160057160057160041
480204160056119900001100000001600250012799560255601088010032000816000080100320000160000480499800287227160561160037160056160055799340380037560100200160000320000200240000960000160040160055118020110099100100800008000011001600000016000000331600210103300051102172216005318000010016000032000080100160041160057160149160056160056
480204160040120200000000270101160040012127995602556010880100320008160000801003200001600004804998001282271582011600211600551600557994903800225601002001600003200002002400009600001600401600551180201100991001008000080000010016000002516002200016002160000005110217221600520800006616000032000080100160490160162161262160508160056
48020416004011990000000028010016004100127995602556010080100320008160000801003202521600004804998004292271582011600361600401600557994903800375601002001600003200002002400009600001600401600552180201100991001008000080000010016000002516003000301600210130000151102172216003718000010616000032000080100160041160056160056160056160056
4802041600401199000000003601011600410121279956025560108801003200081600008010032000016000048049980012822712692016003716005616005579949038003856010020016000032000020024000096000016004016005511802011009910010080000800000100160000001600291001600006129330005110217221600370800006616000032000080100160057160057160056160041160041
48020416004011990000000028000116004021212799550255601088010032000816000080100320000160000480499800287227158200160036160055160055799340380037560100200160000320000200240000960000160055160055118020110099100100800008000001001600000251600220025160022612200005110217221600370800000016000032000080100160041160056160057160057160057
4802041600401199000000000000016004021212799400255601008010032013216000080100320000160000480499800428227126921160037160056160056799520380038560100200160000320000200240000960000160055160055118020110099100100800008000001001600000016000000301600306122330005110217221600370800006016000032000080100160041160056160057160057160056
4802041600551199000000000010116002501207994002556010880100320008160000801003200001600004809048002882271582011600391600561600557994903800385601002001600003200002002400009600001600551600551180201100991001008000080000010016000002516002200221600006021250005110217221600371800000016000032000080100160056160041160056160056160056
4802041600551199001010002801011600412120799560255601088010032000816000080100320000160000480499800429227158201160021160056160040799520380038560100200160000320000200240000960000160040160055118020110099100100800008000011001600000016002200016003060223300051102172116005318000010616000032000080100160057160057160057160057160041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0007

retire (01)cycle (02)030708090a0b0e0f18191e1f2223243a3f4346494e51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8acafb5b6bbdcache load miss (bf)c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0e7? int output thing (e9)eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
480025160056119900100000035010001600410121279956255600188001032000816000080010320000160000480049800439227160560160021160056160056799500380038560010201600003200002024000096000016005616005611800211091010800008000001016000033160030030160030613033000502061756160053180000101016000032000080010160057160041160057160041160057
48002416004011980000000003500000160025012127995625560018800103200081600008001032000016000048004980043922716056016003716006516006579950038003856001020160000320000202400009600001600561600561180021109101080000800000101600000160029029160000613033000502071765160053180000101016000032000080010160057160041160149160057160057
48002416005611990000000003600000160041212127995625560018800103200081600008001032000016000048004980042622712692016003716006516006579954038003856001020160000320000202400009600001600561600401180021109101080000800000101600003316002903016003001300000502061777160053180003131016000032000080010160041160057160057160057160057
48002416005611990000000003600000160041012127994025560018800103200081600008001032000016000048004980043722718320016002116006516006579950038003856001020160000320000202400009600001600561600561180021109101080000800000101600003316002910160030012933000502061765160037180000101016000032000080010160041160041160041160057160057
48002416005611990000000000010001600410120799562556001880010320008160000800103200001600004800498004282271269211600371600461600657995002980038560010201600003200002024000096000016005616005611800211091010800008000001016000033160030129160000613033000502081765160053080000101016000032000080010160057160057160057160057160057
480024160040119900000000036000001600432121279956255600188001032000016000080010320000160000480049800428227160560160021160065160065799340380038560010201600003200002024000096000016005616004011800211091010800008000001016000033160029029160029613033000502071766160037180000101016000032000080010160057160041160057160057160057
480024160040119900000000035010001600412121279956255600188001032000816000080010320000160000480049800429227162160160037160065160065799500380038560010201600003200002024000096000016005616005611800211091010800008000001016000033160150030160000613033000502071768160053080000101016000032000080010160057160057160057160057160057
4800241600561198000000000350100016004100079956255600188001032000016000080010320000160000480049800128227160560160037160065160065799500380022560010201600003200002024000096000016005616004011800211091010800008000001016000033160030030160030613033000502071765160053080000101016000032000080010160057160057160057160057160041
480024160056119900000000069600000160041212127995625560018800103200081600008001032000016000048004980044422716056016002116006516006579950038003856001020160000320000202400009600001600401600401180021109101080000800000101600003316000002916002960033000502081756160053180000101016000032000080010160057160057160057160057160057
480024160056119900000000036010001600412121279956255600188001032000816000080010320000160000480049800439227160560160037160065160065799507380038560010201600003200002024000096000016005616005611800211091010800008000001016000033160029029160000613033000502061788160053180000101016000032000080010160057160057160057160041160041