Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD4 (multiple structures, 2D)

Test 1: uops

Code:

  ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 12.000

Issues: 12.026

Integer unit issues: 0.000

Load/store unit issues: 4.000

SIMD/FP unit issues: 8.028

retire (01)cycle (02)03050708090a0b0e0f18191e22243a3f43464951inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)5f696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
7200529357219115101311000410455229064244153331203280284000800040002032398448924729291642929031012000400080004000200002917129200116100110001000040055040070014400260101241127089203685630641044192293047382312394428447164441292113573400080002920229352293182927829244
7200429289220116111810000161045192903104415280120088028400080004000203119861672481129188292783101200040008000400020000292502931911610011000100004005684007102104006611084112978908069033128739192253076382713414628501163411301113523400080002932929364292842932029285
7200429381219112111110000181046002915700015308120328026400080004000203419802210247842912429281310120004000800040002000029293292271161001100010000400541240111011240066012124112935919668353045544192913016382510384028450162761283313333400080002929829268292982937129351
720042928821911211121000016104582290450401530312028802840008000400020310984561124720291802927531012000400080004000200002923329216116100110001000040056114012003440066112124212770917068533083936192533081382411354528457162131304413666400080002928629283292592932229288
720042934521911511171000016104568290952001521212022802240008000400020328985337247192926729333310120004000800040002000029216291181161001100010000400641240130004400851684112760919868483080543193413088382110414028544162511279713595400080002934829289294182936029320
7200429294220114111510000161045632907404015285120328022400080004000203259840442476329104292553101200040008000400020000291932922111610011000100004004684011001640026010124212926908768183034640192813080382015414028541163871296513524400080002925329353293242933029266
7200429260220112111510000120045302902524015254120088028400080004000203399838452473229225292883101200040008000400020000292942916611610011000100004005584011001104006614124112934918368143070639192733071383011424028505163901302713497400080002934129349293772927329256
720042934221911300141000018104500290590001532612032802640008000400020329980948244802920129344310120004000800040002000029224292271161001100010000400568400700144000611004212733912068053010739193433103382510374228478165391302313650400080002934629335293092927629316
720042924922012011131000018104565290412041531712008802840008000400020326983324247542917229260310120004000800040002000029291292491161001100010000400540401100294008606124212867902068163054242193203033382416393728520164291293813546400080002929629313293132922929254
7200429320219113112300000180045742909204015273120328028400080004000203239854172481329190292263101200040008000400020000292532920411610011000100014004584012102640026141242129169258686430661045192323078382812403828531163661296813602400080002928229280292842927229361

Test 2: throughput

Count: 8

Code:

  ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6]
  ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6]
  ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6]
  ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6]
  ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6]
  ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6]
  ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6]
  ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0007

retire (01)cycle (02)03090a0b0e0f18191e1f2223243a3f4346494e4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a8a9acafb5b6bbdcache load miss (bf)c2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
9602051600691199000000038010001600443000025960156100640056320000100640000320000500168040618560012116004001600641600600032596010020032000064000020032000016000001600591600591180201100991001008000080000010032001314003200540013320040613235000051091171116005614100320000640000100160060160044160044160044160060
9602041600591199000000038010001600443012002596015610064005632000010064000032000050016804151856001211600400160059160059003419601002003200006400002003200001600000160043160059118020110099100100800008000001003200000350320037003232000061320000051091171116005610101320000640000100160060160044160060160060160141
960204160059119900000003801000160049012120025960100100640056320000100640000320000500168045818560012116004001600591600590034196010020032000064000020032000016000001600591600591180201100991001008000080000010032000003503200326303232000061035000051091171116005610101320000640000100160060160060160044160060160044
960204160043119900000003800002160044012000259601561006400643200001006400003200005001680001185600121160045016005916005900341960100200320144640000200320000160000016006616004311802011009910010080000800000100320000035032003200032003201035000051091171116006110101320000640000100160044160060160044160065160060
960204160059119800000003800002160044312000259601561006400563200001006400003200005001680437185600121160040016005916005900341960100200320000640000200320000160000016005916004311802011009910010080000800000100320000035032003200323200326132350000510911711160056001320000640000100160044160060160060160060160060
96020416004711990000000920000016002831200025960100100640056320000100640000320000500168041581600000160046016005916005900341960100200320000640000200320000160000016005916004311802011009910010080000800000100320000035032003200323200006003500005109117111600401001320000640000100160060160060160060160044160065
96020416004311990000000000000160031012120025960156100640056320000100640000320000500168088118560012016004001600591600590034196010020032000064000020032000016000001600591600591180201100991001008000080000010032000003503200320003200326100000051091171116004010101320000640000100160060160044160060160060160060
9602041600431199000000000100016004431212002596015610064000032000010064000032000050016804081856001211600400160043160059003259601002003200006400002003200001600000160059160059118020110099100100800008000001003200000350320000380323200326100000051091171116005610101320000640000100160060160060160060160060160060
9602041600591198000000038000021600443012002596015610064005632000010064000032000050016800011856001211600400160059160059003259601002003200006400002003200001600000160059160059118020110099100100800008000001003200000350320032003320032613235000051091171116005610141320000640000100160060160052160047160060160060
9602041600431198000000000100016004431212002596010010064006432000010064000032000050016804581856001211600450160066160059003419601002003200006400002003200001600000160062160043118020110099100100800008000011003200000350320032003232003761320000051091171116005610101320000640000100160060160060160044160060160047

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0008

retire (01)cycle (02)0305080b0e0f18191e22233a3f4346494e51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2branch mispredict (cb)cdcficache miss (d3)d5d6daddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
96002516007211990000000590031600540605259600781064005632000010640000320000501681193217607521160050016006916006900346960010203200006400002032000016000001600591600431180021109101080000800000103200000350320000000032000061040000050190151701410160056010132000064000010160065160044160065160065160044
960024160059119900000001310316003426652596006610640016320000106400003200005016811882176075211600500160049160069003259600102032000064000020320000160000016005916005911800211091010800008000001032000000032003700003200376037400000501901717015161600611414032000064000010160065160065160065160065160065
960024160043119900000005910216005131212025960010106400643200001064000032000050168089081600001160045016004316006400341960010203200006400002032000016000001600441600591180021109101080000800000103200000350320037010032003260324000105019016170151116006100132000064000010160060160044160060160153160065
9600241600641199000000059000160044000025960074106400643200001064000032000050168088118560012116004501600641600430034696001020320000640000203200001600000160043160059118002110910108000080000010320000035032003700037320037613200000501901517017121600661313432000064000010160050160070160050160070160050
96002416006911991010000419001160034206525960066106400563200001064000032000050168004811520024116005031600691600493034696001020320000640000203200001600000160059160059118002110910108000080000110320000000320032000032003760320000050190141701511160056100132000064000010160060160065160065160065160044
960024160064119800010001310216002831212025960074106400643200001064000032000050168040818560008116004601600641600640032596001020320000640000203200001600000160064160059118002210910108000080000010320000000320000010373200326132000005019015170151516006670232000064000010160050160070160050160070160070
960024160049119910100005910216004931212025960074106400563200001064000032000050168088121760004116004501600431600640035196001020320000640000203200001600000160069160069118002110910108000080000010320013144403200540015332004061534413000501901117011151600663213232000064000010160050160070160050160070160050
96002416006911991000000591021600490121202596007410640064320000106400003200005016804132175999611600240160043160043003419600102032000064000020320000160000016006416005911800211091010800008000001032000000032000000037320037613240000050190111701216160061140132000064000010160060160065160044160065160065
9600241600641198000000059103160054066025960066106400163200001064000032000050168004611520024116005001600491600493035196001020320000640000203200001600000160049160070218002110910108000080000010320013140032001310253320040015301320050190101701014160040010132000064000010160060160065160065160044160065
9600241600641199000000059100160049312002596007410640064320000106400003200005016808901856001201601410160064160064003579600102032000064000020320000160000016006416004311800211091010800008000001032000003503200370103732003700040000050190161701216160061140132000064000010160065160044160065160060160065