Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD4 (single, S, post-index)

Test 1: uops

Code:

  ld4 { v0.s, v1.s, v2.s, v3.s }[1], [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 6.004

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 4.004

retire (01)cycle (02)030405080b0e0f1e1f223a3f43464951inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)a0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
6500529348219511110301456728785000169506013100040111000100040001000500050114759310022992290542928431060001000400020008000291832910911610011000100010000210000096010012020012801903468553051552008230553812731412833710001633113215144161000400010002926029196292422921129308
650042923421921110040145232876000016945601110004013100010004000100050005013476112002292029057292013106000100040002000800029116290971161001100010001003131003012100121211129749233687630923020180307938111032322841410001612013296145631000400010002920529172292772920329190
65004291532181111005004630287141001695660131000401310001000400010005000500547639000229402901829190310600010004000200080002910229106116100110001000100122100201210002131112828913668973036322014730533816736372840710001643213436144301000400010002920229335291482922729182
650042925321841101040144852880610016927601310004011100010004000100050005011476721002287729013292263106000100040002000800029199290541161001100010001003321004022100133311128839119686230463720110307238141027332840410001634813151146811000400010002926229196291332926029277
65004292382191111012014577287730001693860111000401310001000400010005000500647605410229342902429220310600010004000200080002907329093116100110001000100322100301210002121012900908168553077362010030893815534302838710001636713476145641000400010002922029299292582929129195
650042916622601100030045742877500016953601310004011100010004000100050005000476012002290528978291923106000100040002000800029099290671161001100010001004221003001100023212127859152683831013220105310038121134322838110001635813309144721000400010002928129207292102915329318
650042923621901100050045552874000016942601310004013100010004000100050005004476481002290629034291173106000100040002000800029097291511161001100010001003321003021100023310128359205689230753220100307638141031302833610001632913297143891000400010002921929198292082923129262
650042919521801110040046992879200016929601310004013100010004000100050005014476960002295629015292293106000100040002000800029134291011161001100010001002131003012100131310128199130689630773420078309838091133322836710001602613376145201000400010002921029232292332922229189
65004291782180111004004577287660001693360111000401310001000400010005000500547656100228632901629244310600010004000200080002910829087116100110001000100332100302210023231213000903568753018352016430623810737362834910001662513327144411000400010002916929224291812923429181
650042926421821011140045142875600016899601110004011100010004000100050005004476940002285129030292393106000100040002000800029111290351161001100010001002221004022100021210129259216683830703620055304038111032392839610001636213336144021000400010002926029266292612921929195

Test 2: throughput

Count: 8

Code:

  ld4 { v0.s, v1.s, v2.s, v3.s }[1], [x6], x8
  ld4 { v0.s, v1.s, v2.s, v3.s }[1], [x6], x8
  ld4 { v0.s, v1.s, v2.s, v3.s }[1], [x6], x8
  ld4 { v0.s, v1.s, v2.s, v3.s }[1], [x6], x8
  ld4 { v0.s, v1.s, v2.s, v3.s }[1], [x6], x8
  ld4 { v0.s, v1.s, v2.s, v3.s }[1], [x6], x8
  ld4 { v0.s, v1.s, v2.s, v3.s }[1], [x6], x8
  ld4 { v0.s, v1.s, v2.s, v3.s }[1], [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0007

retire (01)cycle (02)03080b0e0f18191e1f22243f4346494e51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8a9acafb5b6bbdcache load miss (bf)c2c5cfd5d6ddinst fetch restart (de)e0e7? int output thing (e9)eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
4002051600591198000000190101600250607994025480108801003200088000080100320704800004804994000712271604401600210160116160170799500380038480100200800003200002001600006400001600561600561180201100991001008000080000010080000188000000178001360140005110317231600370800001308000032000080100160060160060160041160060160060
400204160040119900000022010160044066799592548010880100320008800008010032000080000480499400064227126920160021016005916005979934038004148010020080000320000200160000640000160040160094118020110099100100800008000001008000018800163030800006014200051103173316005608000013138000032000080100160060160060160060160041160060
400204160056119900100021010160044166799592548010080100320008800008010032000080000480499400061227126920160021016005916005979934038003848010020080000320000200160000640000160056160056118020110099100100800008000001008000018800160012800150116200051103173316003708000013138000032000080100160060160041160057160060160060
40020416009511980000002200016004406679959254801008010032000080000801003200008000048049940006222716560016003701600591600407993403800414801002008000032000020016000064000016005916004011802011009910010080000800000100800001880016001580000001420005110317331600560800000108000032000080100160060160060160060160060160060
4002041600591199000100000016004116679959254801088010032000880000801003200008000048049940005722716044016003731600561600597995303800414801002008000032000020016000064076016005616004011802011009910010080000800000100800000800160028800156000005110317231600370800001008000032000080100160060160041160041160060160057
40020416005911990000002700016004410679959254801008010032000880000802803200008000048049940006622716044016002101600591600567995303800224801002008000032000020016000064000016005916004011802011009910010080000800000100800000800160068001601141800511031733160056080000008000032000080100160041160060160041160041160041
4002041600561199001100001016002516679956254801088010032000080000801003200008000048049940006422716560116004001600401600597993403800414801002008000032000020016000064000016004016005611802011009910010080000800001100800001880016002580016610200051103173216003718004513108000032000080100160041160041160057160041160060
400204160059119900000022000160041166799562548010880100320008800008010032000080000480499400064227165600160021016005616005679934038004148010020080000320000200160000640000160040160056118020110099100100800008000001008000018800000014780000601300051102172316005608000010138000032000080100160114160060160057160060160041
40020416005911990000002201016007716679940254801008010032000880000801003200008000048049940006722716044016002101600591600407993703800224801002008000032000020016000064000016005616005611802011009910010080000800000100800001880016001608001661132000511031743160053080000008000032000080100160060160057160060160041160109
400204160056119900000020000160044066799592548010880100320000800008010032000080000480499400062227160440160021016004016005979953038003848010020080050320000200160000640000160059160040118020110099100100800008000011008000008000000980000611400051103173216005608000013138000032000080100160041160041160041160041160041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0007

retire (01)cycle (02)03080b0e0f18191e1f22233a3f4346494e51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)6061696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9fa0a3a6a8acafb5b6bbdcache load miss (bf)c5branch mispredict (cb)cfd0d2d5d6ddinst fetch restart (de)e0e7? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
4000251600551199001000250101160025160799552548001880010320008800458001032000080000480049400057227158080016003616005516005579949380022480010208000032000020160000640000160040160055118002110910108000080000108000008001000800096001700502052717771600520800006608000032000080010160056160056160056160056160286
40002416005511990000001901001600401667994025480018800103200488000080010320000800004800494000622271580815160036160055160040799493800224800102080000320000201600006400001600551600551180021109101080000800001080000080013098000001101700502053817581600370800000608000032000080010160056160041160056160041160056
40002416004011990000001801001600401667995525480018800103200088000080010320000800004800494000582271580815160036160040160055799493800374800102080000320000201600006400001600551600551180021109101080000800001080000080010098001000131700502050617851600521800006608000032000080010160041160056160056160056160056
40002416005511990000001600001600401667995525480018800103200008000080010320000800004800494000622271269215160036160055160040799493800224800102080000320000201600006400001600551600401180021109101080000800001080000138001310800136191700502054517561600371800000008000032000080010160041160041160056160056160056
40002416005511990000001800001600401607994025480156800103200088000080010320000800004800494000712271586410160021160040160055799343800224800102080000320000201600006400001600551600551180021109101080000800001080000138000901380012600000502054717781600520800009908000032000080010160056160056160041160056160056
4000241600401199000000190000160040166799552548001880010320000800008001032000080000480049400356227158641516003616010916005579949380037480010208000032000020160000640000160055160055118002110910108000080000108000013800120168001261101700502050617771600521800006608000032000080010160056160056160056160041160056
4000241600401199001100190000160040166799552548001080010320000800008001032000080000480049400064227158081516002116004016005579949380037480010208000032000020160000640000160040160055118002110910108000080000108000008005601080000619000502054617861600371800000608000032000080010160056160056160056160056160056
4000241600551199000100160001160025160799552548001880010320008800008001032000080000480049400064227126921016002116005516005579949380037480010208000032000020160000640000160055160040118002110910108000080000108000017800121080012011013005020144817671600520800009908000032000080010160056160041160056160041160041
4000241600551199000000160100160040060799402548001080010320008800008001032000080000480049400056227158081516002116005516005579949380022480010208000032000020160000640000160055160055118002110910108000080000108000008000000800136001700504054517671600370800009908000032000080010160056160056160056160041160056
40002416005511990011001601001600401067994025480018800103200008000080010320000800004800494000582271269215160036160040160055799493800224800102080000320000201600006400001600551600551180021109101080000800001080000080000008001260101300502054717571600521800000608000032000080010160056160041160041160041160056