Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD3 (single structure, S)

Test 1: uops

Code:

  ld3 { v0.s, v1.s, v2.s }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 4.009

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 3.009

retire (01)cycle (02)030507080a0b0f181e223a3f43464951inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2c3cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
6400529315220019018010491464328709111171244009300910003000100050013577500022843292152939031040001000300010006000291082907711610011000100001000041001001100131300012927918168443058658202913119381817565128284161911383415074100030002936829367293042931029368
640042935221901602000080462828793101170974009300910003000100050013573520022779291532942431040001000300010006000292032914111610011000100001000041001001100131300012979935268843107757203013132381420535328378161111380114915100030002923829264292572935129317
640042943121901702400070457528745101171534009300910003000100050003579240022812290462938231040001000300010006000291872920811610011000100001000031001001100131300012942917568543094655203683077381613505528342163461397114975100030002938229326292602935029268
640042929122102001600024046462876010117119400930091000300010005000357832002283729077294573104000100030001000600029114291541161001100010000100003100100110013120001295092756924307595420294303838179525528335162891389815069100030002931929259293412937429277
6400429240219025021000671460128719101171074009300910003000100050003578370022832291522933431040001000300010006000291732916911610011000100001001341005013100324212012775907568643066855203183091381716555028380163851390815029100030002927129360293142940229350
6400429426219019017000121465328796212170684015301510003000100050013582820022794291192933631040001000300010006000291882917111610011000100001000031001001100131300012991917868603067950203443117381516545228328161661401715179100030002926729294292462937729284
6400429314220020022000111462228756102170704009301110003000100050003573810022833290992921831040001000300010006000291112914211610011000100001000031001001100131300012962921768303039956202453078381919515228335163101382414835100030002920529234294062933129300
6400429250220024017000101465128812101171034009300910003000100050013579070022760291062936731040001000300010006000291252909711610011000100001000031001002100121300012903928768693101853202973126381818565528432162911394015127100030002934129349293622935129226
640042938322002101400028204614288591011706940093009100030001000500035802400228582914429313310400010003000100060002917329119116100110001000010000410010011003313000130199196691730801455203253080381719555428398162841375215053100030002927829290292672927429275
6400429357220020024000520457628858101170524009300910003000100050003580050022791291242934631040001000300010006000291472912811610011000100001000041001001100121300013038924168993088115820298307238189544828375162191385815193100030002928229352293972935129282

Test 2: throughput

Count: 8

Code:

  ld3 { v0.s, v1.s, v2.s }[1], [x6]
  ld3 { v0.s, v1.s, v2.s }[1], [x6]
  ld3 { v0.s, v1.s, v2.s }[1], [x6]
  ld3 { v0.s, v1.s, v2.s }[1], [x6]
  ld3 { v0.s, v1.s, v2.s }[1], [x6]
  ld3 { v0.s, v1.s, v2.s }[1], [x6]
  ld3 { v0.s, v1.s, v2.s }[1], [x6]
  ld3 { v0.s, v1.s, v2.s }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0007

retire (01)cycle (02)0304050708090a0b0e0f18191e1f22233a3f4346494e51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0e7? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3202051600651199000000011002000001600261661598732532010610024000080000100240000800005004003002279296501600400160056160056799090380038320100200800002400002008000048000016004116005611802011009910010080000800000100800000188001300178001861027000005110011711160056101310280000240000100160057160060160060160060160060
320204160059119800000000000190100160026166159838253201061002400068000010024000080000500400029227893420160040016005916004179924038001432010020080000240000200800004800001600591600561180201100991001008000080000010080000018800180017800246102700000511001171116005310100080000240000100160042160042160060160060160042
3202041600591199000000000000010016002616615987325320106100240006800001002400008000050040001122792965016003701600561600597990903800413201002008000024000020080000480000160059160056118020110099100100800008000011008000000800140024800180102800000511001171116005610130280000240000100160094160060160042160060160060
3202041600411199000000000002400001600260001598762532010610024000680000100240000800005004000222278934211600400160056160056799270380041320100200800002400002008000048000016005916005611802011009910010080000800000100800000080018001880014601328000005130111711160038001310080000240000100160057160057160057160042160060
3202041600411199000000000000010016004400615987625320106100240006800001002400008000050040001522792965016002701600591600597992703800233201002008000024000020080000480000160041160056118020110099100100800008000001008000001880014001780018611828000005110011711160038001310080000240000100160042160060160057160060160042
3202041600591199000000000002000001600441061598762532010610024000080000100240000800005004000192279296511600463160059160059799270380041320100200800002400002008000048000016005916005611802011009910010080000800000100800000188001410218000061182800000511001171116003810100280000240000100160060160060160060160060160042
3202041600591199000000000000010016002610015983825320100100240006800001002400008000050040000022792965116004601600561600417992703800413201002008000024000020080000480000160041160056118020110099100100800008000001008000001880000101780018011400000051100117111600381000280000240000100160042160057160060160063160060
3202041600591199100000000000000016004106615983825320106100240006800001002400008000050040002922792965016004601600561600597990973800413201002008000024000020080000480000160041160041118020110099100100800008000001008000000800180018800186013000000511001171116003810013080000240000100160042160060160060160060160042
3202041600591199000000001000000016002606015983825320106100240000800001002400008000050040001522789342116004601600411600597990903800233201002008000024000020080000480000160056160056118020110099100100800008000001008000000800000014800186000000005110011711160056001310280000240000100160057160060160042160060160060
3202041600411199000000000000010016004406615983825320106100240006800001002400008000050040001822792965016004601600591600417992403800233201002008000024000020080000480378160056160041118020110099100100800008000001008000001880017001880017611728000005110012611160038101010080000240000100160060160060160060160060160060

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0008

retire (01)cycle (02)030507080b0e0f18191e1f22233a3f4346494e5051inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f60696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cdcfd0d2icache miss (d3)d5d6daddinst fetch restart (de)e0e7eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
32002516006211991011000031010116004716615988102532002210240006800001024000080000504000382279400101160043016004616004679930038004432001020800002400002080063480000160062160062118002110910108000080000010800088238002602268002061272460000502000051705616005919928000024000010160063160063160063160063160063
3200241600621199101000003201011600981661598810253200221024000680000102400008006150400045227907630116004301600621600467993003800443200102080000240000208000048000016004616006211800211091010800008000001080008724800280126800186172471000502000041706416005909908000024000010160063160063160063160063160063
32002416006211991011000032000116004716615988102532001610240012800001024000080000504000032279400111160027016004616006279914738004432001020800002400002080000480000160062160062118002110910108000080000010800078248002500268001861262460000502000051704616005909928000024000010160063160063160063160063160063
32002416006211991001000031000116004716615988102532001610240012800001024000080000504000382279400101160043016006216006279930038004432001020800632400002080000480000160062160046118002110910108000080000010800077248002500268000061262370000503300061707416005909028000024000010160063160063160063160063160063
32002416006211991010000060101160047160159881025320022102400128000010240000800005040003322794001001600430160062160062799300380044320010208000024000020800004800001600621600621180021109101080000800000108000780800080028800196126071000502000071716416005919928000024000010160063160063160047160063160063
3200241600621198100100007010116004716615988102532001610240012800001024000080000504000292279076301160027016006216006279914038004432001020800002400002080000480000160062160062118002110910108000080000110800088238002600258001901262461000502000061706416005909908000024000010160063160063160063160063160047
320024160062119910001100310101160047166159881025320016102400128000010240000800005040004722794001011600430160046160062799300380044320010208000024000020800004800001600621600621180021109101080000800000108000972480027022680000612624720005020140051706616005919928000024000010160047160063160063160047160063
3200241600621199100100007010116004716015988102532002210240006800001024000080000504000382279400101160043016006216009779930038004432001020800002400002080000480000160046160046118002110910108000080000010800078238002802268002061252362000502000061704616005919908000024000010160047160063160063160063160063
3200241600621199101100003100011600311601598810253200221024001280000102400008000050400000227940010116004301600621600467993003800443200102080000240000208000048000016006216006211800211091010800008000001080007824800250029800186026071001502000061704616005919928000024000010160063160063160063160063160063
32002416006211991000000032010116004716015988182532002210240012800001024000080000504000332279400101160043016004616006279914038002832023620800002400002080000480000160062160062118002110910108000080000010800099238000901258000060262371000502000061704616005909928000024000010160047160063160063160063160047