Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, three registers, 8B)

Test 1: uops

Code:

  ld1 { v0.8b, v1.8b, v2.8b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)03050708090a0b0e0f1e223a3f43464951inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)5f696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
6300529265220124002700061452128877010241952000200020001000021616028685292923102000300020002913529018116100110001000020032520030002200060242113004922468573215127620613310338225555328408165011382615357200010002930229220291252923929345
630042923222011811151012145862884600124164200020002000100000161582863129174310200030002000291452912311610011000100002002362003000020004006001310591766859304065120536312438096494928344164811362615705200010002933329304293002928829261
630042920821901900160001614611288510202413320002000200010000416152286342913831020003000200029060291561161001100010000200006200000012200000040012977935268243053115620647311838176535428379165161372115664200010002936129268293082924929240
63004292602190210020000714593288420002420020002000200010000016151287002934431020003000200029078291071161001100010000200006200000002000402400129359044683930311053205753067381117444628368162611367815402200010002918829215293092921329156
630042922921902300220005145342881300024108200020002000100000161752858729275310200030002000290882905111610011000100002000062002000020004026001298790746830306295420734307538116505128363164691394315606200010002930529271292492928029312
630042929721902500170005146832886202224084200020002000100000161542863829287310200030002000287092916211610011000100002000042002000020024026001288790906843306735020628311838196474928372162421354215575200010002931129262292822921029289
630042922322001200160007044712884900224155200020002000100000161452864829303310200030002000291102913711610011000100002000042000000220004004001291290236905303574420615316038137534728393163801380615557200010002926929297291812924529212
63004292342200210018000514632288440202428520002000200010000716137286392934831020003000200029082290831161001100010000200004200000002000402400129589225683830441343206093030381013464128463162851375315612200010002922029289291762923629237
630042925421902600190000045172889002024167200020002000100000161842870129193310200030002000291572919311610011000100002000042002000020004006001294991166803307475020623307338089535028351162291371815643200010002922929266293022924429221
63004292542200200021000714561287770202417620002000200010000016154287372923331020003000200029154291091161001100010001200004200000002000402400128519256681030321048205413068380610524728337162201380515647200010002926929278293162923829235

Test 2: throughput

Count: 8

Code:

  ld1 { v0.8b, v1.8b, v2.8b }, [x6]
  ld1 { v0.8b, v1.8b, v2.8b }, [x6]
  ld1 { v0.8b, v1.8b, v2.8b }, [x6]
  ld1 { v0.8b, v1.8b, v2.8b }, [x6]
  ld1 { v0.8b, v1.8b, v2.8b }, [x6]
  ld1 { v0.8b, v1.8b, v2.8b }, [x6]
  ld1 { v0.8b, v1.8b, v2.8b }, [x6]
  ld1 { v0.8b, v1.8b, v2.8b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.6675

retire (01)cycle (02)0304090e0f1e22243a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)6067696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a5a6a8acafb5b6bbdcache load miss (bf)cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
24020553415400011144001533792121281251601001001600001001600005002358180005336905339453374232973233521601002002400002001600005339453394118020110099100100800008000001001600004301600380381600386139445110116115339510107160000800001005339953399533995339953399
24020453374400011144101533832011925160100100160000100160000500233741700533690533985339823297323356160100200240000200160000533945339411802011009910010080000800000100160000430160000039160038613944511011611533951000160000800001005339953399533755339553395
240204533744000000440015338321119251601001001600001001600005002349885005336905339853374233213233561601002002400002001600005337453394118020110099100100800008000001001600004301600380391600006039445110116115339514140160000800001005337553375533995339953375
24020453398400001044001533792111925160100100160000100160000500234988510533690533945339423317323356160100200240000200160000533985339411802011009910010080000800000100160000430160038038160038613844511011611533920144160000800001005340653405534035340053395
2402045339840000004410153359212119251601001001600001001600005002337495105336905339453394233213233561601002002400002001600005339853374118020110099100100800008000001001600004301600380391600396138445110116115339514147160000800001005339953395533995337553399
24020453398400000144101533832110251601001001600001001600005002349885105337305339853398233213233321601002002400002001600005339853374118020110099100100800008000001001600004301600000381600386138445110116115339514147160000800001005339953399533955339553399
24020453398400000045000533592121025160100100160000100160000500234988510533720533985339823321323359160100200240000200160000533985339411802011009910010080000800000100160000001600380381600386139435110116115339810107160000800001005340253395533995339553399
240204533984000000450015338321119251601001001600001001600005002349885105337305339853398233213233561601002002400002001600005339853394118020110099100100800008000001001600004301600390411600386139445110116115339514140160000800001005339953399533995339953399
2402045339440000014400153383212019251601001001600001001600005002337495105337305339453394233213233561601002002400002001600005340253394118020110099100100800008000001001600004301600390381600386139445110116115339510107160000800001005339953399533755339553375
240204533984000000440015338321119251601001001600001001600005002345544105337305339853374233173233521601002002400002001600005339853374118020110099100100800008000001001600004301600380411600386139445110116115339114107160000800001005339553375533995339953375

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.6675

retire (01)cycle (02)03050708090a0b0e0f18191e1f22243a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
240025533903990001000000450001533791703012516001010160000101600005023261710533555339853380233253233601600102024000020160000533945338911800211091010800008000001016000003901600390001816005561544300150202416242253378006216000080000105339053381533905339953395
24002453380399000200110045000253379212012251600101016000010160000502352959153369533945339823339323374160010202400002016000053394533801180021109101080000800000101600000001600390004216001801354300050202416232453386006216000080000105339553390533975338153395
240024533984000001000100450002533651812121251600101016000010160000502358180053355533805339523339323378160010202400002016000053394533801180021109101080000800000101600000001600390001816003961180000502019162118533770100016000080000105339953381533955338353395
240024533984000002000000330001533791712121725160010101600001016000050235295915335553394533942333932337416001020240000201600005339453389118002110910108000080000010160000039016014801003516003501354300050202316241953391066316000080000105339553395533815339553399
24002453415400000200110018010253374181201251600101016000010160000502352959053369533985339423325323374160010202400002016000053394533801180021109101080000800000101613000433881615950041065516172561390020513633972523551591106016000080000105554355375545955539555541
2400245542841502010110139175796810255522201210713991617021316182011161602502342429153355533945339423334323369160010202411542016000055406551911418002110910108000080000010161692204031618380101136616146961354304052192912512395528921010416000080000105569655395538615539455250
240024558654200201300017162394105600155207215181372518162221111606501216302661241383305499155372568202543223425320162682202457722016384357410573782018002110910108000080000010162342006161612050021627416185961180000524726161324533950104216000080000105339653535552235547955506
2400245462442302001001004500005336521827125160010101600001016000050234177615335653381533812333532336916001020240000201600005338953389118002110910108000080000010160000058016005401035160018603539000502025161425534000104416000080000105539455686555525523956148
24002455836457040100010084079210153379250191725160010101600001016000050235295905337753402533802334732336016001020240000201600005338053402118002110910108000080000010160000043016003800018160038005544000502020162313533770010716000080000105339953399533955338153403
2400245340239900000001004400015337921112516001010160000101600005023374950533775338053380233253233781600102024000020160000533805339411800211091010800008000001016000004401600380003916003861394400050201716231953395000016000080000105339953399533815340353381