Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, three registers, 8H)

Test 1: uops

Code:

  ld1 { v0.8h, v1.8h, v2.8h }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.000

Integer unit issues: 0.000

Load/store unit issues: 3.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030507080a0b0e0f18191e22243a3f43464951inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)5f696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0eaebecld/st retires (ed)f5f6f7f8fd
630052941322012011310000141046142877400024082300030003000150029162352865529209310300030003000290552906611610011000100013005693008108300151303113045932468823047104920548306938171742392846916097137571551430002950129282292922926329301
6300429283220022017000006104636289580002414830003000300015000616150285992933531030003000300029177290731161001100010001300000300000630005109001280791816893310484020629305138161635362848216453135421554830002930029203292972929329238
6300429204219013012000006104579288800002401730003000300015000816152286592921831030003000300029086291601161001100010001300009300000130045149001304493086863313473620639310238211545482838416295136171571730002927229260293012930829239
630042925721901101500002101044872888301024106300030003000150001016145286232927431030003000300029055292521161001100010000300009300400430050040001304291066891310683720607306338151740412846716474139551541830002919129217292592927429268
630042929822001301100000230046232878300124044300030003000150091116154285572923331030003000300029150290791161001100010000300006300410530010149001274190046812324774020639310538201539412835616250136561532030002933729136292422937029293
63004293262200160150000010004528287710002404230003000300015000816157285782928931030003000300029144291171161001100010000300009300440430045059001278493536945312263820598312038111439402842016424136851562330002927529277292652930529209
6300429302220012013000009004534288480302410630003000300015000516150286592930031030003000300029023290861161001100010000300009300410030045019001281990726925302154220648309038131439412837116199136551541430002938129312293382937029290
63004293432200140130000012004619288350002412630003000300015000716157286312918931030003000300029121291201161001100010000300000300000330045049001324891456900310783920649311138191238382840416204137871545230002920229257292082925429342
630042928422001509000006104610289090102412930003000300015000916159286022925631030003000300029097291161161001100010000300009300100030005150001278291376862304394120558308438121136462838416233136261558830002935629281293352928929255
63004293502190130180000010004576287670002407630003000300015001016175286052927431030003000300029155290831161001100010000300009300010430005019001280792066896308974120653313438201438442833716268137671559830002926129201294112934129442

Test 2: throughput

Count: 8

Code:

  ld1 { v0.8h, v1.8h, v2.8h }, [x6]
  ld1 { v0.8h, v1.8h, v2.8h }, [x6]
  ld1 { v0.8h, v1.8h, v2.8h }, [x6]
  ld1 { v0.8h, v1.8h, v2.8h }, [x6]
  ld1 { v0.8h, v1.8h, v2.8h }, [x6]
  ld1 { v0.8h, v1.8h, v2.8h }, [x6]
  ld1 { v0.8h, v1.8h, v2.8h }, [x6]
  ld1 { v0.8h, v1.8h, v2.8h }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0008

retire (01)cycle (02)0305080b0e0f1e223a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
2402058070360011110651280050201814252401001002400001002400005003500450180023800658004849988350023240100200240000200240000800488004811802011009910010080000800001100240019204224005710166240039515842191511041613800629922400001008006680074800708007080066
2402048006560010000740280055218017252401001002400001002400005003531263080050803688007650206350026240462200240383200240000803808006511802011009910010080000800001100240020214224005910063240039615842191511031642800449922400001008006680070800708006680049
24020480069600111003303800502181817252401001002400001002400005003520953080044800698007049992350027240100200240000200240000800698006911802011009910010080000800000100240020194224005611060240039005842190516521644800629922400001008006680048800668004980066
240204800656001010068128005031818025240100100240000100240000500353106508004080048802094999735002324010020024000020024000080066800651180201100991001008000080000010024002019024001910121240039605842192511031631800649922400001008004980066800668006680066
2402048004859911100211380033218180252401001002400001002400005003523674080040800478006549988350023240100200240000200240000800488006511802011009910010080000800000100240019204224005810060240038515942192511011633800629922400001008006780069800668006780048
2402048004859911100651380060218049252401001002400001002400005003500271080040800658006549988350023240100200240000200240000800658004711802011009910010080000800000100240021204224005710260240039015842191511031613800629922400001008006680066800688006680049
24020480065599110006513800502181816252401001002400001002400005003523674080040800688006649988350023240100200240000200240000800658006511802011009910010080000800000100240020204224005910160240039515842190511021624800629022400001008006680049800668006680066
240204800656001110021038005020181625240100100240000100240000500347251608004080065800654998835000624010020024000020024000080065800651180201100991001008000080000010024002019024005811166240040501942190511011635800639022400001008004980066800508004980066
2402048006859910100650380050118181625240100100240000100240000500350109508004080065800654998835002324010020024000020024000080065800651180201100991001008000080000110024001920422400581105824003950580191511021613800629922400001008006680048800498006680066
2402048006559911100650380053218017252401001002400001002400005003513588080040800658006549989350023240100200240000200240000800658006511802011009910010080000800000100240021214124005910260240038511942192511031632800629922400001008006680049800498006680066

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0008

retire (01)cycle (02)03050e0f1e1f22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a8acafb5b6bbdcache load miss (bf)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
240025800725990007200101800462121216252400101024000010240000503521982180036080061800615000635004124001020240000202400008006180061118002110910108000080000010240000043024004003924004051404405020816758005801000240000108006880062800618006280062
24002480061599000450101800462121216252400101024000010240000503517947180040080041800415000635002124001020240000202400008006180061118002110910108000080000010240000043024004004024004051410050205166680038010104240000108006280061800628004280062
240024800606001004501008004621201625240010102400001024000050352198218003608006180061500063500212400102024000020240000800618006611800211091010800008000011024000004302400400024004001400050205167580058010104240000108006280065800428006280062
24002480061599000450101800462121216252400101024000010240000503537918180036080061800615000635004124001020240000202400008006180061118002110910108000080000010240000000240040040240040514043050207166480058010104240000108006280062800428006280062
240024800615990004600018013321212162524001010240000102400005035222361800360801138006150006350041240010202400002024000080061800611180021109101080000800000102400000430240000039240040514043050207166480058010104240000108006180062800628006280249
240024800415990004501018004601212162524001010240000102400005035222361800360800618006150006350021240010202400002024000080061800611180021109101080000800000102400000430240040040240040604043050207167580057010100240000108004280062800628006280062
24002480041600000460001800452121202524001010240000102400005035222361800350800618006150006350021240010202400002024000080061800611180021109101080000800000102400000430240040040240040504043050204166480058010104240000108006280064800658006180061
240024800616000004501018004621212162524001010240000102400005035222361800160800618006150006350041240010202400002024000080061800611180021109101080000800000102400000430240039041240039004043050207175780061010104240000108006180062800628006280062
2400248006160000045010180049212093252401401024052010240000603537918180035080061801945010635016024001020240381202400008004180061218002110910108000080000010240000043024004104024004051404305020816468005701004240000108004280062800628006280062
240024800415990000010180026212122252400101024000010240000503517947080036080061800615000635004124001020240000202400008006180061118002110910108000080000010240000043024000000240040514043050357167580061010100240000108245983372833868306083383