Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD3R (2S)

Test 1: uops

Code:

  ld3r { v0.2s, v1.2s, v2.2s }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 4.006

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 3.006

retire (01)cycle (02)03070a0e0f181e223a3f43464951inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)5f696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a3a6a8acafb5bbdcache load miss (bf)cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
64005282392151413102110496427898000163894010300610003000100050003572822290827943284873104000100030001000300028670281431161001100010000100001000001000123133631051972453239742191273187381313423927977139701200713407100030002844228161279352800628011
64004286552111111000405209278880111590440093006100030001000500035752622953284072810131040001000300010003000284962796611610011000100001000010000110002121412610255721534931029196703457381610363027749154821217512792100030002803528050281062808828071
640042858421191100001485727887100159414000300910003000100050003571210229182812528068310400010003000100030002806228232116100110001000010002100001100022014082103797272318073919496358838175332927996137521194412947100030002808628076286482818928058
6400428251214111200020523927918011159314009300610003000100050003572542284528164280073104000100030001000300028191280311161001100010000100021001001000233140471049972033402935195863187380910353327767142571211212704100030002850528206281952812228110
64004281372109170000051662799211016331400630091000300010005000356246228652818928197310400010003000100030002860528097116100110001000010002100100100020013997107607278343163219163346738115363828004145681296712766100030002820228493283992865528203
640042807721111100004148662799100115989400630061000300010005000357242229142809828253310400010003000100030002849428561116100110001000010002100000100011013373950572263363103618988351938128273227761145091277112862100030002802728013280412805828473
64004281332101290003052032821411015902400630061000300010005000357185229072812528066310400010003000100030002842728608116100110001000010002100102100001013866943272073493736192713247381515333627808143141223012812100030002824528096280492806527996
6400428175211151400030477228339100162934000300910003000100050003561862291128178281943104000100030001000300028111285791161001100010000100001000001000203142119525691334551136190353498380911353727970151351208313854100030002816528054280682817628030
640042850621012130000148772787601115919400630091000300010005000356912229382840728546310400010003000100030002808328032116100110001000010002100000100022013983952771453267934190263462381210363527675143141329014172100030002813928015285082853528119
640042815021413141107052542818001016319400630061000300010005000357052228952848228175310400010003000100030002852827928116100110001000010002100000100121213149966469943468641197693452381412353427797141371210513052100030002853228092281582810828513

Test 2: throughput

Count: 8

Code:

  ld3r { v0.2s, v1.2s, v2.2s }, [x6]
  ld3r { v0.2s, v1.2s, v2.2s }, [x6]
  ld3r { v0.2s, v1.2s, v2.2s }, [x6]
  ld3r { v0.2s, v1.2s, v2.2s }, [x6]
  ld3r { v0.2s, v1.2s, v2.2s }, [x6]
  ld3r { v0.2s, v1.2s, v2.2s }, [x6]
  ld3r { v0.2s, v1.2s, v2.2s }, [x6]
  ld3r { v0.2s, v1.2s, v2.2s }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire (01)cycle (02)0305080b0e0f18191e1f22233a3f43464951inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0e7eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3202058006759900010001901008002616626320198100240092800061002400248000850040009428841700800228004280041612320132200800082400242008000824002480041800411180201100991001008000080000010080010908003301318000261300711115116116118003801010800002400001008004280042800438004280042
32020480041599000100000100800271602632020110024009580006100240024800085004000382884170080022800418004161232013220080008240024200800082400248004180041118020110099100100800008000001008000992880032003180025612928711115116116118003801313800002400001008004380043800438004280043
32020480041599101000036000180026166253201351002400298000610024002480008500400092288040908002280041800417123201322008000824002420080008240024800418004111802011009910010080000800000100800098288003201318002561312870111511611611800380013800002400001008004380042800438004280042
32020480041599110000060001800271662532019410024002780006100240096800085004000862880409080022800418004161232013220080008240024200800082400248004180041118020110099100100800008000001008001072880034013280026613027721115116116118003801313800002400001008004280042800428004380042
3202048004159911000003700018002606625320186100240018800001002400008000050040005428839810800228004180041323320100200800002400002008000024000080041800411180201100991001008000080000010080007708000802308002361627720005109217228003811313800002400001008004280042800428004280042
32020480041599110000024357210080026166253201831002400208000010024000080000500400046288398108002280041800413233201002008000024000020080000240000800418004111802011009910010080000800000100800088288003100318002360292771000510921722800381013800002400001008004280042800428004280042
3202048004159910000007000080026066253201831002400868000010024000080000500400046288020208002280041800413233201002008000024000020080000240000800418004111802011009910010080000800000100800000080014101480000611418000005109217228003811010800002400001008004280042800428004280042
320204800416000000000190000800261662532015410024000080000100240000800325004000092881693080022800418004132332010020080000240000200800002400008004180041118020110099100100800008000001008000000800000008001460018000005109217228003801313800002400001008004280042800428004280042
3202048004159911000006001180026166253201861002400188000010024000080000500400054288022008002280041800413233201002008000024000020080000240000800418004111802011009910010080000800000100800077288002900308002360300700005109217228003811313800002400001008004280042800428004280042
320204800416001101000350011800261662532018610024007980000100240000800005004000542880220080022800418004132332010020080000240000200800002400008004180041118020110099100100800008000001008000772880029003080023002927710005109217228003811313800002400001008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire (01)cycle (02)0305080b0e0f18191e1f22233f4346494e4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)cfd5d6d9dadbddinst fetch restart (de)e0e7? int output thing (e9)eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3200258005559900000002001080026166002532006410240054800001024000080000504000182881693180022800418004100323320010208000024000020800002400008004180041118002110910108000080000010800000188001400148001460141805019417081538003810101080000240000108004280042800428004280042
3200248004159901000002001080026166002532006410240054800001024000080000504000002881693180022800418004100323320010208000024000020800002400008004180041118002110910108000080000010800000188001400148001461141805019417060368003800101080000240000108004280042800428004280042
3200248004159900000002000080026166002532001010240054800001024000080000504000162880000180022800418004100323320010208000024000020800002400008004180041118002110910108000080000010800000188003110138001461141805019217060428003800101080000240000108004280042800428004280042
3200248004159900000002001080026166002532001010240000800001024000080000504000182881693180022800418004100323320010208000024000020800002400008004180041118002110910108000080000010800000188001400148001461141805019417060628003810101080000240000108004280042800428004280042
3200248004159900000002000080026166002532001010240054800001024000080000504000092881739080022800418004100323320010208000024000020800002400008004180041118002110910108000080000010800000188001400138001461131805019417060448003800101080000240000108004280042800428004280042
32002480041599000000000008002616000253200641024005480000102400008000050400011288169318002280041800410032332001020800002400002080000240000800938004111800211091010800008000001080000018800130014800136014005019417060458003810101080000240000108004280042800428004280042
32002480041599000000000008002616600253200641024005480000102400008000050400011288169308002280041800410032332001020800002400002080000240000800418004111800211091010800008000001080000018800140014800146001805019217070438003810101080000240000108004280042800428004280042
3200248004159900000004200080026166002532001010240054800001024000080000504000182881693180022800418004100323320010208000024000020800002400008004180041118002110910108000080000010800000188001300148001361141805019417060428003800101080000240000108004280042800428004280042
32002480041600000000020010800261660025320064102400548000010240000800005040000028816931800228004180041003233200102080000240000208000024000080041800411180021109101080000800000108000001880000001480013611400501941706054800381001080000240000108004280042800428004280042
3200248004162000000002800080026166002532006410240054800001024000080000504000112881693080022800418004100323320010208000024000020800002400008004180041118002110910108000080000010800000080013000800140113180501921707024800380001080000240000108004280042800428004280042