Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD3R (8B)

Test 1: uops

Code:

  ld3r { v0.8b, v1.8b, v2.8b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 4.006

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 3.006

retire (01)cycle (02)0304050708090a0b0e0f1e22243a3f43464951inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)5e5f696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a7a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
640052862221330601100070050112798100116149400630061000300010005000356900622864284442827031040001000300010003000284272816111610011000100001000021001000310012231214012101587292334105019290341038108474027939140621214812827100030002845628159281192803328300
64004280732112110000002005145279231101591740063000100030001000500035634082283328257284183104000100030001000300028080283331161001100010000100230100300211000010001365798577263329504019470330938187394527812142561283913643100030002813828178281532815628435
6400428057211000100111310527628041010162844009301210003000100050003574708228392815628512310400010003000100030002827027988116100110001000010000210030101100002311135921051471163190041193643415381212414427918146571257713285100030002819228406283062836628360
640042818421000000000021051632795501115966400930061000300010005000356940522877283792824131040001000300010003000281552816421610011000100001000021001000310002221013618104347308329604319121342638119463627816141241241713794100030002842328344283712816828379
640042823821100000000021052652784411015883400630061000300010005000356880122850281152819831040001000300010003000282552823111610011000100001000001001010210011120014117103967311354204719307347238129404827937142701205913483100030002828828102281542832228091
6400428266211000000000010485128021101159564006300010003000100050003570903229192809628370310400010003000100030002824828416116100110001000010000210000001100111000136841052071083312043195463353380810424327760140341228712991100030002841728302284532851828454
6400428353210100000000010522927972001159454012300010003000100050003575005228332805628024310400010003000100030002834328076116100110001000010000310030000100022212137881046772583491044193913298381413444327850152731208214008100030002806528333281082799628089
640042813221201000000000051162806911116097400030001000300010005000356210222902280322809031040001000300010003000284452843211610011000100001000001002000110011120013530981370913435040192013478380812444527898148561253813230100030002811528196284542841628174
6400428216210000000000700518728119011160504006300610003000100050003568204228572804228106310400010003000100030002836928377116100110001000010000310010001100011000139881045673073498045192453390381512384127893140151239512841100030002847528275282902861228388
640042805921100000000020052522793411116017400630061000300010005000356340122832280472828831040001000300010003000283202810011610011000100011000021001000310031120014010104487217335504719150348138126413927929140941277413544100030002808628062284562808428124

Test 2: throughput

Count: 8

Code:

  ld3r { v0.8b, v1.8b, v2.8b }, [x6]
  ld3r { v0.8b, v1.8b, v2.8b }, [x6]
  ld3r { v0.8b, v1.8b, v2.8b }, [x6]
  ld3r { v0.8b, v1.8b, v2.8b }, [x6]
  ld3r { v0.8b, v1.8b, v2.8b }, [x6]
  ld3r { v0.8b, v1.8b, v2.8b }, [x6]
  ld3r { v0.8b, v1.8b, v2.8b }, [x6]
  ld3r { v0.8b, v1.8b, v2.8b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire (01)cycle (02)0305080b0e0f1e1f22233a3f4346494f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2c5cfd5d6ddinst fetch restart (de)e0e7eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3202058006759911100350101800261660253201861002400888000010024000080000500400003288398118002280041800410323320100200800002400002008000024000080041800411180201100991001008000080000010080007628800290229800006131287105109217228003801313800002400001008004280042800428004280042
320204800416001101145010180026166025320186100240079800001002400008000050040004728839810800228004180041032332010020080000240000200800002400008004180041118020110099100100800008000001008000982880031003180000612928620510921722800380130800002400001008004280042800428004280042
32020480041599101003501018002616602532017910024008380000100240000800005004000812883981080022800418004103233201002008000024000020080000240000800418004111802011009910010080000800000100800067288003000308002361312861051432172280038000800002400001008004280042800428004280042
32020480041600100003501018002610602532018510024007980000100240000800005004000482883981080022800418004103233201002008000024000020080000240000800418004111802011009910010080000800000100800087278000700680024613007105109217228003811313800002400001008004280042800428004280042
320204800416001100036000180026166051320188100240079800001002400008000050040010628802510800228004180041032332010020080000240000200800002400008004180041118020110099100100800008000011008000882880030023180023602927700510921722800381130800002400001008004280042800428004280042
320204800415991110035010080026066025320186100240018800001002400008000050040005428839810800228004180041032332032420080000240000200800002400008004180041118020110099100100800008000001008000001880014001480014011300005109217228003801313800002400001008004280042800428004280042
32020480041599111003600008002616602532018310024008680000100240000800005004000462883981180022800418004103233201002008000024000020080000240000800418004111802011009910010080000800000100800077080030021080023603128720510921722800381013800002400001008004280042800428004280042
3202048004160011000350001800261660253201831002400858000010024000080000500400046288020218002280041800410323320100200800002400002008000024000080041800411180201100991001008000080000010080007727800310129800006130277005109217228003811313800002400001008004280042800428004280042
320204800416001110060101800260660253201831002401408000010024000080000500400054288022018002280041800410323320100200800002400002008000024000080041800411180201100991001008000080000010080006628800290129800246031286205109217228003801010800002400001008004280042800428004280042
320204800415991000044288101800261660253201831002400868000010024000080000500400046288398118002280041800410323320100200800002400002008000024000080041800411180201100991001008000080000110080008728800310129800246130277005109217228003811313800002400001008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire (01)cycle (02)030508090a0b0e0f18191e1f2223243f4346494e4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f60696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9fa0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0e7eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
32002580054600001000100104201008002666000253200901024008680000102400008000050400127288367701800220800418004100323320010208000024000020800002400008004180041118002110910108000080000108000003280010000288001060280005019141771880038113080000240000108004280042800428004280042
3200248004159900000000080601008002666110025320093102400188000010240000800005040004228836770180022080041800410032332001020800002400002080000240000800418004111800211091010800008000010800000328002800010800286110000501981717680038010080000240000108004280042800428004280042
32002480041600000000000866010080026601100253200281024002780000102400008000050400022288367701800220800418004100323320010208000024000020800332400008004180041118002110910108000080000108000003280028000108002861280005019171761780038001080000240000108004280042800428004280042
3200248004159900000000043000008002610110025320064102400838000010240000800005040000028836770180022080041800410032332001020800002400002080000240000800418004111800211091010800008000010800000080028000980006011000050191417617800380101080000240000108004280042802028011080042
3200248004159900000000054300008002666110025320091102400808000010240000800005040004528802340180022080041800410032332001020800002400002080000240000800418004111800211091010800008000010800000228002800068001761600050198171721800381101080000240000108004280042800428004280042
32002480041599000000000114010080026660002532003710240081800001024000080000504000452880383018002208004180041002623320010208000024000020800002400008004180041118002110910108000080000108000000800280203180010612827005019617617800381101080000240000108004280042800428004280042
3200248004160000000100039200008002666000253200731024001880000102400008000050400048288367701800220800418004100323320010208000024000020800002400008004180041118002110910108000080000108000003280028000288002861142700501981781780038110080000240000108004280042800428004280042
320024800415990000000008890000800269611002532002810240027800001024000080000504000452883677018002208004180041003233200102080000240000208000024000080041800411180021109101080000800001080000008001000028800280028270050191717171780038110080000240000108004280042800428004280042
320024800415990000001008640100800266600025320037102400548000010240000800005040006828837250180022080041800410032332001020800002400002080000240000800418004111800211091010800008000010800000080028000680010602800050191717177800381101080000240000108004280042800428004280042
32002480041600000000000403010080026106110025320028102400278000010240000800005040000328836770180022080041800410032332001020800002400002080000240000800418004111800211091010800008000010800000328002500018800106118270050191817171780038001080000240000108004280042800428004280042