Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD3R (8H)

Test 1: uops

Code:

  ld3r { v0.8h, v1.8h, v2.8h }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 4.009

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 3.009

retire (01)cycle (02)030507080a0b0e0f18191e22243a3f43464951inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)5f696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
6400528764213131022101003104884282020101636740123009100030001000500035629422858283522850631040001000300010003000283352846511610011000100001002121001001100021211135809905710433121176196283259381724605928015145471265313433100030002861228674285362862528501
6400428602213126123011004004968281650001640340033003100030001000500035624522883283592843631040001000300010003000283792830111610011000100001001221003112100021011135279842705432911465193913243382020606227997145411257613648100030002852728478284162852528453
6400428525213126124100002104964281700001624840093012100030001000500035735522886283472849031040001000300010003000285222838211610011000100001001121002012100101210134121008970983263857194643285381720656028090150141285513500100030002854728393285442853128486
64004284282131311250000011048582810701116428400930091000300010005000356976229172833328466310400010003000100030002838128496116100110001000010021210021111000212101395510097707032831363195823241381426656227963145301238713618100030002866628547285602844028405
64004285192141241281000031050162817200016231400930091000300010005000356251022824282622854331040001000300010003000284352842611610011000100001002101002011100021211136129834712032511062195003298381626676427954146891257813784100030002851728454283752850028530
640042877321312902910000310488428149001162724009300310003000100050003580042283228373284433104000100030001000300028421284181161001100010000100220100202110010101113618987070833309859195543190381020666627957145791264613727100030002863128456285732862728419
6400428497213125027000003104890282340101627140093003100030001000500035644422903284352849731040001000300010003000281902829811610011000100001001231001101100001011138669936713733701064195053325381627576328024147681260413500100030002858628444284822837928462
640042866521412513100000400518328242000164644012300910003000100050003571752284828424285513104000100030001000300028292283341161001100010000100122100300410000131013719985870423312970195233294381121665928066146621292213549100030002867128630285722853128460
6400428483216122228100333310050072817300016288400930091000300010005000357341022868282192845831040001000300010003000283572838211610011000100001001221002011100022211135739828705234091071193443399381121656927946148421285213727100030002881728506286792855028524
6400428555212124125111001004872281110001623540093003100030001000500035723622856283172843031040001000300010003000282792828211610011000100001002221001002100121211136949669715733451359193963298381530626428112149901263713571100030002860928632283702835928461

Test 2: throughput

Count: 8

Code:

  ld3r { v0.8h, v1.8h, v2.8h }, [x6]
  ld3r { v0.8h, v1.8h, v2.8h }, [x6]
  ld3r { v0.8h, v1.8h, v2.8h }, [x6]
  ld3r { v0.8h, v1.8h, v2.8h }, [x6]
  ld3r { v0.8h, v1.8h, v2.8h }, [x6]
  ld3r { v0.8h, v1.8h, v2.8h }, [x6]
  ld3r { v0.8h, v1.8h, v2.8h }, [x6]
  ld3r { v0.8h, v1.8h, v2.8h }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire (01)cycle (02)03090e0f18191e22233f43464951inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6e72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0e7eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
320205800676001000024008002616625320154100240000800001002400008000050040002428816930800228004180041032332010020080000240000200800002400008004180041118020110099100100800008000011008000018800141013800146102200510911711800380130800002400001008004280042800428004280042
32020480041599000000108002600625320100100240062800001002400008000050040014828816930800228004180041032332010020080000240000200800002400008004180041118020110099100100800008000001008000018800180014800180102200510911711800381013800002400001008004280042800428004280042
32020480041599000002010800261602532016310024006380000100240000800005004000182881693080022800418004103233201002008000024000020080000240000800418004111802011009910010080000800000100800001880000001880017601322005109117118003811310800002400001008004280042800428004280042
3202048004159900000470080026166253201631002400638000010024000080000500400018288169308002280041800410323320100200800002400002008000024000080041800411180201100991001008000080000010080000188001700080017611322005109117118003801011800002400001008004280042800428004280042
32020480041599000000008002610025320100100240000800001002400008000050040000928800000800228004180041032332010020080000240000200800002400008004180041118020110099100100800008000001008000008000010080017611318005109117118003801010800002400001008004280042800428004280042
320204800415990000024008002616025320100100240000800001002400008000050040002928816930800228004180041032332010020080000240000200800002400008004180041118020110099100100800008000001008000008003100188001700240725109117118003811013800002400001008004280042800428004280042
3202048004159900000190080026066253201631002400008000010024000080000500400029288000008002280041800410323320100200800002400002008000024000080041800411180201100991001008000080000010080000080018001880000611422005109117118003811313800002400001008004280042800428004280042
320204800416000000019108002610625320100100240054800001002400008000050040002928827360800228004180041032332010020080000240000200800002400008004180041118020110099100100800008000001008000018800000018800006113220051091171180038100800002400001008004280042800428004280042
3202048004160000000620080026166253201631002400638000010024000080000500400021288234108002280041800410323320100200800002400002008000024000080041800412180201100991001008000080000010080000188001800138000061022005109117118003811313800002400001008004280042800428004280042
32020480041599000000008002616025320154100240063800001002400008003250040003328802971800228004180041032332010020080000240000200800002400008004180041118020110099100100800008000001008000018800000014800006102260510911711800380130800002400001008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire (01)cycle (02)03040507080a0b0e0f18191e1f2223243a3f4346494e4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cdcfd5d6daddinst fetch restart (de)e0e7? int output thing (e9)eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
320025800676000110011000320100180026066002532008210240067800001024000080000504000032882839080022800418004103233200102080000240000208000024000080041800411180021109101080000800000108000001580000010108001160101500050191617010780038106080000240000108004280042800428004280042
3200248004160000000010001701000800261060025320010102400008000010240000800005040000828809610800228004180041032332001020800002400002080000240000800418004111800211091010800008000001080000015800110001180011601015000501951709880038006080000240000108004280042800428004280042
3200248004160000000000001701000800261660025320052102400008000010240000800005040000028809610800228004180041032332001020800002400002080000240000800418004111800211091010800008000001080000015800000000800000011150005019717071080038106680000240000108004280042800428004280042
320024800415990000000000170100080026166002532005210240042800001024000080000504000002880964180022800418004103233200102080000240000208000024000080041800411180021109101080000800000108000982480026001780000612624620501981705880038009980000240000108004280042800428004280042
320024800415990101000000320100080026166002532008110240020800001024000080000504000332882839180022800418004103233200102080000240000208000024000080041800411180021109101080000800000108000880800270002680000012515000501971705880038109680000240000108004280042800428004280042
32002480041599000000000017010008002616000253200101024004280000102400008000050400008288096418002280041800410323320138208000024000020800002400008004180041118002110910108000080000010800000080011000108001160015000501981709780038106680000240000108004280042800428004280042
3200248004159900000000001701000800260060025320052102400428000010240000800005040000828800001800228004180041032332001020800002400002080000240000800418004111800211091010800008000001080000015800110001080000601115000501981709580038106680000240000108004280042800428004280042
32002480041599000000000016010008002616000253200101024004280000102400008000050400011288096118002280041800410323320010208000024000020800002400008004180041118002110910108000080000010800000080011000118001160112371050191017011980038109080000240000108004280042800428004280042
3200248004160001010000001701000800260060025320052102400428000010240000800005040000728809611800228004180041032332001020800002400002080000240000800418004111800211091010800008000001080006724800270012680018612623710501971705880038009980000240000108004280042800428004280042
32002480041599010101000016010008002616600253200101024004480000102400008000050400002288096408002280041800410323320010208000024000020800002400008004180041118002110910108000080000010800000158001000011800006010150005019717010980038106680000240000108004280042800428004280042