Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD3R (4H)

Test 1: uops

Code:

  ld3r { v0.4h, v1.4h, v2.4h }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 4.012

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 3.012

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f191e22243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
64005287012151171160000400537027959001601440093009100030001000500035727322850284952813131040001000300010003000284252804911610011000100001003321002021100121311132911046872723501760195683399381313443627802151601251712707100030002806828566281162827128105
64004281532151150141000400524727963001578340123012100030001000500035722022879280042806331040001000300010003000283852856911610011000100001003121002011100121310141721057673303541937197543370382216424227822139501295913729100030002810828088279812853228322
64004285342141131140000400534527945111587740093009100030001000500035760522849284912812931040001000300010003000280252795511610011000100001003321004022100121310134831042070313436840192973420382216384427774147131209012832100030002804828035280242841828527
64004281382111141151000310510227872101596240123012100030001000500035751622909284422851031040001000300010003000280022808311610011000100001002131003011100122310141431008273363508638193273456381813434327817138051211312949100030002848028051280702845128124
64004283542141100150000400521727977111598340123012100030001000500035757222850286282866431040001000300010003000281142819511610011000100001003331004002100122312142481057472133550837193463430382214503927981138611223712545100030002803228007282822861628255
6400428241211170101000400489527893001582240123012100030001000500035757922894280012828431040001000300010003000283672805211610011000100001002121003021100021312135041033572913499735192143483381212444627871141411213112841100030002855528583285532812028020
64004282502101141150110400527228373101650740093012100030001000500035735422860284892812831040001000300010003000284462854221610011000100001001121002002100122210139641053272873417938189923302381810454928119141201218113798100030002868228220286312820628050
64004283952111141121000410520227944001600240093012100030001000500035732622840280052855331040001000300010003000284672838411610011000100001002231001011100122211140481044872813474635191693369382015413927808140551229112890100030002815028148285882809028193
64004280482131121121000410512027938011598140093012100030001000500035745822853280052806231040001000300010003000282512812611610011000100001003221002002100122312143211041573143408437192543470382214404127933138781214012937100030002806928515280152841328674
640042817421411117011041053122784200163184009300910003000100050003575292287628047281263104000100030001000300028217279991161001100010001100322100301110002121214035103947304347284519715334538189423627783139161225012690100030002821328479281222847528097

Test 2: throughput

Count: 8

Code:

  ld3r { v0.4h, v1.4h, v2.4h }, [x6]
  ld3r { v0.4h, v1.4h, v2.4h }, [x6]
  ld3r { v0.4h, v1.4h, v2.4h }, [x6]
  ld3r { v0.4h, v1.4h, v2.4h }, [x6]
  ld3r { v0.4h, v1.4h, v2.4h }, [x6]
  ld3r { v0.4h, v1.4h, v2.4h }, [x6]
  ld3r { v0.4h, v1.4h, v2.4h }, [x6]
  ld3r { v0.4h, v1.4h, v2.4h }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)090e0f1e22233f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
32020580067600010023008002606625320163100240054800001002400008000050040052228817981800228004180041323320100200800002400002008000024000080041800411180201100991001008000080000010080000188001401480018610005109117118003801310800002400001008004280042800428004280042
32020480041599001020008002610625320163100240054800001002400008000050040083628818811800228004180041323320100200800002400002008000024000080041800411180201100991001008000080000010080000080018018800006014180510911711800381010800002400001008004280042800428004280042
320204800415990000201080026166253201631002400638000010024000080000500400727288349418002280041800413233201002008000024000020080000240000800418004111802011009910010080000800000100800001880018013800000113005109117118003801310800002400001008009380042800428004280042
3202048004160000000108002606625320154100240063800001002400008000050040085928836791800228004180041323320100200800002400002008000024000080041800411180201100991001008000080000010080000188001711480000000220510911711800380013800002400001008004280042800428004280042
3202048004159900102300800261602532010010024006380000100240000800005004006992884025180022800418004132332010020080000240000200800002400008004180041118020110099100100800008000001008000018800000148000001182205109117118003811313800002400001008004280042800428004280042
3202048004160000002310800261602532015410024006380000100240000800325004003012884008180022800418004132332010020080000240000200800002400008004180041118020110099100100800008000001008000018800170138001401172205109117118003801313800002400001008004280042800428004280042
320204800416000000000800261062532016310024006380000100240000800005004008722880252180022800418004132332010020080000240000200800002400008004180041118020110099100100800008000001008000022800170188000060182205109117118003811313800002400001008004280042800428004280042
320204800416000011240080026106253201631002400548000010024000080000500400797288396518002280041800413233201002008000024000020080000240000800418004111802011009910010080000800000100800001880017014800136117220510911711800381100800002400001008004280042800428004280042
3202048004160000000008002616025320154100240000800001002400008000050040086428837601800228004180041337320100200800002400002008000024000080041800411180201100991001008000080000010080000188001801880018600220510911711800380013800002400001008004280042800428004280042
3202048004160000102310800261662532010010024006380000100240000800005004006612881871180022800418004132332010020080000240000200800002400008004180041118020110099100100800008000001008000018800000148000060142205109117118003811310800002400001008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0e7eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
32002580054600000000003601001800260660046320107102404088010010240300800005040001128816920800228004180041032332001020800002400002080000240000800418004111800211091010800008000001080000027800140018800176172860005019012171215800381131380000240000108004280042800428004280042
32002480041599000010002000000800260060025320073102400638000010240000800005040002228823410800228004180041032332001020800002400002080000240000800418004111800211091010800008000001080000027800140018800006113180000501901417131380038101080000240000108004280042800428004280042
3200248004159900000000240100080026106006632007310240063800001024000080000504000242882341080022800418004103233200102080000240075208000024000080041800411180021109101080000800000108000002880014002180014611700000501901517171580038101080000240000108004280042800428004280042
3200248004159900000000240000080026066002532006410240000800001024000080000504000112881693080022800418004103233200102080000240000208000024000080041800411180021109101080000800000108000000800000008001761132200005019015171310800380131380000240000108004280042800428004280042
32002480041600000000002401000800260060025320073102400008000010240000800325040002228816930800228004180041032332001020800002400002080000240000800418004111800211091010800008000001080000028800130020800136117220000501901017141480038013080000240000108004280042800428004280042
320024800415990000000000100080026106002532006410240054800001024000080000504000182881693180022800418004103233200102080000240000208000024000080041800411180021109101080000800000108000002880018001880018610000005019010171414800380131080000240000108004280042800428004280042
32002480041599000000000000008002610600253200641024006280000102400008000050400000288000008002280041800410323320010208000024000020800002400008004180041118002110910108000080000010800000288001400138001861140000050190131714168003810080000240000108004280095800428004280042
320024800416000000000020000008002606600107320063102400008000010240000800005040003828800090800228004180041032332001020800002400002080000240000800418004111800211091010800008000001080000028800850017800006017000005019012171415800380131080000240000108004280042800428004280042
320024800415990000000024000008002616600253200731024006380000102400008000050400024288234108002280041800410323320010208000024000020800002400008004180041118002110910108000080000010800000288001700080017601300000501961617151580038013080000240000108004280042800428004280042
320024800415990000000024000008002616000253200721024005480000102400758000050404568288991008002280041800410323320010208000024000020800002400008004180041118002110910108000080000110800000278001400188001400132200005019013171215800381131380000240000108004280042800428004280042