Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD3R (8H)

Test 1: uops

Code:

  ld3r { v0.8h, v1.8h, v2.8h }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 4.009

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 3.009

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e22243a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6400528764213131022101003104884282020101636740123009100030001000500035629422858283522850631040001000300010003000283352846511610011000100001002121001001100021211135809905710433121176196283259381724605928015145471265313433100030002861228674285362862528501
6400428602213126123011004004968281650001640340033003100030001000500035624522883283592843631040001000300010003000283792830111610011000100001001221003112100021011135279842705432911465193913243382020606227997145411257613648100030002852728478284162852528453
6400428525213126124100002104964281700001624840093012100030001000500035735522886283472849031040001000300010003000285222838211610011000100001001121002012100101210134121008970983263857194643285381720656028090150141285513500100030002854728393285442853128486
64004284282131311250000011048582810701116428400930091000300010005000356976229172833328466310400010003000100030002838128496116100110001000010021210021111000212101395510097707032831363195823241381426656227963145301238713618100030002866628547285602844028405
64004285192141241281000031050162817200016231400930091000300010005000356251022824282622854331040001000300010003000284352842611610011000100001002101002011100021211136129834712032511062195003298381626676427954146891257813784100030002851728454283752850028530
640042877321312902910000310488428149001162724009300310003000100050003580042283228373284433104000100030001000300028421284181161001100010000100220100202110010101113618987070833309859195543190381020666627957145791264613727100030002863128456285732862728419
6400428497213125027000003104890282340101627140093003100030001000500035644422903284352849731040001000300010003000281902829811610011000100001001231001101100001011138669936713733701064195053325381627576328024147681260413500100030002858628444284822837928462
640042866521412513100000400518328242000164644012300910003000100050003571752284828424285513104000100030001000300028292283341161001100010000100122100300410000131013719985870423312970195233294381121665928066146621292213549100030002867128630285722853128460
6400428483216122228100333310050072817300016288400930091000300010005000357341022868282192845831040001000300010003000283572838211610011000100001001221002011100022211135739828705234091071193443399381121656927946148421285213727100030002881728506286792855028524
6400428555212124125111001004872281110001623540093003100030001000500035723622856283172843031040001000300010003000282792828211610011000100001002221001002100121211136949669715733451359193963298381530626428112149901263713571100030002860928632283702835928461

Test 2: throughput

Count: 8

Code:

  ld3r { v0.8h, v1.8h, v2.8h }, [x6]
  ld3r { v0.8h, v1.8h, v2.8h }, [x6]
  ld3r { v0.8h, v1.8h, v2.8h }, [x6]
  ld3r { v0.8h, v1.8h, v2.8h }, [x6]
  ld3r { v0.8h, v1.8h, v2.8h }, [x6]
  ld3r { v0.8h, v1.8h, v2.8h }, [x6]
  ld3r { v0.8h, v1.8h, v2.8h }, [x6]
  ld3r { v0.8h, v1.8h, v2.8h }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03090e0f18191e22233f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6esimd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320205800676001000024008002616625320154100240000800001002400008000050040002428816930800228004180041032332010020080000240000200800002400008004180041118020110099100100800008000011008000018800141013800146102200510911711800380130800002400001008004280042800428004280042
32020480041599000000108002600625320100100240062800001002400008000050040014828816930800228004180041032332010020080000240000200800002400008004180041118020110099100100800008000001008000018800180014800180102200510911711800381013800002400001008004280042800428004280042
32020480041599000002010800261602532016310024006380000100240000800005004000182881693080022800418004103233201002008000024000020080000240000800418004111802011009910010080000800000100800001880000001880017601322005109117118003811310800002400001008004280042800428004280042
3202048004159900000470080026166253201631002400638000010024000080000500400018288169308002280041800410323320100200800002400002008000024000080041800411180201100991001008000080000010080000188001700080017611322005109117118003801011800002400001008004280042800428004280042
32020480041599000000008002610025320100100240000800001002400008000050040000928800000800228004180041032332010020080000240000200800002400008004180041118020110099100100800008000001008000008000010080017611318005109117118003801010800002400001008004280042800428004280042
320204800415990000024008002616025320100100240000800001002400008000050040002928816930800228004180041032332010020080000240000200800002400008004180041118020110099100100800008000001008000008003100188001700240725109117118003811013800002400001008004280042800428004280042
3202048004159900000190080026066253201631002400008000010024000080000500400029288000008002280041800410323320100200800002400002008000024000080041800411180201100991001008000080000010080000080018001880000611422005109117118003811313800002400001008004280042800428004280042
320204800416000000019108002610625320100100240054800001002400008000050040002928827360800228004180041032332010020080000240000200800002400008004180041118020110099100100800008000001008000018800000018800006113220051091171180038100800002400001008004280042800428004280042
3202048004160000000620080026166253201631002400638000010024000080000500400021288234108002280041800410323320100200800002400002008000024000080041800412180201100991001008000080000010080000188001800138000061022005109117118003811313800002400001008004280042800428004280042
32020480041599000000008002616025320154100240063800001002400008003250040003328802971800228004180041032332010020080000240000200800002400008004180041118020110099100100800008000001008000018800000014800006102260510911711800380130800002400001008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)daddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320025800676000110011000320100180026066002532008210240067800001024000080000504000032882839080022800418004103233200102080000240000208000024000080041800411180021109101080000800000108000001580000010108001160101500050191617010780038106080000240000108004280042800428004280042
3200248004160000000010001701000800261060025320010102400008000010240000800005040000828809610800228004180041032332001020800002400002080000240000800418004111800211091010800008000001080000015800110001180011601015000501951709880038006080000240000108004280042800428004280042
3200248004160000000000001701000800261660025320052102400008000010240000800005040000028809610800228004180041032332001020800002400002080000240000800418004111800211091010800008000001080000015800000000800000011150005019717071080038106680000240000108004280042800428004280042
320024800415990000000000170100080026166002532005210240042800001024000080000504000002880964180022800418004103233200102080000240000208000024000080041800411180021109101080000800000108000982480026001780000612624620501981705880038009980000240000108004280042800428004280042
320024800415990101000000320100080026166002532008110240020800001024000080000504000332882839180022800418004103233200102080000240000208000024000080041800411180021109101080000800000108000880800270002680000012515000501971705880038109680000240000108004280042800428004280042
32002480041599000000000017010008002616000253200101024004280000102400008000050400008288096418002280041800410323320138208000024000020800002400008004180041118002110910108000080000010800000080011000108001160015000501981709780038106680000240000108004280042800428004280042
3200248004159900000000001701000800260060025320052102400428000010240000800005040000828800001800228004180041032332001020800002400002080000240000800418004111800211091010800008000001080000015800110001080000601115000501981709580038106680000240000108004280042800428004280042
32002480041599000000000016010008002616000253200101024004280000102400008000050400011288096118002280041800410323320010208000024000020800002400008004180041118002110910108000080000010800000080011000118001160112371050191017011980038109080000240000108004280042800428004280042
3200248004160001010000001701000800260060025320052102400428000010240000800005040000728809611800228004180041032332001020800002400002080000240000800418004111800211091010800008000001080006724800270012680018612623710501971705880038009980000240000108004280042800428004280042
32002480041599010101000016010008002616600253200101024004480000102400008000050400002288096408002280041800410323320010208000024000020800002400008004180041118002110910108000080000010800000158001000011800006010150005019717010980038106680000240000108004280042800428004280042