Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD3R (4S)

Test 1: uops

Code:

  ld3r { v0.4s, v1.4s, v2.4s }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 4.006

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 3.006

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)0e0f1e1f22233a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
640052832221332112010504927858111158394006300610003000100050023569042289128319280893104000100030001000300028434283661161001100010000100031001001000212113474104367166346705819182335838159454128037136211292213111100030002811028073283402815628079
640042809021300109010509428052001165354000300010003000100050003571002282928334286543104000100030001000300028205284081161001100010001100001000151001122013070941170393214039196663298381613384527967156441328314154100030002862328733286172873828775
640042876522300002000525727920000162844006300910003000100050003569452287928329281083494000100030001000300329238290731161001100010000100021000011000202012893918368883053038202013088381611443828309164141400015068100030002924029228292902925229274
64004292842200000300145672875300017076400630091000300010005000357136228362914229143310400010003000100030002909129226116100110001000010002100000100020001286792376836310404120294308738157434228303164611372415069100030002926829234292342921829265
64004292642192000300045722876500017091400630061000300010005000356814228192911629256310400010003000100030002915629119116100110001000010002100000100020001274991056833310604220294306638156444828271164041389415033100030002924029192291782918129283
64004292562180000300045242880600117159400630061000300010005000356792228452908429262310400010003000100030002910729147116100110001000010000100000100000201295491116852307103920197300638146454528351163791380914840100030002924529192291852922229290
64004292122190000000145672879101017024400630061000300010005000356955228342902329279310400010003000100030002910629169116100110001000010002100000100120301293991336911308304220153306638169404328345164421383614989100030002917929253291772917129233
64004291622191000600046272880400016958400630061000300010005000357406227602904929178310400010003000100030002907429084116100110001000010002100001100000201276894006891315404620170310038148434228339165371380114947100030002930429232292282918829284
64004291692180000300145702888401017063400630061000300010005000357175228432907329224310400010003000100030002910029156116100110001000010002100000100020201299491816922303704620253310838099434628344160761383815029100030002913829167293412931529303
64004291982190001000145202881700016996400630091000300010005000356175228052909129291310400010003000100030002914729093116100110001000010000100000100000201282191296875306604620230304138127364328383163081390415029100030002928729346293152918529215

Test 2: throughput

Count: 8

Code:

  ld3r { v0.4s, v1.4s, v2.4s }, [x6]
  ld3r { v0.4s, v1.4s, v2.4s }, [x6]
  ld3r { v0.4s, v1.4s, v2.4s }, [x6]
  ld3r { v0.4s, v1.4s, v2.4s }, [x6]
  ld3r { v0.4s, v1.4s, v2.4s }, [x6]
  ld3r { v0.4s, v1.4s, v2.4s }, [x6]
  ld3r { v0.4s, v1.4s, v2.4s }, [x6]
  ld3r { v0.4s, v1.4s, v2.4s }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f181e22233a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696b6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0e7eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320205800675990010000010080026160002532015410024000080000100240000800005004000092881693128002208004180041323320100200800002400002008000024000080041800411180201100991001008000080000110080000018800130014800136114180051092117118003801010800002400001008004280042800428004280042
320204800415991001000361018002616000253201631002400008000010024000080000500400018288000012800220800418004132332010020080000240000200800002400008004180041118020110099100100800008000011008000001880018001480018611418005109211711800381010800002400001008004280042800428004280042
32020480041600000000019100800260600025320154100240054800001002400008000050040000028816931080022080041800413233201002008000024000020080000240000800418004111802011009910010080000800001100800076288000702780024613028705129211711800381013800002400001008004280042800428009480042
32020480041599100000036100800261600025320183100240086800001002400008000050040004628802201280022080041800413233201002008000024000020080000240000800418004111802011009910010080000800001100800087278003101308000001310705109211711800381013800002400001008004280042800428004280042
32020480041599110000037001800260660025320179100240079800001002400008000050040005528839811280022080041800413233201002008000024000020080000240000800418004111802011009910010080000800001100800076288003100308000061706051092117118003811313800002400001008004280042800428004280042
320204800415991000000370018002616000253201861002400188000010024000080000500400054288398112800220800418004132332010020080000240000200800002400008004180041118020110099100100800008000011008000772880029003880023013027715109211711800380130800002400001008004280042800428004280042
3202048004159910010003610180026160002532011810024008380000100240000800005004000462883981128002238004180041723320100200800002400002008000024000080041800411180201100991001008000080000110080008628800091019800246129277051092117118003801313800002400001008004280042800428004280042
32020480041600100000061018002600600253201791002400208000010024000080000500400048288391512800220800418004132332010020080000240000200800002400008004180041118020110099100100800008000011008000972880031023180023613128715109211711800380130800002400001008004280042800428004280042
32020480041599110000036101800260660025320120100240086800001002400008000050040004628839811280022080041800413233201002008000024000020080000240000800418004111802011009910010080000800001100800087278000701680000013027715109211711800380130800002400001008004280042800428004280042
32020480041600110000037001800261600025320183100240086800001002400008000050040004728839811280022080041800413233201002008000024000020080000240000800418004111802011009910010080000800000100800076080007013080024603128615109211711800380130800002400001008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
3200258005460000000007010008002616000253200891024002080000102400008000050400003288398108002280041800410323320010208000024000020800002400008004180041118002110910108000080000110800000188001800178000060000005019317328003810080000240000108004280042800428004280042
32002480041600000000054501000800261660025320010102400008000010240000800005040000028800001800228004180041032332001020800002400002080000240000800418004111800211091010800008000011080000008001810188000061132200050193174480038001080000240000108004280042800428004280042
32002480041600000000020010008002616000253200101024006380000102400008000050400022288234108002280041800410370320010208000024000020800002400008004180041118002110910108000080000010800000080014001880017601318000501941733800380131380000240000108004280042800428004280042
320024800415990000000240000080026060002532007310240054800001024000080000504000222880000080022800418004103233200102080000240000208000024000080041800411180021109101080000800001108000001880000003380000010000050194174380038013080000240000108004280042800428004280042
32002480041600000000024010008002616600253200301024008680000102400008000050400054288400518002280041800410323320010208000024000020800002400008004180041118002110910108000080000110800000080000000800136013000150194173480038101380000240000108004280042800428004280042
320024800416000000000200100080026166002532006410240063800001024000080000504000292881693180022800418004103233200102080000240000208000024000080041800411180021109101080000800001108000002280018001880018610000050194174480038113080000240000108004280042800428004280042
320024800415990000000570000080026100004432007310240063800501024000080000504000582881757180022800418004103473200102080000240000208000024000080041800411180021109101080000800001108000001880014403080018611422000501951735800380131380000240000108004280042800428004280042
32002480041600000000024010008002616000253200731024000080000102400008000050400021288001518002280041800410323320010208000024000020800002400008004180041118002110910108000080000110800000188000010178001701018000501931743800380131380000240000108004280042800428004280042
32002480041600000000024000008002606600253200731024006380000102400008000050400022288000018002280041800410323320010208000024000020800002400008004180041118002110910108000080000110800000188000000178000060142200050192173280038113080000240000108004280042800428004280042
3200248004160011000003501000800261060025320028102400838000010240000800005040000328802201800228004180041032332001020800002400002080000240000800418004111800211091010800008000011080000008000000138001800142200050194173480038113080000240000108004280042800428004280042