Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, post-index, 3 regs, 1D)

Test 1: uops

Code:

  ld1 { v0.1d, v1.1d, v2.1d }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.000

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
63005293572360300101100701004651286990212406630001000200010002000500010001160016113285542928831030003000300029040290951161001100010000200004200200320024260001312295246883306617020617329538014562592848110001617913438148412000100010002928729226292762941629281
63004292762360000000100700004617286690212409630001000200010002000500010002150016141284912922331030003000300029080290661161001100010000200005200200020024260001305790976889318006620703331138154558602838310001591513517147722000100010002928129317292792922929382
63004292392350100000100400004638286570212426330001000200010002000500010002100016127285762931131030003000300029139289731161001100010000200004200200220024260001296592456930318927520530327838054760592839310001611913275149802000100010002929529355292802918829296
630042924523500000001001000004536287260212408530001000200010002000500010000180016115286062927532930003000300029035291431161001100010000200006200200220022260001326891456830319506820683327838054956562848010001588913473148952000100010002919729302291862919229254
630042916623500001011001380000473328642022241043000100020001000200050001000290016105285642932031030003000300029082289891161001100010000200004200200020024240001312693416871313507220665326738064461642845610001614013593149062000100010002937029267293332935129241
630042905723400000000001000004622286290222400430001000200010002000500010000110016132286842932731030003000300029161289331161001100010000200004200200220024260001316394676850316707320576329438084556652854510001596113344148672000100010002921729226292262929029360
63004292432350100000000700004737286900212410130001000200010002000500010001100016120286102926731030003000300029055290261161001100010000200004200210320064260001305593896924312607020587322538043959552856610001605413392149362000100010002914829173292992928129254
63004293352340000000000700004573286710012415630001000200010002000500010002130016091286172936331030003000300029036291081161001100010000200004200200220024200001299792206893308207020644323338054255652837510001591113488147532000100010002931529200293252935829271
6300429153234000010000070000466628675022241503000100020001000200050001000250016132286262919631030003000300029004289971161001100010000200004200000220024260001308691766904317506720531329238104654562850210001598613719149962000100010002926529337293452924629239
63004292752350000000000700004695286100122414630001000200010002000500010004150016104285752925631030003000300029024290671161001100010000200004200010320024260001312393346871316906720664329038045162592849910001604513654148342000100010002922629094292292914729319

Test 2: throughput

Count: 8

Code:

  ld1 { v0.1d, v1.1d, v2.1d }, [x6], x8
  ld1 { v0.1d, v1.1d, v2.1d }, [x6], x8
  ld1 { v0.1d, v1.1d, v2.1d }, [x6], x8
  ld1 { v0.1d, v1.1d, v2.1d }, [x6], x8
  ld1 { v0.1d, v1.1d, v2.1d }, [x6], x8
  ld1 { v0.1d, v1.1d, v2.1d }, [x6], x8
  ld1 { v0.1d, v1.1d, v2.1d }, [x6], x8
  ld1 { v0.1d, v1.1d, v2.1d }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
24020580040643000000001250000800252121212252401008010016000080100160000205295836759950800158004080040499543499982401002002400002002400008004080040118020110099100100800008000011001600000271600240024160000612335000051101161180037180000100016000080000801008004180041800418004180041
24020480040643000000009360001800262121202524010080100160000801001600002070175367599508001580040800404995334999824010020024000020024000080040800401180201100991001008000080000110016000002716003100321600006124270000511012511800370800001010016000080000801008004180041800418004280041
240204800406430000000050001008002521212152524010080100160000801001600002064876367267208001580040800404995334999824010020024000020024000080040800401180201100991001008000080000010016000002716003200321600326024000005110116228003708000000016000080000801008004180041800418004180041
24020480040643000000006410001800252121202524010080100160000801001600002065067366934408001580040800404995334999824010020024000020024000080040800401180201100991001008000080000010016000000160032003516003361243500005110116118003818000067016000080000801008004180041800418004180041
2402048004064300000000571000080025212121125240100801001600008010016000020614963646101080015800408004049953349998240100200240000200240000800408004011802011009910010080000800000100160000027160023002616003361233500005110116118003718000066016000080000801008004280041800418004180041
24020480040642000010001019000180025212121525240100801001600008010016000020607823669470080015800408004149953349998240100200240000200240000800408004011802011009910010080000800000100160000027160032003516000061353500015110116118003818000066016000080000801008004180041800428004180041
2402048004064300000000100800008002521212132524010080100160000801001600002050044367266408001580162800404995418499982401002002402492002400008016180040118020110099100100800008000011001600000391600320032160147612435000051101161180114080000106016000080000801008004280041800418016480041
240204800406420000000110050001800252121211252401008010016000080100160164202538436655180800158004080040500083499982401002002400002022400008004080163118020110099100100800008000001001600000271600000001600006132002005110116118003708006506016000080000801008004180041800418016380041
2402048004064300200101102601008002521212552524010080174160000801001600002044958364610108001580040801624995435008124010020024024820024000080040800401180201100991001008000080000010016023270160147008211601136124350700517116123806360802071210016000080000801008040280286804078028980407
24020480282647000000651333528000803902120220181241041804921606138052416096620729333702166080015801398004049953184999824010020024024920024000080163825801180201100991001008000080000010016000000160000002416003161035002105445116628003708154210788016000080000801008139082602802618004187111

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafldst x64 uop (b1)b5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2d5map dispatch bubble (d6)d9ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
24002580040642100001000590002800252161619252400108001016000080010160000205010636726620080018800408004049976350020240010202400002024000080040800441180021109101080000800001101600131242160050000501600380615242130502000216022800370800006616000080000800108004180045800448004180041
240024800406431000011007500028002816152924252400108001016000080010160000204237336694530080018800408004049976350021240010202400002024000080043800401180021109101080000800000101600121441160067000691600540616642120502000216022800370800006916000080000800108004480041800448004480044
240024800436431000000006700028002516131619252400108001016000080010160000205842936725610080018800408004449979350021240010202400002024000080040800401180021109101080000800000101600131357160067100501600390606641130502000216022800410800006916000080000800108004280044800428004180041
24002480043643100000000640102800251701617252400108001016000080010160000205730136726600080018800408004049976350020240010202400002024000080043800401180021109101080000800000101600131342160051100511600380615043131502000316033800400800006916000080000800108004480045800418004480778
240024800406421000100006288102800282151617252400108001016000080010160000204718336657890080019800408004049976350024240010202400002024000080043800431180021109101080000800000101600131342160050000521600380616543121505100316023800400800006916000080000800108004180045800458004580041
24002480040642100000000580002800283162916252400108001016000080010160000204856536596460080018800448004049979350106240010202400002024000080040800401180021109101080000800000101600131257160053011651600400613843130502000316032800370800006616000080000800108004180041800418004480044
240024800436431000100007300028002821529632524001080010160000800101600002057950367265300800188004080040499763500202400102024000020240000800408004311800211091010800008000001016001414571600520018671600540616642131502000316022800370800629616000080000800108004480041800448004180045
240024800436431000000005700028002916151620252400108001016000080010160000203703936593870080018800418004050014350020240010202400002024000080043800401180021109101080000800000101600141442160050001521600390615041131502000215022800400800006916000080000800108004580041800418004180041
240024800406431000000008500028015217152915252400108001016000080010160000203751936661510080018800408004149979350020240010202402472024000080043800401180021109101080000800000101600141241160067010501600530615043130502000216033800410800006916000080000800108004180044800448004480041
24002480043643100000000720002800293162917252400108001016000080010160000204718336726660080018800408004049975350020240010202400002024000080043800401180021109101080000800000101600121357160066311661600390615043136502000216022800370800006616000080000800108004280045800418004180045