Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD3 (multiple, post-index, 2D)

Test 1: uops

Code:

  ld3 { v0.2d, v1.2d, v2.2d }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 7.006

Integer unit issues: 1.000

Load/store unit issues: 3.000

SIMD/FP unit issues: 3.006

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)0e0f1e223a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)l1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
660052942023033012300904569289600331703170091000300930001000300030005000150133572982303229181294013107000300030004000900029211293011161001100010003000930046143004514913038940368493131147020153321338221774742857610001643913113146193000300010002938529399293442943729501
6600429534228220023009047152930633317194700910003009300010003000300050001503335749023077294833006431070003000300040009000292242925611610011000100030009300462103001514912938918768183068147520278308638192273652841210001624413277144283000300010002927829352293482937529484
6600429314219290121111804581288390331693770091000300930001000300030005000150003574142296529100292793107000300030004000900029276291531161001100010003000930045873004514912935927468463065126020170313038231763642842410001629213408144303000300010002938829400293762932129398
6600429264219230023009045402890003317004701210003009300010003000300050001500135733023015291312932631070003000300040009000291512919611610011000100030009300433853004514912992906868663050136820116305938242268642848310001646013182145523000300010002940629407293852923429543
660042936422029002400120457328814033170357009100030093000100030003000500015001357292230382922129317310700030003000400090002921929222116100110001000300093004012633004514913297920968273043117220103320438212764682850410001624513401145083000300010002943329366293562944429368
66004294272202300270018045792889003316902700910003009300010003000300050001500035724022987290882937131070003000300040009000293682920411610011000100030009300701843004514912896928768013059126720237319538232364682852710001645513468145093000300010002935629313294502942729465
6600429510220270030006045552893100016964700610003006300010003000300050001500035708422994292422936531070003000300040009000293872923611610011000100030006300101783001511613164924368423184116720202307338242163592855210001654613256144843000300010002941729393293042923729323
6600429402220240028006047082888400017028700610003006300010003000300050001500935709423040291472943531070003000300040009000292762929011610011000100030006300101300151161295890526803307686620227311138232265652845410001651913318144273000300010002936229463293782938129413
660042933622027012600604544289120001698470071000300630001000300030005000150003568202304529152293323107000300030004000900029227292261161001100010003000630010133001511612924925168573091146620260321038231657682843410001644413146147523000300010002935829414294302936129347
66004293792192500220060470928905000169477006100030063000100030003000500015006357105230102911829421310700030003000400090002921529244116100110001000300063001013001511612902911768403091146320258310738191669662851410001642813471145193000300010002938029407294202940229407

Test 2: throughput

Count: 8

Code:

  ld3 { v0.2d, v1.2d, v2.2d }, [x6], x8
  ld3 { v0.2d, v1.2d, v2.2d }, [x6], x8
  ld3 { v0.2d, v1.2d, v2.2d }, [x6], x8
  ld3 { v0.2d, v1.2d, v2.2d }, [x6], x8
  ld3 { v0.2d, v1.2d, v2.2d }, [x6], x8
  ld3 { v0.2d, v1.2d, v2.2d }, [x6], x8
  ld3 { v0.2d, v1.2d, v2.2d }, [x6], x8
  ld3 { v0.2d, v1.2d, v2.2d }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f23373a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)67696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
48020580086643100000000720018180073218181325560128801002400472400008010024000024000048049235194085713174080046800638006390327560100200240000240000200320000720000800708005611802011009901001008000080000010024001516420240055010421986240039513800051091016888006108000007240000240000801008004680047800638004680393
4802048007164300000000045001080050217015255601078010324004124000080100240000240000480498352637055586770800498006280062100345560100200240000240000200320000720000802038005711802011009901001008000080000110024000003502400400146033240040513335005109816878004208000067240000240000801008006380050800648004780057
480204800566430000000004500208004721717925560140801012400412400008010024000024000048049735335015589725080043800728005600345560100200240000240000200320000720000800688006211802011009901001008000080000010024000004302400400520322400000140440051097178480044080000106240000240000801008006380063800648005780058
4802048006664300000000045002280048217171125560141801032400392400008010024000024000048049635252155764153080044800688006800346560100200240000240000200320000720000800608004511802011009901001008000080000010024000004302400000970392400405133430051099168880059180000106240000240000801008008380064800648006380406
480204800476430000000004500102800472171792556014080100240039240000801002400002400004804973520381571923808004380062800650034256010020024000024000020032000072000080046800622180201100990100100800008000001002400000350240040000362400405133430051098178880054080000106240000240000801008006380060800638006780057
48020480062643000000000488802280030217171825560141801002400432400008010024000024000048049735203295764144080043800578005600347560100200240000240000200320000720000800638004811802011009901001008000080000010024000003502400400260392400405133430051097179880059080000106240000240000801008005780063802548006580414
48020480062643000000000480022800492017925560139801012401682400008010024000024000048049835204263866645080043800628006613033856010020024000024000020032000072000080045800561180201100990100100800008000001002401300057240170000432400005104400512491778800530800611010240000240000801008025480064802598025483724
48020480262646010000022177880028042921717776156078780224240301240130801622403692402024811983389223444925528290383334833298830316456052620024000024018620032024472113480433804292180201100990100100800008000001002401300057240302087430002403006130430051519358880385080000120240000240000801008006380064800628004680304
48020480066643000000100450052800472171710625601418010024017024000080100240000240178480497352028055851380800438006780066530331560100200240000240000200320000720000800458025411802011009901001008000080000110024000003502401300704124000001334300510910178480059080062107240000240000801008004680067800508025480046
4802048006164300000001045002280248012179255604608010024003924000080164240000240000480498288196456746340802108025380063130244556010020024000024000020032000072000080067800701180201100990100100800008000001002400000350240040000612400400135440051098168880059080000106240000240000801008006780068800638006380240

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2224373a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)606167696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)d9daddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
480025800686200000000046000008005120715255600498001024004124000080010240000240000480048347613238670010008005180045800661303525600102024000024000020320000720000800698006211800211090101080000800000102400000430240041100240041514244005019316023380063800001410240000240000800108006980067800678006780046
4800248006262000000000460007280051217142556004980010240040240000800102400002400004800473491437568323600080047800708006737035156001020240000240000203200007200008006280062118002110901010800008000001024000004302400410002400410140000501921601338006780000010240000240000800108004780067800678007180068
480024800666200000000046000128005527716255600518001024004024000080010240000240000480047349123556161350008004780066800701303535600102024000024000020320000720000800458006211800211090101080000800001102400000430240040004124017150404300501931600438006380000140240000240000800108007180067800678006780046
4800248006562100001100000022800502771425560049800102400432400008001024000024000048004735426955751420000800478006680066170349560010202400002400002032000072000080066800451180021109010108000080000010240000000240040004424004051400005019316003380042800001410240000240000800108006780067800678006780155
48002480066620000000004600012800502701425560051800102400182400008001024000024000048004735390435753731000800518006680066130354560010202400002400002032000072000080066800661180021109010108000080000010240000044024004100412400005104400501931700338005980000140240000240000800108006780067800638006780066
480024800716210000000000002280051271132556004980010240039240000800102400002400004800473491235386684900080026800668006617035056001020240000240000203200007200008006280045118002110901010800008000001024000004402400420044240041514045005019317003380063800001410240000240000800108004680067800648006780067
480024800666200000000046000228005027714255600528001024004024000080010240000240000480046349123557095770008004780066800660034956001020240000240000203200007200008006680062118002110901010800008000001024000004302400411041240041514144015019316003280063800001414240000240000800108006780067800678007180067
48002480066620000000004600002800532700255600498001024003924000080010240000240000480046191784857031890008004780066800662003495600102024000024000020320000720000800668006611800211090101080000800000102400000430240041008572400415141440050192160071482739800001410240000240000800108006780071800668006780067
4800248004562000000000580002280051277142556003480010240039240000800102400002400004800491917848575399800080048800768006513034956001020240000240000203200007200008006680066118002110901010800008000001024000000572400410041240041514144005019316003380063800001410240000240000800108006780067800728006780066
480024800456210000000000006280053217132556004980010240040240000800102400002400004800483491235386663700080047800668006213032756001020240000240000203200007205678007080070118002210901010800008000001024000004302400411041240041604144005019316003380042800001414240000240000800108006780071800678006780067