Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD3 (multiple, post-index, 8H)

Test 1: uops

Code:

  ld3 { v0.8h, v1.8h, v2.8h }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 7.009

Integer unit issues: 1.000

Load/store unit issues: 3.000

SIMD/FP unit issues: 3.009

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)l2 tlb miss instruction (0a)0f1e22233a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
6600528765222118160120049062832033016212701210003003300010003000300050001500335805910230692876428802310700030003000400090002838228420116100110001000030003030031043003514901338796746916314774519516321038141644502809810001518912234133903000300010002853628692285022888228695
6600428522222015140310487728163303162727009100030083000100030003003500015003356251100230212842128476310700030003000400090002869028545116100110001000030000030041033004014601333894296952309574019530320538101045452803310001502012460133363000300010002851028658286282848528522
66004285322220131701200477128397303163017012100030093000100030003000500015003357499082305828429287193107000300030004000900028464285491161001100010000300001130040043006003601323394606922319454119460324638031138402823410001536912642134463000300010002858128772286762865528696
66004286832230151409004760283543031635870091000300330001000300030005000150043563911002302128701287583107007300030034000900028954286831161001100010000300009300330430070140715132539429689731296441953133103818540392813110001627413431145343000300010002934029253292692925429214
66004292972190141107004560289593031690070091000300930001000300030005000150033563400022984290942932331070003000300040009000293142907911610011000100003000093003104300351300127718897685030534432010630643810745392842110001642913399145673000300010002950529328292462928329250
660042932721901713070046152884531017037700610003009300010003000300050001500035828400230262917629320310700030003000400090002924729299116100110001000030043153010111430105113001290791266846306674020074316538111041362844110001630713407146863000300010002929029305293942923729218
6600429248219018140300455928774300169597009100030093000100030033000500015004357531002298529039293843107000300030004000900029221291581161001100010000300009300430133004514001283190996821301364520161306738171140422837710001641913322146743000300010002932429337293142926929187
660042932621901117070045042883230316932700910003009300010003000300050001500335738100229632910729337310700030003000400090002912829164116100110001000030000930043073004513001279093726810300573820114311738151238632839210001623813196144783000300010002918329389293362925929295
66004293142200151213014650288283031688370091000300930001000300030005000150033575050023029291512926231070003000300040009000291322917911610011000100003000093004307300351490128779215684330579402004131063811937412835610001625513355145253000300010002928129243292852924829206
6600429283220015150210045962885330316800700910003009300010003000300050001500335758900229822919629277310700030003000400090002921229244116100110001000030000030042073004513001288290836856306194120035311538161240362837810001630313314144683000300010002927729320292942923929269

Test 2: throughput

Count: 8

Code:

  ld3 { v0.8h, v1.8h, v2.8h }, [x6], x8
  ld3 { v0.8h, v1.8h, v2.8h }, [x6], x8
  ld3 { v0.8h, v1.8h, v2.8h }, [x6], x8
  ld3 { v0.8h, v1.8h, v2.8h }, [x6], x8
  ld3 { v0.8h, v1.8h, v2.8h }, [x6], x8
  ld3 { v0.8h, v1.8h, v2.8h }, [x6], x8
  ld3 { v0.8h, v1.8h, v2.8h }, [x6], x8
  ld3 { v0.8h, v1.8h, v2.8h }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0009

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f24373a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6067696b6d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4802058008362110100110006600738005634717255601308010024002724000080100240000240000480496352022955910840080118080071802052203535601002002400002400002003200007200008007180071118020110099010010080000800001002400151743024005600058240040515543150051091116131380068800011313240000240000801008007680072800728007280076
480204800716201000010000760012800563771825560122801002400292400008010024000024000048049835202295688606008005508007280071180354560100200240000240000200320000720000800718007511802011009901001008000080000100240015154302400561015924003951554315105109816121280068800001313240000240000801008007280072800768007280072
48020480072620101000000076005284866377172556014080100240026240000801002400002400004804973520229568861201800770800758007118035256010020024000024000020032000072000080071800711180201100990100100800008000010024001616430240056000612400416155431500510981713880072800001313240000240000801008007280072800768007280072
480204800716201000010000760012800563771725560143801012400272400008010024000024000048049835472365723387018005208007180071180354560100200240000240000200320000720000800718007111802011009901001008000080000100240015154302400560015924003951574315205109111613680068800001313240000240000801008007280072800728007580072
4802048007562010010100006400028005627717255601408010024004224000080100240000240000480497349143257286320180052080071800711803535601002002400002400002003200007200008007480071118020110099010010080000800001002400151643024005600162240040515643151051091216121280076800001313240000240000801008007280072800728007680072
480204800716201010000000760002800562771725560128801002400412400008010024000024000048049535279865680668038005708008080071200355560100200240000240000200320000720000800718007111802011009901001008000080000100240016154302400561015924004051564315105109141681280069800001313240000240000801008007280072800768007980076
4802048007162110100000007300018006337717255601298010024004224000080100240000240000560464354482655297430280056080071800711803575601002002400002400002003200007200008007180071118020110099010010080000800001002400161544024005510259240038515643150051091317131380072800001313240000240000801008007280071800768007280072
48020480071620100101000075000380056377172556014380101240027240000801002400002400004804953547236567398410800550800718007118035456010020024000024000020032000072000080071802621218020110099010010080000800001002400151543024005500061240040615643150051231016151480072800001313240000240000801008007280072800778007180072
480204800716211010000001650012800560771725560127801002400282400008010024000024000048049735202295688608018005908007580072180353560100200240000240000200320000720000800728007411802011009901001008000080000100240016154302400560135924004151544315005109121613880068800001313240000240000801008007280072800728007280103
4802048065262510020000006688062800563972025560127801002400262400008010024000024000056046435202295688626008005508007580071190354560100200240000240000200320000720000800718007311802011009901001008000080000100240016154302400550015824017161564315005109121613880068800001314240000240000801008007280076802658007180072

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f24373a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6067696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4800258007762000000058000180047217172412556005180010240040240000800102400002400004800483501251568875500800438006680067130345560010202400002400002032000072000080067800621180021109010108000080000010240000044024004000040240040614143050193162380059800001010240000240000800108006380063800638041580067
480024800626210000004500518004729172246256004980010240042240000800102400002400004800473511704570948100802138006280066130349560010202400002400002032000072000080260800621180021109010108000080000010240000043024003902040240041514144050193174380059800001010240000240000800108006780067800678006380071
4800248006762100000045000180047017172212556005080010240039240000800102400002400004800473528308387886500800518006280066160349560010202400002400002032000072000080062800621180021109010108000080000010240000043024004000040240040514043050194173380059800001010240000240000800108006880063800638024280066
480024800666200000004600028005121717228255600508001024004024000080010240000240000480048353374657512880080043800628006290327560010202400002400002032000072000080062800661180021109010108000080000010240000043024004002042240040514043250193163280063800001010240000240000800108005180063800678006380063
48002480062620000000450031800512171725513656004980073240042240000800102400002400004800443505387574662200800438006280066130349560010202400002400002032000072000080255800623180021109010108000080000010240000043024004001040240040504043050193162380063800001414240000240000800108006680068800638038980063
480024800686210000005800018004727172432556005880010240039240000800102400002400004800473533453574661500800478006280062903455600102024000024000020320000720000800628006311800211090101080000800000102400000435724004000040240040514044050194173380047800001010240000240000800108006580067800638024180063
48002480062621000000177002280047217172272556005180010240040240000800102400002400004800483533647574530100800478006680070130349560010202400002400002032000072000080062800621180021109010108000080000010240000043024004000040240040514043050193173380066800001410240000240000800108006380063800638009080063
480024800666200000004600218005421717194255603768001024003924000080010240000240000480048350125157161400080047800658006290343560010202400002400002032000072000080062802521180021109010108000080000010240000044024004000041240040514043050193163480058800001010240000240000800108006380067800678027480065
4800248007062100004046002180048217784255600498001024003924000080010240000240000480048353345341424140080043800658006294534556001020240000240000203200007200008006280062118002110901010800008000001024000004302400400004024004051043050193172380063800621013240000240000800108006880063800638006380063
480024800626200000014500018018021717206255600538001024030524000080010240000240000480048353345357466200080055800628006290317856043620240000240000203200007200008006680062118002110901010800008000001024000004302400400000240040014043050192173280063800001010240000240000800108006680063800768034780066