Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD3 (multiple, post-index, 4S)

Test 1: uops

Code:

  ld3 { v0.4s, v1.4s, v2.4s }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 7.003

Integer unit issues: 1.000

Load/store unit issues: 3.000

SIMD/FP unit issues: 3.003

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22243a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
66005293622282700280110090104614288903016900700610003006300010003000300050001500235726523058292692923931070003000300040009000292642918811610011000100003000093004000300451460012966939968383241156320116336338091558532855810001595413154145033000300010002932129354294282942829231
66004294512282400240000090104658289101116915700910003003300010003000300050001500635755723034291922947331070003000300040009000292162924811610011000100003000003004004300451490012802941869023133105720166320738081558582847110001631913301144873000300010002941629488293172941329551
6600429454227230031000009010461528928011684970091000300930001000300030005000150033575752303429122292483107000300030004000900029214291701161001100010000300009300400030045149001309592646928309995720055316638081255542850610001596613280143193000300010002930429381293662939229299
66004293332272100250000018010467328863331693470091000300830001000300030005000150063574562300129177293129107000300030004000900029166291581161001100010000300009300400430045109001298293826896312416562022632333811651592846610001617913329144313000300010002937729294292582936829382
66004293102282200240000000104573288670116960701210003008300010003000300050001500335765523004291672934931070003000300040009000292462920211610011000100003000293004104300451490013094942168913149156620194325238061353622853110001581913280144283000300010002933729434294002934429285
66004294252272000270000090104711289190016938700310003009300010003000300050001500035628522958292302935331070003000300040009000293152922611610011000100003000093004004300431090013143941868853185165520104315038081461612844810001603913430142903000300010002942829360293762942729329
66004293592272300210000030104616288251016977700810003009300010003000300050001500335675022988291952934931070003000300040009000292652910911610011000100003000093001000300451490013119930868853093115620081323238041054632840810001577913071143543000300010002928529226293072934229356
66004294962272700250000021010462628827331687170081000300830001000300030005000150003572952299629080292553107000300030004000900029160293391161001100010000300009300010430045109001294591326949304915621996533103813963602849310001617613125142923000300010002947829328294362955629255
6600429433226260022000009010464428887001695070091000300930001000300030005000150003572832298229290293393107000300030004000900029151292241161001100010000300009300400430045149001290291216865313614592019332203809855592862810001634513314143053000300010002934529296292652943029343
66004292872271800280100090104681288950017064700810003008300010003000300050001500335725623017293282935631070003000300040009000292172905211610011000100003000093004004300451490013132931969193133115620187324138091654602856110001629613096143373000300010002942429397292312948429469

Test 2: throughput

Count: 8

Code:

  ld3 { v0.4s, v1.4s, v2.4s }, [x6], x8
  ld3 { v0.4s, v1.4s, v2.4s }, [x6], x8
  ld3 { v0.4s, v1.4s, v2.4s }, [x6], x8
  ld3 { v0.4s, v1.4s, v2.4s }, [x6], x8
  ld3 { v0.4s, v1.4s, v2.4s }, [x6], x8
  ld3 { v0.4s, v1.4s, v2.4s }, [x6], x8
  ld3 { v0.4s, v1.4s, v2.4s }, [x6], x8
  ld3 { v0.4s, v1.4s, v2.4s }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f23373a3c3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6067696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)77dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4802058008162000000004600020800322177142556014680106240041240000801082400242400194805363491346575422800800478006680070170737056015420024002424002420032003272007280065800621180201100990100100800008000001002400000430240041000442400005140440111511601600800590800061010240000240000801008007180067800478006780071
48020480066621000000012000108005120018255601468010624004024000080108240024240019480536349134657515340080047800708006600639056015120024002424002420032003272007280066800621180201100991100100800008000001002400000430240041000402400415040440111511601600800430800061010240000240000801008006780067800478006780064
480204800656210100000450022080051077142556014680106240003240006801082400242400194805363524694570213610804348006280068130637056015120024002424002420032003272007280066800691180201100990100100800008000001002400000430240041000412400405140440111511601600800460800061014240000240000801008004780064800678006780067
4802048006664300000004600100800582771425560145801062400412400008010824002424002448053835265653866824008004780062800629061705601512002400242400242003200327200728006680062118020110099010010080000800001100240000044024004100002400405141440111511701600800590800061010240000240000801008007180071800768007180071
480204800666430000000460002080051270142556014780106240041240000801082400242400244805361917988573227300800478006980066130348056010020024000024000020032000072000080045800691180201100991100100800008000001002400000440240042010412400000141450000510911711800670800001014240000240000801008007180067800678006780071
4802048006664300000004500220800552771425560139801002400392400008010024000024000048049734914304631336008004780066800450034505601002002400002400002003200007200008006680064118020110099010010080000800000100240000000240040010412400415140440000510911611800630800001014240000240000801008007280071800688006780071
48020480066643000000000022080055047132556014080100240041240000801002400002400004804983528698568566200800308006680066130349056010020024000024000020032000072000080062800661180201100991100100800008000001002400000440240041000442400410040440000510911611800630800002914240000240000801008006680070800678006780046
4802048004564300000004600200800510779255601398010024004024000080100240000240000480497352454057537350080051800668006513035205601002002400002400002003200007200008006680067118020110099010010080000800000100240000000240039000442400395040440000510911611800630800001010240000240000801008006780067800718004680067
480204800666430000000580011080051200142556013980100240005240000801002400002400004804973524540572701900800478008380066190353056010020024000024000020032000072000080068800701180201100990100100800008000001002400000440240040010402400415141440000510911711800630800001010240000240000801008004680067800678007180067
480204800676430000024154700420800510721425560139801002400392400008010024000024000048049935245403866667008004780066800669034905601002002400002400002003200007200008006680066118020110099010010080000800000100240000000240040010392400000041450000510911611800590800001010240000240000801008006680071800678006780063

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f373a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)67696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4800258007062110000004603280239217177605600498001024004224000080010240000240000480417351170456839801800518006280062903435600102024000024000020320000720000800628005611800211090101080000800000102400003502400000026986240040010430501906161098007018000006240000240000800108006380065800638006380067
4800248006262000000004500080047217179255600498001024004324000080010240000240000480048353345357201870803398005080062903565600102024000024000020320500720000800618004511800211090101080000800000102400003502400400002400405133430501909169980053080000610240000240000800108006380063800468006380062
48002480066620000000047080800472771725560049800102400442400008001024000024000048041835245405738213080061800768006645032756001020240000240000203200007200008004580062118002110901010800008000001024000044024004110432400415140440501907171010800630800001410240000240000800108006780067800678006780066
480024800666200000100460028005507714255600498001024003924000080010240000240178480047352454038665842801028006680066160349560010202400002400002032000072000080065800621180021109010108000080000010240000430240041004124004151404405033010161010800420800001414240000240000800108007180072800678006780071
480024800666210000000460328005127714255600498001024003924000080010240000240000480047352200457513400800518004580070160327560010202400002400002032000072000080045800611180021109010108000080000010240000440240040004424004051404405019010161010800630800001414240000240000800108006780067800678020280067
480024800666200000000570028005127717255600498001024003924000080010240000240000480047353066556982782801088006680045180351560010202400002400002032000072000080066800661180021109010108000080000010240000440240041004124004001404405019010161010800630800001410240000240000800108006780069800668006780067
480024800626210000000460328005520714255600498001024004024000080010240000240000480047352454038666073800608006680066130345560436202400002400002032000072000080262800661180021109010108000080000010240131436124017300599624030101404405047073410783371080124014240000240000800108025880450802598025880260
4800248025962200100221791761280422277919688565834810542422392422108106324295524195848041435245405140114480112800668026011489431715608622024018924037820320252721134802578025331800211090101080000800000102401304411524017510204124017160404405019010171010800630800001410240000240000800108007180067800678004680067
48002480070620000000055010280047277142556003980010240041240000800102400002400004800473522787570657608007080062800660034956001020240000240000203200007200008006780045118002110901010800008000001024000044024004110412400405141440501901017710800620800001010240000240000800108006780067800678006780067
4800248006662100000004600080055270132556004980010240039240000800102400002400004800483522004570305908006580065800620033256001020240000240000203200007200008006680062118002110901010800008000001024000043024000010442400405140440501901016106800670800001410240000240000800108006780068800678007180067