Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, post-index, 1 reg, 2S)

Test 1: uops

Code:

  st1 { v0.2s }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f233a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
610052908923302600260100004820285901123792200010001000100010005000500091595628262289303102000100130002875528701116100110001000100002010000001000030001305094536891313311492131832543815134951282731000156741300914665100010002895528917289102884928938
610042879623202101230000004714285251123924200010001001100010005000500001592528798298913102000100030002874528712116100110001000100002210000201000030001308995886934311611532135231873819184653282511000155781345115025100010002875228872288482894028827
61004288102320221021010132880470728566002386420001001100010001001500550428159522874829358310200010003000287602879311610011000100010002201000020100002000131169495693131249412133132253819155150281981000157321303414624100010002894928913288412881228805
6100428901231016111510200046442860001238522000100010001000100050005000816007285942958829992004100130002871328705216100110001000100021010010201001020201330493686916315410502115932223815144645281941000157921300514337100010002927329143291852898629066
6100429040231024012310113200478828481002375120021000100110001000500550009159622815628836710200010003000286622865011610011000100010010301000000100003000133559456693431339482138632933819164556284261000158821333114730100010002896129000290672897528808
61004287812310230020000000476128630112388320001000100010001000500050005159692815929072310200010003000286992852511610011000100010022311001021100012110133949410696931767452116631733820204745281431000154761312614520100010002889128701288662870728685
610042879123011511181000204758284040024032200010001000100010005000500071598128029288363102000100030002874428722116100110001000100223110010111000131101314094206985308211482114032483813154551281761000155401295914599100010002888429022289002880228930
610042889123311401221000104746285760023926200010001000100010005000500031594828192287873102000100030002881528749116100110001000100222110010111000131101312093776976313710482130032463815144947283391000155561321214462100010002892228894288262883228879
61004289892321220226100020472228821102390720001001100010001000500050009159922829029106310200010003000292002927811610011000100010000001001000100000006821318094926911304912432171132943812145343284621000161011381215320100010002948629295294702953929458
6100429361235021001600000046292889500243532000100010001000100050005000111598228601292773102000100030002929229246116100110001000100002010001031000000001304493636988313312572190632583811194557286751000162951332815198100010002937029483296132949329335

Test 2: throughput

Count: 8

Code:

  st1 { v0.2s }, [x6], x8
  st1 { v0.2s }, [x6], x8
  st1 { v0.2s }, [x6], x8
  st1 { v0.2s }, [x6], x8
  st1 { v0.2s }, [x6], x8
  st1 { v0.2s }, [x6], x8
  st1 { v0.2s }, [x6], x8
  st1 { v0.2s }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e18191e1f23243a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cfd5map dispatch bubble (d6)d9daddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020580040642000000000200080025880251601008014680000801008000041797033758824080015800408004069924370029160100200800002002400008004080040118020110099100100800008000010080000017080001001800001170511031700338003780000080000801008004180041800418004180041
8020480040643000000000200080025883251601008010080000801008000041797033758824080015800408004069924369997160100200800002002400008004080040118020110099100100800008000010080000019080001101800010170511041710438003780000080000801008004180041800418004180041
8020480040642000000000400080025803321601008010080000801008000041797033758824080015800408004069924369997160100200800002002400008004080040118020110099100100800008000010080000017080001000800011170511031710338003780000080000801008004180041800418004180041
80204800406420000000012200080025880251601008010080000801008000041797033759940080015800408004069924369997160100200800002002400008004080040118020110099100100800008000010080000017080001201800011170511031710248003780000080000801008004180041800418004180041
80204800406420000000034100800258845251601008010080000801008000041797033758824080015800408004069924369997160100200800002002400008004080040118020110099100100800008000010080000017080001003800011180511031700338003780000080000801008004180041800418004180041
8020480040643000000000400080025881251601008010080000801008000041797033758824080015800408004069924369997160100200800002002400008004080040118020110099100100800008000010080000017080000103800011170511021700248003780000080000801008004180041800418004180041
802048004064300000000341008002508025160100801008000080100800004179703375882408001580040800406992436999716010020080000200240000800408004011802011009910010080000800001008000000080001003800011170511031710338003780000080000801008004180041800418004180041
8020480040643000000003400080025880251601008010080000801008000041797033758824080015800408004069924369997160100200800002002400008004080040118020110099100100800008000010080000017080001001800010170511031700338003780000080000801008004180041800418004180041
80204800406430000000018400080025880251601008010080000801008000041797033758824080015800408004069924369997160100200800002002400008004080040118020110099100100800008000010080000017080001003800011170511031710338003780000080000801008004180041800418004180041
8020480040643000000000400080025000251601008010080019801008000041797033758824080015800408004069924369997160100200800002002400008004080040118020110099100100800008000010080000017080001100800011170511031710338003780000080000801008004180041800418004180041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22233a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafldst x64 uop (b1)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002580040643110010000700280025994251600108001080000800108000041786213759892080015080040800406994637002016001020800002024000080040800401180021109101080000800001080008729080008102638000107297000502251665800378000080000800108004180041800418004180041
8002480040642111012000700280077994251600108001080000800108000041786213758824080015380040800906994637002016001020800002024000080040800401180021109101080000800001080007829080008012908000108297000502251645800378000080000800108004180041800418004180041
800248004064211001000612002800259942516001080010800008008380000417862137588240800150800408004069946370020160010208000020240000800408004011800211091010800008000010800087291800081088000108297000502231654800378000080000800108004180091800418004180041
800248004064311101000012102800258932516001080010800008001080000417862937588240800150800408004069946370020160010208000020240000800408004011800211091010800008000010800077290800080088000108297100502251655800378000080000800108004180041800418004180041
8002480040642111010000120028002590225160010800108000080010800004178621375882408001508009080089699463700511601522080080202400008004080040118002110910108000080000108000872918000700118000207297100502251645800378000080000800108004180041800418004180041
800248004064211101000090028007501142516001080010800008001080000417862937588240800150800408004069946370020160010208000020240000800408004011800211091010800008000010800297018000810298000108257100502251655800378000080000800108004180041800418004180041
800248004064311001000090028002511112251600108001080000800108000041786213758824080015080040800406994637002016001020823982024000080040800401180021109101080000800001080008825080007012908002308257000502241655800378000080000800108004180041800418009280041
80024800406421111100009002800250113251600108001080000800108000041786213759892080015080040800406994637002016001020800002024000080040800401180021109101080000800001080007825080008002698000108257000502271654800378002780000800108004180041800418004180041
80024800916431111100009002800251184251600108001080000800108000041786213758824080015080040800406994637002016001020800002024000080040800401180021109101080000800001080008825180008012848000108277000502261645800378000080000800108004180041800418004180041
8002480040643110111002049002800251011325160010800108000080010800004178621375882408001508004080040699463700201600102080000202400008004080040118002110910108000080000108000870080007111468000107257000502241635800378000080000800108009180041800418004180041