Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, post-index, 1 reg, 2D)

Test 1: uops

Code:

  st1 { v0.2d }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f243a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f60696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
61005289312330113111810009004678284510023903200010001001100010005000500060159722823828774310200010003000287202878211610011000100010023311001011100012110133439377685931767412122231903821114441282291000159091309114593100010002883828888288072885928886
6100428757232011411191000204670284400023888200010001000100010005000500010159612816228970310200010003000286732885111610011000100010022211001011100012130131349229691031066362126432823815114347283631000156731318014376100010002891428894288432889428934
61004288952330019001400008804691287800024401200010001000100010005000500040159702830528924310200010003000290532902211610011000100010000201000000100002000131539447691131446402148831913818183540283331000157311320514549100010002895728843287412891129012
6100429283233101500110000004688285430023826200010001000100010005000500040159722826928993310200010003000288152888411610011000100010000201000000100002000130209664695531715402146632633814223541284781000157921336814710100010002908428914288652889728856
6100428727231001300130000004851284750023862200010001000100010005000500040159502817728929310200010003000286292866011610011000100010000201000000100002000133069340691431425362132332013814113443281521000156291302614337100010002885528809287982879728626
6100428743231001600120000104702284870023709200010001000100010005000500000159712800528761310200010003000287922873111610011000100010000201000000100002000132249558694931847372110031723814114332282481000156221306514467100010002897628868289052884829067
6100428960232001010190000004774285840023912200010001000100010005000500071159612824528969310200010003000286592881511610011000100010000201000000100002000132079563689331495432131032593817193535283841000157741325014563100010002893028841289332893428954
61004289612320116019100132204717285640023888200010001000100010005000500050159632822729008910200010003000288092874611610011000100010032221001011100112130132949433695031133352126633093812123341282961000156601299314399100010002886128847289742895128866
6100429007232002100130000004814285320023857200010001000100010005000500050159632775528954910200010003000288372884511610011000100010000201000000100002000133009439688331185372121732583818114037281701000158261311114796100010002883828849289802892828975
6100428861232011611810009004786285930023933200010001000100010005000500040159582816828940310200010003000288792873021610011000100010012211001011100012120131589328692631008332125632513816114133283441000155901342514616100010002882628937288622893628891

Test 2: throughput

Count: 8

Code:

  st1 { v0.2d }, [x6], x8
  st1 { v0.2d }, [x6], x8
  st1 { v0.2d }, [x6], x8
  st1 { v0.2d }, [x6], x8
  st1 { v0.2d }, [x6], x8
  st1 { v0.2d }, [x6], x8
  st1 { v0.2d }, [x6], x8
  st1 { v0.2d }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f233a3f46494f5051schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6066696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
802058004062010111000902800259842251601008010080000801008000041796873758824108001580040800406992436999716010020080000200240000800408004011802011009910010008000080000100800087008000801118000080700511251753800378002880000801008004180041800418004180041
8020480040643101000009028007699702516010080100800008010080000417967937588240080015800408004069924369997160100200800002002400008004080040118020110099100100080000800001008000872908000800880001830700511351755800378000080000801008004180041800418004180041
8020480040643100000001202800250946204516022280201800508020280207417692837618340080015800408004069924369997160100200800002002400008004080040118020110099100100080000800001008000872918000801880001829710511251735800378000080000801008004180041800418004180041
8020480040643100000012902800258980251601008010080000801008000041796793758824008001580040800406992436999716010020080000200240000800408004011802011009910010008000080000100800071029080008011850680001829710511251755800378000080000801008004180041800418004180041
802048004064310010000702800769940251601008010080000801008000041796793758824008001580040800406992436999716010020080000200240000800408004011802011009910010008000080000100800077018000810148000170700512831743800378000080000801008004180041800418004180041
802048008964210110000120280025903025160100801008000080100800004179679375882400800158004080040699243699971601002008000020024000080040800401180201100991001000800008000010080008801800080075180001829710511251735800378000080000801008004180041800418004180041
8020480040643101110009128002509202516010080100800008010080000417967937588240080015800408009169924369997160100200800002002400008004080040118020110099100100080000800001008000772908000711880001829700511251745800378000080000801008004180041800418004180041
80204800406421000110090280025997025160100801008000080100800004179679375882400800158004080040699243699971601002008000020024000080040800401180201100991001000800008000010080007929280008011180001729700511251755800378000080000801008004180041800788004180041
8020480040642101100009028002598302516010080100800008010080000417967937588240080015800408004069924369997160100200800002002400008004080040118020110099100100080000800001008002972908000801880001829710511231753800378000080000801008004180041800418024480649
80204800406431010100070280025994025160100801008000080100800004179679375882400800158004080040699243699971601002008000020024000080040800401180201100991001000800008000010080007729080007001180001729700511251755800378000080000801008004180041800418004180041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f233a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)d9ddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800258004064300000000212008002501142516001080010800008001080000417862137588241800158004080040699463700201600102080000202400008004080147118002110910108000080000108000772608000811117804638297150206164478007680000080000800108004180041800778004180041
80024800406431020000000008002588025160010800108000080010800004178645375882408001580040800406994637002016001020800002024000080040800401180021109101080000800001080000019080000008800001170050204160448003780000080000800108004180041800418004180041
80024800406200000000002008002508125160010800108000080010800004178645375882418001580040800406994637002016001020800002024000080040800401180021109101080000800001080000017080000109800011210050206160468003780000080000800108004180041800418004180093
80024800406200000000004008002588125160010800378000080010800004178645375994008001580040800406996537002016001020800002024000080040800401180021109101080000800001080000021080001003800011210050204160438003780000080000800108004180041800418004180041
80024800406200000000000108002588025160010800108000080010800004178645375882408001580040800406994637002016001020800002024000080040800401180021109101080000800001080000021080001006800011210050205160478003780000080000800108004180041800418004180041
8002480040621000000001322008002508025160010800108000080010800004178645375882408001580040800406994637002016001020800002024000080040800405180021109101080000800001080000017080000103800010170050205160568003780000080000800108004180041800418004180041
8002480040641000000000400800258832516001080010800008001080000417864537588240800158004080040699463700201600102080000202400008004080040118002110910108000080000108000001808000100736800011170050205160768003780000080000800108004180041800418004180041
8002480040621000000006400800258832516001080037800008001080000417864537588240800158009080040699463700201600102080000202400008004080040218002110910108000080000108000000080001003800001170050205160358003780000080000800108004180041800418004180041
80024800406200000000000008002588025160010800108000080010800004178645375882418001580040800406994637002016015320800002024000080040800401180021109101080000800001080000017080023004800000170050205160448003780000080000800108004180041800418004180041
800248004062100000000041080025884425160010800108000080010800004178645375882408001580040800406994637002016001020800002024024080040800401180021109101080000800001080000017080001003800231170050204160668003780000080000800108009280041800418004180041