Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST3 (single, post-index, D)

Test 1: uops

Code:

  st3 { v0.d, v1.d, v2.d }[1], [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 5.000

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 2.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22233a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
640072880223521224002132267004661288180217775500510042002200210012000200250002179116000120021903288012915133050002000200450006000287312880911610011000100020132422011042855200800001318292806929313113520062318838251546432877210011611412789142352000200010002924829070290922918529280
64004291452340030112113217700473928926001844550001000200420041001200420045005216711603240021794286112888531050002000200050006000292742910611610011000100020000402000006200004011201318393656955315614119970323138192038402832110001556312632139942000200010002916429584289762930928800
6400428818231002030000000490628427001771650001000200020001000200020005000216301600040021802286102867731050002000200050006000288772884111610011000100020000402000000200006025681323595187046320724219807320738132145362838010001556612600137772000200010002870828881288042878628853
6400428785232003010000100477628731221770250001000200020061011201220125030220261612810822119288422963782316505520142020503560662924529069161610011000100020232002010005825201424201283489496758296414320297320038171746382834510001544912657140512000200010002911029164289372890929068
6400428643223000030000100471928610021779850001000200020001000200020005005216231600080921822286882878931050002000200050006000289352872711610011000100020000602000000200006001308792236947316703719983319338162137382842510001560212791141632000200010002890028832288752899328918
6400428983232003030010000475128731001775150051000200020001000200020005000216161600000821850286552916231050002000200050006000288362880411610011000100020000602000000200004201336895146951316604019759315638261435372833310001562312663139932000200010002957029709294532895529100
64004294392360020201411132061700474129218021809850171007200620151003200020125040219441614431821866287782908731050002010201650256036291482893811610011000100020202602002000200000001314794127003325003519915309238273437382814510001523212544135132000200010002868928628285752874328493
64004285112220021300131728123300478328528001775550001000200020001000200020025000216111600030021825283662857810105000200020005000600028680286161161001100010002000042200000020000620134919530704832041401955632333819939372817510011513212379134352000200010002853928610286872854728658
6400428593222002030000100477328520201755850001001200020001000200020005000216261600030021868283062859331050002000200050006000286762866511610011000100020000602000000200004001316295216948325904419727316638221946362781710001530912531137082000200010002864328512287252853328658
64004286242220030300075000484128404021765150001000200020001000200020005000216321600000821819285282876131050002000200050006006287072867811610011000100020000402000000200000201302495576993313003519762315338231540442823310001566612631137082000200010002867728668287102880128657

Test 2: throughput

Count: 8

Code:

  st3 { v0.d, v1.d, v2.d }[1], [x6], x8
  st3 { v0.d, v1.d, v2.d }[1], [x6], x8
  st3 { v0.d, v1.d, v2.d }[1], [x6], x8
  st3 { v0.d, v1.d, v2.d }[1], [x6], x8
  st3 { v0.d, v1.d, v2.d }[1], [x6], x8
  st3 { v0.d, v1.d, v2.d }[1], [x6], x8
  st3 { v0.d, v1.d, v2.d }[1], [x6], x8
  st3 { v0.d, v1.d, v2.d }[1], [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2224373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320207800716200000000151068442800430160254049568010016407916000080100160000160000480499231999613030331800258004980049034440010020016000016000020040000048000080061800481180201100991001008000080000100160014123800160014019371600021438120051090117118004780000160000160000801008004980052800508006280062
320204800506201101010106003954280036161602540676780100162101160000801001600001600004804992399916130214408002580050800500331400100200160000160000200400000480000800508005011802011009910010080000800001001600121203016001400151600001438120051090117118005880000160000160000801008005980052800518006280051
320204800496211011000180035782800351616025404467801651644211600008010016000016000048049923999161301136080025802188005003304001002001600001600002004000004800008005081858118020110099100100800008000010016001213380116001401141600021438121051090117118004680000160000160000801008005180062800488005280051
32020480061620111100015004898280044161615042540554980100162134160000801001600001600004804992399916128956808002680050800500333400100200160000160000200400000480000800508005011802011009910010080000800001001600121300016001200151600001238120051090117118004780000160000160000801008006280051800528005180051
320204800486211100006180054962800441616025406765801001640011600008010016000016000048049923998021280052080036800508004903434001002001600001600002004000004800008004980062118020110099100100800008000010016001312384016001410151600001238120051090117118004880000160000160000801008005080050800608005180051
320204800506201100000180082800341600254001128010016182016000080100160000160000480499343921912965470800268005080058033240010020016000016000020040000048000080049800491180201100991001008000080000100160012133800160012112141600002000051090117118004280000160000160000801008004680050800458004680045
32020480049621000000030064780800300160254069658010016676316000080100160000160000480499207938813022410800258004380044033140010020016013616000020040000048000080044800441180201100991001008000080000100160000000016000200216000223200051090117118004780000160000160000801008004680045800468005080045
3202048004562000000003004139080029016025405598801001646461600008010016000016000048049921582551302568080025800458004503314001002001600001600002004000004800008004580044118020110099100100800008000010016000003800160002001116000223200051090117118004280000160000160000801008004580045800458005080046
32020480046620000000123005907080029161602540587880100165120160000801001600001600004804992319291128984908002480050800490326400100200160000160000200400000480000800498004911802011009910010080000800001001600000320016000200216000223200051090117118004280000160000160000801008004680045800508005180051
3202048004562100000003006134080031161602540494680100164156160000801001600001600004804992319801129934508002380050800500332400100200160000160000200400000480000800448004511802011009910010080000800001001600000320016000200216000223200051090117118004780000160000160000801008004680050800458004580046

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2223373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
3200278004562100000012300294208002901602540472280010167138160000800101600001600004800492158075129176900800238004580044033340001020160000160000204000004800008004580045118002110901010800008000010160000040001600020021600022400005019005175480041800000160000160000800108004680046800468004780046
3200248004562100001012300529808003016160254042388001016550816006080187160118160000480049295930713004190080025800448004503274000102016000016000020400000480360800458004511800211090101080000800001016000000001600020018601600022400005019006176580042800000160000160000800108004680046800468004780045
3200248004462000001003002914080842161602540610880010164333160000800101600001600004800492079404130437600800238004580043032740001020160000160000204000004800008004580045118002110901010800008000010160000040001600020021600022400005019004173480042800000160000160000800108004680046800458004680046
3200248004562000000003002044080030161602540399680010162864160060800101600001600004800492236246129621800800248005480044033640001020160000160000204000004800008004680045118002110901010800008000010160000040101600026021600022400005019005177580042800000160000160000800108004580046800468004780045
3200248004462100001003005373080029161602540399680010165381160000800101600001600004800492959291129840700800248004580045032740001020160000160000204000004800008004580044118002110901010800008000010160000040001600020021600022400005019006175480042800000160000160000800108004680046800468004580055
3200248004562000000003006470080030161602540439080069166186160000800101600001600004800492079525129840705800248004480044032740001020160000160000204000004800008004680213118002110901010800008000010160000040101600020021600022400005019004175680041800000160000160000800108004680046800488004580046
32002480045621000000001039090803631616025404349800101644771600008001016000016000048004922339751295723008002380044800450326400010201600001600002040000048000080045800441180021109010108000080000101600000406201600020021600022400005019004255580042800000160000160000800108004580046800458004580046
320024800466200000001327004606080033161602540456680010164068160000800101600001600004800492079279129622600800258004480046033640001020160000160120204000004800008004580045118002110901010800008000010160000040001600021021600022400005019036175380042800000160000160000800108021480044800468004680046
32002480045620000000030038390802051616025405210800101629161600008001016000016226848004925597361303995008002780053800520332400010201600001600002040000048000080052800521180021109010108000080000101600121246001600140114160002144612105019005174580049800000160000160000800108004680046800468004680046
320024800456200000000300412108003016160254024778001016630116000080010160000160000480049223856412987380080023800448004403274000102016000016000020400000480000800458005411800211090101080000800001016000000001600020021600022400005019004177580042800000160000160000800108004680046800468004680046