Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, post-index, 1 reg, 8H)

Test 1: uops

Code:

  st1 { v0.8h }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2022243a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5e5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)7bmap ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch cond (94)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0e2? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
61005294142366101021000000046362880600243392000100010001000100050005000010015958284622942331020000100030002921729176116100101000100010021211001124511000121001329494587000314614421764334738181038432859401000164331352015203100010002937929377292452932129327
61004294372351100000000100047172897100248642000100110001000100050005000050015974285002927431020000100030002922929137116100101000100010013031001001100012100132169254695831421432174132343816942412861901000158901336914921100010002935529421293832936529315
61004293052361100112677891000467328819002438420001000100010011000500050000100160292856629399310200001000300029240292082161001010001000100510310020011000121301332795446922316214321603343138132139412859201001162461343814811100010002938129303294892930229247
6100429359235010011065117352900146942880810244922000100110011000100050005005000015982285182937881020000100030002909329314116100101000100010031211001004061000131001312594196925310703621680321838191038382851501001161841376315053100010002927429399293562937429379
610042932523511121000051989000470228919002443920001000100010001000500050000600159632851829317310200001000300029319291491161001010001000100122110010011000101101308592576938317203921675328538151141372853001000162261371915304100010002936329336294682931129329
61004294352360110011005791001460228855002430420001000100010001000500050000100159632864329397310200001000300029125292651161001010001000100220110010111000121001311294266945320004121701340138131335392857801000161151330515149100010002942129346293622933629346
61004293172360101111005551000464328867102442520001000100010001000500050000600159532863629383310200001000300029138291831161001010001000100222210011211000131101325194066948318104221759325938151440432857601000164581342114924100010002937029337293792950229306
6100429329237010111100522100146572884701243342000100010001000100050005000070915959284512935831020000100030002925029204116100101000100010022301001011100013110132059393701131420412175431993819933402865201000161091342715084100010002941829526294202930529481
61004294042360101001005851000470328809002437220001000100010001000500050000100159862854729324310200001000300029210292761161001010001000100222110010011000131201325393797022319604321653319038171841402847901000159621348114894100010002931729254293482947129270
61004292602351101101001201001474328817102421720001000100010001000500050000300159732855829322310200001000300029193292291161001010001000100210110010111000101101321493556998319704021786330338151140352851301000163421376915131100010002945429361294022930429255

Test 2: throughput

Count: 8

Code:

  st1 { v0.8h }, [x6], x8
  st1 { v0.8h }, [x6], x8
  st1 { v0.8h }, [x6], x8
  st1 { v0.8h }, [x6], x8
  st1 { v0.8h }, [x6], x8
  st1 { v0.8h }, [x6], x8
  st1 { v0.8h }, [x6], x8
  st1 { v0.8h }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f23243a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)79map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
802058004062011100007001800250112251601008013280000801008000041797033758824800150800408004069924369997160100020080000200240000800408004011802011009910010008000080000100800088250180008017800007257000511011711800378000080000801008004180041800418004180041
8020480040620100000090018002510118251601008010080000801008000041796793758824800150800408004069924369997160100020080000200240240800408004011802011009910010008000080000100800077250080007018800018277100511011711800378000080000801008004180041800418004180041
802048004062010100009001800251104251601008010080000801008000041796873758824800150800408004069924369997160100020080000200240000800408004011802011009910010008000080000100800089001800290111800018257000511011701800378000080000801008004180041800418004180041
80204800406211100000120018002511114251601008014180000801008000041796793758824800150800408004069924369997160100020080000200240000800408004011802011009910010008000080000100800077000800080111800008257000511011711800378000080000801008004180041800418004180092
80204800406211000001290018002501142516010080100800008010080000417967937588248001508004080040699243699971601000200800002002400008004080040118020110099100100080000800001008000870008003002880000807200511011711800378000080000801008004180041800418004180041
802048004062011010012120018002511104251601008010080000801008000041796793758824800150800408004069924369997160100020080000200240000800408004011802011009910010008000080000100800077250080008008800018257000511011711800378000080000801008004180041800418004180041
80204800406201000000900180025111142516010080100800008010080000417968737588248001508004080040699243699971601000200800002002400008004080040118020110099100100080000800001008007315250080075002338800678257000511011711800378000080000801008004180041800418004180041
802048004064610005209101800251011225160100801008000080100800004179679375882480015080040800406992437002916010002008000020024000080040800401180201100991001000800008000010080008700080008008800018257000511011711800378000080000801008004180041800418004180041
80204800406421001000900180025110444160100801008000080100800004179679375882480015080040800406992436999716010002008000020024024080040800401180201100991001000800008000010080007700180007101080001807000511011711800778000080000801008004180041800918004180041
802048004064310000007001800761111425160100801008000080100800694179679375882480015080040800406992436999716010002008000020024000080040800401180201100991001000800008000010080008800180007007800008257000511011711800378000080000801008004180041800418004180092

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)18191e1f2223243f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002580040621000097840008002588025160010800108000080010800004178645375882408001580040800406994637002016001020800002024000080040800401180021109101080000800001080000021080001000800001210502031633800378002880000800108004180041800418004180041
8002480040620000002100800258832516001080010800008001080000417864537588240800158004080040699463700201600102080000202400008004080040118002110910108000080000108000002108000100380000100502031633800378000080000800108004180041800418004180041
800248004062100009482000800258002516001080010800008001080000417864537588240800158004080040699463700201600102080000202400008004080040118002110910108000080000108000000080001003800011210502041643800378000080000800108004180041800418004180041
80024800406200000021008002588125160010800108000080010800004178645375882408001580040800406994637002016001020800002024000080040800401180021109101080000800001080000021080001003800011210502031633800378000080000800108004180041800418004180041
80024800406200000021008002588025160010800108000080010800004178645375882408001580243802426994637002016001020800002024336080040800402180021109101080000800001080000022080000003800011250505221633800378000080000800108004180041800418004180041
80024800406433600004000800258802516001080010800008001080000417864537588240800158004080040699463700201600102080000202400008004080040118002110910108000080000108000002108000110180001100502031623800378000080000800108004180041800418004180041
800248004062100000400080025880251600108001080000800108000041786453759892080015800408004069946370020160010208000020240000800408004011800211091010800008000010800000210800012010800011290502032533800378000080000800108004180041803458004180344
80024803436270000103240018002588725160010800108000080010800004178645375882408001580040800406994637002016001020800002024000080040800401180021109101080000800001080000017080001003800011170502041633800378000080000800108004180041800418004180041
8002480040599000091800008002508025160010800108000080010800004178645375882408001580040800406994637002016001020800002024000080040800401180021109101080000800001080000017080015006800011170502031633800378000080000800108004180041800418004180041
8002480040620000045640008002588025160010800108000080010800004179537375882418001580040800406994637002016001020800002024000080040800401180021109101080000800001080000017080001003800001170502021633800378000080000800108004180041800418004180041