Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, post-index, 1 reg, 8B)

Test 1: uops

Code:

  st1 { v0.8b }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)18191e1f233a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
6100529471228530200000462429057002450120001000100010001000500050005159442864929498310200036210003000293022958511610011000100010000001000001000022130809139692831261542184032523817136260285311000160771360715235100010002934729377293962927429371
61004298812340203000004626288360024240200010001000100010005000500051593928485294643102000010003000291462921911610011000100010000201000101000020130329272691731641572166631993814155555283981000166951364015090100010002931029273293382925129326
610042935822603030112104612288790024235200010001000100010005000500051597428409293083102000010003000292632918311610011000100010000201000001000020131379349696231070582155131513807135355284411000163681361015167100010002930329333294252940729291
61004293062280204000004592288590024269200010001000100010005000500031596428471295423102000010003000291922922811610011000100010000201000131000030130689414695631411522176031883813136055285121000162971350915150100010002923129458293012934629394
610042936222701001001047142883700243022000100010001000100050005000101597828413293663102000010003000292062920411610011000100010000201000291000020131279359695031812562171331723810185659285081000163041373514993100010002936829257293072930829358
61004294042280102000004584289050024314200010001000100010005000500021595628471294333102000010003000294772937111610011000100010000001000001000000132289270683931042572175432653811165653284631000161391375515205100010002938729236293872940929339
61004293962280203000004605288060024419200010001000100010005000500021597328359292553102000010003000291002916011610011000100010000301000001000020132089148697431820502185732403816115057286411000163381387215248100010002951029316293012933629308
610042942123303010012004646288140024419200010001000100010005000500021598628448293463102000010003000291072913011610011000100010000201000001000000129419099667630931602190632543810125554285681000163591372915279100010002931429379294112934529372
61004294122360302000114663289020024393200010001000100010005000500021595428525294283102000010003000292062910511610011000100010000201000001000020129029164687831540542178232573819155347284681000164161381315213100010002926529282293162936029230
61004292042360302000104596287680024332200010001000100010005000500031597328470292943102000010003000291552915511610011000100010000201000001000020131309344687230601562168931913818106161283181000161261363314961100010002924629297292512930129263

Test 2: throughput

Count: 8

Code:

  st1 { v0.8b }, [x6], x8
  st1 { v0.8b }, [x6], x8
  st1 { v0.8b }, [x6], x8
  st1 { v0.8b }, [x6], x8
  st1 { v0.8b }, [x6], x8
  st1 { v0.8b }, [x6], x8
  st1 { v0.8b }, [x6], x8
  st1 { v0.8b }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss data (0b)18191e1f23243a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd0d2d5map dispatch bubble (d6)daddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? int retires (ef)f5f6f7f8fd
802058004062000000001240008002588025160100801008000080100800004179703375882408001580040800406992436999716010020080000200240000800408004011802011009910010080000800001008000000008000100480001100005110002170238003780000080000801008004180041800418004180041
80204800406200000000020008002580025160100801008000080100800004179703375882408001580040800406992436999716010020080000200240000800408004011802011009910010080000800001008000002100800014072800011210005110005170448003780000080000801008004180041800418004180041
8020480040621000000004000800258812516010080100800008010080000417970337588240800158004080040699243699971601002008000020024000080040800401180201100991001008000080000100800000210080001003800011210005110004170448003780000080000801008004180041800418004180115
802048004062100000009400080025080251601008010080000801008000041797033758824080015800408004069924369997160100200800002002400008004080040118020110099100100800008000010080000021008000100080000000005110002170238003780000080000801008004180041800418004180041
80204800406200000000122000800258812516010080100800008010080000417970337588240800158004080040699243699971601002008000020024000080040800401180201100991001008000080000100800000210080001006800001210005110002170358003780000080000801008004180041800418004180041
8020480040621000000064000800258802516010080100800008010080000417970337588240800158004080040699243699971601002008000020024000080040800401180201100991001008000080000100800000210080000003800011210005110003170348003780000080000801008004180041800418004180041
80204800406210000000124000800258812516010080100800008010080000417970337588240800158004080040699243699971601002008000020024000080040800401180201100991001008000080000100800000210080001103800011210005110004170458003780000080000801008004180041800418004180041
8020480040620000000012400080025880251601008010080000801008000041797033758824080015800408004069924369997160100200800002002400008004080040118020110099100100800008000010080000021008000000380001100005110004490538003780000080000801008004180041801008004180041
8020480040621000000002000800258802516010080100800008010080000417970337588240800158004080040699243699971601002008000020024000080040800401180201100991001008000080000100800000210080001003800001210005110005250438003780000080000801008004180041800418004180041
802048004062100000000400080025800251601008010080000801008000041797033758824080015800408004069924369997160100200800002002402408004080040218020210099100100800008000010080000021008000000758800011210005110004170238003780000080000801008004180090800418004180041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f233a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfl1i cache miss demand (d3)d5map dispatch bubble (d6)d7d9ddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002580040621000000012400800258802516001080010800008001080000417862137588248001580040800406994637002016001020800002024000080040800401180021109101080000800001080000021080001007800011210005020041600558003780000080000800108004180041800418004180041
800248004062100000000400800258832516001080010800008001080000417864537588248001580040800406994637002016001020800002024000080040800401180021109101080000800001080000021080001000800011220005020031600458003780000080000800108004180041800418004180041
800248004062000000000200800258832516001080010800008001080000417864537588248001580040800406994637002016001020800002024000080040800401180021109101080000800001080000021080001003800011210005020041600658003780000080000800108004180041800418004180041
800248004062010000000400800258802516001080010800008001080000417862137588248001580040800406994637002016001020800002024000080040800401180021109101080000800001080000021080000006800011210005020051600548003780000080000800108004180041800418004180041
800248004062000000000400800258812516001080010800008001080000417864537588248001580040800406994637002016001020800002024000080111801782180021109101080000800001080000021080001003800021210005020051600458003780000080000800108004180041800418004180041
800248004062000000000400800258832516001080010800008001080000417864537588248001580040800406994637002016001020800002024000080040800401180021109101080000800001080000021080001103800011210005020051600448003780000080000800108004180041800418004180041
800248004062000000000400800258802516001080010800008001080000417864537588248001580040800406994637002016001020800002024000080040800401180021109101080000800001080000021080001003800011210005020051600438003780000080000800108004180041800418004180041
8002480040620000000012400800258802516001080010800008001080000417864537588248001580040800406994637002016001020800002024000080040800401180021109101080000800001080000021080001004800011210015020031600548003780000080000800108004180041800418004180041
800248004062100000000200800250812516001080010800008001080000417864537588248001580040800406994637002016001020800002024000080040800401180021109101080000800001080000021080001003800011210005020051600558003780000080000800108004180041800418004180041
800248004062000000000400800258032516001080010800008001080000417864537588248001580040800406994637002016001020800002024024080040800401180021109101080000800001080000021080001003800011210005020041600358003780000080000800108004180041800418004180041