Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST3 (single, post-index, S)

Test 1: uops

Code:

  st3 { v0.s, v1.s, v2.s }[1], [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 3.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2223243a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
620062887822310120011000120000478328528001788430001000100010001000100010005000109608000921685286172879731030001000100030003000287182869311610011000100010022011001311100013111331697096916317535420185320438142552532824210001570712593137701000100010002896328731286812873428758
6200428789223011411810103000473328701001782630001000100010001000100010005000108988008721650285102885331030001000100030003000287232877311610011000100010023221001021100010111319094476954311945320203321438181955502821810001593012559139091000100010002887628697287432888028859
62004288182240015111110001000463828729001786330001000100010001000100010005000108978000721743285232879331030001000100030003000287762867611610011000100010021311000115100014111326695036961309464820205323338182451502842710001574412862141141000100010002876128819287602875028840
62004288032230011111400000000469828642001783230001000100010001000100010005000109058000821747285442879331030001000100030003000287592874311610011000100010023211001011100124101315094456921321134420082322838252553512817610001561012826138631000100010002865628812288082879228801
6200428910223018111000001100479628644111779830001000100010001000100010005000108978000921729286312874931030001000100030003000287122884111610011000100010012011001011100024111330495657047321534919927323938162248562831210001524212401134631000100010002873728713286042863928496
620042858522101121110100010004634285091017589300010001000100010001000100050001090580001121706283542875831030001000100030003000284802861011610011000100010008001002113100000111341294277038318165120164319038121254482808410001537712710134041000100010002866128677287472894028783
6200428591223009111100001000468628545011763530001000100010001000100010005000109068000821694284522862531030001000100030003000285712853011610011000100010020421000000100013121317092376902311436020024330238341750482815310001544512763131391000100010002878328807287412874428664
6200428613221011410910001000479528432001771730001000100010001000100010005000109088000721694284572874331030001000100030003000285862860211610011000100010030401001000100012101356496247076318214919884324838052650582823310001502612441135071000100010002864228581287542854128608
62004285942220113101010001000487328511001759330001000100010001000100010005000108988000821697284882866631030001000100030003000285992854811610011000100010011211001000100024111312393236972322015020123318738232353542819310001564212620140771000100010002882928688287592869928854
6200428675223011000111001810004661285530017790300010001000100010001000100050001090980006217232890429414310300010001000300330002882328780116100110001000100003110010111000001214005102597201354155819453328938072257562807710001444212070124831000100010002822628285283012808828126

Test 2: throughput

Count: 8

Code:

  st3 { v0.s, v1.s, v2.s }[1], [x6], x8
  st3 { v0.s, v1.s, v2.s }[1], [x6], x8
  st3 { v0.s, v1.s, v2.s }[1], [x6], x8
  st3 { v0.s, v1.s, v2.s }[1], [x6], x8
  st3 { v0.s, v1.s, v2.s }[1], [x6], x8
  st3 { v0.s, v1.s, v2.s }[1], [x6], x8
  st3 { v0.s, v1.s, v2.s }[1], [x6], x8
  st3 { v0.s, v1.s, v2.s }[1], [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2224373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16020680040621001000122001152080025880252402408010080280800008010080000800004359014375884864892908001580040800405992435999824010020080000800002002400002400008004080040218020110099100100800008000010080000017008000100180002100055981161180037800008000080000801008004180041800418004180041
16020480040620000010000011730800258812524026080100824288000080100800008000043590143758848761401080015800408004059924359998240100200800008000020024000024000080040800401180201100991001008000080000100800000180080001002800010170051101161180037800008000080000801008004180041800418004180041
16020480040621000000132000419408002588125244265801008346380000801008000080000435901437588486404250800158004080040599243599982401002008000080000200240000240000800408004011802011009910010080000800001008000000008000100580001000051101161180037800008000080000801008004180041800418004180041
16020480040621000000120001274080025883252402408010082096800008010080000800004359014375884864035108001580040800405992413599982401002008000080000200240000240000800408004011802011009910010080000800001008000001728080001101800001170051101161180037800008000080000801008004180041800418004180041
1602048004062100000002001670800258832524186480100851378000080100800008000043590143758848640407080015800408004059924359998240100200800008000020024000024000080040800401180201100991001008000080000100800000170080001005800001170051101160180037800008000080000801008004180041800418004180041
160204800406200000000200140080025880252402408019184147800008010080000800004359014375884865002808001580040800405992435999824010020080000800002002400002400008016380040118020110099100100800008000010080000000080001001800010170051101161180037800008000080000801008004180041800418004180041
160204800406210000101200029730800258802524026580100829058000080100800008000043590143758848647296080015800408004059924359998240100200800008000020024000024000080040800401180201100991001008000080000100800000170080001005800000170051101161180037800008000080000801008004180041800418004180041
160204800406200000000910011408002508025242286801008015480000801008000080000435901437588486473110800158004080040599243599982401002008000080000200240000240000800408004011802011009910010080000800001008000001729080001002800011170051101161180037800008000080000801008004180162800418004180041
160204800406200000000201166080025881252402608010080140800008010080000800004359014375884865246018001580159800405992435999824010020080000800002002400002400008004080040218020110099100100800008000010080000017008000100280001100051101161180037800008000080000801008004180041800418004180041
1602048004062010000002004144080025883252434388010080166800008010080000800004358998376308465281308001580040800405992435999824010020080000800002002400002400008004080040118020110099100100800008000010080000000080001000800011170051101161180037800008000080000801008004180041800418004180041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2224373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16002680040620000000001046960800258802524470780010814628000080010800008000043584293758848654095080015080040800405994636002024001020800008000020240000240000800408004011800211091010800008000010800000210080008103800011210050203163480037800008000080000800108004180041800418004180041
1600248004062100000002004696080025880252447048001080921800008001080000800004358429375884864002708001508004080040599463600202400102080000800002024000024000080040800401180021109101080000800001080008022018000700480001100050204164580037800008000080000800108004180041800418004180041
1600248004062010000001200115318002590025242079800108445180000800108000080000435840537588486434590800150800408004060132360020240010208000080000202400002400008004080040118002110910108000080000108000082100800082111800011210050204164380037800008000080000800108004180041800418004180041
160024800416200000000900120800259922524470580010811488000080010800008000043584293758848654092080015080040800405994636002024001020800008000020240000240000800408004011800211091010800008000010800078290080008017800011290050205164580037800008000080000800108004180041800418004180041
1600248004062000000012200115308002588125241166800108379080000800108000080000435842937588486417480800150800408004059946360020240010208000080000202400002400008004080040118002110910108000080000108000072100800080111800011210050204165580037800008000080000800108004180041800418004180041
16002480040620010000040046940800258802524093480010809248000080010800008000043584293758848657081080015080040800405994636002024001020800008000020240000240000800408004011800211091010800008000010800000290080001008800011210050203164480037800008000080000800108004180041800418004180041
1600248004062000000004004686080270991462524139580191801518012080126802328010843631263783976667208080221080162804076001036601032410302080250801202024036024036080040800402180021109101080000800001080127102957080127303256801817297350563255580142802768000080000800108016580285801648016481875
16002482242636000122132990044918014888147532405788019381377801208024280116801084360854376722464445508022208033880165601351460102240690208024080000202400002400008004080040118002110910108000080000108000882900800080111800011217050203167780037800008000080000800108004180041800418004180041
160024800406211000000400569108002588125241390800108138080000800108000080000435842937588486400510800150800408004059946360020240010208000080000202400002400008004080040118002110910108000080000108000702900800070011800011210050206165780037800008000080000800108004180041800418004180041
160024800406200000002420069408002500025244704800108058080000800108000080000435842537588486540810800150800408004059946360020240010208000080000202400002400008004080040118002110910108000080000108000882101800010011800018210050204165580037800008000080000800108004180041800418004180041