Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST2 (multiple, post-index, 2S)

Test 1: uops

Code:

  st2 { v0.2s, v1.2s }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 3.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)l2 tlb miss instruction (0a)18191e1f22243a3f4651schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafl1d cache miss st nonspec (c0)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
6200629415236513131215939680043802916401844830091002100510051007100110045000109068000101021714288212885231030001000100030002000286782881111610011000100010000201000001000201335196776993319706220077318838202358622833810001542012603137651000100010002858928687286862859028773
6200428675223021000010471328710017658300010001000100010001002100050001091480002002173128544288843103000100010003000200028657287191161001100010001000020100000100020131629361693831050592032732143815962682844110001562413022141421000100010002889128971289332897328953
62004289152310020001004639288880178443000100010001000100010001000500010906800050021687286532897631030001001100030002000289642901211610011000100010000001000001000221316890696888314806320295323138211861612853610001577613052138651000100010002892728980290812895628852
62004292082320110000004748290550180553000100010001001100010001000500010907800060021746285162891731030001000100030002000288462879011610011000100010000201000001000001328293026901316016120142321938231959632821910001561312893137121000100010002870028852288562886028776
62004287792230130000004664286880177673000100010001000100110001000500010892800020021705285722886231030001000100030002000288442878711610011000100010009201000001000201335894786962311816120184312738161858632828810001584312818137551000100010002880928836288322884228951
62004286942230210001004730286400178173000100010001000100010001000500010906800020021780285032872331030031000100030032000287282874311610011000100010000221000001001201313992726901314506220072318138222359602833310001559812717140641000100010002884128845287362882528749
62004288292230110000004650287031177603000100010001000100010001000500010902800030021711285672886031030001000100030002000287702885211610011000100010002201000001000001325696846961315915920219316038281863622834010001577112462138901000100010002866128730287992876028773
62004288372220320001004613286750177773000100010001000100010001000500010904800040021729286072875931030001000100030002000287202872811610011000100010000201000001000201314292296979317906020153331138182060572820410001571012777139791000100010002869428819288462885528726
62004288172230210000004629286470177893000100010001000100010001000500010907800020821713285232881031030001000100030002000287272882111610011000100010000201000001000201329492676916324716320171313038202155712834110001592612819138131000100010002870528720288722877528808
62004287952230230001004786288490178413000100010001000100010001000500010899800010821755283142871731030001000100030002000286992856811610011000100010000201000001000201326895006971312515720246325338191158582811310001573412730135771000100010002870128651286212868228836

Test 2: throughput

Count: 8

Code:

  st2 { v0.2s, v1.2s }, [x6], x8
  st2 { v0.2s, v1.2s }, [x6], x8
  st2 { v0.2s, v1.2s }, [x6], x8
  st2 { v0.2s, v1.2s }, [x6], x8
  st2 { v0.2s, v1.2s }, [x6], x8
  st2 { v0.2s, v1.2s }, [x6], x8
  st2 { v0.2s, v1.2s }, [x6], x8
  st2 { v0.2s, v1.2s }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2324373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1602068004064210100012900244218002599225241854801008253280180831208243380000435899037588486420640800150800408004059924359998240100200800008000020024000016000080040800401180201100991001008000080000100800000210080001003800018297000051101161180037800918000080000801008004180041800418004180041
160204800406210000000000103508002588325245273801008175680000801008000080000435899437588486404831800150800408004059924359998240100202800008000020024000016000080040800401180201100991001008000080000100800077001800070010800011210000051101161280037800008000080000801008004180164800418004180041
16020480040620101000070113518002509225242150801008514980000801008000080108435899037588486443200800150800408004059988359998240100200800008000020024000016000080040800401180201100991001008000080000100800670210080001001800018297000051101161180037800008000080000801008004180041800418004180041
160204800406210000000200848080025881252444038010080100800008010080000800004358994375884865564808001508004080040599243599982401002008000080000200240000160000800408004011802011009910010080000800001008000772901800080011800011210000051101161180037800008000080000801008004180041800418004180041
1602048004062010110012900205718002599525240261801008018680000802168000080000435900237588486416350800150801618004059924359998240100200800008000020024000016000080040800401180201100991001008000080000100800000210080001003800018297000051101161180037800008000080000801008004180041800418004180164
16020480040620000000020084908002588025244402801008116080000801008000080000435899037588486466320800150800408004059924359998240100200800008000020024000016000080040800401180201100991001008000080000100800088300381507008800012210000051101161180037800008000080000801008004180041800418004180041
1602048004062011120001200126418002599625240219801008232280000801008000080000435899437588486555560800150800408004059924359998240100200800008000020024000016000080040800402180201100991001008000080000100800000210080001000800011210000051101161180037800008000080000801008004180041800418004180041
160204800406201001003990020411800250922524522680100842898000080100800008000043589943758848642624080015080040800405992413599982401002008000080000200240000160000800408004011802011009910010080000800001008000002127080001100800018297100051101161180037800008000080000801008004180041800418004180041
160204800406200000000401103308002588325241135801008089680000801008000080000435899037588486448951800150800408004059924359998240100200800008000020024000016000080040800401180201100991001008000080000100800088290180008018800011210000051101161180037800008000080000801008004180041800418004180041
1602048004062111010009001831800258062524465480100820498000080100800008000043589903758848642624180015080040800405992435999824010020080000800002002400001600008026980040118020110099100100800008000010080000021008000120680002707100051101161180037800008000080000801008004180041800418004180041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0f18191e1f222324373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)dbddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16002680040620001010000900037560800258012524173180010800098000080010800008000043584293758848651271080015800408004059946360020240010208000080000202400001600008004080040118002110910108000080000108000002501800080038000112570502031603280037800008000080000800108004180041800418004180041
1600248004062110000000129100114808002500225240928800108474480000800108000080000435840937588486444140800158004080040599463600202400102080000800002024000016000080040800401180021109101080000800001080008725008000100280001825005020326604480037800008000080000800108004180041800418004180041
1600248004062010000000012000375518002511111252409328001084694800008001080000800004358409375884865689508001580040800405994636002024001020800008000020240000160000801658014811800211091010800008000010800080170080001102800018000502031602380037800008000080000800108004180041800418004180041
160024800406200000000007100162208002511110252447038001080922800008001080000800004358429375884865408008001580040800405994636010324001020800008000020240000160000800408004011800211091010800008000010800000170080001002800000000502031603380037800008000080000800108004180041800418004180041
160024800406200000000012620005636080025880252405898001080020800008001080000800004358429375884864172908011780040800405994636002024001020800008000020240000160000835858284911800211091010800008000010800000000800012008000111700502031603380037800008000080000800108004180041800418004180041
160024800406200000000001200011510800258822524115880010846948000080010800008000043584293758848654092080015800408004059946360020240010208000080000202400001600008004080040118002110910108000080000108000092500800011028000012570502051603280037800008000080000800108004180041800418004180041
16002480040621100000000900047390800258012524470580010800148000080010800008000043584053758848645164080015800408004059946360020240010208000080120202400001600008004080040118002110910108000080000108000070008000110138000201700502021603380037800008000080000800108004180041800418004180041
1600248004062010000000121200011481800250812524115880010816208000080010800008000043584293758848654085080015800408004059946360020240010208000080000202400001600008004080040118002110910108000080000108000001700800000088000101700502031603280037800008000080000800108004180041800418004180041
160024800406210000000002100663518002511002524469980010844168000080010800008000043584293758848654235080015800408004059946360020240010208000080000202400001600008004080040118002110910108000080000108000001700800080158000112300502031603380037800008000080000800108004180041800418004180041
16002480040620000000000010037900800258812524470380010805808000080010800008000043584293758848642772080015800408004059946360020240010208000080000202400001600008004080040118002110910108000080000108000001701800083018000012571502041604480037800008000080000800108004180041800418004180041