Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST3 (single, post-index, H)

Test 1: uops

Code:

  st3 { v0.h, v1.h, v2.h }[1], [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 3.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2223243a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafl1d cache miss st nonspec (c0)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
6200628945233101010000110047222878000179303000100010001000100010011000500010904800062170628541289193103000100010013000300028910288062161001100010001000000100010010002001318795196964315407720292321538223173752827010001579112900140941000100010002890728819290252896129035
62004288982331001000132100047212878700179653000100010001000100010001000500010898800072172028614288983103000100010003000300028924289221161001100010001000000100000010000001289191196923318518020301324138241974712826910001593712899141191000100010002903928865290792899629036
620042904523311010000000047012876400178773000100010011000100010001000500010895800082171228718289043103003100010003000300328926288871161001100010001000020100000010000001333090656871318707320251316438201875742834710001607412932139351000100010002889028909288772898128971
620042893023211000000100046732889100179903000100110001000100010001000500010908800062167928618290183103000100010003000300028890288961161001100010001000020100000010000001319592196909311716020260326338222170692842210001582912797140851000100010002889328952289632905428920
6200428953232110100000000459128811001797330031000100010001000100010005000109028008112173828659290843283000100010003000300028872290431161001100010001000020100000310002001303992246944315317320360328038252870752849910001573512960142341000100010002902328988289802906328949
6200429058233110100000000464629129101825130001000100010001000100010005000109008000521736284862901231030001000100030003006288802892721610011000100010002031001004351000301971323893026856319416820356320238231577682840910021539712967138071000100010002914229245290862927129143
620042926323611011311322640014663287700017926300010001000100010001000100150001091180088217172888729081122730031001100130003006290802920131610011000100010004221003011010000201312491686875317707020450327238282677732880510011608213170145591000100010002914229230291062920429093
62004291412371101000000104616289390018520300010001000100010001000100050001090680001121760283952882131030001000100030003000287142880311610011000100010000301000004251000276231330694086919314437719886317338332070732832010001596713286137071000100010002917528743288902895228942
620042901623211010000001048122871600180193000100010001000100010001000500010897800072174828608288093103000100010003000300028617287131161002100010001007022100800718510072001285589746748307517420513312938266071772868610041577613039138051000100010002966229600296092936329755
6200429434238011100001000494131025101901330001000100010001000100010005000109058000122166028593288813103000100010003000300028831289501161001100010001000030100010010000201313192216918311416820406324938212870722845710001591913025140511000100010002901929023290092895428950

Test 2: throughput

Count: 8

Code:

  st3 { v0.h, v1.h, v2.h }[1], [x6], x8
  st3 { v0.h, v1.h, v2.h }[1], [x6], x8
  st3 { v0.h, v1.h, v2.h }[1], [x6], x8
  st3 { v0.h, v1.h, v2.h }[1], [x6], x8
  st3 { v0.h, v1.h, v2.h }[1], [x6], x8
  st3 { v0.h, v1.h, v2.h }[1], [x6], x8
  st3 { v0.h, v1.h, v2.h }[1], [x6], x8
  st3 { v0.h, v1.h, v2.h }[1], [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0f18191e1f373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16020680040620111000129734241800251011125242580801008516380000801008000080000435899037588486469178001580040800405992435999824010020080000800002002400002400008004080040118020110099100100800008000010080007800080008002171800618070512713411813998009308000080000801008028680162804078016380284
16020480165621121016162649718741802711111147832421228019380348800608021680348801084363645376294065375780222801648028259986236016624078020080240801202002407202403608016580409218020110099100100800008000010080130725600801880021938012072570512813411801418018308000080000801008016580286802848016580166
160204802856211000000725561800250112252402198010081636800008010080000800004359002375884864489880015800408004059924359998240100200800008000020024000024000080040800401180201100991001008000080000100800077000800080088000182570511011711800378000008000080000801008004180041800418004180041
160204800406211000000943181800251111225242151801008204980000801008000080000435899437588486405078001580040800405992435999824010020080000800002002400002400008004080040118020110099100100800008000010080007725018000800118000182570511011611800378000008000080000801008004180041800418004180041
16020480040620111000010421418002511922524187580100822148000080100800008000043588503758848644902800158004080040599243599982401002008000080000200240000240000800408004011802011009910010080000800001008000780008000800148000082571511011611800378000008000080000801008004180041800418004180041
16020480040620110000127510918002511102252435238010082049800008010080000800004358990375884864042980015800408004059924359998240100200800008000020024000024000080040800401180201100991001008000080000100800087000800080088000182570511011611800378000008000080000801008004180041800418004180041
1602048004062010000007342118002501122524215180100842458000080100800008000043589903758848655505800158004080040599243599982401002008000080000200240000240000800408004011802011009910010080000800001008000782500800080078000172570511011611800378000008000080000801008004180041800418004180041
16020580040620110000092559180025118125244395801008247280000801008000080000435899437588486442928001580040800405992435999824010020080000800002002400002400008004080040118020110099100100800008000010080009825008000800118000182570511011611800378000008000080000801008004180041800418004180041
16020480040620100000012425418002511022524526380100820378000080100800008000043589943758848646157800158004080040599243599982401002008000080000200240000240000800408004011802011009910010080000800001008000772600800080088000182570511011611800378000008000080000801008004180041800418004180041
160204800406201000001212512018002510022524026380100825798000080100800008000043589943758848646524800158004080040599243599982401002008000080000200240000240000800408004011802011009910010080000800001008000782501800070188000082570511011611800378000008000080000801008004180041800418004180041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e18191e1f222324373f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
160026800406200000000400011488002588325245699800108375580000801268000080000435842937588486540950180015080040801625994636002024001020800008000020240000240000800408004011800211091010800008000010800000210080001106800011210050203162280037800938000080000800108004180041800418004180041
160024801646210000010400056338002588025244704800108114880000800108000080000435842937630846570820180015080040800405994636002024001020800008000020240000240000800408004011800211091010800008000010800000210080001103800011210050202163280037800008000080000800108016680041800418004180041
16002480040620000000041009248002588125244709800108080980060800108000080000435842937588486540880180015080040800405994636002024001020800008000020240000240000800408004021800211091010800008000010800000210080001006800011210050202163380037800008000080000800108004180041800418004180041
16002480040620000000020001080025883252447048001080014800008001080000800004358429375884865408601800150800408004059946239621882498702080000800002024000024000080040800401180021109101080000800001080000000080001003800011210050202163280037800008000080000800108004180041800418004180041
160024800406200000006400037578002500125244704801018138080000800108000080000435842937588486512710080015080040800405994636002024001020800008000020240000240000800408016111800211091010800008000010800000210080000003800011210050202163280037800008000080000800108004180041800418004180041
16002480040621000000020006978002588325240018800108375780000800108000080000435842937588486540840180015080040800405994636002024001020800008000020240000240000800408014311800211091010800008000010800000210080001001800000210050202162280037800008000080000800108004180041800418004180041
160024800406200000000400011488002508025240587800108001080000800108000080000435842937588486570810180015080040800405994636002024001020800008000020240000240000800408004011800211091010800008000010800000210080000001083800011210050202162280037800008000080000800108004180041800418004180041
16002480040621001000040004694800258812524235080010815788000080010800008000043584293758848654100008001508004080040599463600202400102080000800002024000024000080040801601180021109101080000800001080000000080061003800011210050202162280037800008000080000800108004180041800418004180041
160024801616200000100200046948002588025240588800108000880000800108000080000435842537630846488170180015080040800405994636002024001020800008000020240000240000800408004011800211091010800008000010800000210080001103800011210050202162280037800008000080000800108004180041800418016280041
160024800406220000000200019458002588025245642801018057580000800108000080000435842937588486420840180015080040800405994636002024001020800008000020240000240000800408004011800211091010800008000010801200027280061121066801201210050563253280142800908000080000800108224581876821108200680409