Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (single, post-index, B)

Test 1: uops

Code:

  st1 { v0.b }[1], [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 3.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22243a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
62006289632341240027100392104724288781017855300010001000100010001000100050001090680000217892861728909310300010001000300010002896529029116100110001000100113110021011000141101310891776902311254620257324138193146452823810001569712909138651000100010002899728936287822897728781
620042894723211710181003632104687287960018111300010001000100010001000100050001090780006217622876028927310300010001000300010002879728811116100110001000100412910010111000121201309293227003316594520140333838192649532842210001522112874142911000100010002910828954290552910828852
6200428812231118112010045890047202871800178593000100010001000100010001000500010903800002169128567287838283003100010003000100028749288111161001100010001001201100101110001210013276955669023177134720351326038172548452835610001602912651138311000100010002877228947288552891728860
62004288232321151214000302004613286520017959300010001000100010001000100050001090480000217332876229042310300010001000300010002871128863116100110001000100212010011111000101101305095186989309254720234320738171844462839710001553613066139281000100010002883128924289562891429110
620042906423212211221005720047232858700180503000100010001000100010001000500010905800822177028838292953103000100010003000100029115290751161001100010001004221100100410001211013112933368443085145120536317838152648552852110001615013213144871000100010002916729211291182922829097
620042915423412010191003320047092896700183153000100010001000100010001000500010908800062169528948292583103000100010003003100029151294213161001100010001002222100112410001211013027932168653097134520341321638082441472846410001588412755141701000100010002915429049291062896929030
6200429005234119001710001004612287980018051300010001000100010001000100050001090680004217412877529057810300010001000300010002880629001116100110001000100222210011111000101101320092506928309375020432322538162247542853310001578012894141681000100010002904629186289822897929018
62004289502331190120000089004649288210018047300010001000100010001000100050001089780007217052878029096310300010001000300010002890828981116100110001000100520210010111000121101317792346868309294920368321638102546482852010001602212922139941000100010002898529110290312906128979
620042913323312112221003320046062884501179463000100010001000100010001000500010900800002171128768289373103003100010003000100028934289821161001100010001003205100101110022213013049917369393101104920416329638171749492847210001584112912139281000100010002904929095289892911828941
620042895123311521181000900147162881701178943000100010001000100010001000500010897800002170128725289873103000100010003000100028979289562161001100010001005301100103110011211013043927269313076134520320320838111944472850510001581312695140541000100010002897328967289602912028961

Test 2: throughput

Count: 8

Code:

  st1 { v0.b }[1], [x6], x8
  st1 { v0.b }[1], [x6], x8
  st1 { v0.b }[1], [x6], x8
  st1 { v0.b }[1], [x6], x8
  st1 { v0.b }[1], [x6], x8
  st1 { v0.b }[1], [x6], x8
  st1 { v0.b }[1], [x6], x8
  st1 { v0.b }[1], [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)0309l2 tlb miss data (0b)18191e1f233f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)c2cfd5map dispatch bubble (d6)daddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1602068004062000002854080025883252401008010080000800008010080000800004359014375884864000000800158004080040599243599982401002008000080000200240000800008004080040118020110099100100800008000010080000021008000111800011210511012160121380037800008000080000801008004180041800418004180041
16020480040620000033408002588025240100801008000080000801008000080000435901437588486400000080015800408004059924359998240100200800008000020024000080000800408004011802011009910010080000800001008000002100800000480001100511012160141480037800008000080000801008004180041800418004180041
160204800406210000304080025880252401008010080000800008010080000800004359014375884864000000800158004080040599243599982401002008000080000200240000800008004080040118020110099100100800008000010080000021008000103800011210511012160121280037800008000080000801008004180041800418004180041
16020480040621000092080025881252401008010080000800008010080000800004359014375884864000000800158004080040599243599982401002008000080000200240000800008004080040118020110099100100800008000010080000021008000103800011240511011160121280037800008000080000801008004180041800418004180041
16020480040620000004080025883252401008010080120800008010080000800004359014375884864000000800158004080040599243599982401002008000080000200240000800008004080040118020110099100100800008000010080000021008000119800011210511012160131380037800008000080000801008004180041800418004180041
16020480040620000004080025880252401008010080000800008010080000800004359010375884864000000800158004080040599243599982401002008000080000200240000800008004080040118020110099100100800008000010080000021008000103800001210511016160101380037800008000080000801008004180041800418004180041
160204800406200000122080025880252401008010080000800008010080000800004359014375884864000000800158004080040599243599982401002008000080000200240000800008004080040118020110099100100800008000010080000023008000103800011210511012160121380037800008000080000801008004180041800418004180041
16020480040620000004080025880252401008010080000800008010080000800004359014375884864000000800158004080040599243599982401002008000080000200240000800008004080040118020110099100100800008000010080000021008000103800010210511014160131380501800008000080000801008004180041800418004180041
160204800406200000345208002588325240100801008000080000801008000080000435901437588486400000080015800408004059924359998240100200800008012820024000080000800408004011802011009910010080000800001008000002100800010380001122051101216081280037800008000080000801008004180041800418004180041
16020480040620000094080025803252401008010080000800008010080000800004359014375884864000000800158004080040599243599982401002008000080000200240000800008004080040118020110099100100800008000010080000221008000103800011210511013160141380037800928000080000801008004180180800418004180041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l2 tlb miss data (0b)0e18191e1f22233a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16002680040620000063601018002588125240010800108000080000800108000080000435842937588486400000800158004080040599463600202400102080000800002024000080000800408004011800211090101080000800001080000017008000100280001100502204163580037807268000080000800108004180041800418004180041
16002480040621000094501018002588225240010800108000080000800108000080000435842937588486400001800158004080040599463600202400102080000800002024000080000800408004011800211090101080000800001080000017008000100580001100502205164580037800008000080000800108004180041800418004180041
160024800406200000504310180025880252400108001080000800008001080000800004358429375884864000008001580040800405994636002024001020800008000020240000800008004080040118002110901010800008000010800000170080001002800011170502204165480037800008000080000800108004180163800418004180041
160024800406210000954210180025883252400108001080000800008001080000800004358429375884864000008001580040800405994636002024001020800008000020240000800008004080040118002110941010800008000010800000170080001002800000170502203165380037800008000080000800108004180041800418004180041
160024800406200000486210180025880252400108001080000800008001080000800004358429375884864000008001580040800405994636002024001020800008000020240000800008004080040118002110901010800008000010800000170080001206800001170502205164580037800008000080000800108004180041800418004180041
16002480040620000012300180025883252400108001080000800008001080000800004358429375884864000008001580040800405994636002024001020800008000020240000800008004080040118002110901010800008000010800000170080001001800001180502205163480037800008000080000800108004180041800418004180041
160024800406200000963600180025883252400108001080000800008001080000801084358429375884864000018001580040800405994636002024001020800008000020240000800008004080040118002110901010800008000010800000170080001102800011170502205165580037800008000080000800108004180041800418004180041
160024800406210000306000180025883252400108001080000800008001080000800004358429375884864000008001580040800405994636002024001020800008000020240000800008004080040118002110901010800008000010800000170080001105800011170502204165480037800008000080000800108004180041800418004180041
1600248004062100006300180025883252400108001080000800008001080000800004358429375884864000008001580040800405994636002024001020800008000020240000800008004080040118002110901010800008000010800000170080001001800011170502205164580037817368000080000800108004180041800418004180041
160024801626200000342010180025081252400108001080000800008001080000800004358429375884864000008001580040800405994636002024001020800008000020240000800008004080040118002110901010800008000010800000170080001002800000177502205165580037800008000080000800108004180041800418004180041