Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, post-index, 2 regs, 8H)

Test 1: uops

Code:

  st1 { v0.8h, v1.8h }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 3.000

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2223243a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6061696b6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
620052967823800130060000100046492900522243453000100020001000200050001000017001605302855529393310300020005000293882935011610011000100020033622002212200404000131489574694431537522080433013811184954286851000163321364715197200010002955029434295332944629572
6200429376236008011100001000470728993002433630001000200010002000500010000700161120285632955331030002000500029372293841161001100010002004361200220222200206000132809351695331505542090932783807155352287731000160131387515052200010002952329526294462951829557
620042977023700701110311322650004599290560024542300010012002100120045005100988001611202864929203162930032000500029473293892161001100010002002500200313387200444000131309128690731574552088833153817205554288401000164271371315078200010002961029794296042970029576
620042966823801802101113961771004641289660224565300610012002100020005005100000001605502873229769310300920005000293502937011610011000100020053612002511120000402108132899212691431795572084632873816175849287001001159931360315063200010002944529430295872985029432
62004294292360010008000001004593289170224453300010002000100020005000100100001607802859029436310300020005000293912933611610011000100020032422002511520000400424131259444696331582512089832923816115255286421000161251340114846200010002954329421293742947629510
620042943323700700600000000468829002002429930001000200010002000500010000500160710285892938231030002014505029357299131716100110001000202754020105807027201000000130279021676930912532094331163819495452289611007163871357715088200010003021730534304643033830264
62004294052360050060001510004669288620224333300010002000100020005000100001001606302864629258310300020005000291482927811610011000100020023402002812200200000131349511697431557482077432693815184948286521000161461341315173200010002929029418293992933229455
620042933622701801111000100046732896200243743000100020001000200050001000040016073028605294443103000200050002924329290116100110001000200326020043012200004040130999520690431463512077133123819155250285511000161761342915058200010002935029332294352932629406
62004293672280010009000132100046542898502243673000100020001000200050001000000016062028513294123103000200050002911629256116100110001000200320020025602200006010131229366689931064482076732743812164246284211000161521355214857200010002923429348292922930929308
62004293262260060080000100046802887022242013000100020001000200050001000050016059028399292623103000200050002921729021116100110001000200330120024012200206000130209373691031385432073332983810145348286061000160391347914945200010002926729356293592932729360

Test 2: throughput

Count: 8

Code:

  st1 { v0.8h, v1.8h }, [x6], x8
  st1 { v0.8h, v1.8h }, [x6], x8
  st1 { v0.8h, v1.8h }, [x6], x8
  st1 { v0.8h, v1.8h }, [x6], x8
  st1 { v0.8h, v1.8h }, [x6], x8
  st1 { v0.8h, v1.8h }, [x6], x8
  st1 { v0.8h, v1.8h }, [x6], x8
  st1 { v0.8h, v1.8h }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2223243a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
160205800586211010000211001800251616025240100801001600008010016000010807863679956080022080047800595996436000824026420016000020040000080059800611180201100991001008000080000100160015143601160016001816000216361410005110116118005680000160000801008004880059800538006080053
16020480047621100100017000180043016025240216801001600008010016000020402503680220080033080042800585996036001724010020016000020040000080178800421180201100991001008000080000100160014153600160016001416000016361400005110116118005680000160000801008004980048800608006180048
16020480058620100100122200018004316166252401008010016000080100160000116073436799320800340800528005259973116000524010020016000020040000080047800471180201100991001008000080000100160014143630160016113016000216381400005110116118004980000160000801008005380048800488006080062
16020480047622100000019000180032161632524010080100160000801001600001000838367990808003308005280047599643600102401002001600002004000008004780052118020110099100100800008000010016001414360016001601181600021636000005110116118005580000160000801008004880049800538006080052
1602048004262110000031800008003716012524010080100160000801001600006011863680220080032080058800585996036001724010020016000020040000080059800591180201100991001008000080000100160014143600160014001816000216361400005110116118004980000160000801008005380059800618005380059
1602048005962110100001900018003701602524010080100160000801001600006012463680292080022080050800475996136000524010020016000020040000080060800501180201100991001008000080000100160014140001600160017250161802236000005110116118005780000160000801008004880054800598019580060
16020480059620100000021000180043016025240100801001600008010016000010807943684720080022080047800585996336000524010020016000020040000080059800591180201100991001008000080000100160014173600160016101916000216341400005110116118005080000160000801008005380043800518004380062
160204801836210100011214000180034161652524010080100160000801001600002040250367942408001508004880058599553600082401002001600002004000008005980040118020110099100100800008000010016001414360016001401171600021601400005110116118004880000160000801008005280059800618005380060
16020480057621101000019000180037160102524010080100160000801001600001160726367969208002838005880059599653600192401002001600002004000008005980057118020110099100100800008000010016001414360016007625120160062160000005110116118004480000160000801008019080053800608004880048
160204800476211011000210001800431616425240100801001600008010016000011607343679908080022080183800525997336001024010020016000020040000080052800531180201100991001008000080000100160014143801160016001816000216341400015110116118005780000160000801008006180053800488005280192

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f233f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
160025800426200000009080027161602524001080010160000800101600001879765367942408001580049800425997736002224001020160000204000008004280042118002110910108000080000101600000360016000200016000220005020371638238003980000160000800108005080043800438005180043
1600248004062000000060800271616025240010800101600008001016000018797653679424080017800428004059977360029240010201600002040000080042800421180021109101080000800001016000003400160002000160000238005020411641398003980000160000800108004380043800508004380051
1600248004262100000030800251616025240010800101600008001016000018797653679424080025800428004059977360023240010201600002040000080050800511180021109101080000800001016000003400160002005160002234005020381638388003980000160000800108004180041800438004380041
16002480051620000003060800271616025240010800101600008001016000018797653679424080259800518004059975360020240010201601202040000080320800421180021109101080000800001016000003402160002005160002234005020411615418004880000160000800108004380043801158004380043
160024800426200000021908003516160252400108001016000080010160000187976536794240800178005180050599773600222400102016000020400000800428004211800211091010800008000010160000034900160002405160002234005020371642428003780000160000800108004380043800438004480051
1600248005062100000630800271616025240010800101600008001016000013202733682604080017800428004259985360022240010201600002040000080040800421180021109101080000800001016000003400160002205160002234005020421640378003980000160000800108005180043800438004380043
1600248005062000000090800271616025240010800101600008001016000018797653679424080017800508005159977360022240010201600002040000080042800421180021109101080000800001016000003400160002005160002234005020431638368003980000160000800108005180043801478004180041
160024800426200000003080027160025240010800101600008001016000013202653679808080024800428004259977360020240010201600002040000080049800421180021109101080000800001016000003400160000000160002234005020391637408003980000160000800108005180043800438004180043
1600248004262000000030800271616025240010800101600008001016000018797653679424080017800508005059977360029240010201600002040000080042800401180021109101080000800001016000003400160002002160002234005020441643438003980000160000800108005180043800438005080041
1600248004262000000060800271616025240010800101600008001016000018797653679424080017800508005159977360020240010201600002040000080040800401180021109101080000800001016000003400160002708160002034005020421638378003980000160000800108005380051800418004980043