Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, post-index, 4 regs, 4S)

Test 1: uops

Code:

  st1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 5.000

Integer unit issues: 1.000

Load/store unit issues: 4.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)1e1f223a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
6400528599211013014004050992804200220665000100040001000400050002162514170212780028385310500040009000282952822411610011000100040056024006018400021000138811003171763408745192923375380675240278891000141781198912057400010002833328276281232822928177
6400428408212013011000153162816404223175000100040001000400050002163610170392786228458310500040009000281942809211610011000100040000100400000040022100013761982072293439742192773349380456038279031000141391183912204400010002815328495280522821428178
640042805721111411110705200281874422225500010004000100040005000216259170142790028346310500040009000283942818811610011000100040054141400602740026144113617965171763409635192143344380265246279311000146521209312608400010002837928307282382845028350
64004282872120110130030518628341402206350001000400010004000500021640151702227887283233105000400090002814828408116100110001000400001004002000400220001392610294724833325431922534873806114543278801000137951175011920400010002828128382280492827628233
6400428200212112110104050252790340221285000100040001000400050002163020170272792328302310500040009000282852821211610011000100040055143400601114000614421329410301715534367421948432823800125647277421000140441182212625400010002844528262281382857928281
640042820621101201000304983280394422375500010004000100040005000216341717055278902828831050004000900028281280191161001100010004000010040020004002212001386010180721134111041191833403380175444280471000140601196812687400010002824228159282182820428184
640042842521211711310404952280574421973500010004000100040005000216291817031277852815331050004000900028313280741161001100010004004512240040184000414401406210070705233635401924333523804124946280061000139171248912818400010002818828023281662852328123
640042822421101701400305229283914422318500010004000100040005000216471817037278262834631050004000900028047283191161001100010004000000400000240002000138441018970403348536192873399380485349278791000143581168913607400010002831628521282162849628403
6400428235212113116104050212847000222625000100040001000400050002161418170392794828398310500040009000283632815911610011000100040045014004014400040421369810248714433557431909133783818125437279291000141161211612885400010002840128305282422836628340
6400428363211018016001050232833300220605000100040001000400050002163522170362818628038310500040009000281512813211610011000100040000100400000040020000134841016770823214542193553331381385242279051000143351201012890400010002866428058279482810528184

Test 2: throughput

Count: 8

Code:

  st1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  st1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  st1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  st1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  st1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  st1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  st1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  st1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0005

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f2223243f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
32020516005712400000690310016002716002540010080100320000801003201084804997359352160017160042160040799783800344001002003200002007200001600421600401180201100991001008000080000100320000000032000000532000003414005110117111600378000032000080100160043160043160043160043160051
32020416017212410000001001600271616025400100801003200008010032000040052473598081600171600401600427997838011840010020032000020072000016004016017911802011009910010080000800001003200000000320002000320000200005110117211600378000032000080100160043160043160043160175160041
320204160042124100000881001600251616025400189801003200008010032000040052473594241600151600501600427998038004240023720032000020072000016004216004011802011009910010080000800001003200000000320002007873200022340015110217111600398000032000080100160041160041160043160043160043
32020416004012400000120000160172016025400100801003200008010032010840052473594241600151600421600407997838002440010020032000020072000016004016004011802011009910010080000800001003200000000320002000320002200005110117111600478000032000080100160041160041160041160043160188
3202041600421241001003001160027161602540010080100320000801003200004005247359424160017160050160042799803800434001002003200002107208101600421600401180201100991001008000080000100320000034003200021053200020340005110162111600398000032000080100160041160041160041160041160051
3202041600501241000021300116002700025400100801003200008010032000048049973641881600171600491600427997838003240010020032000020072000016004016004211802011009910010080000800001003200000000320060000320000200005110117111600398000032000080100160050160052160041160041160043
320204160042124000001443001160025000254001898010032000080100320000400524735980816001716004216005079980380024400100200320000200720000160040160042118020110099100100800008000010032000003400320002307453200002340005110217111600398000032000080100160041160174160043160041160052
32020416004212420000249001160025016025400100801003200008010032000040052473641881600151600421600517998015800404001002003200002007200001600401600401180201100991001008000080000100320000034003200020033200002340005110117111601568000032000080100160320160448160323160450160439
3202041605761243013130528267001160549160512846402948810883219808013132032447173073738121603721604361605828028733802454006482003203602007208101604341603074180201100991001008000080000100320000036003200020053200000340005110117111600488000032000080100160041160041160043160043160041
320204160042124100000000116002700025400100801003200008010032000048049973593521600171600511600427997838002440010020032000020072000016004216005011802011009910010080000800001003200000340032000200113200022340005110117111600378000032000080100160043160043160043160043160051

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2223243f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5e60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
32002516004312400010103000160027016125400010800103200008001032000048004973641880016001516005116004279980380024400010203200002072000016004316004311800211091010800008000010320000000232000000232000224200005020117121600518000032000080010160041160044160043160041160044
32002416004212400001012300016002816161254000108001032000080010320000400076735944800160021160043160042799783800244000102032000020720000160042160040118002110910108000080000103200000420032000200532000224200005020117111600408000032000080010160043160041160043160044160044
320024160042124000000030001600251616025400099800103200008001032000040007673594480016002916004216004079981380025400010203200002072000016004316018111800211091010800008000010320000000032000212232000224200005020117111600398000032000080010160041160174160043160043160043
3200241600421242000001230001600281601254000108001032000080010320000459110736428400160021160043160042799803800244000102032000020720000160042160042118002110910108000080000103200000420032000210532000024200005020126111600408000032000080010160043160043160043160043160043
3200241600401240000000000016002716161254000108003932000080010320000400076735944800160019160042160042799833800264001472032000020720000160043160042218002110910108000080000103200000420032000200232000224202015032117111600398000032000080010160043160179160043160044160043
3200241600421241000001230001600271616125400010800103200008001032000040007673594480016001516004216004279980380025400010203200002072027016004216004211800211091010800008000010320000000032000200232000224200005044144311620678008732000080010160176160318160583160455164587
3200241603001244011323962670001601611616732540009980010320060800393201084800497364188001602551604541603188021236802154004212032000020720000160042160042118002110910108000080000103200000420032000000232000224200005020117111600398000032000080010160043160043160122160041160043
32002416004212400000026430001600251600254000108001032000080010320000400076735944800160018160042160042799783800244000102032000020720000160043160042118002110910108000080000103200000420032000200532000224200005020117111602778000032000080010160586160046160047160043160043
320024160040124100000030001600271601254000108001032000080010320000400076736414000160015160182160043799813800274000102032000020720000160040160042118002110910108000080000103200000440032000200532000224200005020217111600398000032000080010160044160041160043160043160043
3200241600421240000001270001600271616025400010800103200008001032000040007673594480016002816004016004279981380024400010203200002072000016004316004211800211091010800008000010320000000032000210532000224200005020117421600398000032000080010160043160043160043160044160055