Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, post-index, 2 regs, 4S)

Test 1: uops

Code:

  st1 { v0.4s, v1.4s }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 3.000

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22243a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
62005291662343021040000100466328558222377230001000200010002000500010000800160612824228827310300020005000287802872811610011000100020000602000000200020210133359345694130550662020932213811245761283201000157091321314653200010002897028838290122895228995
62004290122320020040100100469028535002377030001000200010002000500010000800160382825029010310300320005000289402881511610011000100020000602000000200006000132689297688831382662038532613825185555283341000157331293814731200010002898028936289322905828938
62004288822330060040000000471328554222366930001000200010002000500010000800160342818028815310300020005000288892894111610011000100020000602000000200000000131209398688831531572033531733821196157283251001158771329114480200010002904528859290192902428947
62004289182330030050100891047172865122237603000100020001000200250001000061016049281242900031030002000500028847288441161001100010002000060200000410200006000131539176687631241622050932963817255656283701000158041332614857200010002900229098290612902328984
62004291332340060040000100460828503222385430001000200010002000500010000600160572821728804310300020005000286472889111610011000100020000602000000200000000132069566692431670532008231513818186059282291000158391295414254200010002885828937286692863028802
62004288222320030050006000470028488222421330001000200010002000500010000610160522838729359310300020005000293142912811610011000100020000002000000200006000130249059694030982552020132953815165566284111000156701314614407200010002899828924288332901428987
62004290942340070030000000463228443202432630001000200010002000500010000800160442825328752310300020005000287912886211610011000100020000602000000200000000129889092690631024592023731883816196258283291000160971345514817200010002899128932287692874128971
620042883023100400300001014704286892223761300010002000100020005000100001300160372811928787310300020005000298282988711610011000100020000602000000200000000129958969691630902592082432783815125559286931001164991363915397200010002897529145292842955929353
62004291592340030150000100457328817022420130001000200010002000500010000600160502849029216310300020005000292182919911610011000100020000602000000200006000129509352695831320612077733233818286053289461000165811366615370200010002959629061291282896929047
62004288842350040020100100465128836022386630001000200010002000500010000408160512826629060310300020005000289362878821610011000100020000602000000200004000131389002680330732532034532323819226363283681000161681328214782200010002894729018288772898829193

Test 2: throughput

Count: 8

Code:

  st1 { v0.4s, v1.4s }, [x6], x8
  st1 { v0.4s, v1.4s }, [x6], x8
  st1 { v0.4s, v1.4s }, [x6], x8
  st1 { v0.4s, v1.4s }, [x6], x8
  st1 { v0.4s, v1.4s }, [x6], x8
  st1 { v0.4s, v1.4s }, [x6], x8
  st1 { v0.4s, v1.4s }, [x6], x8
  st1 { v0.4s, v1.4s }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22243a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
160205800546211110100003001800271616025240100801001600008010016000018803583679448080029800548005359977360012240100200160000200400000800478005211802011009910010080000800001001600141643011600140014160002242005112516558003980000160000801008004483369803988004180041
1602048004062101001000017002800321516025240100801001600008010016000011607343679908080017800428004359955360110240100200160000200400000800438004011802011009910010080000800001001600000420016000200216000216441405112516658005180000160000801008018880056800538004880055
160204800476211100100003001800251616125240100801001600008010016000018803583684284080031800558005459976360010240100200160000200400000800548005411802011009910010080000800001001600141444001600160017160002242005112416538005180000160000801008004480043800438004380041
160204800406200100100002100280039161652524010080100160000801001600001480522367969208001880043800425995536000024010020016000020040000080043800421180201100991001008000080000100160000042001600000021600021601405112616758005180000160000801008005580056800538004880055
160204800546201100100003001800271616742524010080100160000801001600001880358367935208002780064800545996636001324010020016000020040031580054800641180201100991001008000080000100160014140001600160014160000246005112516548004080000160000801008004180043800448004380055
16020480043620010010000150028003901622524010080100160000801001600009209103680004080017800428004259955359998240264200160000200400000800428004211802011009910010080000800001001600000421016000200816000216441405112516558005180000160000801008004880055800488005380055
16020480056621110010000300180027160025240100801001600008010016000018803583680000080039800548005459960360005240100200160000200400000800648004711802011009910010080000800001001600141744001600160014160002042005112416538003980056160000801008004480043800438004380055
160204800406200100100026719400280037161642524010080100160000801001600001480510367990808002880048800635996736001224010020016000020040000080053800611180201100991001008000080000100160014144401160016101616000216441405112516358006080000160000801008005580055800528005580055
1602058006362111101100314002800321606252401008010016000080100160000920898367969208002880052800635996710600122401002001600002004000008005380053118020110099100100800008000010016001414440016001410141600021601405112316548005180000160000801008005380055800558005380055
16020480054621110010001217002800321616625240100801001600008010016000010767833680004080029800528005159967360010240100200160000200400000800478005411802011009910010080000800001001600151444320160016001416000214441415112616558005180000160000801008005580056800538005580055

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1600258004362100000000380027161602524001080010160000800101600001879773367944808001708004280043599773600232400102016000020400000800428004011800211091010800008000010160000042001600044105160002242005020415548003980000160000800108004480442831148004180044
1600248004262000000000380027161602524001080010160000800101600001799829367947208001708004080043599773600232400102016000020400000800428004211800211091010800008000010160000042001600621002160002242005020615648003980000160000800108004180043800448004380045
160024800426200000000123800281616525240010800101600008001016000018797733679448080018080042800425997836002324001020160000204000008004280042118002110910108000080000101600000422901600029302160000242005020416438003980000160000800108004180044800438004380047
16002480042620000000012380027161612524001080010160000800101600001799837367947208001708004280042599773600232400102016000020400000800428017711800211091010800008000010160000042001600025102160002242005020716548003780000160000800108004480043800438004380043
1600248004262100000100380025161612524001080010160000800101600001879773367944808002908004280042599783600232400102016000020400000800448004211800211091010800008000010160000042001600021302160062242005020616458004080000160000800108004380043800438005580045
160024800426210000000038002716161252400108001016000080010160000187977336794480800170800428004259978760023240010201600002040000080043800421180021109101080000800001016000004210160002005160002242005020416868003980000160000800108004480179800438004380047
1600248004262000000000380027161602524001080010160000800101600001954047367944808001508004380043599773600232401902016000020400000800428005411800211091010800008000010160000042001600021302160002242005020616848003980000160000800108004180043800438004480046
1600248004262000000000380027161602524001080010160000800101600001879773368002408001708004280042599773600232400102016000020400000800438004311800211091010800008000010160000042001600029602160002242005020515468016180000160000800108004380043800448017380052
16002480042620000000012380027161602524001080010160000800101600001968365367944818001708004080042599773600232400102016000020400000800438004311800211091010800008000010160000042001600022902160002242005020515668004180000160000800108004480043800438004380051
1600248004362100000002247237680105161602524001080066160000800101600001799833367944808001708004280042599773600232400102016000020400000800428004211800211091010800008000010160000042001600025802160062242005034515548003780000160000800108004480044800438004380043