Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, post-index, 2 regs, 16B)

Test 1: uops

Code:

  st1 { v0.16b, v1.16b }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 3.000

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f3a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5e5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
62005290462341020031003346772856902238563000100020001000200050001000009160802828128931310300020005000289132893911610011000100020000402000003200046210131809400682031260652037032363820226165283841000165521415114922200010002902529058290492908928709
62004290252340010020003347062864920240003000100020001000200050001000007160522817929003310300020005000288522893911610011000100020034632000105200024210130299386690631962702038332453815196062283421000158391302914503200010002890228943289462894728951
620042893223201110200003473428600202381730001000200010002000500010000001605928246289653103000200050002894228844116100110001000200334020000322000240081131659277690831220592036131773819156065283451000159361316414497200010002903729019290162893029024
6200429042232013001000132346202861520238373003100220021000200650001001000161112836429046113130002002500029054289833161001100010002010040200403452200024230132319306699131261632040832403820365663283771001158901309514514200010002895028885289132895628979
620042895323400210310001794731287972023973300010012004100020025005100860516097284142926592830002002500029109290824161001100010002000283200403870200004200133279350692630930602034531613818176256284031000160541334614821200010002902329066290862967229030
620042910923201010213015347372856220237653000100020001000200050001000000160622848028998310300020005000286452875511610011000100020000602000003200006000132469378694931000672017932243814176765281481000154581282314666200010002876728834286982891928630
6200428823223010102100034741285122023699300010002000100020005000100000516068282002888431030002000500028744286881161001100010002003360200000220022420013286938969323170158202163288381695659281871000157591303114253200010002887128771288402883128823
62004288502250100040000547352856520238593000100020001000200050001000005160562818728847310300020005000287542873411610011000100020022402002002200026000131829367697031710632019632383822195560282831000156201309014484200010002892928861289562893928807
62004288072230120030000346752849300237903000100020001000200050001000000160542829928883810300020005000287932884711610011000100020000422002012200004200129939438694231740692022832273819186060281801000156941295014355200010002899128827288782892728927
62004288612230022031000547062857620238463000100020001000200050001000005160602809128553310300020005000285852865811610011000100020002612002000200006210132569470696932240621990831953822166161280821000148041281814021200010002886928762287652877128625

Test 2: throughput

Count: 8

Code:

  st1 { v0.16b, v1.16b }, [x6], x8
  st1 { v0.16b, v1.16b }, [x6], x8
  st1 { v0.16b, v1.16b }, [x6], x8
  st1 { v0.16b, v1.16b }, [x6], x8
  st1 { v0.16b, v1.16b }, [x6], x8
  st1 { v0.16b, v1.16b }, [x6], x8
  st1 { v0.16b, v1.16b }, [x6], x8
  st1 { v0.16b, v1.16b }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22233a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfl1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
16020580052643100000000170018003716002524010080100160000801001600002040250367935280017800438008659955360001240100200160000200400000800428004211802011009910010080000800001001600000000160000000160000000053831116118004080000160000801008004380043800418004180043
16020480040643000010009710180027016025240100801001600008010016000020402503679352800178004280040599533600002401002001600002004000008004280042118020110099100100800008000010016000004200160002007471600022420051121216128003980056160000801008004180043800438004380041
160204800426430100000063011800280161252401008010016000080100160000188035836794728001780042800425995535999824010020016000020040000080042800431180201100991001008000080000100160000042001600000061600022420051121216228003980000160000801008004180041800538004380043
160204800426430000000063010800251601252401008010016000080100160000204025036794488001780042800445996736000124010020016000020040000080176800421180201100991001008000080000100160000042001600020021600022460051101216118004080000160000801008004180043800458004380180
1602048004264300000000138400280027160025240100801001600008010016000060115836804388002280042800405995536000224010020016000020040000080043800541180201100991001008000080000100160000042001600620021600020420051121125118003980000160000801008004380173800418004380043
16020480042643010010000301180028000252401008010016000080100160000204025036800288001780042800425995536000024010020016000020040000080051800541180201100991001008000080000100160000042001600020021600022420051100216128003980000160000801008004180043800438004180044
16020480042643010010001230018002816160252401008010016000080100160000188035836793528001780042800425995536000124010020016000020040000080040800431180201100991001008000080000100160000042001600000001600020420051100216228004080000160000801008005580044800438005580043
160204800426430100100063000800251616025240100801001600008010016000020402503679448800188004480040599553600012401002001600002004000008005180042118020110099100100800008000010016042024273301600020021600022420051120216218003980000160000801008004180044800438004380041
1602048004264401000000630118002516160252401008010016000080100160000188035836794488001580040800445995535999824010020016000020040000080051800421180201100991001008000080000100160000042001600020001600002420051101217228003780000160000801008004480041800438004180043
16020480042643010010000001080027161612524010080100160000801001601082040250367935280018800548004059956360000240100200160000200401230800428004211802011009910010080000800001001600000000160000003160002000051100216228003980000160000801008004380044800438004480043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f22233a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1600258005462111100138181018003916165252400108001016000080010160000920325368002880035800548005459989360045240010201600002040000080053800531180021109101080000800001016000000001600020001600041644140050200415338004880000160000800108006180064800548006080064
160024800546201100091610080027161612524001080010160000800101600001799829367947280017800428005459975360023240010201600002040000080042800421180021109101080000800001016000000001600020021600022000050200516338003980000160000800108004380043800448004180043
160024800436210000029431008002816160252400108001016000080010160000187977336794488001780042800425997736002324001020160000204000008004380042118002110910108000080000101600000420016000200216000224200050200416338004880000160000800108004480043800438004380041
16002480042620000001443100800270161252400108001016000080010160000187977336794488001780051800425997736002324001020160000204000008004280043118002110910108000080000101600000420016000200216000224200050200415348003980000160000800108004180043800418004380041
160024800426200000039355100800271616125240010800101600008001016000017998413679448800178004280042599773600202400102016000020400000800428004311800211091010800008000010160000000016000210816000004200150200415338003780000160000800108004380043800418004480044
1600248004262000000342300080025160125240010800101600008001016000020396653679472800188004280042599783600232400102016000020400000800438004211800211091010800008000010160000000016000210016000224200050200516358003980000160000800108004380043800448004380041
16002480042621000004590100800291616025240010800101600008001016000020396653679448800178004280040599773600232400102016000020400000800408004211800211091010800008000010160000042001600023061600000000050200316558003980000160000800108004480044800438004480041
160024800406200000045330008002716160252400108001016000080010160000187977336793528001880044800425997736002024001020160000204000008004380042118002110910108000080000101600000420016000210516000224200050200315338003780000160000800108004480043800438004380044
16002480040620000004503000800250161252400108001016000080010160000203966536794488001780040800425997736002324001020160000204000008004080042118002110910108000080000101600000420016000200216000024200050200415438003980000160000800108004480041800448004380047
1600248004262100000564001080027161612524001080010160000800101600002039665367935280017800428004259978360020240010201600002040000080042800421180021109101080000800001016000000001600000021600020000050200315458003980000160000800108004380043800438004180043