Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STNP (32-bit)

Test 1: uops

Code:

  stnp w0, w1, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e3d3f464951schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)afl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0st nt uop (e5)? ldst retires (ed)f5f6f7f8fd
1005540401525882510001000100022352540540353339810001000300054054011100110001000100010001000010001000731161153710001000541541541541541
1004540401525882510001000100022352540540353339810001000300054054011100110001000100010001000010001000731161153710001000541541541541541
1004540301525882510001000100022352540540353339810001000300054054011100110001000100010001000010001000731161153710001000541541541541541
1004540401525882510001000100022352540540353339810001000300054054011100110001000100010001000010001000731161153710001000541541541541541
1004540401525882510001000100022352540540353339810001000300054054011100110001000100010001000010001000731161153710001000541541541541541
1004540401525882510001000100022352540540353339810001000300054054011100110001000100010001000010001000731161153710001000541541541541541
1004540401525882510001000100022352540540353339810001000300054054011100110001000100010001000110001000731161153710001000541541541541541
10045403015258845310001000100022352540540353339810001000300054054011100110001000100010001000010001000731161153710001000541541541541541
1004540401525882510001000100022352540540353339810001000300054054011100110001000100010001000010001000731161153710001000541541541541541
1004540401525882510001000100022352540540353339810001000300054054011100110001000100010001000110001000731161153710001000541541541541541

Test 2: throughput

Code:

  stnp w0, w1, [x6]
  add x6, x6, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0040

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1e1f383f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)67696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0st nt uop (e5)? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20209100407511100712481002522520101101001000010104100005440304688242496960100401004074306749220104102081000810208300241004012311202011009910010000101001000010010007810000010007207100007100007111113170160010037100071000010000101001004110041100411004110041
20204100407511110712481002532520102101011000010104100005440444688243496960100401004074307749220104102081000810208300241004012311202011009910010000101001000010010007710000110007117100007100007000013101161110037100071000010000101001004110041100411004110041
2020410040761011371248100253252010210100100001010010000543996468824249696010040100407424374982010010200100001020030000100401231120201100991001000010100100001001000871000011000701136100007100007100013101161110037100071000010000101001004110041100411004110041
202041004075111107124810025425201011010010000101001000054399846882404969601004010040742437498201001020010000102003000010040123112020110099100100001010010000100100098100001100075110100007100007100013101161110037100071000010000101001004110041100411004110041
20204100407511010712481002542520102101021000010100100005439944688241496960100401004074243749820100102001000010200300001004012311202011009910010000101001000010010008710000110007017100007100007100013101161110037100071000010000101001004110041100411004110041
20204100407510110712481002532520102101011000010100100005439944688241496960100401004074243749820100102001000010200300001004012311202011009910010000101001000010010009810000210007017100007100007000013101161110037100071000010000101001004110041100411004110041
20204100407511116712481002532520101101011000010100100005440084688240496960100401004074243749820100102001000010200300001004012311202011009910010000101001000010010008710000110007007100007100007000013101161110037100071000010000101001004110041100411004110041
20204100407511110712481002532520101101031000010100100005439924688240496960100401004074243749820100102001000010200300001004012311202011009910010000101001000010010007710000110007007100007100007000013101161110037100071000010000101001004110041100411004110041
202041004075111007124810025425201031010010000102491000054399446882404969601004010040742437498201001020010000102003000010040123112020110099100100001010010000100100088100000100074010100007100007000013101161110037100071000010000101001004110041100411004110041
202041004076111007124810025325201011010110000101001000054399846882404969601004010040742437498201001020010000102003000010040123112020110099100100001010010000100100087100000100073010100007100007200013101161110037100071000010000101001004110041100411004110041

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0040

retire uop (01)cycle (02)031e383f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0st nt uop (e5)? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
200291004075012491002525200101001010000100101000054343346882411496960100401004074463752020010100201000010020300001004012411200211091010000100101000010100001000010000091000001000012701161110037100001000010000100101004110041100411004110041
200241004075012491002525200101001110000100101000054343546882411496960100401004074463752020010100201000010020300001004012411200211091010000100101000010100001000010000001000001000012701161110037100001000010000100101004110041100411004110041
200241004076012491002525200101001010000100101000054343346882410496960100401004074463752020010100201000010020300001004012411200211091010000100101000010100001000010000001000001000012701161110037100001000010000100101004110041100411004110041
200241004075612491002525200101001010000100101000054343346882410496960100401004074463752020010100201000010020300001004012411200211091010000100101000010100001000010000001000001000012701161110037100001000010000100101004110041100411004110041
200241004075012491002525200101011010000100101000054343346882410496960100401004074463752020010100201000010020300001004012411200211091010000100101000010100001000010000101000001000012701161110037100001000010000100101004110041100411004110041
200241004075012491002525200101001010000100101005154342546882410496960100401004074463752020010100201000010020300001004012411200211091010000100101000010100001000010000101000001000012701161110037100001000010000100101004110041100411004110041
200241004075012491002525200101001010000100101000054343346882410496960100401004074463752020010100201000010020300001004012411200211091010000100101000010100001000010000401000001000012701161110037100001000010000100101004110041100411004110041
200241004075012491002525200101001010000100101000054343346882400496960100401004074463752020010100201000010020300001004012411200211091010000100101000010100001000010000001000001000012701161110037100001000010000100101004110041100411004110041
200241004075012491002525200101001010000100101000054343346882410496960100401004074463752020010100201000010020300001004012411200211091010000100101000010100001000010000131000001000012701161110037100001000010000100101004110041100411004110041
200241004075012491002525200101001010000100101000054343346882410496960100401004074463752020010100201000010020300001004012411200211091010000100101000010100001000010000101000001000012701161210037100001000010000100101004110041100411004110041

Test 3: throughput

Code:

  stnp w0, w1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 0.5209

retire uop (01)cycle (02)031e3a3d3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafl1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0st nt uop (e5)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1020552093900185194144144168251010010010000100100005002372884921285209520838596390510100200100002003000052094121111020110099100100001001000010010000010000100010000031000010000111722224225205100001000010052095210520952105209
1020452083900185193144144152251010010010000100100005002372884921285209520838596390410100200100002003000052084122111020110099100100001001000010010000010000100010000031000010000111722224225205100001000010052145209521052095210
1020452093900185194144144152251010010010000100100005002372884921285209520838591441101010020010000200300005208412211102011009910010000100100001001000001000084010000001000010000111722224225206100001000010052095214520952105209
102045208390018519414414416825101001001000010010000500237336492128520952083859639041010020010000200300005208412211102011009910010000100100001001000001000084010000001000010000111722224225205100001000010052105209521052095210
102045209390018519314414415225101001001000010010006500237442492128520952083877739101010620010016200300485208412211102011009910010000100100001001000001000084010000001000010000111718016005205100001000010052105209521052095210
10204520939001851931441441522510100100100001001010850023744249212852085209387873911101062001001620030048520941211110201100991001000010010000100100000100001000100003601000010000111718016005206100001000010052095210520952105209
102045208390018519414414416825101001001000010010006500237490492129520852093878739111010620010016202300485209412111102011009910010000100100001001000001000084010000001000010000111718016005602100001000010052105209521052095210
1020452093900185194144144152251010010010000100100065002374424921285209520838777391010106200100162003004852084122111020110099100100001001000010010000010000100010000031000010000111718016005205100001000010052105209521052095210
1020452093900185194144144168251010010010000100100065002374904921285209520838777391010106200100162003004852084122111020110099100100001001000010010000010000100010000001000010000111718016005206100001000010052105209521052095210
1020452093900185193144144168251010010010000100100065002374904921295208520938787391110106200100162003004852094121111020110099100100001001000010010000010000100010000231001810000111718016005206100001000010052095210520952105209

1000 unrolls and 10 iterations

Result (median cycles for code): 0.5209

retire uop (01)cycle (02)0309181e3d3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0st nt uop (e5)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10025520939000185193144144168251001010100001010000502374160492128520952083893339391001020100002030000520952081110021109101000010100001010000010000841000020100001000064031633520610000100001052095210520952105209
100245208390001851941441441682510010101000010100005023746404921295208520938943393910010201000020300005209520811100211091010000101000010100000100001001000080100001000064031633520510000100001052095210520952105209
10024520839000185194144144168251001010100001010000502374160492129520852093894339391001020100002030000520952081110021109101000010100001010000010000841000000100001000064031633520610000100001052095210520952105213
10024520838000185193144144152251001010100001010000502374161492128520952083893339381001020100002030000520852091110021109101000010100001010000010000841000030100001000064031633520510000100001052095210520952105209
1002452083900018519314414416825100101010000101000050237464049212952085209389433939100102010000203000052095208111002110910100001010000101000001000010010000150100001000064031633520610000100001052095210520952105213
100245212390001851931441441522510010101000010100005023741604921285209520838933393810010201000020300005208520911100211091010000101000010100000100001001000016117100001000064031633520510000100001052095210520952105209
1002452083900018519314414416825100101010000101000050237464149212952085209389433939100102010000203000052095208111002110910100001010000101000001000084100002078100001000064031633520610000100001052145209521052095210
10024520939000185193144144152251001010100001010000502374161492129520852093894339391001020100002030000520952081110021109101000010100001010000010000841000000100001000064031633520610000100001052105209521052095214
100245209390001851931441441682510010101000010100005023746404921285209520838933393810010201000020300005208520911100211091010000101000010100000100001001000000100001000064031633520510000100001052115210520952105209
100245208390001851941441441682510010101000010100005023746404921295208520938943393910010201000020300005209520811100211091010000101000010100000100001001000000100001000064031633520510000100001052105210520952105209