Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STP (S)

Test 1: uops

Code:

  stp s0, s1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 7 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)031e1f223f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6erob full (74)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
9006116690011151082520001000100010001000108758000011451166116603242000100010002000200011661166118001100010001000151000001000005145164411631000100011671167116711671167
90041166900011518825200010001000100010001087580000114511661166032420001000100020002000116611661180011000100010001510000010000155144164411631000100011671167116711671167
9004116680111151802520001000100010001000108758000111451166116603242000100010002000200011661166118001100010001000151000001000005144164411631000100011671167116711671167
90041166901011518025200010001000100010001087580001114511661166032420001000100020002000116611661180011000100010001510000010000155144164411631000100011671167116711671167
90041166801011518825200010001000100010001087580000114511661166032420001000100020002000116611661180011000100010001510000010000155144164411631000100011671167116711671167
90041166861111518825200010001000100010001087580000114511661166032420001000100020002000116611661180011000100010002510000010000155144164411631000100011671167116711671167
90041166901111518825200010001000100010001087580001114511661166032420001000100020002000116611661180011000100010001510000310000155144164411631000100011671167116711671167
90041166961111518025200010001000100010001087580001114511661166032420001000100020002000116611661180011000100010001510000010000155144164411631000100011671167116711671167
9004116696101151882520001000100010001000108758000111451166116603242000100010002000200011661166118001100010001000151000001000005144164411631000100011671167116711671167
9004116680411151882520001000100010001000108758000111451166116603242000100010002000200011661166118001100010001000010000010000155144164411631000100011671167116711671167

Test 2: throughput

Count: 8

Code:

  stp s0, s1, [x6]
  stp s0, s1, [x6]
  stp s0, s1, [x6]
  stp s0, s1, [x6]
  stp s0, s1, [x6]
  stp s0, s1, [x6]
  stp s0, s1, [x6]
  stp s0, s1, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03l2 tlb miss data (0b)1e1f222324373f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)c2branch mispred nonspec (cb)cfd2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16020640043300003000231840027016025162426100823538000010080000800005001839832649539140021400434004919962032000716010020080000800002001600001600004004240042118020110099100100800008000010080000034080000058000203400511001161140039080000800001004004440043400434004440043
16020440048300003100325540028161602516303310082928800001008000080000500183971264566414002140049400431995903200011601002008000080000200160000160000400424004211802011009910010080000800001008000000080002028000203400511001161140040080000800001004004340043400444004940049
1602044004230000600022824002701602516242610082326800001008000080000500183971264566414002440043400431995903200001601002008000080000200160000160000400424004211802011009910010080000800001008000003408000008800022000511001161140039080000800001004004340050400444004940044
16020440049300000000148640028161602516198310082782800001008000080000500183971264879704002140042400421996203200011601002008000080000200160000160000400434004911802011009910010080000800001008000003408000202800020000511001161140039180000800001004004340050400434005040043
1602044004230010310023614003400025163028100829288000010080000800005001839712645664140021400434004320259232000616010020080000800002001600001600004004340043118020110099100100800008000010080000034080000028000203400511001161140039080000800001004004340043400444004340043
16020440042300003000390940028161602516158510082253800001008000080000500183971264604904002340042400421995903200071601002008000080000200160000160000400424004211802011009910010080000800001008000003408000202800022000511001161140045080000800001004004440044400434004340043
160204400423000031002326400271616025162422100823538000010080000800005001839712648813140021400424004219959032000616010020080000800002001600001600004004340042118020110099100100800008000010080000034080002038000023400511001161140046080000800001004004440043400504004340043
1602044004330000310023264003316160251630381008326480000100800008021650018478846523700400214004340049199620320000160100200800008000020016000016000040042400421180201100991001008000080000100800000008000200800002000511001161140039080000800001004004340050400434004940043
1602044004930200310029284002816160251624261008225380000100800008000050018398086444691400214004940048199620320001160100200800008000020016000016000040042400481180201100991001008000080000100800000340800021228000003400511001161140040080000800001004004440043400434004440043
16020440042300003000232240028161605116379110083746800001008011680000500183971264698714002140049400431995903200011601002008000080000200160000160000400424004211802011009910010080000800001008000000080002028000223400511001161140039080000800001004004340043400434004440043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f22373f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cfl1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1600264004330000000013235400270160251630361083026800001080000800005018397126509584002140442402441998203200221600102080000800002016000016000040245404463180021109101080000800001080000000080002002800022005020051667400408000080000104004440050400434005040247
1600244004830000000613885400271600251630291082119800001080000800005018485806509484002140042400491998203200231600102080000800002016000016000040043400491180021109101080000800001080000034008000000418000203405020071665400398000080000104004340044400444004340043
1600244004230000000302563400281616025162129108211980000108000080000501839712651177400214004840043199820320023160010208000080000201600001600004004840042118002110910108000080000108000003400800000028000223405020061698400398000080000104004340043400434004340455
160024400493000000030166040034016025163029108301980000108000080000501839832651646400234004240042199820320022160010208000080000201600001600004004240043218002110910108000080000108000003400800020088000223405020081687400398000080000104004340044400444004340043
160024400483000000030211940028160025163666108381880000108011680000501839712649051400214004340043199820320028160010208000080000201600001600004004240042118002110910108000080000108000003400800000008000003405020081676400398000080000104004340043400444004440863
16002440042300000063125634003516002516303610830268000010800008000050183983265165740021400424004219982032002216001020800008000020160000160000400424004211800211091010800008000010800000000800000028000203405020071687400398000080000104004340043400444005040044
1600244004230000000906840028016025162573108365280000108000080000501839712650948400214004340042199820172002916001020800008000020160000160000400434004211800211091010800008000010800000340080000108800002005020071675400408000080000104004340251400434004440043
1600244004930000000303649400271616025163029108302680000108000080000501839808649584400214004240042199820320022160010208000080000201600001600004004340043118002110910108000080000108000003400800020008000023405020051667400458000080000104004340043400444004340050
1600244004330001000303649400271616025162513108238480000108000080000501839712649058400214004240042199820320022160010208000080000201600001600004004240043118002110910108000080000108000003400800000008000203405020071667400398000080000104004340049400444004340044
16002440049300000000136564002716160251619181083019800001080000800005018398086495844002140042400421998203200221600102080000800002016000016000040043400431180021109101080000800001080000234960800020028000223405038071677400398000080000104004340043400444004440049