Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STP (S)

Test 1: uops

Code:

  stp s0, s1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 7 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e1f223f464951inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6e74scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a2a7a8acafbcdcache store miss (c0)cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
9006116690011151082520001000100010001000108758000011451166116603242000100010002000200011661166118001100010001000151000001000005145164411631000100011671167116711671167
90041166900011518825200010001000100010001087580000114511661166032420001000100020002000116611661180011000100010001510000010000155144164411631000100011671167116711671167
9004116680111151802520001000100010001000108758000111451166116603242000100010002000200011661166118001100010001000151000001000005144164411631000100011671167116711671167
90041166901011518025200010001000100010001087580001114511661166032420001000100020002000116611661180011000100010001510000010000155144164411631000100011671167116711671167
90041166801011518825200010001000100010001087580000114511661166032420001000100020002000116611661180011000100010001510000010000155144164411631000100011671167116711671167
90041166861111518825200010001000100010001087580000114511661166032420001000100020002000116611661180011000100010002510000010000155144164411631000100011671167116711671167
90041166901111518825200010001000100010001087580001114511661166032420001000100020002000116611661180011000100010001510000310000155144164411631000100011671167116711671167
90041166961111518025200010001000100010001087580001114511661166032420001000100020002000116611661180011000100010001510000010000155144164411631000100011671167116711671167
9004116696101151882520001000100010001000108758000111451166116603242000100010002000200011661166118001100010001000151000001000005144164411631000100011671167116711671167
9004116680411151882520001000100010001000108758000111451166116603242000100010002000200011661166118001100010001000010000010000155144164411631000100011671167116711671167

Test 2: throughput

Count: 8

Code:

  stp s0, s1, [x6]
  stp s0, s1, [x6]
  stp s0, s1, [x6]
  stp s0, s1, [x6]
  stp s0, s1, [x6]
  stp s0, s1, [x6]
  stp s0, s1, [x6]
  stp s0, s1, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)030b1e1f222324373f46494f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)74scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a7a8acafbcdcache store miss (c0)c2branch mispredict (cb)cfd2d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020640043300003000231840027016025162426100823538000010080000800005001839832649539140021400434004919962032000716010020080000800002001600001600004004240042118020110099100100800008000010080000034080000058000203400511001161140039080000800001004004440043400434004440043
16020440048300003100325540028161602516303310082928800001008000080000500183971264566414002140049400431995903200011601002008000080000200160000160000400424004211802011009910010080000800001008000000080002028000203400511001161140040080000800001004004340043400444004940049
1602044004230000600022824002701602516242610082326800001008000080000500183971264566414002440043400431995903200001601002008000080000200160000160000400424004211802011009910010080000800001008000003408000008800022000511001161140039080000800001004004340050400444004940044
16020440049300000000148640028161602516198310082782800001008000080000500183971264879704002140042400421996203200011601002008000080000200160000160000400434004911802011009910010080000800001008000003408000202800020000511001161140039180000800001004004340050400434005040043
1602044004230010310023614003400025163028100829288000010080000800005001839712645664140021400434004320259232000616010020080000800002001600001600004004340043118020110099100100800008000010080000034080000028000203400511001161140039080000800001004004340043400444004340043
16020440042300003000390940028161602516158510082253800001008000080000500183971264604904002340042400421995903200071601002008000080000200160000160000400424004211802011009910010080000800001008000003408000202800022000511001161140045080000800001004004440044400434004340043
160204400423000031002326400271616025162422100823538000010080000800005001839712648813140021400424004219959032000616010020080000800002001600001600004004340042118020110099100100800008000010080000034080002038000023400511001161140046080000800001004004440043400504004340043
1602044004330000310023264003316160251630381008326480000100800008021650018478846523700400214004340049199620320000160100200800008000020016000016000040042400421180201100991001008000080000100800000008000200800002000511001161140039080000800001004004340050400434004940043
1602044004930200310029284002816160251624261008225380000100800008000050018398086444691400214004940048199620320001160100200800008000020016000016000040042400481180201100991001008000080000100800000340800021228000003400511001161140040080000800001004004440043400434004440043
16020440042300003000232240028161605116379110083746800001008011680000500183971264698714002140049400431995903200011601002008000080000200160000160000400424004211802011009910010080000800001008000000080002028000223400511001161140039080000800001004004340043400434004440043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)03080b18191e1f22373f46494f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)74scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafbcdcache store miss (c0)c2cficache miss (d3)d5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600264004330000000013235400270160251630361083026800001080000800005018397126509584002140442402441998203200221600102080000800002016000016000040245404463180021109101080000800001080000000080002002800022005020051667400408000080000104004440050400434005040247
1600244004830000000613885400271600251630291082119800001080000800005018485806509484002140042400491998203200231600102080000800002016000016000040043400491180021109101080000800001080000034008000000418000203405020071665400398000080000104004340044400444004340043
1600244004230000000302563400281616025162129108211980000108000080000501839712651177400214004840043199820320023160010208000080000201600001600004004840042118002110910108000080000108000003400800000028000223405020061698400398000080000104004340043400434004340455
160024400493000000030166040034016025163029108301980000108000080000501839832651646400234004240042199820320022160010208000080000201600001600004004240043218002110910108000080000108000003400800020088000223405020081687400398000080000104004340044400444004340043
160024400483000000030211940028160025163666108381880000108011680000501839712649051400214004340043199820320028160010208000080000201600001600004004240042118002110910108000080000108000003400800000008000003405020081676400398000080000104004340043400444004440863
16002440042300000063125634003516002516303610830268000010800008000050183983265165740021400424004219982032002216001020800008000020160000160000400424004211800211091010800008000010800000000800000028000203405020071687400398000080000104004340043400444005040044
1600244004230000000906840028016025162573108365280000108000080000501839712650948400214004340042199820172002916001020800008000020160000160000400434004211800211091010800008000010800000340080000108800002005020071675400408000080000104004340251400434004440043
1600244004930000000303649400271616025163029108302680000108000080000501839808649584400214004240042199820320022160010208000080000201600001600004004340043118002110910108000080000108000003400800020008000023405020051667400458000080000104004340043400444004340050
1600244004330001000303649400271616025162513108238480000108000080000501839712649058400214004240042199820320022160010208000080000201600001600004004240043118002110910108000080000108000003400800000008000203405020071667400398000080000104004340049400444004340044
16002440049300000000136564002716160251619181083019800001080000800005018398086495844002140042400421998203200221600102080000800002016000016000040043400431180021109101080000800001080000234960800020028000223405038071677400398000080000104004340043400444004440049