Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STP (signed offset, S)

Test 1: uops

Code:

  stp s0, s1, [x6, #0x10]
  nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 7 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)031f223f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
900611669411151772520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001910011110011195191216111111631000100011671167116711671167
900411669411151772520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001910010310011195201216111211631000100011671167116711671167
900411669201151772520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001910013310011195191216111111631000100011671167116711671167
900411669411151772520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001910010110011195191216111111631000100011671167116711671167
900411669411151772520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001910010310011195191116111211631000100011671167116711671167
90041166941115177252000100010001000100010875800011145116611663242000100010002000200011661166118001100010001000191001031001119521916111111631000100011671167116711671167
900411668411151772520001000100010001000108758000011451166116632520001000100020002000116611661180011000100010001910010310011195201016101111631000100011671167116711671167
900411669211151772520001000100010001000108758000011451166116632420001000100020002000116611661180011000100010001910010110011195211216111211631000100011671167116711671167
900411668411151772520001000100010001000108758000011451166116632420001000100020002000116611661180011000100010001910010110011195191216121111631000100011671167116711671167
900411668411151772520001000100010001000108758000011451166116632420001000100020002000116611661180011000100010001910010110011195211116111211631000100011671167116711671167

Test 2: throughput

Count: 8

Code:

  stp s0, s1, [x6, #0x10]
  stp s0, s1, [x6, #0x10]
  stp s0, s1, [x6, #0x10]
  stp s0, s1, [x6, #0x10]
  stp s0, s1, [x6, #0x10]
  stp s0, s1, [x6, #0x10]
  stp s0, s1, [x6, #0x10]
  stp s0, s1, [x6, #0x10]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03l2 tlb miss data (0b)191e1f22373f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1602064005430000031428940028161625164389100842898000010080000800005001839712643982140021400424004319959320001160100200800008000020016000016000040043400431180201100991001008000080000100800004208000202800020005110116114004080000800001004004340044400444005340044
1602044004229900030183640027016251644821008438080000100800008000050018397126439821400214004340043199593200011601002008000080000200160000160000400424004311802011009910010080000800001008000042080000008000004205110116114004180000800001004004440043400444004340043
1602044004330000000222040028161625161424100813648000010080000800005001839712643968140021400434004319959320000160100200800008000020016000016000040043400421180201100991001008000080000100800004208000010800020005110116114003980000800001004004340043400444004440043
1602044004330000000428940027161625163625100843828000010080000800005001839712646680140021400434004319959320001160100200800008000020016000016000040043400431180201100991001008000080000100800004208000202800022005110116114003980000800001004004440043400434004340044
160204400433000000122224002801625164482100833078000010080000800005001839712649899140021400434004319959320000160100200800008000020016000016000040043400421180201100991001008000080000100800004208000000800002005110116114003980000800001004004440043400444004340044
1602044004330000031427340028016251629571008066680000100800008000050018397126455290400214004240042199593200001601002008000080000200160000160000400434004311802011009910010080000800001008000042080002028000224205110116114004080000800001004025040044400434004640044
160204400433000003111094002816025162289100843808000010080000800005001839712648009140021400424004219959320000160100200800008000020016000016000040042400421180201100991001008000080000100800000080002028000224205110116114003980000800001004004340044400444004340044
16020440042300000003329400281616251614241008136680000100800008000050018397126433390400214004340043199593200011601002008000080000200160000160000400434004211802011009910010080000800001008000042080000028000024205110116114003980000800001004004340043400444004640044
16020440042300000302222400281616251618041008134780000100800008000050018397126442650400214004340042199593200001601002008000080000200160000160000400434004211802011009910010080000800001008000042080002028000204205110116114004080000800001004004440043400434004440044
160204400423000000143824002801625163238100814688000010080000800005001839712650866040021400424004219959320001160100200800008000020016000016000040042400431180201100991001008000080000100800004208000202800002005110116114003980000800001004004440043400434004340043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03191e1f22373f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16002640042300000030194002816160251638921083656800001080000800005018397126463591400214004240043199823200231602342080000800002016000016000040447404511180021109101080000800001080000348000016800002050201316129400398000080000104004340043400434004440043
1600244004230000003019400271616025163036108335280000108000080000501839712647154140021400424004219982320022160010208000080000201600001600004004940042118002110910108000080000108000034800020080002234502011161010400398000080000104004340043400434004340050
1600244004230000313567400271616025163029108211980000108000080108501839736650756040024400434004319982320023160010208000080000201600001600004004240042118002110910108000080000108000034800020580002034502012161711400468000080000104004440044400434004340050
16002440042299003030194002816160251630551082384800001080000800005018397126463640400214004340049199853200221600102080000800002016000016000040042400421180021109101080000800001080000080002088000020502013161316400398000080000104004340054400434005040043
160024400493000060364940028016025164180108249080000108000080000501839712649058040021400424004219982320022160010208000080000201600001600004004240042118002110910108000080000108000034800000280000234502011161311400408000080000104004340043400434004340043
160024400433000030364940028001251630291082119800001080000800005018397126477110400214004940048199843200281600102080000800002016000016000040042400481180021109101080000800001080000080002008000200502013161413400398000080000104004340043400444005040044
1600244004330000303656400271616025162401108216080000108000080000501839712650948140021400424004219982320022160010208000080000201600001600004004340042118002110910108000080000108000034800020880000234502012161212400398000080000104004440043400434004440043
1600244004830000011365400271616025163036108301980000108000080000501839832646364040021400434004319982320023160010208000080000201600001600004004240042118002110910108000080000108000034800020580000234502012161212400398000080000104004440043400434004340043
1600244004230000313603400271616025163029108211980000108000080000501839832649051040024400434004219982320023160010208000080000201600001600004004240042118002110910108000080000108000034800020880002034502013161013400458000080000104004940044400434004340050
1600244004229900303656400271616025163027108301980000108000080000501839712650958140021400424004319982320028160010208000080000201600001600004004240042118002110910108000080000108000034800000080002034502012161412400468000080000104004340044402474024340050