Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STP (signed offset, S)

Test 1: uops

Code:

  stp s0, s1, [x6, #0x10]
  nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 7 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031f223f464951inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a2a7a8acafbcdcache store miss (c0)cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
900611669411151772520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001910011110011195191216111111631000100011671167116711671167
900411669411151772520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001910010310011195201216111211631000100011671167116711671167
900411669201151772520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001910013310011195191216111111631000100011671167116711671167
900411669411151772520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001910010110011195191216111111631000100011671167116711671167
900411669411151772520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001910010310011195191116111211631000100011671167116711671167
90041166941115177252000100010001000100010875800011145116611663242000100010002000200011661166118001100010001000191001031001119521916111111631000100011671167116711671167
900411668411151772520001000100010001000108758000011451166116632520001000100020002000116611661180011000100010001910010310011195201016101111631000100011671167116711671167
900411669211151772520001000100010001000108758000011451166116632420001000100020002000116611661180011000100010001910010110011195211216111211631000100011671167116711671167
900411668411151772520001000100010001000108758000011451166116632420001000100020002000116611661180011000100010001910010110011195191216121111631000100011671167116711671167
900411668411151772520001000100010001000108758000011451166116632420001000100020002000116611661180011000100010001910010110011195211116111211631000100011671167116711671167

Test 2: throughput

Count: 8

Code:

  stp s0, s1, [x6, #0x10]
  stp s0, s1, [x6, #0x10]
  stp s0, s1, [x6, #0x10]
  stp s0, s1, [x6, #0x10]
  stp s0, s1, [x6, #0x10]
  stp s0, s1, [x6, #0x10]
  stp s0, s1, [x6, #0x10]
  stp s0, s1, [x6, #0x10]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)030b191e1f22373f464951inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a2a6a7a8acafbcdcache store miss (c0)c2cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602064005430000031428940028161625164389100842898000010080000800005001839712643982140021400424004319959320001160100200800008000020016000016000040043400431180201100991001008000080000100800004208000202800020005110116114004080000800001004004340044400444005340044
1602044004229900030183640027016251644821008438080000100800008000050018397126439821400214004340043199593200011601002008000080000200160000160000400424004311802011009910010080000800001008000042080000008000004205110116114004180000800001004004440043400444004340043
1602044004330000000222040028161625161424100813648000010080000800005001839712643968140021400434004319959320000160100200800008000020016000016000040043400421180201100991001008000080000100800004208000010800020005110116114003980000800001004004340043400444004440043
1602044004330000000428940027161625163625100843828000010080000800005001839712646680140021400434004319959320001160100200800008000020016000016000040043400431180201100991001008000080000100800004208000202800022005110116114003980000800001004004440043400434004340044
160204400433000000122224002801625164482100833078000010080000800005001839712649899140021400434004319959320000160100200800008000020016000016000040043400421180201100991001008000080000100800004208000000800002005110116114003980000800001004004440043400444004340044
1602044004330000031427340028016251629571008066680000100800008000050018397126455290400214004240042199593200001601002008000080000200160000160000400434004311802011009910010080000800001008000042080002028000224205110116114004080000800001004025040044400434004640044
160204400433000003111094002816025162289100843808000010080000800005001839712648009140021400424004219959320000160100200800008000020016000016000040042400421180201100991001008000080000100800000080002028000224205110116114003980000800001004004340044400444004340044
16020440042300000003329400281616251614241008136680000100800008000050018397126433390400214004340043199593200011601002008000080000200160000160000400434004211802011009910010080000800001008000042080000028000024205110116114003980000800001004004340043400444004640044
16020440042300000302222400281616251618041008134780000100800008000050018397126442650400214004340042199593200001601002008000080000200160000160000400434004211802011009910010080000800001008000042080002028000204205110116114004080000800001004004440043400434004440044
160204400423000000143824002801625163238100814688000010080000800005001839712650866040021400424004219959320001160100200800008000020016000016000040042400431180201100991001008000080000100800004208000202800002005110116114003980000800001004004440043400434004340043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)03191e1f22373f46494f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a2a7a8acafbcdcache store miss (c0)cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002640042300000030194002816160251638921083656800001080000800005018397126463591400214004240043199823200231602342080000800002016000016000040447404511180021109101080000800001080000348000016800002050201316129400398000080000104004340043400434004440043
1600244004230000003019400271616025163036108335280000108000080000501839712647154140021400424004219982320022160010208000080000201600001600004004940042118002110910108000080000108000034800020080002234502011161010400398000080000104004340043400434004340050
1600244004230000313567400271616025163029108211980000108000080108501839736650756040024400434004319982320023160010208000080000201600001600004004240042118002110910108000080000108000034800020580002034502012161711400468000080000104004440044400434004340050
16002440042299003030194002816160251630551082384800001080000800005018397126463640400214004340049199853200221600102080000800002016000016000040042400421180021109101080000800001080000080002088000020502013161316400398000080000104004340054400434005040043
160024400493000060364940028016025164180108249080000108000080000501839712649058040021400424004219982320022160010208000080000201600001600004004240042118002110910108000080000108000034800000280000234502011161311400408000080000104004340043400434004340043
160024400433000030364940028001251630291082119800001080000800005018397126477110400214004940048199843200281600102080000800002016000016000040042400481180021109101080000800001080000080002008000200502013161413400398000080000104004340043400444005040044
1600244004330000303656400271616025162401108216080000108000080000501839712650948140021400424004219982320022160010208000080000201600001600004004340042118002110910108000080000108000034800020880000234502012161212400398000080000104004440043400434004440043
1600244004830000011365400271616025163036108301980000108000080000501839832646364040021400434004319982320023160010208000080000201600001600004004240042118002110910108000080000108000034800020580000234502012161212400398000080000104004440043400434004340043
1600244004230000313603400271616025163029108211980000108000080000501839832649051040024400434004219982320023160010208000080000201600001600004004240042118002110910108000080000108000034800020880002034502013161013400458000080000104004940044400434004340050
1600244004229900303656400271616025163027108301980000108000080000501839712650958140021400424004319982320028160010208000080000201600001600004004240042118002110910108000080000108000034800000080002034502012161412400468000080000104004340044402474024340050