Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STP (signed offset, Q)

Test 1: uops

Code:

  stp q0, q1, [x6, #0x10]
  nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 7 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e1f22243f464951schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)60696b6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)acafbcl1d cache miss st nonspec (c0)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
900511659031011501414252000200020001001701140011651165323200020004000116511651180011000100020003802002020002380512116111162200011661166116611661166
900411659071011501414252000200020001001701140011651165323200020004000116511651180011000100020003812000920022382512116111162200011661166116611661166
900411658030011501414252000200020001000011140011651165323200020004000116511651180011000100020003802002220022380512116111162200011661166116611661166
900411659031011501414252000200020001001711140011651165323200020004000116511651180011000100020003802002220002380512116111162200011661166116611661166
900411658171111011501414252000200020001001601140011651165323200020004000116511651180011000100020003802002220022380512116111162200011661166116611661166
900411659001011501414252000200020001001711140011651165323200020004000116511651180011000100020003812000620022380512116111162200011661166116611661166
9004116581530011501414252000200020001001601140011651165323200020004000116511651180011000100020003802000220022380512116111162200011661166116611661166
9004116580310115001425200020002000100171114001165116532320002000400011651165118001100010002000012002620022380512116111162200011661166116611661166
90041165921101011501414252000200020001001611140011651165323200020004000116511651180011000100020003802000920022380512116111162200011661166116611661166
900411659001011501414252000200020001069901140011651165323200020004000116511651180011000100020003802002220022380512116111162200011661166116611661166

Test 2: throughput

Count: 8

Code:

  stp q0, q1, [x6, #0x10]
  stp q0, q1, [x6, #0x10]
  stp q0, q1, [x6, #0x10]
  stp q0, q1, [x6, #0x10]
  stp q0, q1, [x6, #0x10]
  stp q0, q1, [x6, #0x10]
  stp q0, q1, [x6, #0x10]
  stp q0, q1, [x6, #0x10]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l2 tlb miss data (0b)18191e1f223f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
16020580042599000030800271600251601001001600001001600075003679455080015800428005059961759992160107200160016200320032800428004211802011009910010080000800001001600003416000200516000020111511701600800471600001008004380043800418004180043
160204800406000000608002716160251601001001600001001600065003679455080015800498004059969759992160106200160016200320032800408004211802011009910010080000800001001600003416000200216000000111511701600800391600001008004380043800518004180052
16020480042599000031800270160251601001001600001001600075003679378080026800408004059970759994160107200160016200320032800428004021802011009910010080000800001001600000160000000160002234111511701600800391600001008004180051800528004380043
1602048004059900003180027161602516010010016000010016000750036794550800178004280051599597599921601072001600162003200328005080042118020110099100100800008000010016000034160002008160002234111511701600800371600001008004180041800438004180051
160204800405990000018002716002516010010016000010016000650036794550800158004080050599553600001601002001600002003200008004280040118020110099100100800008000010016000034160002000160002234000511011611800371600001008004380041800418004380043
16020480042599000030800250160251601001001600001001600005003679784080015800428004259963359998160100200160000200320000800498004211802011009910010080000800001001600003416000000016000200000511011611800371600001008004380043800418004180051
1602048004959900003180027016025160100100160000100160000500367942408001780040800405995335999816010020016000020032000080051800401180201100991001008000080000100160000016000200516000200000511011611800391600001008004380041800438004180043
1602048004059900003180027161622516010010016000010016000050036794240800258004080042599623600001601002001600002003200008004080040118020110099100100800008000010016000034160002002160000034000511011611800391600001008004380050800518004380043
160204800425990000308003416002516010010016000010016000050036793520800178005080042599553599981601002001600002003200008005080040118020110099100100800008000010016000034160002002160002034000511011611800371600001008018980043800438004380043
16020480040599100030800271616025160100100160000100160000500367942408002580040800425996435999816010020016000020032000080051800421180201100991001008000080000100160000016000200816000020000511011611800461600001008005080041800418004180041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f22233a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd0d5map dispatch bubble (d6)dbddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1600258005959900000003100800271600251600101016000010160432503679832180025080040800425997536048116001020160000203200008004080051118002110910108000080000101600161436011600160117160000163614105020051607480047160000108004880052800508004880060
16002480058599110000324181018004301622516001010160000101600005036796921800220800528006059993360027160010201600002032000080059800601180021109101080000800001016000000001600022001600022340005020054305580037160000108004380043800418005280052
160024800516000000000000080025161629825160010101600001016000050367935218001708005080040599843600201600102016048020320000800408004211800211091010800008000010160000034001600021021600020340005020063406680037160000108004380043800518005280043
160024800425990000001231008002716165251600101016000010160000503679692180033080058806035998336003016001020160000203200008005880047118002110910108000080000101600161600116001400326316000216014205020061605580049160000108005180062800498006080060
1600248005960010010001400180038161602516001010160000101600005036793521800170800428005159975360213160010201600002032024080042800501180021109101080000800001016000000001600020021600022340005020051607780039160000108004380041800518004380043
160024800426000000003600008059416160251600101016000010160000503679352080015080042800515997751600221600102016000020320000800428004011800211091010800008000010160014153601160016011716000214014205020051606980044160000108004880048800488005480053
160024800475991110000170018060116165251600101016000010160000503679932180027080060800586035836002716001020160000203200008006080058118002110910108000080000101600000340016000200111600022340005020061604480039160000108019180600805978004380043
160024800405990001000310080027160225160190101600001016000050368024608002808004780047599823600381604422016000020320000800518061811800211091010800008000010160000034001600021221600022340005020061605580039160000108004380043800418005180051
1600248004959900000000100800271616025160010101600001016000050367942418001708004280040599853600221600102016000020320000800408004211800211091010800008000010160000034001600004221600002340005020051604580046160000108004880048800598004980051
160024806115991110000191018059316022516001010160000101600005036793521800170800428004059977360020160010201600002032000080042800421180021109101080000800001016000000001600000001600022340005020051606680037160000108004380043800438005180041