Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STP (Q)

Test 1: uops

Code:

  stp q0, q1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 7 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e1f22233f464951schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
900511658021011500152520002000200010184114011651165323200020004000116511651180011000100020000312001012001131511116001162200011661166116611661166
900411658021011501515252000200020001000011401165116532320002000400011651165118001100010002000031200100200010511016001162200011661166116611661166
900411659051011500025200020002000100001140116511653232000200040001165116511800110001000200000200000200110511016001162200011661166116611661166
900411659000011501502520002000200010000114011651165323200020004000116511651180011000100020000312000042001031511016001162200011661166116611661166
900411658050011501502520002000200010057114011651165323200020004000116511651180011000100020000312001002000131511016001162200011661166116611661166
90041165800001150151525200020002000100131140116511653232000200040001165116511800110001000200000200000200010512016111162200011661166116611661166
900411659001011501515252000200020001000011401165116532320002000400011651165118001100010002000002001002001131512016001162200011661166116611661166
9004116580510115015152520002000200010057114011651165323200020004000116511651180011000100020000312000012001131512016011162200011661166116611661166
90041165800001150015252000200020001011211401165116532320002000400011651165118001100010002000002001002001031512016001162200011661166116611661166
900411659021011500152520002000200010057114011651165323200020004000116511651180011000100020000312001002000131511016001162200011661166116611661166

Test 2: throughput

Count: 8

Code:

  stp q0, q1, [x6]
  stp q0, q1, [x6]
  stp q0, q1, [x6]
  stp q0, q1, [x6]
  stp q0, q1, [x6]
  stp q0, q1, [x6]
  stp q0, q1, [x6]
  stp q0, q1, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f2223243f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6061696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)79map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
160205800426000000631008002716160251601001001600001001602165003684236108001508004980042599533600081601000200160000200320000800408018321802011009910010080000800001001600000000160000051600022000005110016118003901600001008004380041800418004380043
1602048004259900000300080036160025160100100160000100160000500367983200800170800428004259955359998160100020016000020032000080042800401180201100991001008000080000100160000034001600020516000223400005110116118003901600001008004180041800528004380043
160204800425990000030008002701602516010010016000010016000050036794240080017080050800425995336000016010002001600002003200008004980042118020110099100100800008000010016000000001600020216000203400005110116118003701600001008004380043800438004380043
16020480042599000009000800351616025160100100160000100160000500367985600800260800428004059964360000160100020016012020032024080049803181180201100991001008000080000100160000034001600020016000223400005110116118003901600001008004180041800438005180052
1602048005160000000300080025016025160100100160000100160000500367942400800170800498004259963360000160100020016000020032000080051800401180201100991001008000080000100160000034001600000216000223400005110116118011801600001008004180050800518004380041
1602048004262600000310080027161602516010010016000010016000050036794240080017080051800425995336023216010002001600002003200008005180040118020110099100100800008000010016000000001600020016000223400005110116118003901600001008004380043800438005180051
160204800496000000031008002700025160100100160000100160000500367935200800150800428004059964359998160100020016000020032000080042800421180201100991001008000080000100160000034001600020216000003400005110116118003901600001008004380043800438005280052
16020480051599000066100800251616225160100100160000100160000500367942400800150800428004059953360000160100020016000020032000080042800421180201100991001008000080000100160000034001600020216000203400005110116118004801600001008004380041800438004380041
16020480042600000003000800250019381105161426102160000100160000500367942400800170800428004259955360000160100020016000020032000080042800491180201100991001008000080000100160000034001600020016000023400005110116118003901600001008004380043800438005180052
16020480130599000003100800251616025160100100160000100160000500367942400800170800518004259953360000160100020016000020032000080050800401180201100991001008000080000100160000034001600020816006223400005110116118003701600001008004180323823288074581295

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22233a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)daddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
16002580042599000000031008002516161251600101016000010160000503679352108001680042800435997836002216001020160000203200008004280042118002110910108000080000101600000420016000200216000244200502011601180040160000108004480041800448004480041
160024800436000000000300080028000251600101016000010160000503679352108002180043800405997536002316001020160000203200008004280042118002110910108000080000101600000440016000200616000024200502011601180040160000108004480044800418004480044
16002480042600000000031008002716160551600101016000010160000503679352108001880043800425997736002316001020160000203200008004380040118002110910108000080000101600000420016000000016000224200502011601180039160000108004480044800448004180043
1600248004059900000000000800271600251600101016000010160000503679448108001580040800405997736002016001020160000203200008004380043118002110910108000080000101600000420016000000216000224200502011601180039160000108004380043800438004380041
1600248004060000000003100800270160251600101016000010160000503679448108002180042800425997736002316001020160000203200008004080042118002110910108000080000101600000420016000210016000224200502011601180039160000108004180043800438004380043
160024800426000000000300080027000251600101016000010160000503679352108001980043800435997836002216001020160000203200008004280042118002110910108000080000101600000420016000200216000224200502011601180040160000108004480044800448004480044
1600248004260000000063100800271616025160010101600001016000050367944810800178004280042599753600221600102016000020320000800428004211800211091010800008000010160000042001600020051600022000502011601180039160000108004480044800418004480041
160024800436000000000300080027016025160010101600001016000050367944810800198004280043599753600201600102016000020320000800428004311800211091010800008000010160000000016000200016000024200502011601180039160000108004180043800438004180041
16002480040599000000663000800271616025160010101600001016000050367944810800198004280043599783600221600102016000020320000800408004311800211091010800008000010160000000016000043021600022000502011601180039160000108004180043800418004380043
1600248004359900000000100800251616125160010101600601016000050367944810800278004080042599773600201600102016000020320000800428004011800211091010800008000010160000000016000200216000224200502011601180037160000108032480602801808046180885