Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STP (Q)

Test 1: uops

Code:

  stp q0, q1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 7 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)031e1f22233f464951inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a1a2a7a8acafbcdcache store miss (c0)cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)f5f6f7f8fd
900511658021011500152520002000200010184114011651165323200020004000116511651180011000100020000312001012001131511116001162200011661166116611661166
900411658021011501515252000200020001000011401165116532320002000400011651165118001100010002000031200100200010511016001162200011661166116611661166
900411659051011500025200020002000100001140116511653232000200040001165116511800110001000200000200000200110511016001162200011661166116611661166
900411659000011501502520002000200010000114011651165323200020004000116511651180011000100020000312000042001031511016001162200011661166116611661166
900411658050011501502520002000200010057114011651165323200020004000116511651180011000100020000312001002000131511016001162200011661166116611661166
90041165800001150151525200020002000100131140116511653232000200040001165116511800110001000200000200000200010512016111162200011661166116611661166
900411659001011501515252000200020001000011401165116532320002000400011651165118001100010002000002001002001131512016001162200011661166116611661166
9004116580510115015152520002000200010057114011651165323200020004000116511651180011000100020000312000012001131512016011162200011661166116611661166
90041165800001150015252000200020001011211401165116532320002000400011651165118001100010002000002001002001031512016001162200011661166116611661166
900411659021011500152520002000200010057114011651165323200020004000116511651180011000100020000312001002000131511016001162200011661166116611661166

Test 2: throughput

Count: 8

Code:

  stp q0, q1, [x6]
  stp q0, q1, [x6]
  stp q0, q1, [x6]
  stp q0, q1, [x6]
  stp q0, q1, [x6]
  stp q0, q1, [x6]
  stp q0, q1, [x6]
  stp q0, q1, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire (01)cycle (02)03080b18191e1f2223243f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)6061696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)79~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8acafbcdcache store miss (c0)c2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
160205800426000000631008002716160251601001001600001001602165003684236108001508004980042599533600081601000200160000200320000800408018321802011009910010080000800001001600000000160000051600022000005110016118003901600001008004380041800418004380043
1602048004259900000300080036160025160100100160000100160000500367983200800170800428004259955359998160100020016000020032000080042800401180201100991001008000080000100160000034001600020516000223400005110116118003901600001008004180041800528004380043
160204800425990000030008002701602516010010016000010016000050036794240080017080050800425995336000016010002001600002003200008004980042118020110099100100800008000010016000000001600020216000203400005110116118003701600001008004380043800438004380043
16020480042599000009000800351616025160100100160000100160000500367985600800260800428004059964360000160100020016012020032024080049803181180201100991001008000080000100160000034001600020016000223400005110116118003901600001008004180041800438005180052
1602048005160000000300080025016025160100100160000100160000500367942400800170800498004259963360000160100020016000020032000080051800401180201100991001008000080000100160000034001600000216000223400005110116118011801600001008004180050800518004380041
1602048004262600000310080027161602516010010016000010016000050036794240080017080051800425995336023216010002001600002003200008005180040118020110099100100800008000010016000000001600020016000223400005110116118003901600001008004380043800438005180051
160204800496000000031008002700025160100100160000100160000500367935200800150800428004059964359998160100020016000020032000080042800421180201100991001008000080000100160000034001600020216000003400005110116118003901600001008004380043800438005280052
16020480051599000066100800251616225160100100160000100160000500367942400800150800428004059953360000160100020016000020032000080042800421180201100991001008000080000100160000034001600020216000203400005110116118004801600001008004380041800438004380041
16020480042600000003000800250019381105161426102160000100160000500367942400800170800428004259955360000160100020016000020032000080042800491180201100991001008000080000100160000034001600020016000023400005110116118003901600001008004380043800438005180052
16020480130599000003100800251616025160100100160000100160000500367942400800170800518004259953360000160100020016000020032000080050800401180201100991001008000080000100160000034001600020816006223400005110116118003701600001008004180323823288074581295

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire (01)cycle (02)0307080a0b18191e1f22233a3f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)6061696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafbcdcache store miss (c0)c2c5cfd5d6daddinst fetch restart (de)e0ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
16002580042599000000031008002516161251600101016000010160000503679352108001680042800435997836002216001020160000203200008004280042118002110910108000080000101600000420016000200216000244200502011601180040160000108004480041800448004480041
160024800436000000000300080028000251600101016000010160000503679352108002180043800405997536002316001020160000203200008004280042118002110910108000080000101600000440016000200616000024200502011601180040160000108004480044800418004480044
16002480042600000000031008002716160551600101016000010160000503679352108001880043800425997736002316001020160000203200008004380040118002110910108000080000101600000420016000000016000224200502011601180039160000108004480044800448004180043
1600248004059900000000000800271600251600101016000010160000503679448108001580040800405997736002016001020160000203200008004380043118002110910108000080000101600000420016000000216000224200502011601180039160000108004380043800438004380041
1600248004060000000003100800270160251600101016000010160000503679448108002180042800425997736002316001020160000203200008004080042118002110910108000080000101600000420016000210016000224200502011601180039160000108004180043800438004380043
160024800426000000000300080027000251600101016000010160000503679352108001980043800435997836002216001020160000203200008004280042118002110910108000080000101600000420016000200216000224200502011601180040160000108004480044800448004480044
1600248004260000000063100800271616025160010101600001016000050367944810800178004280042599753600221600102016000020320000800428004211800211091010800008000010160000042001600020051600022000502011601180039160000108004480044800418004480041
160024800436000000000300080027016025160010101600001016000050367944810800198004280043599753600201600102016000020320000800428004311800211091010800008000010160000000016000200016000024200502011601180039160000108004180043800438004180041
16002480040599000000663000800271616025160010101600001016000050367944810800198004280043599783600221600102016000020320000800408004311800211091010800008000010160000000016000043021600022000502011601180039160000108004180043800418004380043
1600248004359900000000100800251616125160010101600601016000050367944810800278004080042599773600201600102016000020320000800428004011800211091010800008000010160000000016000200216000224200502011601180037160000108032480602801808046180885