Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STP (D)

Test 1: uops

Code:

  stp d0, d1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 7 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)030508090b191e1f22243a3f464951inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a1a2a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
90061166900000010011151100252000100010001000100010875800011145116611663242000100010002000200011661166118001100010001008923010070071000823705125162111631000100011671167116711671167
900411669101100710011510025200010001000100010001087580001114511661166324200010001000200020001166116611800110001000100000010000001000015005131161111631000100011671167116711671167
90041166800000011011151101025200010001000100010001087880001114511661166324200010001000200020001166116611800110001000100000010000001000015005121161111631000100011671167116711671167
9004116690000001000115188252000100010001000100010875800001145116611663242000100010002000200011661166118001100010001000015010000001000015005132162211631000100011671167116711671167
9004116690000001100115188252000100010001000100010875800001145116611663242000100010002000200011661166118001100010001000015010001001000015005132161111631000100011671167116711671167
90041166910110091011151101025200010001000100010001087880001114511661166324200010001000200020001166116611800110001000100780110080081000823705122161111631000100011671167116711671167
9004116690000001000115188252000100010001000100010875800011145116611663242000100010002000200011661166118001100010001008723210070071001823725122162111631000100011671167116711671167
900411668000000110011518825200010001000100010001087580000114511661166324200010001000200020001166116611800110001000100001501000100100000005122161111631000100011671167116711671167
900411669000000100011518025200010001000100010001087580001114511661166324200010001000200020001166116611800110001000100001501000000100000005121161111631000100011671167116711671167
90041166800000011011151100252000100010001000100010878800001145116611663242000100010002000200011661166118001100010001008723010080071000723715121161211631000100011671167116711671167

Test 2: throughput

Count: 8

Code:

  stp d0, d1, [x6]
  stp d0, d1, [x6]
  stp d0, d1, [x6]
  stp d0, d1, [x6]
  stp d0, d1, [x6]
  stp d0, d1, [x6]
  stp d0, d1, [x6]
  stp d0, d1, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5006

retire (01)cycle (02)030508090b18191e1f2223373a3f46494f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)74scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020640061300100100015002810140036161632516137210081891800001008000080000500183978464848804003640050400591997103200161601002008000080000200160000160000400594004911802011009910010080000800001008001515030800161118800021436141051102161140046080000800001004005440061400534004940052
160204400503001100000190018921400341616025163415100821888000010080000800005001839904646749140035400514005819972032001816010020080000800002001600001600004005340050118020110099100100800008000010080014143600800160119800021636140051101161140047080000800001004005940061400594005140053
1602044049730011010001910218914003616165251618171008281780000100800008000050018402886467041400334004940050199630320017160100200800008000020016000016000040058400501180201100991001008000080000100800171600080016021480002160142051101161140047080000800001004005140051400514005240052
1602044006030010010001800241814004316161402516176610081238800001008023280000500183990464646204002540048400501996503200081601002008000080000200160000160000400614005011802011009910010080000800001008001615361180014101880000160140051101161140048080000800001004005340062400514006240053
1602044005830010010002100277714003416052516251010082280800001008000080000500183995264727304002540058400611997403200101601002008013680000200160000160000400584005011802011009910010080000800001008001415360080014001880002160140051101161140048080000800001004005140062400514006140053
16020440048299100100019002254140042016025162413100824198000010080000800005001840264648010040224400504005320118032001616010020080000800002001600001600004006040060118020110099100100800008000010080014143601800160018800001636141051101161140055080000800001004005340051400534006040050
1602044005930011010001400172914003201452516175910082280800001008000080000500183995264660404002840050400581997203200081601002008000080000200160000160000400584005711802011009910010080000800001008001515000800141116800021436142051101161140046080000800001004005140050400534006240051
16020440059300100100020002468140038160625161846100824198000010080000800005001839904646827040025400504005819972032000816010020080000800002001600001600004005140050118020110099100100800008000010080014153601800160118800021636142051101161140047080000800001004006040053400594005340059
160204400483001000000150023161400431614525162378100824108000010080000800005001839952644257040025400504006119961032001916010020080000800002001600001600004005340059118020110099100100800008000010080015153601800160114800001436140051101161140047080000800001004005340060400514005040052
160204400583001101006201021881400451616025161410100820548000010080000800005001840312643848040027400594006019971032001016010020080000800002001600001600004005840061118020110099100100800008000010080015150008001602225880060140140051101161140049080000800001004005940049400594005040052

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5007

retire (01)cycle (02)03040508090b18191e1f222324373a3f46494f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)6061696d6edispatch stall (70)74scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2branch mispredict (cb)cfd0icache miss (d3)d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002640061300011010001900022531400441604311648341081655800001080000801165018397606446240040026400614005219985032003816001020800008024020160000160000400614005311800211091010800008000010800141536018001601188000216361420502000616654004808000080000104005140059400594005140060
16002440050300012010001400019731400450161251612651082429800001080000800005018400246497150040033400494005119986032002816001020800008000020160000160000400674005111800211091010800008000010800141436018001401188000214361410502000516654004808000080000104004940059400514005840053
16002440047300010010001400029321400461616325163194108206280000108000080000501839904645740104002540050400521998803200281600102080000800002016000016000040058400601180021109101080000800001080014143601800160018800001601410502000516554004708000080000104005940051400594005340051
1600244005830001100000140002474140036160025162334108450380000108000080000501839904650548104002640058400481998303200391600102080000800002016000016000040050400621180021109101080000800001080016160018001600208000216361410502000616654005508000080000104005140051400514005340059
16002440050300011000001410032041400350160251644741080783800001080000800005018399046471620040032400534005819986032002816001020800008000020160000160000400584005111800211091010800008000010800151535018001610248006016361400502000616664004708000080000104005140049400594005040053
1600244005830001101000140003970140046003251621611083054800001080000800005018403126495950040027400584006119996032003016001020800008000020160000160000400504005911800211091010800008000010800141436008001400148000016361420502000616654005508000080000104005140053400604005240060
160024400583000100000014100523140035160025162673108258780000108000080000501839784650979004002540058400501998603200391600102080000800002016000016000040052400621180021109101080000800001080014143600800160114800021401410502000616654004508000080000104006140060400524005940052
160024400583000110100017100249214003500225162476108344980000108000080000501840408650935004002840049400501998403200381600102080000800002016000016000040052400641180021109101080000800001080016150008001600148000216361400502000616554005608000080000104005140049400594005140059
16002440050300010010062000020151400430160251631601081385800001080000800005018403126489520040035400584005819986032003216001020800008000020160000160000400614005411800211091010800008000010800141436008001401148000216361420502000716554043208000080000104005340062400514004940059
1600244005030001101001021400037061400351616525163305108296880000108000080117501840384648910104003340052400591998603200271600102080000800002016000016000040059400591180021109101080000800001080016150008001410188000216361400502000416564004908000080000104005940051400584005240058