Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STUR (Q)

Test 1: uops

Code:

  stur q0, [x6, #1]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)03191e1f22233f46494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a2a4a7a8acafbcdcache store miss (c0)cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)f5f6f7f8fd
10055434003005280160251000100010002244805175425423563400100010002000543543111001100010001000420100202100224273216115391000543543543543555
10045424000005271600251000100010002244805185425423553401100010002000542540111001100010001000420100202100224273116115391000541543543543543
100454240030052516161251000100010002235205185435423553400100010002000542542111001100010001000420100202100224273116115391000544544544544541
100454340070052516160251000100010002244805175425423553400100010002000542554111001100010001000420100202100224273116115391000543543543543553
100454040000052516160251000100010002247205175425433563400100010002000543542111001100010001000420100000100224273116115391000544544544544544
100454340930052816161251000100010002247205175435433553400100010002000542542111001100010001000420100200100224273116115391000544544544543543
10045423003005281600251000100010002247205185435403533400100010002000540542111001100010001000420100000100224273116115391000541541543544544
1004543402100052716166251000100010002244805175425423553400100010002000542542111001100010001000420100202100224273116115391000543543544544544
100454340030052716160251000100010002244805155425423553400100010002000542542111001100010001000420100200100004273116115391000543543541543555
100454240030052716161251000100010002244805175425423553400100010002000542542111001100010001000420100212100224273116115391000543543541541543

Test 2: throughput

Count: 8

Code:

  stur q0, [x6, #1]
  stur q0, [x6, #1]
  stur q0, [x6, #1]
  stur q0, [x6, #1]
  stur q0, [x6, #1]
  stur q0, [x6, #1]
  stur q0, [x6, #1]
  stur q0, [x6, #1]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)031e1f223f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80205400433000314002516160258010010080000100800005001839692400274005440054299603300128010020080000200160000400474005411802011009910010080000800001008001514440800160118800021644142051101161140044800001004004140041400434004340043
8020440042300031400281616025801001008000010080000500183944840017400434004329955330001801002008000020016000040123400401180201100991001008000080000100800000420800020028000204200051101161140039800001004059841017406384018440044
8020440042300123140027161626625801001008000010080000500183935240017400424004229955329998801002008000020016000040181400421180201100991001008000080000100800000420800020028000224200051101251140039800001004004340041400434004340044
8020440040300001400281616025802201008000010080000500183944840486401814004329955330000801002008000020016000040040400421180201100991001008000080000100800000420800020028000224200051101161140039800001004059640181400414004740044
8020440043300031400271616125801001008000010080000500183935240015400424004229953330000801002008000020016000040042400421180201100991001008000080000100800000420800021058000224200051101161140039800001004004340041400444004340044
802044004230063140027160025801001008000010080000500183935240018400434004229956330000801002008000020016000040042400421180201100991001008000080000100800000420800020088000224200051101161140039800001004004340043400434004340041
80204400403000314030816162558010010080000100801085001839448400184004340042299553299988010020080000200160000401814004011802011009910010080000800001008024204228800022228000224200051101161140040800001004004440043400434004440041
802044004030003140025160125801001008000010080000500183944840015400424004229956330000801002008000020016000040042400421180201100991001008000080000100800000420800020028000224200051101161140037800001004004140043400444004440044
8020440043300031400270161258010010080000100800005001839472400174004240042299563300008010020080000200160000400424004211802011009910010080000800001008000000080000100800022000051101161140039800001004004340043400434004140041
802044004230003040027161612580100100800001008000050018393524001740042400422995533000080100200800002001600004004040040118020110099100100800008000010080000000800020088000224200051101161140040800001004004440043400434004440043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)030408090a18191e1f2223243f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafbcdcache store miss (c0)c2cfd0d2itlb miss (d4)d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80025400583001010000300040027161602580010108000010800005018393521400264004240042299773300208001020800002016000040042400421180021109101080000800001080000034008000200580002234050233111716141740039080000104004340041400434004340043
800244005030010100003010400280166258001010800001080000501839352040017400404004029977330022800102080000201600004004340043118002110910108000080000108000004200800021028000200050200001716171840039080000104004140043400434004340043
8002440040300000000030104002716161258001010800001080000501839472040017400424004229978330022800102080000201600004004240042118002110910108000080000108000004200800020028000224205020000141617940039080000104004540043400414004440043
800244004030000000063000400271616125800101080000108000050183947204001740042400402997533002280010208000020160000400404004211800211091010800008000010800000000800000028000004205020000171616840039080000104004140043400434004140041
8002440040300000000001004002701602580010108000010800005018394480400174004240042299773300228001020800002016000040054400421180021109101080000800001080000042008000200280002242050200001616141740040080000104004140043400434004140041
800244004230000000060100400271601258001010800001080000501839352140017400424004229977330034800102080000201600004004040040118002110910108000080000108000004210800020008000220050200001716171740039080000104004340044400414004340044
80024400423000000000300040025016025800101080000108011750183944804001540054400422997733002080010208000020160000400424004211800211091010800008000010800000420080002002800022005020000201617840039080000104004340043400414004340043
8002440042300000000000004002716161258001010800001080000501839448040015400424004029977330022800102080000201600004004240042118002110910108000080000108000004200800000028000204205020000816171440037080000104004140190401934004340043
8002440043299000000000004002716160258001010800001080000501839352040017400544004229977330022800102080000201600004004240042118002110910108000080000108000004200800000028000024205020000171691740037080000104004140041400434004140043
800244004330000000000000400291600258001010800001080000501839352040018400434004329975330023800102080000201600004004040040118002110910108000080000108000000008000200280002242050200001716171740037080000104004340043400414004340043