Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STR (signed offset, D)

Test 1: uops

Code:

  str d0, [x6, #0x10]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030405080b1e1f223a3f46494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a1a2a4a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)f5f6f7f8fd
10055594011101403538161652510001000100023244152254755936434051000100020005585521110011000100010141400010140018100214014077516555571000554553551562549
100455941110021135371615525100010001000232681533552559365340510001000200055254711100110001000101415002101401191002163614177516555551000560559559548554
1004552411000150354314162251000100010002269215265585523603405100010002000547558111001100010001015153610101400161002163614277516555441000560560553552548
100454741101018035371615525100010001000232681522547547365341610001000200055255911100110001000101416001101601151002163514177516555501000559548554553548
1004547401110180353600025100010001000229561522559552360341010001000200056055911100110001000101514360110160120100216014177516555441000548553554552551
1004560411100180353616155251000100010002322015345475583653405100010002000552561111001100010001016153601101600201000163614177516555491000560548559553553
1004552411100191354314092510001000100023340152255855237034091000100020005485471110011000100010151500110160022100016014277516555481000548553551548559
100455941111014035441616225100010001000226921522559549360341910001000200054755111100110001000101414360010141017100216014077516555441000553551562558560
1004559411116140354416168251000100010002278815235595503713405100010002000547552111001100010001015143601101601181000163614077516555491000548548553553548
10045474111101403542016025100010001000229081534552547365341710001000200055256011100110001000101615360110140020100216014077516555451000553551562560548

Test 2: throughput

Count: 8

Code:

  str d0, [x6, #0x10]
  str d0, [x6, #0x10]
  str d0, [x6, #0x10]
  str d0, [x6, #0x10]
  str d0, [x6, #0x10]
  str d0, [x6, #0x10]
  str d0, [x6, #0x10]
  str d0, [x6, #0x10]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)030708090a18191e1f22233a3f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a2a4a7a8a9acafbcdcache store miss (c0)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
802054005430000000003000400271616125801001008000010080000500183944814001540042400402995633000080100200800002001600004004040040118020110099100100800008000010080000420800020068000220511011611400370800001004004340043400444004440053
8020440042299000000001004002716161258010010080000100800005001839448140017400424004229955329998801002008000020016000040042400421180201100991001008000080000100800004208000200118000000511011611400370800001004004140041400414004440043
8020440040300000000030004003916160258010010080000100800005001839448040015400424004029953329998801002008000020016000040042400421180201100991001008000080000100800004208000200128000020511011611400370800001004004340043400434005540041
802044004230000000003000400281616125801001008000010080432500183944814001740042400432995532999880100200800002001600004004040040118020110099100100800008000010080000420800004411680002042511011611400400800001004005540043400434004340043
8020440040299000000675700040028161602580100100800001008000050018394720400184004040042299533300008010020080000200160000400424004211802011009910010080000800001008000000800020013180000242511011611400510800001004004340043400524004340043
8020440042300000000000004002716160258010010080000100800005001839472140015400404004029953329998801002008000020016000040054400421180201100991001008000080000100800004208000270380002242511011611400390800001004004140043400414004340044
802044005430000000000100400250002580100100800001008000050018394721400154004340042299653300008010020080000200160000400434004211802011009910010080000800001008000000800020058000020511011611400390800001004004140041400434004340043
802044004230000000003100400361600258010010080000100800005001839448140015400424004329955330001801002008000020016000040040400401180201100991001008000080000100800004208000000580000242511011611400390800001004004340043400554004140043
802044004230000000003100400271600258010010080000100800005001839496140017400424004229953330012801002008000020016000040040400421180201100991001008000080000100800004208000200880000042511011611400370800001004004340043400414004340044
802044004230000000060000400251616125801001008000010080000500183935214001540042400422995333000080100200800002001600004004040042118020110099100100800008000010080000008000000380002244511011611400390800001004004340043400444004440043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)03080918191e1f22233f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8acafbcdcache store miss (c0)c2c5cfd5d6dbddinst fetch restart (de)e0ebld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8002540042299000012300400281616125800101080000108000050183947204001740043400432997733002280010208000020160000400424004211800211091010800008000010800000420080002058000224200502091604640039080000104004340043400444004340041
800244004330000006310400271616125800101080000108000050183944804001840042400422997833002280010208000020160000400424004211800211091010800008000010800000420080002028000224200502081606440040480000104004340043400434004340043
8002440042300000003104002816161258001010800001080000501839472040491400404004329977330023800102080000201600004004040043118002110910108000080000108000004229080000008000224200502061606440037080000104004340043400434004340043
800244004230000000310400271616125800101080000108000050183944804001540042400422998933002080010208000020160000400424004211800211091010800008000010800000420080002008000024200502061604640037080000104004140043400414004340043
80024400403000000031040027160025800101080000108000050183947204001740042400432997733002080010208000020160000400434004311800211091010800008000010800000420080002028000024200502051608640039080000104004340041400434004340043
80024400403000000631040028161602580010108000010800005018393520400184004040043299773300228001020800002016000040043400421180021109101080000800001080000000080002028000004200502051606740040080000104004440044400434004340044
80024400403000000031040027160025800101080000108000050183944804001740042400422997733003480010208000020160000400424004211800211091010800008000010800000420080002028000224200502071605640039080000104004340044400444004140041
80024400423000000243104002716162825800101080000108000050183944804001740043400422997533002080010208000020160000400424004211800211091010800008000010800000420080002008000224200502041607540039080000104004140044400444004140044
80024400422990000030040027161602580010108000010800005018394480400184004240042299783300228001020800002016000040052400421180021109101080000800001080000042008000202800020000502061606740172080000104004140053400434004340043
800244004230000001230040027161605580070108000010800005018394480400194004240318300723300208001020800002016000040182400421180021109101080000800001080000000080002028000024200502051604640040080000104004340055400434004140043