Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STR (signed offset, D)

Test 1: uops

Code:

  str d0, [x6, #0x10]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)1e1f223a3f46494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
10055594011101403538161652510001000100023244152254755936434051000100020005585521110011000100010141400010140018100214014077516555571000554553551562549
100455941110021135371615525100010001000232681533552559365340510001000200055254711100110001000101415002101401191002163614177516555551000560559559548554
1004552411000150354314162251000100010002269215265585523603405100010002000547558111001100010001015153610101400161002163614277516555441000560560553552548
100454741101018035371615525100010001000232681522547547365341610001000200055255911100110001000101416001101601151002163514177516555501000559548554553548
1004547401110180353600025100010001000229561522559552360341010001000200056055911100110001000101514360110160120100216014177516555441000548553554552551
1004560411100180353616155251000100010002322015345475583653405100010002000552561111001100010001016153601101600201000163614177516555491000560548559553553
1004552411100191354314092510001000100023340152255855237034091000100020005485471110011000100010151500110160022100016014277516555481000548553551548559
100455941111014035441616225100010001000226921522559549360341910001000200054755111100110001000101414360010141017100216014077516555441000553551562558560
1004559411116140354416168251000100010002278815235595503713405100010002000547552111001100010001015143601101601181000163614077516555491000548548553553548
10045474111101403542016025100010001000229081534552547365341710001000200055256011100110001000101615360110140020100216014077516555451000553551562560548

Test 2: throughput

Count: 8

Code:

  str d0, [x6, #0x10]
  str d0, [x6, #0x10]
  str d0, [x6, #0x10]
  str d0, [x6, #0x10]
  str d0, [x6, #0x10]
  str d0, [x6, #0x10]
  str d0, [x6, #0x10]
  str d0, [x6, #0x10]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)18191e1f22233a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
802054005430000000003000400271616125801001008000010080000500183944814001540042400402995633000080100200800002001600004004040040118020110099100100800008000010080000420800020068000220511011611400370800001004004340043400444004440053
8020440042299000000001004002716161258010010080000100800005001839448140017400424004229955329998801002008000020016000040042400421180201100991001008000080000100800004208000200118000000511011611400370800001004004140041400414004440043
8020440040300000000030004003916160258010010080000100800005001839448040015400424004029953329998801002008000020016000040042400421180201100991001008000080000100800004208000200128000020511011611400370800001004004340043400434005540041
802044004230000000003000400281616125801001008000010080432500183944814001740042400432995532999880100200800002001600004004040040118020110099100100800008000010080000420800004411680002042511011611400400800001004005540043400434004340043
8020440040299000000675700040028161602580100100800001008000050018394720400184004040042299533300008010020080000200160000400424004211802011009910010080000800001008000000800020013180000242511011611400510800001004004340043400524004340043
8020440042300000000000004002716160258010010080000100800005001839472140015400404004029953329998801002008000020016000040054400421180201100991001008000080000100800004208000270380002242511011611400390800001004004140043400414004340044
802044005430000000000100400250002580100100800001008000050018394721400154004340042299653300008010020080000200160000400434004211802011009910010080000800001008000000800020058000020511011611400390800001004004140041400434004340043
802044004230000000003100400361600258010010080000100800005001839448140015400424004329955330001801002008000020016000040040400401180201100991001008000080000100800004208000000580000242511011611400390800001004004340043400554004140043
802044004230000000003100400271600258010010080000100800005001839496140017400424004229953330012801002008000020016000040040400421180201100991001008000080000100800004208000200880000042511011611400370800001004004340043400414004340044
802044004230000000060000400251616125801001008000010080000500183935214001540042400422995333000080100200800002001600004004040042118020110099100100800008000010080000008000000380002244511011611400390800001004004340043400444004440043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk data (08)0918191e1f22233f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)dbddfetch restart (de)e0eb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002540042299000012300400281616125800101080000108000050183947204001740043400432997733002280010208000020160000400424004211800211091010800008000010800000420080002058000224200502091604640039080000104004340043400444004340041
800244004330000006310400271616125800101080000108000050183944804001840042400422997833002280010208000020160000400424004211800211091010800008000010800000420080002028000224200502081606440040480000104004340043400434004340043
8002440042300000003104002816161258001010800001080000501839472040491400404004329977330023800102080000201600004004040043118002110910108000080000108000004229080000008000224200502061606440037080000104004340043400434004340043
800244004230000000310400271616125800101080000108000050183944804001540042400422998933002080010208000020160000400424004211800211091010800008000010800000420080002008000024200502061604640037080000104004140043400414004340043
80024400403000000031040027160025800101080000108000050183947204001740042400432997733002080010208000020160000400434004311800211091010800008000010800000420080002028000024200502051608640039080000104004340041400434004340043
80024400403000000631040028161602580010108000010800005018393520400184004040043299773300228001020800002016000040043400421180021109101080000800001080000000080002028000004200502051606740040080000104004440044400434004340044
80024400403000000031040027160025800101080000108000050183944804001740042400422997733003480010208000020160000400424004211800211091010800008000010800000420080002028000224200502071605640039080000104004340044400444004140041
80024400423000000243104002716162825800101080000108000050183944804001740043400422997533002080010208000020160000400424004211800211091010800008000010800000420080002008000224200502041607540039080000104004140044400444004140044
80024400422990000030040027161602580010108000010800005018394480400184004240042299783300228001020800002016000040052400421180021109101080000800001080000042008000202800020000502061606740172080000104004140053400434004340043
800244004230000001230040027161605580070108000010800005018394480400194004240318300723300208001020800002016000040182400421180021109101080000800001080000000080002028000024200502051604640040080000104004340055400434004140043