Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STR (register, sxtw, 64-bit)

Test 1: uops

Code:

  str x0, [x6, w7, sxtw]
  mov x0, 0
  mov x7, 8

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e1f223f46494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
100555440705271616025100010001000223521540542355340110001000300054254211100110001000100000100002100224273116115401000541541543543541
1004543403152816160251000100010002235215405433563398100010003000542542111001100010001000420100200100224273116115391000543541541543543
1004542463052501612510001000100022472154354035334001000100030005405431110011000100010000010020210022073116115391000543543541555541
10045424001527016125100010001000224481542542355340010001000300054254211100110001000100000100010100024273116115391000543541541543543
10045404600525016025100010001000224481540554355340010001000300054254011100110001000100042010020210022073116115401000544541544544541
10045404030525000251000100010002244815425403533398100010003000542540111001100010001000420100005100224273116115391000543543543543543
1004542403052716160251000100010002244815425403553412100010003000542542111001100010001000420100200100004273116115391000543543541541544
10045404030528016025100010001000224481542542353340010001000300054254011100110001000100000100202100004273116115391000543541543544543
10045424030528161612510001000100022448054254235534001000100030005425401110011000100010000110020010020073116115391000541543543543541
100454240315251600251000100010002235215405423553401100010003000540542111001100010001000420100002100224273116115391000555543543543541

Test 2: throughput

Count: 8

Code:

  str x0, [x6, w7, sxtw]
  str x0, [x6, w7, sxtw]
  str x0, [x6, w7, sxtw]
  str x0, [x6, w7, sxtw]
  str x0, [x6, w7, sxtw]
  str x0, [x6, w7, sxtw]
  str x0, [x6, w7, sxtw]
  str x0, [x6, w7, sxtw]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f2223243a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)fetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80205400523001110001910014003716022580100100800001008000650018399871493696740052400532997173000580106200800162002400484005432001118020110099100800001008000010080015144400800160117800021401401115118016040039800001004004340043400434004340043
8020440042300000000310004002716161258010010080000100800065001840031149369624004240054299617299948010620080016200240048400423199511802011009910080000100800001008000000008000200280000242001115118016040039800001004004440043400434004440043
80204400403000000003000040025161612580100100800001008000650018394741493696240042400402995972999480106200800162002400484004340179118020110099100800001008000010080000042008000200280002242001115118016040039800001004004440044400434004140043
802044004230000000031001400481616125801001008000010080007500184003514936983400544004729974729999801062008001620024004840053320071180201100991008000010080000100800141444018001601248000216441401115118016040060800001004005540054400554005540055
802044005230011100014100140039016525801001008000010080006500183995914936967400474005229982730005801072008001620024004840054320161180201100991008000010080000100800141544018001600188000214441411115118016040049800001004004840055400654005540055
8020440054300110008120100140039016325801001008000010080006500183993914936972400634005130022730006801062008001620024004840053320071180201100991008000010080000100800151444028001602178000016441421115118016040051800001004005540053400484005340055
80204400472991110069171001400481616125801001008000010080007500184003514936968400544005429974730015801072008001620024004840054320071180201100991008000010080000100800151444018001401198000216441411115118016040051800001004004840049400554005340055
80204400643001100063181001400381616025801001008000010080007500184005414936984400544005429966183001580107200800162002400484005432007118020110099100800001008000010080015154401800140119800021601421115118016040049800001004004840055400644005540055
8020440053300101008420100140032160525801001008000010080006500183996014936972400474004829983730004801062008001620024004840047320171180201100991008000010080000100800151544008001601178000216441411115118016040060800001004005540053400484005240055
80204400633001110011121100140039161662580100100800001008000750018400541493698440054400542997373000680106200800162002400484005432007118020110099100800001008000010080016150028001601178000216441421115118016040049800001004005340055400484004840064

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1e1f2223243a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)daddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002540059299110124310004003216165258001010800001080000501839908014936975400514004029975330031800102080000202400004005940047118002110910800001080000108001515360180002000800022014250203160444004980000104005240048400534004940051
8002440058300100101900014004216162258001010800001080000501840220014936979400594005929993330041800102080000202400004005940059118002110910800001080000108001414360080016011480002163614150204160444004780000104004840053400614004840059
8002440052300110001900014004416161125800101080000108000050183969201493696740052400582999433003980010208000020240000400524004711800211091080000108000010800151500080016001880002143514050203160344005780000104005940053400584005440060
80024400523001100811410014003216162258001010800001080000501839692014936960400424005429985330020800102080000202400004005840060118002110910800001080000108001515360180016111880000143614650204160434004780000104005940053400614006040048
80024400523001111018000140037161652580010108000010800005018399560149369694006140058299943300318001020800002024000040128400591180021109108000010800001080014140018007600218000214014250204160344004480000104005040048400524004840062
8002440058300100001810014004516166258001010800001080000501840221014936982400594005929993330030800102080000202400004005940059118002110910800001080000108001615360080016112280002143614050204160334004980000104006140059400504006040060
800244005830010016171001400321605258001010800001080000501840220014936980400504005229987330027800102080000202400004005140052118002110910800001080000108001515360180014012180000143614050203160334004580000104006040048400594005240053
800244004930010100010014004416169258001010800001080000501839719014936978400534005829994330038800102080000202400004004740047118002110910800001080000108001514360180016011480002163614050203160344005680000104004840061400604006140048
800244005830010013014100140034016525800101080000108000050184024401493698340601400522998733002780010208000020240000400524005211800211091080000108000010800141500280014111480062143614250204160434004480000104005940048400604004840060
800244005130011110180001400441615525800101080000108000050184026801493697540051400532998733003880010208000020240000400514005311800211091080000108000010800151600180016001880002163614050204160444004580000104004840053400594004840048