Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STRH (register, uxtw)

Test 1: uops

Code:

  strh w0, [x6, w7, uxtw]
  mov x0, 0
  mov x7, 8

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1e1f223a3f46494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
1005552410110181153716165251000100010002293215585513723415100010003000559560111001100010001014153600101600181002163614273216115491000559559559553554
1004552410100201153616155251000100010002290815595523713410100010003000558552111001100010001015143601101600161002163614173116115491000559553553552551
1004560410110191153516165251000100010002293205525593653416100010003000552559111001100010001016153502101602181002163614173116115481000560553553551561
1004560411000191154616162251000100010002283605605583643417100010003000551559111001100010001016143601101601171002163614173116115491000559553553552551
1004560411110180153716165251000100010002334005595523733410100010003000560559111001100010001015143601101601201002163614273116115481000553554553551561
1004558410110191154516165251000100010002290805516383653417100010003000552561111001100010001014143600101602201002163614073116115571000554553551562560
1004559411100211153816165251000100010002281215525613723416100010003000559550111001100010001016163601101601181002163614073116115471000553553552561562
1004559410010181154316162251000100010002331615525593653417100010003000552560111001100010001015143601101601161002163614073116115571000553551561558559
1004552411100191153516165251000100010002290815595513713408100010003000558553111001100010001015153601101601181002163614073116115481000560553553551561
1004560410110191154616162251000100010002290815585503723418100010003000559552111001100010001014143600101601171002163614273116115491000559559559554554

Test 2: throughput

Count: 8

Code:

  strh w0, [x6, w7, uxtw]
  strh w0, [x6, w7, uxtw]
  strh w0, [x6, w7, uxtw]
  strh w0, [x6, w7, uxtw]
  strh w0, [x6, w7, uxtw]
  strh w0, [x6, w7, uxtw]
  strh w0, [x6, w7, uxtw]
  strh w0, [x6, w7, uxtw]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03091e1f22233f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80205400423000498310400271616025801001008000010080337500183947449369630400424004229964729992801062008001620024004840042319961180201100991008000010080000100800004208000202800020420111511801600400390800001004004340043400414004440044
80204400433000870104003901602580100100800001008000650018400314936960040040400402995972999280107200800162002400484004031995118020110099100800001008000010080000420800020080000200111511801600400390800001004004440041400434004440043
802044004330006631040027161602580100100800001008000650018394744936962040042400422996472999480106200800162002400484004031995118020110099100800001008000010080000420800001280000000111511801600400370800001004004340043400414004340055
80204400403000630040027016025801001008000010080006500183937849369620400424004229962729992801062008001620024004840040319951180201100991008000010080000100800004208000202800022420111511801600400390800001004004340043400434004140043
802044004230003630040039161602580100100800001008000650018393784936962040040400542995972999680107200800162002400484004331995118020110099100800001008000010080000008000205800022420111511801600400390800001004004340044400414004340044
8020440040299133010400281616125801001008000010080006500183950349369600400404004029962729992801062008001620024004840042319951180201100991008000010080000100800004218000200800022420111511801600400400800001004004440044400414004140043
80204400423000031040027160125801001008000010080006500183947449369630400434004029962729994801062008001620024004840040319951180201100991008000010080000100800004208000005800020420111511801600400400800001004004440041400434004440041
8020440042300060104002701612580100100800001008000650018394744936962040040400422995972999480106200800162002400484004231993118020110099100800001008000010080000008000003800002420111511801600400390800001004004340043400434004340043
80204400403000631040027016025801001008000010080006500183947449369620400404004229961729994801062008001620024004840042319951180201100991008000010080000100800004208000202800022420111511801600400395800001004004140043400434004340041
80204400403000273010400271616025801001008000010080006500183947449369620400424004229962729992801062008001620024004840042319951180201100991008000010080000100800004208000200800020420111511801600400390800001004004340041400434004340041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)1e1f2223243a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6061696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002540052300111002131000400251616125800101080000108000050183947200049369624004040042299903300318001020800002024000040055400631180021109108000010800001080015164402800160216800021644140502000141614104003780000104004140041400434004340043
80024400423000000012190001400321616225800101080000108000050184000400049369744005140054299773300208001020800002024000040040400541180021109108000010800001080000000080002002800000000502000161611154004480000104005540064400554005340054
80024400473001000163300014003916012580010108000010800005018399080004936974400544005429987330034800102080000202400004005140054118002110910800001080000108001414440080014001880002160142502000131615164005180000104004840055400554004840052
8002440055300101011819000140048161632580010108000010800005018404600004936974400524005429988330034800102080000202400004005240047118002110910800001080000108001514000800160117800021644141502000141615124004880000104004840055400644005540056
800244004730011101614100140039160425800101080000108000050183988400049369754005440054299903300348011820800002024000040047400541180021109108000010800001080014164400800160114800021644140502000151615134005180000104005440055400554006440052
8002440055300111001214100140039161612580010108000010800005018400040004936968400544005429989330043800102080000202400004005440054118002110910800001080000108001415440180016101680000160141502000121611114004480000104005540053400554005240055
800244005230010101453221001400391616025800101080000108000050183995600049369834005440047299893300328001020800002024000040054400541180021109108000010800001080014154401800160017800021444141502000131613164006080000104005640048400554017940055
800244005429911001267310024021716161258001010800001080000501839692000493696740054400542999020300348001020800002024000040051400541180021109108000010800001080014144400800140021800021644141502000141613104003980000104004140043400434004340043
800244004030000000301410014003901622580010108000010800005018400040004936974400514005429977330020800102080000202400004004040042118002110910800001080000108000004200800001028000004200502000121612144004480000104004840048400524005540064
800244005430010101519300004002716012580010108000010800005018393520004936962400424004029987330027800102080000202400004005240052118002110910800001080000108001415440180016001680002160140502000111611144003780000104004340041400414004340044