Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STLRB

Test 1: uops

Code:

  stlrb w0, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e1f223f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
100510848610106920251000100010004534401032108481539051000100020001047108411100110001000100060141000001000607311611108101915100010851085108510851085
1004108480001032202510001000100045344110321084815394210001000200010841084111001100010001000601410000010006073116111044191915100010851085108510851048
1004108480111069202510001000100045344110321047815394210001000200010841084111001100010001000601410000010006073116111081191915100010851085108510851085
100410848001106920251000100010004534401032108481539421000100020001084108411100110001000100060141000001000607311611108119015100010851085108510851085
100410848010106902510001000100045344110321084815394210001000200010471084111001100010001000601410000010006073116111081191915100010851085108510851085
10041047801010692025100010001000435681103210848153942100010002000108410841110011000100010006014100021010006073116111081000100010851085108510851085
100410848010106920251000100010004534411032108481539421000100020001084108411100110001000100060141000001000073116111081191915100010851085108510481085
100410848001106920251000100010004534411032104781539051000100020001084108411100110001000100060010000010006073116111081191915100010851085108510851085
100410848011106920251000100010004534411032108481539421000100020001084108411100110001000100060141000001000607311611108119015100010851085108510851085
100410478011106920251000100010004356811032104781539421000100020001084108411100110001000100060141000001000607311611108101915100010851085108510851085

Test 2: throughput

Count: 8

Code:

  stlrb w0, [x6]
  stlrb w0, [x6]
  stlrb w0, [x6]
  stlrb w0, [x6]
  stlrb w0, [x6]
  stlrb w0, [x6]
  stlrb w0, [x6]
  stlrb w0, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0011

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f22233f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
802058008860011100011910800731625801001008000010080000500375858849770088003680088699203700478010020080000200160000800896392611802011009910080000100800001008001514607180015001580000146014005110116118008601506800001008009080089800568008980090
80204800896001100001810800431225801001008000010080000500375854049770098003780089699193700478010020080000200160000800556396011802011009910080000100800001008001615601208001410148000014601410511011611800850161612800001008008980089800908005680090
80204800556001010001510800730258010010080000100800005003758540497700980036800896991937004780100200800002001600008005563959118020110099100800001008000010080015146050800140214800001460140051101161180085015165800001008009080090800898008980090
80204800896001110001500800740258010010080000100800005003758588497700980036800896992037004680100200800002001600008008863959118020110099100800001008000010080015160928001412148000014601401511011611800860161610800001008005680090800908009080090
80204800886001100001900800741225801001008000010080000500375858849770098003680088698863700478010020080000200160000800886395911802011009910080000100800001008001515051800150217800001460140051101161180086015167800001008009080089800898009080089
802048008960011000021008007416258010010080000100800005003758588497697580037800556992037004780100200800002001600008008963960118020110099100800001008000010080015146071800140214800001460142051101161180085016167800001008009080090800908009080089
802048008960011100015008007312258010010080000100800005153758591497700880003800886991937001380100200800002001600008008963959118020110099100800001008000010080015150018007400158000015601410511011611800850161610800001008009080090800908008980090
8020480088600111000160080074122580100100800001008000050037585424976975800368008969886370047801002008000020016000080088639591180201100991008000010080000100800141560508001400148000014014005110116118008600168800001008008980056800568008980089
802048008959911000015018007311258010010080000100800005003758541497700880037800896992037001380100200800002001600008008963960118020110099100800001008000010080014156061800140014800001460140051101161180086015167800001008008980089800908005680090
80204800886001110001500800731225801001008000010080000500375854049770098003680089699203700478010020080000200160000800897521411802011009910080000100800001008001514060800150014800001401400511011611800520151511800001008009080090800908009080059

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0010

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f22233f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)a9acafl1d cache miss st nonspec (c0)cfl1i cache miss demand (d3)d5map dispatch bubble (d6)d9ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002580089600111100691500800651625800101080000108000050375815204976967080028800806993337002780010208000020160000800478008011800211091080000108000010800000080000000800006050200716046800771210080000108008180081800488008180081
8002480080599000000162000800320258001010800001080000503758152049739800800288004770013370110800102080605201602428018580212118002110910800001080000108000060108000000380000605020041604680077102151180000108008180081800818008180081
800248008060000000012100800321625800101080000108000050375815204977000080028800806993337006080010208000020160000800478008011800211091080000108000010800006010800000008000060502004160678007782151180000108004880081800818008180081
800248004760000000012100800650258001010800001080000503758152049770000800288008069933297014680010208024220160000800808014611800211091080000108000010801206027801200208000060502004160468007767151180000108008180048800818008180081
800248008060000000012100800651625800101080000108000050375656804976967080028800806993337006080010208000020160000800808008011800211091080000108000010800006010800001008000060502006160468007782151180000108008180147800818008180081
8002480080599000002121008003202580010108000010800005037565680497700007999580047699003700278001020800002016000080047800801180021109108000010800001080000601080000000800000502006160648007782151180000108008180081800488004880081
8002480080600000000011080065162580010108000010800005037581520497700008002880080699333700278001020800002016000080047800471180021109108000010800001080000601080000000800000502006160648007710215080000108008180081800818004880081
8002480080600000010610080065162580010108000010800005037581520497700008002880080699331670060800102080000201600008008080080118002110910800001080000108000060080000000800006050200616046800778201180000108008180081800818008180081
8002480080620000000010180069202580010108000010800005037583440497700408003280047699003700648001020800002016000080047800841180021109108000010800001080000600800000008000060502005160668004419191580000108008580048800488004880085
800248008459900000001008006920258001010800001080000503758344049770040800328008469937370064800102080000201600008008480084118002110910800001080000108000001480000000800006050200616064800811901580000108004880085800858008580048