Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STRB (register)

Test 1: uops

Code:

  strb w0, [x6, x7]
  mov x0, 0
  mov x7, 8

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1f22233a3f46494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
1005552411011710153916161251000100010002302815545543763410100010003000554553111001100010001015144400101600181002164414073116115601000553564555555555
1004554410111710153916164251000100010002300415545523673410100010003000554551111001100010001015144400101612171002164414173116115511000555555555555555
1004553411101910153916164251000100010002290805645543653412100010003000552554111001100010001014154400101601181002164414073116115501000564555555555555
1004554411111710153916161251000100010002293315535543673412100010003000554563111001100010001015154402101601181002164414073116115511000555553553565565
1004554411102200153716162251000100010002302815545523673410100010003000554554111001100010001015154400101602161002164314073116115601000555555555553552
1004551411011810154816165251000100010002300415545523673410100010003000554552111001100010001016154400101601171002164314273116115511000555555555553553
1004552411001710153716161251000100010002302815545523673410100010003000554554111001100010001014154400101602161002164414273116115511000554553553565555
1004554410011910153716162251000100010002302815635543653412100010003000563554111001100010001015144402101601171002164414273116115511000555555555555555
1004553411011910153716165251000100010002348715525543663412100010003000553554111001100010001014154400101600161002164314073116115491000553553565555555
1004554410002000153916161251000100010002302815545543763411100010003000563554111001100010001014154401101600171002164414273116115511000554553553555555

Test 2: throughput

Count: 8

Code:

  strb w0, [x6, x7]
  strb w0, [x6, x7]
  strb w0, [x6, x7]
  strb w0, [x6, x7]
  strb w0, [x6, x7]
  strb w0, [x6, x7]
  strb w0, [x6, x7]
  strb w0, [x6, x7]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk data (08)1e1f233f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)fetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
802054004330000704002816012580100100800001008000650018394741493696240042400422996172999580106200800162002400484004231996118020110099100800001008000010080000420800020028000204211151180160400390800001004004340043400414004140041
802044004230000304002801602580100100800001008000650018394741493697040042400402996172999480106200800162002400484004232026118020110099100800001008000010080000420800020028000224211151180160400394800001004004340043400414004440041
80204400433000000400290160258010010080000100800065001839378149369634004240043299597299958010620080016200240048400403199311802011009910080000100800001008000000800000028000024211151180160400390800001004004340043400414004340044
802044004230000004002516161258010010080000100800065001839378149369634004240042299627299958010620080016200240048400423199511802011009910080000100800001008000042080000000800022011151180160400370800001004004140043400434004340043
802044004230000304003101602580100100800001008000750018395031493696040043400432996172999280106200800162002400484004231995118020210099100800001008000010080000421800020028000224211151180160400390800001004004140041400434004340041
80204400422990030400290160258010010080000100800065001839378149369624004040042299627299958010620080016200240048400403199311802011009910080000100800001008000042080000002800020011151180160400370800001004004440043400434004140041
802044004029900314003001602580100100800001008000650018394741493696240040400422995972999480106200800162002400484004231993118020110099100800001008000010080000420800021028000224211151180170400390800001004004140043400434004440041
8020440042300000040028160125801001008000010080006500184003114936962400404005429959729992801062008001620024004840042319951180201100991008000010080000100800000080002002800022011151180160400370800001004004440044400414004340043
80204400423000031400250012580100100800001008000650018394741493696040042400422997372999480106200800162002400484004031993118020110099100800001008000010080000420800020008000004211151180160400390800001004004340041400434004340043
80204400423000001400270002580100100800001008000650018393781493696240042400422996172999480106200800162002400484004031995118020110099100800001008000010080000420800020058000204211151180160400400800001004004340043400414004340041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2223243a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfl1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0ea? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80025400423000000000003000140044161632580010108000010800005018398361493697940059400592999333003080010208000020240000400504005311800211091080000108000010800000340080002008800001636142502001616121240049080000104006040052400594005340059
80024400523001011010001710014003916168258001010800001080000501839836149369794005940059299933300278001020800002024000040053400581180021109108000010800001080016153621800160120800021636141502001116131340055080000104005140059400524006040052
800244006130010000100017100140025161602580010108000010800005018393521493696240040400422997733003080010208000020240000400504004211800211091080000108000010800151436008001603178000216341425020013161312400552080000104004840048400594005340059
800244005929910100000019100140041161662580010108000010800005018399321493697940058400592999433003280010208000020240000400524005911800211091080000108000010800161636018001601208000216361405020012161212400482080000104006140059400514006040051
8002440059300101100000170001400451616125800101080000108000050184022014936972400494005229988330037800102080000202400004005740050118002110910800001080000108001515342080016002080002160140502001416121340049080000104005940053400584005340058
80024400583001001010001810014004216165258001010800001080000501840220149369724005940050299853300398001020800002024000040059400591180021109108000010800001080015143601800160018800001436141502001216131340058080000104005340058400534006140048
80024400502991011010001810014004616165258001010800001080000501840220149369724005140052299873300418001020800002024000040061400521180021109108000010800001080016153622800160120800021636141502011016121240055080000104006140048400514006040051
80024400593001000000001410014004316165258001010800001080000501840268149369724005140052299873300388001020800002024000040058400511180021109108000010800001080075143601800169217800021636140502001116111140044080000104005440060400534006140053
80024400472991011010001410014004616165258001010800001080000501840246149369784005340058299943300278001020800002024000040049400521180021109108000010800001080014153600800160117800021634142502001216131240055080000104006040053400524006040061
8002440199311100001000210001400461616111580070108000010801085018402201493739140049400522998833003780010208000020240000400574046711800211091080000108000010800151436018001601835800621636143511501389201341125080000104124241163413074117041305