Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STR (pre-index, D)

Test 1: uops

Code:

  str d0, [x6, #0x10]!

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)1e1f20223a3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10051040811101232113010257135252000100010001000100050762458241015104010408243898200010002000104010401110011000100010409353221007025016710167277073216221037100001000100010411041104110411041
1004104081110916119010258344252000100010001000100050762458241015104010408243898200010002000104010401110011000100010171915226100800120710007237173216221037100001000100010411041104110411041
1004104081110916119010258314252000100010001000100050754458241015104010408243898200010002000104010401110011000100010209271171007012481010247157273216221037100001000100010411041104110411041
100410407100090012010258225252000100010001000100050754458241015104010408243898200010002000104010401110011000100010349395101010114618710167197273216221037100001000100010411041104110411041
1004104081110916270102583152520001000100010001000507544582410151040104082438982000100020001040104011100110001000102473932610070224161010167237273216221037100001000100010411041104110411041
10041040811109161140102551332520001000100010001000507544582410151040104082438982000100020001040104011100110001000101971102510070126161010167237373216221037100001000100010411041104110411041
100410408110090014010256105252000100010001000100050754458241015104010408243898200010002000104010401110011000100010348233371011011716710007237173216221037100001000100010411041104110411041
100410408111091611301025830325200010001000100010005075445824101510401040824389820001000200010401040111001100010001025823291007012822710187317173216221037100001000100010411041104110411041
1004104071100916114010257334252000100010001000100050754458241015104010408243898200010002000104010401110011000100010238193361007022618710167357273216221037100001000100010411041104110411041
10041040811101016114010258224252000100010001000100050754458241015104010408243898200010002000104010401110011000100010248193171010002418710187437073216221037100001000100010411041104110411041

Test 2: Latency 3->3

Code:

  str d0, [x6, #0x10]!

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0040

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2022293a3c3e3f404446494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
102141004075000000105098322561163230732100252244023821525252010010100100001010010000522165468824010017100401004086743874720100200100002002000010040100401110201100991001001000010000100124770169714820148210994149102461504587125133069007101171110037100003431110000101001004110041100411004110041
102041004075000000104286123081148810944100252240017925140252010010100100001010010000522151468824010017100401004086743874720100200100002002000010040100401110201100991001001000010000100124690159214910155611008149802505504597125042968107101171110037100002268110000101001004110041100411004110041
102041004075010000102937623011163221944100252284024320628252010010100100001010010000522173468824010017100401004086743874720100200100002002000010040100401110201100991001001000010000100124730162514870152810977148002497504565125192878707101171110037100001414110000101001004110041100411004110041
102041004075000000104435622701149630840100252240021425227252010010100100001010010000522157468824010017100401004086743874720100200100002002000010040100401110201100991001001000010000100124730161414630146710996149802485504558125123466707101171110037100003914110000101001004110041100411004110041
102041004075000000103804423011171220780100252267024023932252010010100100001010010000522165468824010017100401004086743874720100200100002002000010040100401110201100991001001000010000100124850164115210151111017149202497504530125182765407101171110037100001492110000101001004110041100411004110041
102041004075000000102934322631166430944100252227018320514252010010100100001010010000522141468824010017100401004086743874720100200100002002000010040100401110201100991001001000010000100124850158514970147911052150602481504505125123369337261171110037100001746110000101001004110041100411004110041
102041004075100100106086222681170468724100252243021117937252010010100100001010010000522125468824010017100401004086743874720100200100002002000010040100401110201100991001001000010000100124644165014960150510960149202473504509125063268917101171110037100002463110000101001004110041100411004110041
102041004075101000104465122611169660712100252229019718623252010010100100001010010000522157468824110017100401004086743874720100200100002002000010040100401110201100991001001000010000100124814160414940151910980150402497504561125022268317101171110037100002797110000101001004110041100411004110041
102041004075110000104105122751172040716100252229020723629252010010100100001010010000522157468824010017100401004086743874720100200100002002000010040100401110201100991001001000010000100124774168114940154011023150702493504638125093161607101171110037100003855110000101001004110041100411004110041
1020410040751001001047640228211712427241002522430215204272520100101001000010100100005221334688240100171004010040867438747201002001000020020000100401004011102011009910010010000100001001248541521147501531110181510024815045981251130648071011711100371000031210110000101001004110041100411004110041

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0040

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1e1f2022293a3c3e3f404446494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10034100407622021024570226411488150936100252267027225637252001010010100001001010000521097468824110022100401004086963877020010201000020200001004010040111002110910101000010000101247718160415464157610964154622481504541124904477800640516551003710000421310000100101004110041100411004110041
10024100407512021020070226711616110720100252239025724248252001010010100001001010000521073468824110022100401004086963877020010201000020200001004010040111002110910101000010000101249718170915294157210936152202501504675124953074173640516451003710000293310000100101004110041100411004110041
1002410040753330100868123021149660720100252216025923435252001010010100001001010000521137468824110022100401004086963877020010201000020200001004010040111002110910101000010000101247915162715399156510945150202465504536124873579002640616351003710000285310000100101004110041100411004110041
1002410040752200101559522951154480760100252218022920863252001010010100001001010000521065468824110022100401004086963877020010201000020200001004010040111002110910101000010000101249316166915380156510924150102485504626124963374000640516551003710000299210000100101004110041100411004110041
10024100407510011023068230511504110880100252208025023130252001010010100001001010000520809468824110022100401004086963877020010201000020200001004010040111002110910101000010000101249624153615170154010938152902466504529124944674801640416441003710000195110000100101004110041100411004110041
100241004075201010338982275117121107241002522360244263582520010100101000010010100005211054688241100221004010040869638770200102010000202000010040100401110021109101010000100001012480161510155731567109681536424655045991250045698140640316551003710000352210000100101004110041100411004110041
1002410040753303102756722681169620716100252208021320652252001010010100001001010000521001468824010022100401004086963877020010201000020200001004010040111002110910101000010000101250421159515310153210944151262478504564125165170502640516431003710000272910000100101004110041100411004110041
10024100407520001020976224311704807161002522130187188352520010100101000010010100005210654688241100221004010040869638770200102010000202000010040100401110021109101010000100001012475111540155901560109571520024815045761249334754143640416541003710000206310000100101004110041100411004110041
10024100407533021018253226111712120724100252229024021132252001010010100001001010000521041468824110022100401004086963877020010201000020200001004010040111002110910101000010000101249520155115682160810971154902470504652125003779570640416441003710000221310000100101004110041100411004110041
10024100407522001019468226511704100724100252235023925540252001010010100001001010000521089468824110022100401004086963877020010201000020200001004010040111002110910101000010000101248119158115400158910947154302481504600125053567770640416341003710000251610000100101004110041100411004110041

Test 3: throughput

Count: 8

Code:

  str d0, [x6, #0x10]!
  str d0, [x7, #0x10]!
  str d0, [x8, #0x10]!
  str d0, [x9, #0x10]!
  str d0, [x10, #0x10]!
  str d0, [x11, #0x10]!
  str d0, [x12, #0x10]!
  str d0, [x13, #0x10]!
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5018

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f2022293a3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8021440155300300001048540224611960426440126220535048994251601028010280000801008000040053118440120240177401444015930102330116160100200800002001600004014640125118020110099100100800008000010082506321615246972500800381528025082544592825104323441400511021622401098000280000801004019940140401304011040172
8020440112301505001040113522311196810264401512219807724492516010280102800008010080000400531184362802401274015040118300823301081601002008000020016000040135401751180201100991001008000080000100825113617792449182457800541508024842544636825115918001450511021622402018000280000801004023440185402104021240175
8020440176300500001027296224611952926440132221263377357251601028010280000801008000040053118450441240140401604018330082330158160100200800002001600004017840195118020110099100100800008000010082511411851244352477800621510524842524607825097919611400511021622401518000280000801004014040138401384010840174
80204401643016000010431572260119521526440176221987799028226116202381233807108110881080405642189188812412034145441372309724431137162096202812182001616924141541420101802011009910010080000800001008285326251624052612461806011544024982541224983052271684000528429622410138112280000801004135341474415044064941408
8020441467311300101011532969224111696952041566223792610258042191615018123080660812608118840337618661241240503415034145131075443129616230720281218200161452414444157311180201100991001008000080000100831532416912441310245680329155102472921293682758711860000530421622401628000280000801004021540161401434013940143
80204401423013330010383432284116721356840100224986169485251601028010280000801008000040053518463601240147401384017130072330113160100200800002001600004012240161118020110099100100800008000010082487181676246314247880038150302484254469982512531655060511021622401478000280000801004011140184401204014640096
802044011930133300103924722701171295204009822515609175025160102801028000080100800004005351843888124010240161401023003833007616010020080000200160000401634013811802011009910010080000800001008247130211824266248380043152302476254457782513411621000511021622401398000280000801004015740132401914013340133
802044010930130000104973622841171265004017222445737216125160102801028000080100800004005351845472124015840133401523003833008516010020080000200160000400874013611802011009910010080000800001008249235250124348250880042152702507254463982503432291000511021622401548000280000801004017140148401064015940149
802044010130050000105517022391195213264401242212633660532516010280102800008010080000400535184525612401444015240122301913300901601002008000020016000040181401441180201100991001008000080000100824973518292421102485800401516524682524570825054818671400511021622401148000280000801004016940161401904016240135
8020440129300500001045261223911968142644010922188258554825160102801028000080100800004005351843384124016840167402783005733008516010020080000200160000401314010911802011009910010080000800001008250028220724315246080049152242492254461582512461613000511021622401288000280000801004018240123401554014240134

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5024

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f20222324293a3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8003440162300101100100474522971001448510040137225940554333251600128001280000800108000040008718441040124011240096401323012133008716001020800002016000040133401191180021109101080000800001082480013002462524948003515180250498645478250321109300502081742401718000280000800104009140104401244013840109
8002440103301000000100083822891001248722440075227241655257461600128001280000800108010840064718469000124009240148401013009433009016001020800002016000040120401091180021109101080000800001082486019952472424858002715180249884845218252823118200502041747401178000280000800104016240106400794013440150
8002440131300000000972355232920015043220400902276390549372516001280012800008001080000400087184307201240093401094014730057330120160010208000020160000401474010511800211091010800008000010825060101124161524778003115210249492245868250826220501502041724401338000280000800104015040129401294013340077
800244011230001000099661922991001264526040098226853555851251600128001280000800108000040008718470200124010140126401543006733006916001020800002016000040111401501180021109101080000800001083146013062479424808004015160247059868318310228152500502041742401348000280000800104017640208402494021940191
80024403003011100001022718222920101280222040240225656554842251600128001280000800108000040008718500320124028040237403583019533012816001020800002016000040285402151180021109101080000800001082468922642425824708008615360246049447098255367175001502051745402448000280000800104014940163401634010940167
80024401023011101001005053231201012729336402302252469641942516001280012800008001080000400087184813601240151402904022830140330241160010208000020160000401714024711800211091010800008000010824760154424288242780119150502488976474282552119192800502041742402448000280000800104028640185401824023440161
80024402123021110009942187229201015441317640115230859974435251600128001280000800108000040008718760000024012040199401623009533010216001020800002016000040186401502180021109101080000800001082490416962393824828013315170248497646728255164161100502021724402248000280000800104025040188402174014440168
8002440197302020000102212592300010150415260401632273525614542516001280012800008001080000400087184698411240171402254026930069330221160010208000020160000402784025511800211091010800008000010824788199324146246180108152302472478474982548116102400502021624402648000280000800104024640241401644021140131
800244017731311100099759022670011496202684017522745498103225160012800128000080010800004000871847824012402214026640213301913301831600102080000201600004016140091118002110910108000080000108248616238224270247980100151302474756476182590102181004502031653402758000280000800104024540139401984020740261
800244019830120000097231122309001150413212402302241539695562516001280012800008001080000400087185270801240264402484022030174330219160010208000020160000402354017411800211091010800008000010824782024542428525078011315014248897646358255460205900502041742401518000280000800104025640201405174022040219