Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STR (post-index, Q)

Test 1: uops

Code:

  str q0, [x6], #0x10

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1e1f20223a3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1005104080000128421301025220022520001000100010001000507704582410151040104082438982000100020001040104011100110001000103204046100310426410284320073116111037100001000100010411041104110411041
1004104071011613262160102580610252000100010001000100050746458241015104010408243898200010002000104010401110011000100010393248001011410010100411367173116111037100001000100010411041104110411041
10041040811010153412121025005525200010001000100010005073845824101510401040824389820001000200010401040111001100010001019154801101111421617104312447173116111037100001000100010411041104110411041
1004104081101301536150102514706252000100010001000100050738458241015104010408243898200010002000104010401110011000100010429614111012120014103012407173116111037100001000100010411041104110411041
1004104081001015321160102512037252000100010001000100050738458241015104010408243898200010002000104010401110011000100010172036001011000011100411407073116111037100001000100010411041104110411041
100410407110001526219010252464625200010001000100010005073845824101510401040824389820001000200010401040111001100010001008848535101411503214104811367273116111037100001000100010411041104110411041
10041040711000150018010253412825200010001000100010005075445824101510401040824389820001000200010401040111001100010001050744436101101401611103111447073116111037100001000100010411041104110411041
10041040800000800200102520333252000100010001000100050770458241015104010408243898200010002000104010401110011000100010420414121001004032710444360073116111037100011000100010411041104110411041
10041040801016732120102534623252000100010001000100050762458241015104010408243898200010002000104010401110011000100010340441241004203224710474160073116111037100001000100010411041104110411041
100410407000008381210102534453252000100010001000100050762458241015104010408243898200010002000104010401110011000100010400520610040000010044280073116111037100001000100010411041104110411041

Test 2: Latency 3->3

Code:

  str q0, [x6], #0x10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0040

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2022293a3c3e3f404446494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1021410040750000001048562230611728201188100252265019222623252010010100100001010010000522195468824100161004010040867438747201002001000020020000100401004011102011009910010010000100001001247701647145001441110441509024765045081250926630007101171110037100001631110000101001004110041100411004110041
102041004075000000103115922631148000968100252224123920225252010010100100001010010000522203468824100161004010040867438747201002001000020020000100401004011102011009910010010000100001001249041607149401542110111524024895045321250928735007101171110037100003330110000101001004110041100411004110041
102041004075000000105484722621140010712100252217022519321252010010100100001010010000522147468824100161004010040867438747201002001000020020000100401004011102011009910010010000100001001248501602149701488110331503024615045551250817616007101171110037100002082110000101001004110041100411004110041
102041004075000000105215022631143222776100252224018418627252010010100100001010010000522179468824100161004010040867438747201002001000020020000100401004011102011009910010010000100001001246501772152201444109901524024935045411249627625007101171110037100002020110000101001004110041100411004110041
102041004075100100103804522631144021752100252239023021828252010010100100001010010000522165468824100161004010040867438747201002001000020020000100401004011102011009910010010000100001001247701633154501522110101494024855045971251519662007101171110037100002830110000101001004110041100411004110041
1020410040751000001052160228211672111028100252273018518915252010010100100001010010000522149468824100161004010040867438747201002001000020020000100401004011102011009910010010000100001001246901737146701528109571512024735045841250925691007101171110037100001455110000101001004110041100411004110041
102041004075000000103296122631172051960100252247020023228252010010100100001010010000522187468824100161004010040867438747201002001000020020000100401004011102011009910010010000100001001247301612150501456109871513024935045531251628629007101171110037100001461110000101001004110041100411004110041
102041007775110000104346923011170481724100252229020521720252010010100100001010010000522155468824100161004010040867438747201002001000020020000100401004011102011009910010010000100001001249701545149201534109901520024935045631248521665107101171110037100003193110000101001004110041100411004110041
1020410040750000001047656227111592111020100252270021320520252010010100100001010010000522171468824100161004010040867438747201002001000020020000100401004011102011009910010010000100001001248141578150801536110021504024615046621250525654007101171110037100002987110000101001004110041100411004110041
102041004075000000103926422791170450964100252232019524320252010010100100001010010000522195468824100161004010040867438747201002001000020020000100401004011102011009910010010000100001001248101703141401477110531491024775045481249926709007101171110037100002406110000101001004110041100411004110041

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0040

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)181e1f20222324293a3c3e3f404446494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
100341004075220209960582297100196830956100252261421726825252001010010100001001010000521041468824100211004010040869638770200102010000202000010040100401110021109101010000100001012457817161556015541094915240248150460412502329334640416441003710000389110000100101004110041100411004110041
10024100407520220101767522911001432507561002522300254227332520010100101000010010100005210334688241002110040100408696387702001020100002020000100401004011100211091010100001000010124731616511504015331095615180247750465712501307464640316341007810000355310000100101004110041100411004110041
1002410040751000010347712294100149640776100252244019821632252001010010100001001010000520745468824100211004010040869638770200102010000202000010040100401110021109101010000100001012477919701502015741094315130248550450712500327740640416431003710000298410000100101004110041100411004110041
1002410040751101010161732312100141660952100252271022123828252001010010100001001010000521073468824100211004010040869638770200102010000202000010040100401110021109101010000100001012481915891532015771094315131249350455712507347670640416441003710000352310000100101004110041100411004110041
1002410040761001010173692260100144020924101272242025924940252001010010100001001010000521105468824100211004010040869638770200102010000202000010040100401110021109101010000100001012471816221566015481094115210248450465512491298490640316441003710000190610000100101004110041100411004110041
1002410040751111010245722297100168050880100252274121020934252001010010100001001010000520953468824100211004010040869638770200102010000202000010040100401110021109101010000100001012491816461548015721096015150248450461012518347050640416441003710000193110000100101004110041100411004110041
10024100407510000101046522911001512909121002522380226232362520010100101000010010100005210814688241002110040100408696387702001020100002020000100401004011100211091010100001000010124932316711530015771096415140247750452812484268300640416441003710000191110000100101004110041100411004110041
10024100407530300101405723021001512809121002522490220238322520010100101000010010100005210494688241002110040100408696387702001020100002020000100401004011100211091010100001000010125002418021532015721095314956247750462112501336030640416441003710000405810000100101004110041100411004110041
10024100407520200103627322911001496709641002522660266230252520010100101000010010100005210974688241002110040100408696387702001020100002020000100401004011100211091010100001000010124951816591551015871094015060250050452412492277730640416331003710000269110000100101004110041100411004110041
10024100407522220100265822851001480509361002522630237173382520010100101000010010100005210734688241002110040100408696387702001020100002020000100401004011100211091010100001000010124871816441521015851093115362248450463612505336910640416331003710000319410000100101004110041100411004110041

Test 3: throughput

Count: 8

Code:

  str q0, [x6], #0x10
  str q0, [x7], #0x10
  str q0, [x8], #0x10
  str q0, [x9], #0x10
  str q0, [x10], #0x10
  str q0, [x11], #0x10
  str q0, [x12], #0x10
  str q0, [x13], #0x10
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5021

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f202224293a3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606167696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)c2c3cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8021440102301100001019111822931017041220040109229373455750251601028010280000801008000040053518469360024017740206401703007933012616010020080000200160000402374016611802011009910010080000800001008249381101245211247180041155002476762469182541741247005110021622401498000280000801004016140199401184013740101
802044018030210000988515723001012569208402022306737553502516010280102800008010080000400535184496800240158401754019730123330083160100200800002001600004023640171118020110099100100800008000010082507161851245518247880089151802453316467182540721488205110021622401438000280000801004021340251401864020440151
8020440153300100001000817622191012641212440146231064367430251601028010280000801008000040053518425680024013240187401982999433024016032020080000200160000402414019011802011009910010080000800001008250181457245432494800921508024969864678825461141068005110021622402148000280000801004016840185401884017340184
80204401523002000010194722288101288914440178231447164640251601028010280000801008000040053518459760024018840160401923008233013516010020080000200160000402254019511802011009910010080000800001008250216194424725249880043154322479884482682541651438205110021622401168000280000801004016640187402154021440156
8020440189302100001023913322841012161310440118229153978949251601028010280000801008000040053518420400024019740172402183017633015416010020080000200160000401224022011802011009910010080000800001008248381884241352495800921501024921002465582541961221105110021622401618000280000801004023140159401084018340168
802044018930010000103237822911013848204401602264531568592516010280102800008010080000400535184506400240112401624016630020330149160325200800002001600004024541377111802011009910010080000800001008290016304423992872435806701555025008061204183076721212005289039632413638112980000801004123841490413094135241071
8020441612312101111011082100122721014081478041511220280581786226016183080558806008089281080406778189161200241341415494127731019443087416231620081212200162184415334158511180201100991001008000080000100831141018032437295245880733154202484476123058316211516110053530312134412278158480000801004195341741419054190842057
80204402353011110010149592309101296960040206223645868941251601028010280000801008000040053518449681024020040232402313009533016116010020080000200160000401874016511802011009910010080000800001008248891931245732500800941493024989864730825161071090005110021622401668000280000801004014340129401914029240126
802044022330111000103471172308101264112084016323067577302125160102801028000080100800004005351848040002401404022640194301073301791601002008000020016000040254402141180201100991001008000080000100824799189324615246480107153702488976473082539551020005110021622402638000280000801004024140266401934014340188
80204402073012220010206702342101264174364011822836343566625160102801028000080100800004005351847032002401654011340165301583301661601002008000020016000040204402041180201100991001008000080000100824999143924695246980070151012476740474082539832015105110021622401368000280000801004017240193402634020440116

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5019

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1e1f202224293a3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d5map dispatch bubble (d6)d9dbddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800344016330120029543432280101488528440246222537347755251600128001280000800108000040008718434560240094401474013330114330091160010208000020160000401354012111800211091010800008000010824761218882421224888003115040248474045928251030195002502007170067401588000280000800104012340125401254011440125
800244014130120009993452291101440521640127231680959721187160356803488018080234803244017671853464024038940514406713039223303721606702080240201607204067540966318002110910108000080000108267414173624269324708022614990247276468498274633180600505605330055402138000280000800104014940133401254014940209
80024401583002220997852233310142481084011922616045866525160012800128000080010800004000871847032024014940141401973009133011016001020800002016000040142401151180021109101080000800001082496815802477524768003815231250875644428251441139400502003170056401228000280000800104015840146401374013740129
8002440133301100010044302345101472525640164225276368588251600128001280000800108000040008718434320240107401704017830089330080160010208000020160000401464014011800211091010800008000010824858170724434248380029154212462103446308250634199700502005160053401148000280000800104015040191401744010640143
80024401753011010972936234810144861764009222715357805425160012800128000080010800004000871845424024010540152401243010533010916001020800002016000040158401261180021109101080000800001082486917712431424848003615390248075045978249743160300502005170065401368000280000800104017140167401494017540138
80024401023001111974757231710124084364013922687066344725160012800128000080010800004000871846048024006940189401483009233015016001020800002016000040171401701180021109101080000800001082468825492451424878004415571248897646148250142193400502005160035401788000280000800104014340165401324016640156
80024401773011110101794922961012721029640108230372555737251600128001280000800108000040008718444640240121401774010330099330112160010208000020160000401714015111800211091010800008000010824831021212430124788004815190249275645448250441188900502006170035401758000280000800104013540140402134017740144
8002440175300100099845223001014161130440130228154149567251600128001280000800108000040008718436720240144401344014630077330112160010208000020160000401304013311800211091010800008000010824748172724611024568004015452247688446698251125174901502006170035401468000280000800104012840108401424018440134
80024401773011011987339230010124812324401572343506559312516001280012800008001080000400087184374402400734013040194301123301151600102080000201600004011840197118002110910108000080000108248610139424601524878003715400248474845818251543165801502005170045401178000280000800104018840166401934017940257
80024401603001011986156232210140091564013122944323833525160012800128000080010800004000871843000024008440126402943009933011216001020800002016000040159401051180021109101080000800001082506915022468224738004215131248810844512825063198000502003160055400828000280000800104012240118401384010940142