Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STP (post-index, 32-bit)

Test 1: uops

Code:

  stp w0, w1, [x6], #8

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss data (0b)1e1f2022293a3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)aaabacafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10051040700095341019010252099425200010001000100010005074645824110401040824389820001000300010401241110011000100010000109001005000510056887331633103710001000100010411041104110411041
10041040800009000130102525332842520001000100010001000507624582411040104082438982000100030001040124111001100010001030010941810051302011103571087331633103710001000100010411041104110411041
1004104080000162800121210257582252000100010001000100050738458240104010408243898200010003000104012411100110001000100009300100513203110325767331733103710001000100010411041104110411041
1004104080000152610170102520268425200010001000100010005076245824010401040824389820001000300010401241110011000100010280868231008018122610255887331633103710001000100010411041104110411041
100410408000080001501025153631325200010001000100010005074645824110401040824389820001000300010401241110011000100010280109318100530201110375967331633103710001000100010411041104110411041
1004104080006110001201025235353252000100010001000100050754458241104010408243898200010003000104012411100110001000102207310221007122141610315897341633103710001000100010411041104110411041
100410409000692100112010251594442520001000100010001000507544582411040104082438982000100030001040124111001100010001024010582210081300810055967331633103710001000100010411041104110411041
10041040700009000140102519364132520001000100010001000507624582401040104082438982000100030001040124111001100010001000069001005000510055807331633103710001000100010411041104110411041
10041040700033100001112102527245725200010001000100010005076245824010401040824389820001000300010401241110011000100010000109001005000510055727331733103710001000100010411041104110411041
10041040700001027124124102505332520001000100010001000507624582401040104082438982000100030001040124111001100010001031010130101150148100551047331633103710001000100010411041104110411041

Test 2: Latency 3->3

Code:

  stp w0, w1, [x6], #8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0040

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)191e1f2022293a3c3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1020910040751010224488823177683012010025768117106122520100101001000010106100005221834688240496960100401004086816874320106200100082003002410040122111020110099100100001001000010010905711953370693102622601904368051093812107600111717016001003710000010000101001004110041100411004110041
102041004075110021639881316806941161002577514912334252010010100100001010010000522101468824049696010040100408674387472010020010000200300001004012211102011009910010000100100001001092201303357065810262257091632789109478117400000710117111003710000010000101001004110041100411004110041
102041004075000023078182316729001561002579496104232520100101001000010100100005221014688240496960100401004086753874720100200100002003000010040122111020110099100100001001000010010954814443670679102362590918408411090816120670000710117111003710000010000101001004110041100411004110041
1020410040751100181585868176075011610025807105107212520100101001000010100100005221634688240496960100401004086743874720100200100002003000010040122111020110099100100001001000010010929813833780694102562801906327691091617118970000710117111003710000010000101001004110041100411004110041
102041004076111021008585117448001681002579489114332520100101001000010100100005221654688240496960100401004086743874720100200100002003000010040122111020110099100100001001000010010886014063720673102622890886468501093210103100000710117111003710000110000101001004110041100411004110041
1020410040751000215790802176079015610025790109117262520100101001000010100100005221414688240496960100401004086743874720100200100002003000010040122111020110099100100001001000010010890013593700682102422860898468411091110122200000710117111003710000010000101001004110041100411004110041
10204100407511102250102805176087015610025779969923252010010100100001010010000522187468824049696010040100408674387472010020010000200300001004012211102011009910010000100100001001093001354376068610249267090642861109117106900000710117111003710000210000101001004110041100411004110041
1020410040751100205887841173673010410025797999628252010010100100001010010000522117468824049696010040100408674387472010020010000200300001004012211102011009910010000100100001001091201293387066310264263094038853108939114100000710117111003710000110000101001004110041100411004110041
10204100407510102049837891752670156100258021109022252010010100100001010010000522187468824049696010040100408674387472010020010000200300001004012211102011009910010000100100001001092001313370068610233281088636825109368115300000710117111003710000010000101001004110041100411004110041
102041004075111020709982717288001201002581412010640252010010100100001010010000522133468824049696010040100408674387472010020010000200300001004012211102011009910010000100100001001090801340382067910248259092436877109006119900000737117111003710000010000101001004110041100411004110041

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0040

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1e1f2022293a3c3e3f404446494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10029100407511102073968182512890276100257760147171362520010100101000010010100005210974688241496960100401004086963877020010201000020300001004012411100211091010000101000010109217145239206741022826129489487110919291320726403163310037100001010000100101004110041100411004110041
100241004075110021871038792488810260100257970173160522520010100101000010010100005210414688241496960100401004086963877020010201000020300001004012411100211091010000101000010109377130938506741026928319307086710976241280706403163310037100001010000100101004110041100411004110041
10024100407510002103848322544871188100257880141157332520010100101000010010100005210894688241496960100401004086963877020010201000020300001004012411100211091010000101000010109167144237506721024027218997885310860211264706403163310037100000010000100101004110041100411004110041
10024100407510002016818092584840252100258430193190432520010100101000010010100005210174688241496960100401004086963877020010201000020300001004012411100211091010000101000010108757142740906721026827308827691910945261308706403163310037100003010000100101004110041100411004110041
100241004075101021091018452560820312100257830171117442520010100101000010010100005210814688241496960100401004086963877020010201000020300001004012411100211091010000101000010109137148535707181024527908867482410955201242706403162210037100003010000100101004110041100411004110041
100241004075100122021018252568850292100257940147197522520010100101000010010100005210654688241496960100401009986963877020010201000020300001004012411100211091010000101000010109008134637207141025527109309085110908301323706403162210037100001010000100101004110041100411004110041
10024100407511112316898332528890244100257830161180442520010100101000010010100005210974688241496960100401004086963877020010201000020300001004012411100211091010000101000010109258136035006861024729109069687510922221330706403163310037100001010000100101004110041100411004110041
100241004075110020439782725048512361002582601681574525200101001010000100101000052100946882414969601004010040869638770200102010000203000010040124111002110910100001010000101089171405393069610252282090812287710892201195706403162210037100007010000100101004110041100411004110041
10024100407510102022848662600960252100258060184163472520010100101000010010100005210734688241496960100401004086963877020010201000020300001004012411100211091010000101000010109148138140206661023825609188285010916271365706403163210037100000010000100101004110041100411004110041
10024100407510002091938492552893240100258260209179382520010100101000010010100005211054688241496960100401004086963877020010201008920300001004012411100211091010000101000010109437146140507121023225418948684310910181273726403163310037100001010000100101004110041100411004110041

Test 3: throughput

Count: 8

Code:

  stp w0, w1, [x6], #8
  stp w0, w1, [x7], #8
  stp w0, w1, [x8], #8
  stp w0, w1, [x9], #8
  stp w0, w1, [x10], #8
  stp w0, w1, [x11], #8
  stp w0, w1, [x12], #8
  stp w0, w1, [x13], #8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5098

retire uop (01)cycle (02)03l2 tlb miss data (0b)191e1f2022293a3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)aaabacafbcl1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020940648305001680841768171211817240746768156417201732516075983737800008011480008403183187303602784937666408294079330676630750160122200800162002400484079275118020110099100800001008000010080874042605151490280507236880361572815055253603111511801600409298076380000801004072340736407274081140844
8020440727306001809842790168010911640776783154519902006716041582920800008011480008402619187217201765493763840741407833059463082816012320280016200240048408088111802011009910080000100800001008088903838470986180516257899423228813614914457111511801600407848335880000801004067240836407804076940764
8020440821306001782887794172812810040790778173617871382516523380672800048011680008406305189001607194938053407444081030801630704160123200800162002400484086875118020110099100800001008000010080847036664561489080524246883262997813975193701111511801600407218823580000801004082340801408114074840720
802044087330500204989772616801151444082477018021690140251607318064580000801148000540318018748420125649377074074040867309206307861601232008000020024000040779751180201100991008000010080000100809070414146111867805222458561081618813754794025000511011611407588252080000801004088840738407294081140803
80204407143052017918317891720115116407767791504171914525161658811728007380100800004025481874008023794937668407734064530703330796160100200800002002400004075482118020110099100800001008000010080862039484691083980514231834381619814464933589000511011611407818050980000801004077640763407594073440756
80204407173040017289387701712137128407767911930196290251607148090880000801008000040235918751360237493773340750407573072733081316010020080000200240000407357511802011009910080000100800001008085504036462288080524264888441431814505253940000511011611407288038380000801004077340748408234080540717
80204408253050018997937811680132124406567692060176113225160565808198000080100800004023111875448013904937762407354081030760330670160100200800002002400004078375118020110099100800001008000010080868042564742183180560235866521497814155353593000511001711408738657780000801004079340872407814075540834
802044072830600152476078516961161324072576617531758732516103082916800008010080000403392187436801452493755940782407183074133073716010020080000200240000407097511802011009910080000100800001008089404198430386780505245887481565814475074330000511011711409198034980000801004074240730407464071040875
802044077930500168985780315921151324078676218461597158661609648409980000801008000040315218715600282493777740842407103075733075516010020080000200240000408428111802011009910080000100800001008089404488461489580498243885841720813655633954000511011611408368057480000801004078240766408304076940733
802044078030700185177080416961141244076976116781812146251611658048580060801008000040986318728320358493765240796407973074033068416010020080000200240000408267511802011009910080000100800001008087504207443485180503248843521659814074944478000511011711408298038180000801004088640738407884078240778

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5101

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)1e1f2022293a3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)67696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfl1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002940742306000193883484516961129640816787175616411332516076580631800008001080000400785187871223149377044076440822307953308211600102080000202400004082075118002110910800001080000108096312427445268618051625908243616618148253039271425020031611407778043180000800104076640842407234075740835
800244089930610119328497891656122148407418231650172818925165377835058000080010800004017491875544738493775140748407913072433083916001020800002024000040902821180021109108000010800001080933045185098894805372730835461721814234703804005020011713407708043380000800104075640740407984081340861
80024408183070001878850816271211213240725784176816441582516049280674800008001080000401214187684017564937812407094083430798330906160010208000020240000408617511800211091080000108000010808810430451210860805612840894781638813785253540005020011611408668045480000800104076440856408454082340787
80024407743060001920886821169612384408207651518179815525169639806728000080010800004013501875352273493783540800407943073933074816001020800002024000040754751180021109108000010800001080925043605128881805532770905321675814885573555005020011611408538049880000800104081840828408184075940844
80024407823060001866870791169611096408927841627153412325160448864018000080010800004007381874488125493772140796407923078533084016001020800002024000040743751180021109108000010800001080873040404867846804792680881381552814505363918005020011811407198386380000800104087940765407754080240869
800244078830500020648417881672101100408278061859178814225160380807988010080010800004025471873048264493768740690408283072933083516001020800002024000040823821180021109108000010800001080877037434709835805742650895681651815075303479005020011611408678028980000800104080640778408414075640769
800244085530900017588278271704115924081174819311768157251604078038080032800108000040215318720882634937652407384070730750330741160010208012020240000409147511800211091080000108000010811950405752272899805752694890681660814495693900005020011611408328047280000800104073640730407724081140831
8002440752304001195373478616961181484078977918961638147251606298024580000800108000040195118724782307493772440798408013067833076116001020800002024000040809751180021109108000010800001080919041724406885805402610856361615814885064026005020021611408038328980000800104080240815407464077040836
800244082730500017978487841712137136407967951577169513525160634870398000580010800004021021874574300493770840777408053072733089116001020800002024000040892751180021109108000010800001080886037315057858804932780846361507813555293946005020011711407928014980000800104077240807408804083340851
8002440848305000188487277817281351204076879916511685132251679598614980000800108000041690618775367574937669408174081030739330700160010208000020240000408067511800211091080000108000010808710367948648788050327009371261721814025203527005020031711408428059480000800104082040771408934075040808