Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDR (pre-index, S)

Test 1: uops

Code:

  ldr s0, [x6, #8]!
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e2022292b3a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10051052700001193010040102516311152325200010001000100010005073045824110151040104082439132000100010001040104011100110001000102602751026141120103010385192379000732162210371000332101000100010411041104110411041
10041040800010072361005010256579124520001000100010001000492104582401015104010408243898200010001000104010401110011000100010250251102851260221310234031563000732162210381000213301000100010411041104110411041
100410408010001572610020102565141419252000100010001000100050754458241101510401040824389820001000100010401040111001100010001043724810409112003310283041739740732162210371000242501000100010411041104110411041
100410407111100716100220102593129182520001000100010001000507544582411015104010408243898200010001000104010401110011000100010291216810477110002910203042571000732162210371000212801000100010411041104110411041
100410408000000526100301025037111925200010001000100010005074645824110151040104082438982000100010001055104011100110001000100000551035910002210193321847000732162210371000242201000100010541056104110411041
1004104071001005801003010258411522252000100010001000100050738458251101510401040824391020001000100010401040111001100010001017003110333113062410152941740000732162210371000211901000100010411041104110411041
1004104070000005201002010402048521252000100010001000100050738458240101510401040824389820001000100010401040111001100010001026190551037911002210233432539710732162210381000242901000100010411041104110411041
100410408110100582610040102510556192520001000100010001000507144582411021104010558243910200010001000104010401110011000100010227248102921180122110303652947703732162210371000242201000100010411041104110411041
1004104081011005521100301025011072325200010001000100010005074645825110151040104082439132000100010001040104011100110001000100680471034001002710202832347610732162210371000242201000100010411041104110411041
100410408111101731510030102511291013252000100010001000100050706458240101510401040824389820001000100010401040111001100010001030838010444117082910404142655600732162210371000292701000100010411041104110411041

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr s0, [x6, #8]!
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.1888

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)09l2 tlb miss data (0b)0e0f18191e1f20222324293a3e3f4043494d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9e9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
502091218839120000000060308421007842108121689792211213652570469503501011710000401001000010000107684146090724642614012163312168812163211496531153756010030200100001000060200100001000012182412169211502011009910040100100001000000100109040132553106812171391172291091513721290350321377698121362502608629087761000050100121640122121121840121763121728
502041217839120000000159908531007361100121692820211214622570457503441012310000401001000010046107793945987044652878012150412175712174811489731153256010030200100001000060200100001000012184612176911502011009910040100100001000000100109161131527107242141493872301091513031281150321377668121490502769269407631000050100121711121580122110121744121678
502041217829101100000057008231007362140121699776211215162570514503761013510000401001000010000107667046027534654528012162612153012173711500531153896010030200100001000060200100001000012139812169711502011009910040100100001000000100109191138534107072081290072301091412521131190321387698121481502408588907751000050100121742121796121793121944121769
5020412184291101000000606082610070421121216257942112183025704635032610139100004010010000100001077012460238846521800121683121719121790114929311542560100302001000010000602001000010000121860121675115020110099100401001000010000001001091011425121068721410929503610929125212111250321367668121252502649508608421000050100121687121703121828121841121659
502041218019110000000057208221007363108121623807111215932570472503641013310000401001000010000107644546047184644276012180712154412168211511431151776010030200100001000060200100001000012172512164911502011009910040100100001000000100109100145573107232121189972401092212111310340321587688121362502649209407941000050100121836121532121758121908121779
5020412177991100000010581081710072011441215087991112143625704725036210114100004010010160100001075482459983646493010121864121754121747114840311526960100302001000010000602001000010000121741121641115020110099100401001000010000101001093301375171069621715927343210947134112903110321387687121334502569549008391000050100121630121743121870121647121887
5020412197491100000000563082310069611281217397992112150725705025036810132100004010010000100001078452460371646513090121554121742121684115216214116840660993354011097111606679811098111551241311241312815020110099100401001000010000001001092901335401069421412955307935910889122112203110321387678121498505049609328081000050100123881124028124300124361124436
5020412415393120000028293459264079610076831361219747921112161039070872506061017710062445181122711174114714446633954713884012160712176812176811489631154266010030325100001000060200100001000012159212174521502011009910040100100001000000100109493131566107532151192232331092313021241670321367679121457502401093116410021000050100121994122097121997121900121941
502041219309140110000047108240017042160121940801111214352570526503581013210000401001000010000107901046021484657458112195712185112185711501331157206010030200100001000060200100001000012195112200111502011009910040100100001000000100109251137521106882191192234421096313231181911032137768812164850260109510569361000050100121888121914121958121994121926
50204121828913010000004130832001776314812199982321121572257048450374101361000040100100001000010788484609828465660711217581220271220231152413115729601003020010000100006020010000100001219901218682150201100991004010010000100001010010908112453110725211149523830109471193131115032135767912166450278101310629381000050100121852121993122064121867121754

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.1921

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0f18191e1f2022293a3e3f4043494d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)cfd2d5map dispatch bubble (d6)dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
50029122068914000000421084517520140121906785101214842570421502721013210000400101000010000107990546150424658281012183612192212186411515231156536001030020100001000060020100001000012190512179011500211091040010100001000001010894110450810782204129264811109451352114125031400482002412156950260115310669811000050010121789121956121751121937121925
5002412199991310000039708291728012812199980920121647357043050274101261000040010100001000010786634606121466261701218821218361219151151983115617600103002010000100006002010000100001219231219111150021109104001010000100000101091301145561076621614920442610902128213801510031400482004212166050266104911739761000050010122095122123121789121974121936
500241218239120000004240809173601441220807912012168425703795028210135100004001010000100001078240461032746563600121783121866122084115013311562960010300201000010000600201000010000121963121780115002110910400101000010000010108730104529107531942095948151094212511370390314002820042121568502561125113110661000050010121935122110121826122113121940
5002412208491300000041808291704096121869816201217072570394502901012910000400101000010000108032846104914661611012183012170812199411520131156316001030020100001000060020100001000012191212190711500211091040010100001000001010919110553510759213109313817109691332137007031400282005312162250272110612129261000050010122020121897121871121965122346
500241218059140000004150819172001281219647891112180125704035030610126100004001010000100001078681461111446663290121751121946121648115164311533860010300201000010000600201000010000121954121942115002110910400101000010000010108921109523107592121294042211092412521281310031400682005312167250304117011999151000050010121769121958122132121882122115
50024121772914000000400080526400124121803828111215222570412502861013410000400101000010000107927546092694661706012199812188212201511518031155666001030020100001000060020100001000012199912192811500211091040010100001000001010904011353210745203108733025109371351144008031400282014612173950272106011199651000050010122013122016121816121951121940
50024121820913000000433085217520112122036807201215022570388502701013910000400101000010000107907746054374662516012199012207012188111529431155546001030020100001000060020100001000012182412185411500211091040010100001000001010928010553710741216109437216109271312132008031400382005212189850268115711169231000050010122191122000121985121867122055
50024121838912000000453083717200104122062844201215542570451502621013010000400101000010000107858246103574665641012186112179012204511511631156276001030020100001000060020100001000012187412221811500211091040010100001000011010973111454010773218238854825109041202130037031400482003512173150266117098910401000050010121909122055121902121839122032
5002412194991400000041008481760084122012811101216712570415503061013210000400101000010000107898746078414660443012190512198212191111525331156246001030020100001000060020100001000012196812202211500211091040010100001000001010915012453510798208118863219108961422119031103140048200631216215024210909829151000050010121763121997121904121916122387
50024122024913000000447080817121120122056811101215522570418502681012710000400101000010000108058046091964651543012168912195812189611515231156036001030020100001000060020100001000012192812188411500211091040010100001000001010893011153310759204138967425109281272132035031400282002412163250244102110828931000050010121936121976121971122018122160

Test 3: throughput

Count: 8

Code:

  ldr s0, [x6, #8]!
  ldr s0, [x7, #8]!
  ldr s0, [x8, #8]!
  ldr s0, [x9, #8]!
  ldr s0, [x10, #8]!
  ldr s0, [x11, #8]!
  ldr s0, [x12, #8]!
  ldr s0, [x13, #8]!
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3676

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e18191e1f2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)67696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
802092968622020000007043078817041112042937677353019681838219525160147801518000080100800004007421298926482950029279293321933431950516010020080000200800002921529494118020110099331001008000080000010080885193875537085206643188695650078601369311054985450192651101171129423228004351552411680000801002934029293291832920229407
802042936521910000007128076716961101882942182443921302125227625160142801498000080100800004007491300728422928529589294211940131939416010020080000200800002940929544118020110099251001008000080000010080889194116067085169652128703852298601479411450244519190551101161129594288004760452810480000801002940929181293402942129315
8020429341220101000069150777162410810829475770462175115912225251604758015580000801008000040076412881683729720294012952419348319444160100200800002008000029411294931180201100993010010080000800000100809271937658940856416351583410248368573776613353994988190351101161129419278005051050911680000801002947229239293432938129256
8020429448221101100068950791155210510029249798495171217592002251601458015380000801008000040079012920214829523292302929819307319403160100200800002008000029603293381180201100991810010080000800000100809101937156610850676411485320450268553973012052505460190951101161129563228004855054111580000801002946229288294782932829579
802042939421810000007176076417281143082936078944717811742216125160150801428000080100800004007401302765372933429154295611911531943316010020080000200800002940429305118020110099241001008000080000010080879203775162085639646148543856328629581512351764829194551101171129473318005348158011780000801002957629262293432919729544
802042940722010000117074077016001051122939680245717861867206825160143801578000080100800004008181295049582952529412294421927131950216010020080000200800002963729338118020110099281001008000080000010080902373965654085276655138515257028594277313952085603370451101161129147198005455052311080000801002948629600296362943529177
80204296432202222100690407501752104962944482351319841975209025160156801528000080100800004007651304121452951729332293441933531922416010020080000200800002939129375118020110099351001008000080000010080900373465745085534620138515452838597269311951904420372551101171129447358005551954512780000801002919729490294672932829331
802042956322120000006834078717281051162942077559117831734195825160161801528000080100800004008341303531462947229243294211951531931216010020080000200800002943229189118020110099281001008000080000010080876373865636085303680158633654758565772612257245047373051101171129444348004651758411280000801002941829371295362951729410
8020429304219202000071230869169610022829450803426175819002076251601548015280000801008000040076312924294729579293182943519146319272160100200800002008000029377295731180201100993210010080000800000100809283834854890852336391388113449878592373012651265326353951101161129320298005057360412380000801002944229223294592946829338
802042948622121000006936077916001001162942775845616601840201725160140801428000080100800004007641296804552949129531294191932331925416010020080000200800002935229352118020110099261001008000080000010080877363535642085335661158804050858596284612546654733360351101171129476298005460849912080000801002948029430292522938829437

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3674

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)0e181e1f2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606167696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5b6bbl1d cache miss ld nonspec (bf)c2c3cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800292987822000007145084417121131362953382435518481759217425160053800628000080010800004002401297695104629450294692949819530319596160010208000020800002954029350118002110950101080000800001108096340359538524168411968624998861117251354893524003502000416242959223800495815768980000800102948529461293782947229372
80024293682210000677208341680120116296288063571835172421992516005080053800008001080000400264130449810542965729405294511920031947416001020800002080000292782936711800211095210108000080000010809573766060851036639943725028863007841364720552900502000216442948638800365845769880000800102926629353293282911329387
8002429637221000070360830172813596294597873281810173721642516005480061800008001080000400275129943215502948429423295441949131934516001020800002080000294832957411800211095710108000080000010808833855555855156729935705289863227771304929587102502050416242947526800436315479380000800102944429326294122951329338
80024295972210000687908381728971482921282734918891986232025160060800498000080010800004002891286429155529464295382945919250319567160010208000020800002958129512118002110958101080000800000108090636953128506268611947444575858868351254778538532502050416442986631800466375769480000800102937729586293342949529674
80024294712210000655788831181612012829544840360192215772283251600548004480000800108017840033513029720542293942937829592194243191041600102080384208000029337296311180021109571010800008000001080955386564685071692109417253968574580612646875981005020012162429519438005056954710280000800102952629679293142952429635
8002429307220000267500829168811112429486801347182718061946251600508004880000800108000040027212983951042292412921229465194573194411600102080000208000029404293151180021109471010800008000001080924428528785309684149156054938625474113144625681315020044167329523448004365160710480000800102953229535297012958129338
80024296262210000748508541720115128295208313421732162619912516006980057800008001080000400286129692911042292792928429508193933193021600102080000208000029447294251180021109341010800008000001080943407571585431713119276251648611384013547975556065020104216442939324800406245038880000800102947629397293982937829399
800242935822100006577083117201211402938181031218301779209725160049800518000080010800004002901299505105129372293552917619282319485160010208000020800002930329307118002110960101080000800000108092740456408566068113945445073863457471275158531703502000216242912022800435545489480000800102936129322294932935729246
80024294572190000694808591776128116294067883031976182921482516005980052800008001080000400246128573911040296222964729155194283194061600102080000208000029589293841180021109581010800008000001080954385531885207719199284251198591485014349185541005043057165629603348004154257611180000800102947529385293712950329443
80024293072200010630708611720118140294798373371635186821676016006080061802608019480000400285130199600352936029334295001950531946116001020800002080192293322947711800211094410108000080000010809293735625847816729937445136860448191284710540200502000416422921031800355935499780000800102940829527294642938429465