Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDR (post-index, Q)

Test 1: uops

Code:

  ldr q0, [x6], #8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f1e202223293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafldst x64 uop (b1)b5b6bbl1d cache miss ld nonspec (bf)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1005105981000970000312102562561437252000100010001000100050746458231101510411040824389720001000100010401040111001100010000103628510548135083410551256783885007321622103701000473501000100010411041104110411041
1004104070000962410050102543441025252000100010001000100050738458241101510401040824389820001000100010401040111001100010000102408010476025005710351253993278007321622103701000352901000100010411041104110411041
1004104080000880100524102542566242520001000100010001000507384582301015104010408243898200010001000104010401110011000100001008096106210019084110491255894387007321622103701000162901000100010411041104110411041
1004104070000751200030102516512192625200010001000100010005072245816010151040104082438982000100010001040104011100110001000010216103104719029062510461254454752007321622103701000293901000100010411041104110411041
10041040800009125100301025323411302520001000100010001000507224582301015104010408243898200010001000104010401110011000100001023275105491230103510831254183181007321622103701000402101000100010411041104110411041
1004104070000872710042410252245523252000100010001000100050746458241101510401040824389820001000100010401040111001100010000101905810366019083710421254773069007321622103701000443401000100010411041104110411041
100410408000092000020102523461212520001000100010001000507544582301015104010408243897200010001000104010401110011000100001028357105361230143610541254353159007321622103711000433001000100010411041104110411041
100410407000093000040102537267262520001000100010001000507144582301015104010408243897200010001000104010401110011000100001031076105670360164110531254262452007321622103701000373001000100010411041104110411041
1004104070000770000401025246862125200010001000100010005067445815110151040104082438982000100010001040104011100110001000010000701027000003210321254783769007321622103711000483901000100010411041104110411041
100410408000076191003410251861171925200010001000100010005073045824010151040104082438982000100010001040104011100110001000010200991038000004310841255783169007321722103701000262301000100010411041104110411041

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr q0, [x6], #8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 10.0801

retire uop (01)cycle (02)03090e0f181e1f2022243a3f404346494d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafldst x64 uop (b1)ldst xpg uop (b2)b5bbbel1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c3cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
5020913434999900102140710542130656105808129565257189151304106001000040100100001000011573614935505501227901299550129557130069123699312460860100302001000010000602001000010000129330129617115020110099100401001000010000010010015054810071136691007412504640684528000321017611128999511205450566052441000050100129653129656129687129470129380
5020412912598600001950710549130462759016129584257192451304105951000040100100001000011594944951753498137701295800130136130960123966312344260100302001000010000602001000010000129859129603115020110099100401001000010000010010015054210074337831007812504494674502000321017611129391512125887614554871000050100129561129745129828130171130005
5020412990698900002060710574129779758012130099257190651322106191000040100100001000011535224899631493624611291890129243129755122992312334060100302001000010000602001000010000129834129457115020110099100401001000010000010010014054510073296701007412504530684531000321017611130095511825974598152831000050100129809129281129649130273130144
50204129547987000019809105081298141155015129502257189751471107011005644235111141113512188144984465504313411316710130919132074124294227125689665143376211177112376740411218111961326321323453115020110099100401001000010000010010047257710109169609710074125045366545660003874129413131423514955932626956621000050100132760131956133297132669132551
50204132993100600013394413271056812955512600161294604772014513881061610000401001000010000115802748971634999258013029501296731233631237413123571601003020010000100006048610000100001293201289802150201100991004010010000100000100100170524100231773001002412504535154531000321017611128964511046043596152141000050100129300129229129057129085128787
50204130976972000016707015531299361010091290662571717513501063310004401001008210000115127248824654944157112907001296941294921229073122817601003020010000100006045010080100821295561292521150201100991004010010000100000100100140522100222072951002212504534164486000321017611128994509925222554251131000050100128876128792128722128661128643
50204130552967000017717690154712951578061292182571795512281058710000401001000010000114815448784984939162012877401294821294021224823123091601003020010000100006020010000100001291851290491150201100991004010010000100000100100170573100301691031002712504518184512000321017611128908510405657552649051000050100128761129132128773128678128840
5020413102897101101510701521129445119161284532571813511581052010000401001000010000114245348875424932721112982201296531291571232843122862601003020010000100006020010000100001290601287291150201100991004010010000100000100100160516100261272841002712504517174521000321017611128763509865547575050471000050100129199128541129018130055129108
5020413166997001001650701530129297108051288202571804512541054310000401001000010000114192948945534951974112998901299661293991227103122403601003020010000100006020010000100001290561284451150201100991004010010000100000100100170542100291091881002612504473164536100321017611129252510386102564851141000050100129061129923129190129098129132
502041309539680110175012015061292676706128818257161851180106021000040100100001000011506784901320495064511288480130043129861122460312355560100302001000010000602001000010000129110128612115020110099100401001000010000010010015055110029169471002612504511164570000321017611129320510445689587556441000050100128561128280128958128894128928

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.9254

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2022243a3f4043494d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafldst x64 uop (b1)ldst xpg uop (b2)b5bbbel1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
50029133181974100000001450710522128963744128848257157351080105431000040010100001000011545644911809496701111298380128897129175122519312280560010300201000010000600201000010000128571128983115002110910400101000010000101001705501002118101810021125046341345250000314048233128675511326319596256531000050010129318129236129566129592129029
500241298339710000000014501010492129371101412884425716815109810561100004001010000100001146774488264149507621129476012936112956912235631233386001030020100001000060020100001004913018512940711500211091040010100001000010100150563100181771510021125045121245560000314038233129190510705884599153401000050010129246129556129430129065129509
50024129133962000000001390910505129221104213025925715915113010508100004001010000100001146064491124349539361129342012941012892712232531231116001030020100001000060020100001000012932712897911500211091040010100001000010100170537100191571810019125045191345130000314038223128654510645698640957371000050010130818129944129008129104130079
5002412806896810010000217091051412933171312989725717745105810568100004001010000100001151926493154049683160128889012913412966712267031224356001030020100001000060020100001000012915512968311500211091040010100001000010100212567100201291910018125045321245040000314028233128620510166096598053351000050010128844129463129063128962131068
5002412892496700000000157071054112826174212908425716485106210516100004001010000100001150999491139549839880129626012927112897812239531222586001030020100001000060020100001000012891612889011500211091040010100001000010100140526100211561710018125045331145730000314038224129307511385882563561721000050010128469128632128887128538129389
50024129148966000000001810610525129191104312874325715615105810539100004001010000100001146405488060049417351129641012893412922912218331228536001030020100001000060020100001000012903912912411500211091040010100001000010100170576100211572310020125045291245701100314038234128618510965846636058561000050010129165129257131115129448128716
50024128358963000000001630111051912934293412889425714835100610499100004001010000100001143804486882149513501129694012885312894412231331225576001030020100001000060020100001000012853512932211500211091040010100001000010100170571100181692510018125045211045480000314038233128933511545601612955221000050010128332128805129077129105129763
50024128728972000000001710710513129171124212903725716455110610542100004001010000100001144434490339049386211129466012874412944312225031230596001030020100001000060020100001000012963012927911500211091040010100001000010100201563100201782110021125045761145600000314038233129653511165721583151811000050010128687129092129192129650128979
500241296909651110000016101110557128945104412903025716065109410533100004001010000100001158635498754549611171129420012934313015712248631225436001030020100001000060020100001000012960712933011500211091040010100001000010100191514100261672210020125045171145261100314038232128643510885806577552881000050010129627129140129343128841129850
50024128625961111000001410710482129075743128460257157951066105421000040010100001000011426254877998493344811293980129497129124122932312295560010300201000010000600201000010000129321129111115002110910400101000010000101001705161001923111810021125045631145090014103140312532128744510805939629853531000050010129625130422129216129679129203

Test 3: throughput

Count: 8

Code:

  ldr q0, [x6], #8
  ldr q0, [x7], #8
  ldr q0, [x8], #8
  ldr q0, [x9], #8
  ldr q0, [x10], #8
  ldr q0, [x11], #8
  ldr q0, [x12], #8
  ldr q0, [x13], #8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3790

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e18191e1f2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafldst x64 uop (b1)ldst xpg uop (b2)b5b6bbbel1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
802093083122810010007637085216244163083036180257113801681295825160132801478000080100800004007411341381139302703043330294201253203921601002008000020080000303553021311802011009947100100800008000001008100336385556660686315786158726854298593610000326421325556325656160451101171130426179800444064097480000801003018530022300993034830203
80204302102251000000754207741568448408302717765411477126329232516014580130800008010080000400771134906514330046303593048620249320236160100200800002008000030361302601180201100995510010080000800000100810023540856436318581578898927659738700310000326601256142324705340351101171130409153800413243697580000801002995630309304653035330273
80204305072262202000785408041552419100302897995431526144629932516014180131800008010080000400759132794714630101304533041420147320173160100200800002008000030361301661180201100993810010080000800000100809853542555286148621976088988058808682210000326061135852324994330351101161130112161800383604028580000801003041230308303403052230196
80204302442241001000714108121568392160301308215391225134928162516015080137800008010080000400808132778913830255302323026620171320306160100200800002008000030361304861180201100994610010080000800000100810404141355806048592670379486458748670310000326501255734324884372351101171130429182800463434018580000801003049530416300783035630150
802043031922711010007247078414484342003002075954813931431299425160140801428000080100800004007321330854149303273050330490203233200711601002008000020080000302073030511802011009954100100800008000001008098736396501962186171738118713657658657510000326321325734325132371551101161130523164800373804009680000801003025430402305463003730698
80204302522262020000750708631608426236301517856481534182730492516014680140800008010080000400745132797014330284301343037620469320214160100200800002008000030421303371180201100993410010080000800000100811803643153426718600673898705457628622310000327121125722324827150251101171130245152800553724287480000801003017430179303183024130378
802043034022733000007752084616324362003040977854115921540295925160156801438000080100800004007131344572138303253047030217203933202201601002008000020080000300923025611802011009937100100800008000001008094817423552960585710759118723458098629810000326181205704325082181351101171130228180800363913127380000801003039630075305803016630353
802043042722711000007089082315684382403040776960716081375282625160135801308000080100800004008251326148140302963012330326203213201481601002008000020080000303093046311802011009944100100800008000001008100936420534260586313756108884061358641110000325551275836325467353651101171130212181800624443947980000801003008829923301783037330550
80204302942261000000689607851560463104301668076051699146630282516014580136800008010080000400767134126514030320302713026520273320607160100200800002008000030387302731180201100995010010080000800000100809583541750386278563174279004263988632810000325971275741324952310151101171130392189800442853677980000801003014830328301173022630198
802043010822720000207855083616404372123068377060714421495294425160135801428000080100800004007451351006043304363021430289202483203841601002008000020080000300873019911802011009943100100800008000001008096517405507358385689730138663052938646510000326311266031325327170951101171130480165800453244318280000801003043830258301223033330248

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3765

retire uop (01)cycle (02)03l2 tlb miss data (0b)0f191e2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafldst x64 uop (b1)ldst xpg uop (b2)b5b6bbbel1d cache miss ld nonspec (bf)c2c3cfd0d5map dispatch bubble (d6)daddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002931009228000702379915924189630077827474181317132820251600518005480000800108000040024813288740443016330276301602038132041916001020800002080000300543031511800211093010108000080000010809334604899643857708561493024527786290100003259812346853249120950201531603230032169800422952925280000800103024830230301263011930359
800242999422600077437911624461168299617665111656154129232516005280054800008001080000400270133748714129928299762994120072320207160010208000020800003023430185118002110931010800008000001080969472519762185728833158514653748624310000326051315020325355355020031603330087149800433042586480000800102995030399303723015830135
800242998222600071728421760470212301347845141722149029002516004880054800008001080000400259134388104629839300323020220061320191160010208000020800003012529833118002110971010800008000001080934442506664485212807118894255328611310000325611174729325492365020031613430260168800443263036980000800102996529986302113013530156
8002430334225000772285012804552683027683145616741762282425160039800518000080010800004002991326969041301783025730093201483204191600102080000208000030274302131180021109161010800008000001080968468506562985063902149325055528629010000325621285571324691035020031603330132152800623363287680000800103014930268300333021630226
80024300322250007788843175235936829816758442186918133002251600498005080000800108000040025213426800402989429838300872033232041316037220800002080000302323039011800211093410108000080000010809884825587653849278921593513257008640510000325711214854324918045031331603329898155800392962737080000800103007430345302333011630101
8002429971226000677882317524552443012481551318171806261025160054800558000080010800004002251324737038303233054530162200713202071600102080000208000030228301711180021109171010800008000001081012460484167386402805119574057958700810000326961284820324983375020031603330292180800322873335180000800103027530113303473032430300
8002429872224000666877016243945483020377947718522013301725160055800568000080010800004003691345279034303453033230242202823200541600102080000208000030035301361180021109201010800008000001080948436533962585172847118785059158624410000326131155064324232035020031603330172146800332832725780000800102996830075302413026130421
800243009422500066657931736502128302748205231673176226562516005180048800008001080000400253132329804530214302473021720415320314160010208000020800003000330217118002110915101080000800000108093445253626158521582798987058088617610000326421225280324686655020031603330112160800453113376180000800103039730385302993027430162
800243022622600080048201416371208297287995371563175330392516004880048800008001080000400272132681904430167301503005520221319894160010208000020800003034430296118002110921010800008000001080984487548063085872851139248860138644710000325781205439324582605020031603330050169800433533315880000800103012930022301743003730276
800243014222500073308261752399200302377746022016187929852516004980066800008001080000400258133764603630161301313004520115320167160010208000020800003022830206118002110971010800008000001080990438541662385481832138864454348662510000325621275461325221025020331603330075175800443073055780000800103020730104303163008930105