Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDR (pre-index, Q)

Test 1: uops

Code:

  ldr q0, [x6, #8]!
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e202223292b3a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafldst x64 uop (b1)b5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10051040811100093281003412102913457252520001000100010001000507544582311015104010408243898200010001000104010401110011000100011034627410511055123810691257311579270073116111037100025221000100010411041104110411041
1004104081110009919000050102512216232520001000100010001000507704582411015104010408243898200010001000104010401110011000100001008805010524130623103312556103054727073116111037100025241000100010411041104110411041
10041040811000187191000401025712226252000100010001000100050754458241101510401040824389820001000100010401040111001100010000103070861049603363310571253511286961073116111037100037221000100010411041104110411041
1004104081110007224000050102515292302520001000100010001000507304582411015104010408243898200010001000104010401110011000100001037707810731412403310441258312296060073116111037100023161000100010411041104110411041
100410408111000641410001010251112429252000100010001000100050746458251101510401040824389820001000100010401040111001100010000102282911058401722331034125726348560373116111037100031251000100010411041104110411041
100410407111000800000044010253222220252000100010001000100050754458241101510401040824389820001000100010401040111001100010000102382471054112462510361256593869713073116111037100039471000100010411041104110411041
1004104071110006216000040102520037292520001000100010001000507624582411015104010408243898200010001000104010401110011000100001043909910572134105010561255583950704673116111037100037461000100010411041104110411041
100410407111000972810001010258523212520001000100010001000507384582411015104010408243898200010001000104010401110011000100001018140115106361321841106312588752108711173116111036100025441000100010411045104110411041
100410408110010531810002010250222232520001000100010001000507464582411015104010408243898200010001000104010401110011000100001029808310611311012411046125598345970073116111037100049381000100010411041104110411041
1004104071000008122200040102510303212520001000100010001000507544582401015104010408243898200010001000104010401110011000100001049811051046611305010691257483742624473116111037100025161000100010411041104110411041

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr q0, [x6, #8]!
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.8889

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2022233a3f404346494d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafldst x64 uop (b1)ldst xpg uop (b2)b5bbbel1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
50209133470987410004000017801110583129285712091284992571711511341052510000401001000010000114764448874304939164012878101305361293711224593124598601003020010000100006020010000100001294721292281150201100991004010010000100000100100198571100371511331002912504635214522480321157654129268513446284616151581000050100129514129626129795129419129679
5020412938996821200001001430910542129996101006128693257190051250105421000040100100001000011479504901354494760601293120129471129222122994312345960100302001000010000602001000010000129749129211115020110099100401001000010000010010023357710031916221002712504526244533400321147644128960511245881578950621000050100128995129543129046128696132023
50204131678990280070002728394625487105301316947110101306097047217751439105861000040244100001000011464294906064493434211300440130628130977123203233125527641473392611090109486758211219110671322091316332415020110099100401001000010000010010062254210073129929501006912544511224527300440241241414130858510986233594351721000050100129638129245128849130794131161
502041306191020312200302003430101051712909261206129417257174751212105511000040100100001000011378524846617491132701290820129728128841121734312263960100302001000010000602001000010000128458129478115020110099100401001000010000010010018456810032811231002412504549214511200321149144128848510905485525453651000050100129223129563129426128932129279
502041290019663003031000315070151612812091206128006257170551094104841000040100100001000011426464868387492840901287530128379128695121779312280860100302001000010000602001000010000128732128725115020110099100401001000010000110010017453710029117341002912504547234507330321147644128160511345582558448051000050100129151128817128745129392129334
5020412875496430030000003200701536129251711091280892571840511721055410000401001000010000113920548693994951167012864001287861293001219593122721601003020010000100006020010000100001291041279941150201100991004010010000100000100100173553100341412181002412504517204498330321147644127983510265012474143331000050100128270128402128545128463128546
50204128773965310010000031809016651297727806129098257185551266105201000040100100001000011371444882238491822401289170128139129036122387312257760100302001000010000602001000010000128913128437115020110099100401001000010000010010024257810028127231002612504553184518200321147644128611510865248510045471000050100128186128448128766128423128336
5020412825596230030300003490100150612888971317128718257170551186104981000040100100001000011384584859603491176001284540129086128704122420312250060100302001000010000602001000010000129448128962115020110099100401001000010000010010017251510030159241002712504628244548200321147644128507511785676518049651000050100128988128828129728128716129258
502041295479632000020000339090155112931878051282292571693511401055910000401001000010000114392148936554932131112896801294431289341215443121729601003020010000100006020010000100001291641292961150201100991004010010000100000100100256591100321514291002512504487234505300321147654129318511405381584350731000050100127801128148128979128546130791
502041291029693030000000346090151112889971315128161257162151106105111000240100100001000011405014857401493266201286160128527128562122392312280060100302001000010000602001000010000128507128566115020210099100401001000010000010010017951910028117251002912504503244535330321147644128145510985195595847891000050100128599128738128664129160128183

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.9504

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e1f2022242b3a3f404346494d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafldst x64 uop (b1)ldst xpg uop (b2)b5bbbel1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)d9ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
5002913348497620001037707100530129885611107312862525721045133210601100004001010000100001144894489612149473760129443012923312897512328331228386001030020100001000060020100001000012940412906111500211091040010100001000001010017461010130147120101261250463912744852000003140482111128362512605031502846011000050010129236129623129213128999129158
50024129163965202000392071005021293141111506012917625719575124410688100004001010000100001145374488234549522570129252012990012987612257631235366001030020100001000060020100001000013022412983011500211091040010100001000001010016664910131319143101311250451211945332000003140185011129469512985480550751511000050010129472129372129621129316129638
5002412923597020020043408100607129685712706313044725719365144210651100004001010000100001149694491347649681720130063012917712893512237431234046001030020100001000060020100001000012939313063811500211091040010100001000001010019257610132209136101281250452012445002000003140197011129564512765295500646591000050010129487130248131072129787129307
50024129545967202201395071005331302389118178130466257195151296106431000040010100001000011473874923710499004101302160130226129910122368312282960010300201000010000600201000010000130949130925115002110910400101000010000010100192676101301511127101261250451311945532400003140182012129177513665618589449111000050010129259129790129708129786129784
50024129744969200000406010100577129511610806912920025720205126410616100004001010000100001148629491290049667600129650012963012941712266231228266001030020100001000060020100001000012960012947411500211091040010100001000001010021259010143169126101251250450011746572001003140182011129317512025062490245031000050010129273129416129214128900129097
5002412943696722000038307300541129930711707712996625718615125410613100004001010000100001145314491883749543990129880013039412976412289831223976001030020100001000060020100001000012919912896611500211091040010100001000001010019258510127168127101301250452612945012000003140182011130171512585364545649251000050010129850129105129013129362130129
5002412950497422200040107100550130170911306712933325719065143410655100004001010000100001146430493350549670570129329012920812965512249331229476001030020100001000060020100001000012987812970911500211091040010100001000001010024262810130139122101251250448712245242200003140182122129477512464985471349991000050010129672130325130504130225130085
50024129235971220200399071005271300289112077129390257202351294106141000040010100001000011529554907717495534101295080129694129963122860312327460010300201004810000600201000010000129366129252115002110910400101000010000010100214620101351212127101281250452811945682480003140182033129467512745891546549071000050010129368129712129638130451130591
50024129603971200200398010300511129846811506912868325718885128810608100004001010000100001152292491308349600790129880012937512966412335531234576001030020100001000060020100001000012951912923511500211091040010100001000001010019255510130149121101351250451511944522100003140182011129786513445149557148011000050010129220129782129783130140129438
5002412917996920200038809100504129496711707412965825721225140410691100004001010000100001144522491511849488470130364012947513010012327631227456001030020100001000060020100001000012952612967011500211091040010100001000001010031659310151128123101341250451512145442000003140182011129957512584988516349931000050010130221129058130235129736129468

Test 3: throughput

Count: 8

Code:

  ldr q0, [x6, #8]!
  ldr q0, [x7, #8]!
  ldr q0, [x8, #8]!
  ldr q0, [x9, #8]!
  ldr q0, [x10, #8]!
  ldr q0, [x11, #8]!
  ldr q0, [x12, #8]!
  ldr q0, [x13, #8]!
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3781

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafldst x64 uop (b1)ldst xpg uop (b2)b5b6bbbel1d cache miss ld nonspec (bf)c2c3branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020930949227000000007569085716243961003005077160514641396291696160461803258000080100800004007661351748144309193018930467200863201301601002008038820080000302523030011802011009929100100800008000001008122343450786308608772911900705956863181000032707123527432511103052871721130771161813254083957380000801003082731452316433143531374
8020431504235010000777587264791162439640831343807514149414033768269161680812468102681388812464071941369513043312573131331233210411502113916296320281344204813423147031422818020110099521001008000080000010081831458497593986687715129218611083872461011832589118537432453504052651811131068166815113673557680000801003134331696312003153131544
802043174623400000088778770477116004501803127079956714501182369738116139982160811708206581602409910136893415230891317863168421037181213701637202088188620881890317103183511180201100994910010080000800000100817404355572887872677216911134139128706510144326071195459324818070526911052131282168818823764147380000801003209831807315253224131854
802043202423701100091082667047971712437116319437895631563159035704151631458213480388819248191141094413915031483154931893315372085718821384161151202813442048114831344310108180201100994110010080000800000100809284605739634866757947912525859868101000032639136570532500103051101171130255190800524443876780000801003014130204300253018230406
8020430307228000000007031080815683912203010379053313091389278925160143801468000080100800004007421347219145302943045130316201283200291601002008000020080000302823031011802011009932100100800008000001008094744749466448569675810953705807866421000032622107606232567203051101171130354163800444003489280000801003012229985303623020330242
8020430036226000010006467084115844121843026878256513531471282945160151801508000080100800004007251343391042303423026930541199853204141601002008000020080000303363045311802011009924100100800008000001008094841054836238519977113923765325866651000032639120530232468903051101161130029150800313773507980000801003014730266300553010630077
802043019222600000000700308151520453188301217785471382122427592516014580144800008010080000400711133794104530071299873007820243320383160100200800002008000030292303131180201100994810010080000800000100809674225268649860537788912486123864161000032681127495432470833051101171130238163800463653618380000801003025429967302273028230331
802043033422600000000665508181616415124304018425081345150130282516014280144800008010080000400688134125304330463306103021020208320183160100200800002008000030166301641180201100992310010080000800000100809504134845618853567867875465447864651000032638114556932491003051101161130242154800484223197580000801003024230437302373003330443
8020430222227000001006868081912884381523036883549113321646303625160151801368000080100800004007561328661139302283026530498202593201721601002008000020080000302943047711802011009924100100800008000001008093549651166538520580810900525641866261000032621117565132455605051101171130254165800444004347880000801003047030310303303025830236
8020430218227000000007596083916244932243071783548713391528292125160142801418000080100800004007111335537049300973044430222201463204021601002008000020080000301013041311802011009956100100800008000001008093642656196578590472210917745505866641000032661118583932520903051101171130279171800493774077180000801003033330184303133032630305

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3787

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafldst x64 uop (b1)ldst xpg uop (b2)b5b6bbbel1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)dfe0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80029307722281000171057911656450100305157904621366151131942516005280071800008001080000400240133869201353003830304301752004732031116001020800002080000305903068011800211098510108000080000108094504525068616854777511492844522286599100003258513460083244561815502400031600023430504166802444204118680000800103033230280302673055730355
800243034222820010755678616323902003020678855116501459303025160058800538000080010800004003151335874013630224301853029620145320382160010208000020800003021230291118002110944101080000800001080998314765102590855657329902245419864341000032597117564832460937155502430031600033530392161800363293868380000800103011030251305543018530347
800243039722510110727384517764082242994080152114241473284625160053800448000080010800004002551337277004330299303653051220386320290160010208000020800003030530387118002110934101080000800001080971040157206288608074798763657918669810000326581136119325320005502400041600023430364182800493793508880000800103025230288300533022730285
8002430160227000107392819178444019230205839543142913932819251600548005380000800108000040030613455811135302603031130152200663202041600102080000208000030245303501180021109211010800008000010809280388497164785570766139133664508700310000326311335745325380009502400031600023430363170800443453388480000800103003930350301523031630201
800243016622800000744082016803791123044281162815511463306825160058800578013080010800004002451329377114030254303893019720313320117160010208000020800003064630352118002110957101080000800001080938040251666228613674298794856508671810000326171165961325079003502400031600023430515150800373783778280000800103029230231302403029030594
800243035122900000740878714884311163014578151415961590296825160047800468000080010800004002931328970013830070301583037720113320158160010208000020800003055030108118002110965101080000800001080969043053436298483373478854252138603110000326991265230325209007502400031600033530334160800344583929480000800103010030183303723006130178
8002430049226000007658813162439922830291815562132615433052251600548004980000800108000040027613446110142300133012630437205883201741600102080000208000030464303891180021109401010800008000010809641343851046088523070214882765213865131000032599109518932488716010502500031600033430180176800373013397580000800103007130300305133027930192
8002430286227100007047851162440520429872794463151116113012251600518005580000800108000040023813292170149304253060330332201613202841600102080000208000030290303511180021109441010800008000010809931544752686028574578459052860528582810000326031165661325079066502400021600023430387200800343833547980000800103011030290298773035030242
80024301732271100076008431264439963026880553013211462290025160058800448000080010800004002741318465014330388303123033320281320548160010208000020800003045830132118002110973101080000800001080966184255311653855397589885385217866881000032658130603032475116055024000316000324301531808004836734310480000800103023330388300273022430420
800243019622510100697381316164561723016279652114331480289425160048800588013080010800004002901342102014230078302733020820329320132160010208000020800003035430246118002110940101080000800001081009184215271632854757571086040552386084100003264711952023247451810502430021600032430152159800373883576680000800103030230504303033016330379