Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDR (post-index, S)

Test 1: uops

Code:

  ldr s0, [x6], #8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f1e20223a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb5b6bbl1d cache miss ld nonspec (bf)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
100510408010056121101025111671425200010001000100010005074645824110151040104082438982000100010001040104011100110001000010160062102600130831102340628480073216111037100027271000100010411044104110411041
1004104070000512003010250147152520001000100010001000507624582411015104010408243898200010001000104010401110011000100001000003710250000060104122424480073116111037100027281000100010411041104110411041
1004104380000431301010257135162520001000100010001000507624582411015104010408243898200010001000104010401110011000100001020120461029201601829103727516320073116111037100028231000100010411041104110411041
10041040810004420010102571512125200010001000100010005077045824110151040104082438982000100010001040104011100110001000010070037103500001225103921318320073116111037100027341000100010411041104110411041
100410458010056100401025171121125200010001000100010005077845824110151040104082438982000100010001040104011100110001000010190069103122150022103129220560073116111037100030321000100010411041104110411041
100410558000079001010430111222520001000100010001000507464582411015104010408243897200010001000104010401110011000100001000006110340000027102336616640073117111037100035351000100010411041104110411041
100410518000061001010257144182520001000100010001000507064582211016104010408243898200010001000104010401110011000100001000004210280000035102728525240973116111037100020301000100010411041104110411041
1004104080000631215010250244152520001000100010001000507464582811015104010408243898200010001000104010401110011000100001000026710341320321238103528622560073116111037100020421000100010411041104110411041
100410407000065241101025727419252000100010001000100050762458241101510401040824389820001000100010401040111001100010000103400451026401202224104521621320073116111037100027351000100010411041104110411041
10041044700005513012010250131112520001000100010001000506984582411015104010408243898200010001000104010401110011000100001000005810200146014461043557174002373116111037100025331000100010411041104110411057

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr s0, [x6], #8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.1914

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f20222324293a3e3f4043494d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
5020912201891310001000596083910079201481217567923712145525704965035810127100004010010000100001083586460405046467911121913121733121766115201311555760100302001000010000611941000010168121733121764215020110099100401001000010000010010915111752810711208139548645109611393134101203210176111215035034010049168561000050100121783121656121762121644122255
50204121680912100001005761768261007522108121707809211213952570543503881013510002401001000010000107771446065744653825112217812174412183411507331154266010030200100001000060200100001000012189812184911502011009910040100100001000001001089621285521071721810930843410947132111212040321017611121738502908589327941000050100121705121783121553121689121630
50204121782914100000005060852100736111612180381112121433257050550336101441000040100100001000010767424605004465603611217291217751217691151313115369601003020010000100006020010000100001217281216021150201100991004010010000100000100109331116551107172061194484331091212531191030321011711121594502629148668291000050100121786121813121639121879121840
50204121788911210000005310860100752216812181682022121473257050850370101281000040100100001000010771804605653465527811218051220511219531149733115381601003020010000100006020010000100001218731217271150201100991004010010000100000100109343115532107302091092782411097612641362031321017611121496502819429187241000050100121852122160121603121745121696
502041220249131000000054608291005521108121653810321213762570478503681012310000401001000010000107651745978754650869112171212168612158211491531152906010030200100001000060200100001000012167312179211502011009910040100100001000011001095631155221073622013924683010905133411810703210176111214845025296210569651000050100121906121810121809121970121625
5020412187891320000000546084210073611081219298293312135425704785035010130100004010010000100001086161460155046481831121772121860121815114889311551460100302001000010000602001000010000121802121783115020110099100401001000010000010010948111556510707209994632271089913421212050321017611121641502389048567731000050100121704121790121882121757121767
5020412174991111000000486085710073621041217608114112125025704905037610142100004010010000100001077030460006046463411121692121824121758114940311552460100302001000010000602001000010000121687121727115020110099100401001000010000010010919199539107302161994186132109421265124205960321017611121605502589229987871000050100121727121839121773121793121744
502041217629131001000050308361007202136121865802131214602570484503581014010000401001063511024114900146520154703168112370812416012430411613121111696166095335421113911149671281117611154124796124337301502011009910040100100001000001001097811215441078121911906468416310937124212213303495430723123513505449809028151000050100124041124654124892124220124999
5020412442093500000033316020817100568110012430581031121435257047850364101291000040100100001000010776844604604465045011219271217001216341148223115369601003020010000100006020010000100001216361217711150201100991004010010000100000100109004115529107262311212407443109741202127235032101761212158950264111911649221000050100121898121936121851121987121996
50204121951913100000003920810010744210012192984022121688257051450370101311000040100100001000010791474608233465557611218301218561218081152173115527601003020010000100006020010000100001221171219741150201100991004010010000100000100109593121586107392031398272161095212341412614032101761112181550262111710409071000050100121881122026121841121978122011

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.1893

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f181e1f2022293a3e3f4043494d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfl1i cache miss demand (d3)d5map dispatch bubble (d6)daddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
500291220269155000010415083717603120122050763411214382570379502901012110000400101000010000107982446206854653114012177801217001217281151600311559660010300201000010000600201000010000121992122058115002110910400101000010000010108832111535106961991187944301087512451362350031400982057121717502601128107310051000050010121832121847121836121828121778
50024122054913210000039308081712225212196183240121698257040050272101351000040010100001000010789694602654466147601217820121877121930115300031155836001030020100001000060020100001000012198712196311500211091040010100001000001010880129853510722209995468341090411781314330131400782067121674502549829889551000050010121980121824121772121818122019
5002412182891444000004510804178441081218017874012169525704215026210133100004001010000100001078267460841246605750121829012192112180311524003115512600103002010000100006002010000100001217411219751150021109104001010000100000101090881165291072020399107833109311238122443003140068207612162950274115010268651000050010121808121900121758121899121933
50024122057914300000048008051720312012184579731121340257041850272101321000040010100001000010784294600013465915401218960121852122103115254031154266001030020100001000060020100001000012191812179411500211091040010100001000001010932310953310703208109134631109591177128309003140068206101216875029298011149291000050010121967121685122064122086121973
500241220179143003000455079117524140121877807511215772570412502821014310000400101000010000107914046159184658089012181801217561219211152180311565360010300201000010000600201000010000121761121840115002110910400101000010000010109183108538107082091597850371092112151203930031400108201010121654502449729958761000050010121846121900121964121868121999
5002412202791250000104740778171241081217358085012136345704365027810139100004001010000100001079428460947646575520122049012193912187611532303115449600103002010000100006002010000100001219991219361150021109104001010000100000101089641205301073521279144829109191267130498003140068209712162150282105911339811000050010121833121835121989121795122092
5002412179991330000004430821170439212191278550121554257038850270101241000040010100001000010772594604969465488801217450121808121799115009031154526001030020100001000060020100001000012204612186511500211091040010100001000001010878610152710691194138996431109211166134303003140088208912158550246106810989581000050010121742121887121960121938122122
50024121957912400000050708001672410412191077750121650257041850252101331000040010100001000010792394606699466577501218760121810121971115540031154816001030020100001000060020100001000012182212188411500211091040010100001000001010901410153410712193119147230108931358129401200314007820761215275025010519929341000050010121872121872121815121919121800
50024121921912400400042208301720313612194976830121691257043050300101301000040010100001000010778444605650465857901217870121914121652115073031154006001030020100481000060020100001000012193212194711500211091040010100001000001010875411152210694211118954624109221217123331100314001182010712169350256101411159661000050010121693121854121861122028122061
5002412185391330000004640822171231001218747914012156825703825028410140100004001010000100001078159460801646600840121719012187812211911527303115570600103002010000100006002010000100001218121217701150021109104001010000100000101087931155251069121312903683210876123711430300314007820101012154650260107410219251000050010121907122055121707122039121948

Test 3: throughput

Count: 8

Code:

  ldr s0, [x6], #8
  ldr s0, [x7], #8
  ldr s0, [x8], #8
  ldr s0, [x9], #8
  ldr s0, [x10], #8
  ldr s0, [x11], #8
  ldr s0, [x12], #8
  ldr s0, [x13], #8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3683

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0f181e2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80209301742211101069177761760115116295118206591923192322212516015880152800008010080000400849130136814629355295722943519066319370160100200800002008000029191294261180201100992710010080000800001100808891734957840854396611687242581986350803133541451921703051101171129352258005952650612080000801002944429667294292946829474
8020429477221100007329815178498152295697926831945202821092516014980146800008010080000400759130148115029646294672960219326319350160100200800002008000029474295581180201100994610010080000800001100809121839359020856716041687436542486005703127505452981803051101171129579208004658359011880000801002947129506293922937529612
802042932422200000725576817129814429466777545186419142077251601528015680000801008000040084113002361542947129301292041912731941416010020080000200800002930029351118020110099261001008000080000110080896039253280862036961887786559786128716116555646030010051101171129515248003869158112480000801002983129447295102940729448
80204294062200000069107781656118132294007725602075196622302516013780152800008010080000400806129590615229728293532924619495319377160100200800002008000029461294621180201100991810010080000800001100808780366535908587264614903645420862657941225600508900305110117112937621800525064579680000801002939529478293462927329277
80204293982210000070877961680991002947274462218841950217625160153801558000080100800004007501306662161295852954529569193483196051601002008000020080000294722925411802011009930100100800008000011008088003875541085368655148503046978643875013751995273093051101161129471178004456348111080000801002958029750295912935229662
8020429501222000107012835167210311629291779735195621432187165160140801458000080100800004007771289060152292402974929559194203194651601002008000020080000294962929011802011009921100100800008000011008091403375701085870598148945656108658774712348464649033051101161129337318004954860613280000801002939929595296632959729520
80204294972220000076688461712931122951378169518921855205025160154801418000080100800004008091287518143293682933129297191583194591601002008000020080000292332933911802011009924100100800008000011008085603895913085623653159123058228648676912654975086006051101161129469248005351950511080000801002950529285293202930829321
802042947322100000726282517441212482940081385019121911230225160153801548000080100800004007571301731148296632926429388195583192371601002008000020080000294952957911802011009926100100800008000011008087404155318085747641158686651008610573012351655025003051101161129403278004755658011380000801002934929467294882950729356
802042946622100000764680816161121442951075079618122112215925160152801518000080100800004007841301274140295932931929509194133194031601002008000020080000292722935811802011009932100100800008000011008090003715186085610707119132856948626778011456025391003051101171129280308005355553412080000801002957829718293112962729487
802042932922100000702378717121011522944879552319021856229225160155801448000080100800004008141302743149295982966229262192173195981601002008000020080000292502955611802011009944100100800008000011008091403455890085191681189061005811861387421305393522600005110117112941621800526285559180000801002948729403294842956229419

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3641

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f181e1f202224293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800292941921820001106232082110720971682900378542019512019199725160050800638000080010800004002851283383038290632919729077191933190861600102080000208000029027288961180021109241010800008000011080887043954050848517631089742490985627614142546455670035020916542926528800583833765980000800102917329209292022925429373
8002429331218000000070250788107201211082905081247519341956202725160059800608000080010800004002811281646043290652903829256191293190111600102080000208000029098290541180021109261010800008000011080905043645620845637781291580467385430780126406444330035020316242892331800483543116980000800102910929056292242914229208
800242914821900010006555078810752901162907479338318462024181425160050800588000080010800004002631286383047291142908829109191553189601600102080000208000029025292131180021109191010800008000011080897043444410844477321190280472585476763117475346190035020416442901841800394112736980000800102918129015291712916029017
8002429017216110000061310799107201189629129791382213620631746251600488005780000800108000040033812785240302896029115289381903431904516001020800002080000290032909311800211091610108000080000110808901842351590847077328904444350853847651355054499218035020416452903727800304143555380000800102919129173292422918828949
8002429230218000000063130816107041091442920479438919672002188225160364800528000080010800004002691274437045291222907229261192063190471600102080000208000029147289371180021109171010800008000011080912043348370845387696881122461685032762117487643610035020416432897819800403233585480000800102905729061291292897429150
800242912521710000006672082410720108922893780638018092160175025160054800488000080010800004002551285421139291552912229025190763192761600102080000208000029170290711180021109151010800008000011080918047550780849597648909324950854317491274292481513395020216242909438800363133496580000800102910428932292582897028896
800242885221700000006749080710728103112289557813691890214017892516006080054800008001080000400228127909313529106290852913319350319057160010208000020800002910129142118002110925101080000800001108091204365243084816775689736469785743712127483248670035020316462905526800454033536480000800102919629234291292887629157
800242920121700000005978081810712101144290117943891889191118902516005380053800008001080000400318128035103629281291692898019130319018160010208000020800002909929199118002110920101080000800001108091818428482608468278210936324484861827401214609495317045020416442909927800353763735380000800102921029144291332919629238
80024291042180000000638908041074496112289417954201659180217812516004880050800008001080000400293128569713928851291832912419203319096160010208000020800002912129135118002110925101080000800001108091104615536084986748790348445985857769122420251100035020216662895032800363853635880000800102910929092291172917329135
800242915421800000006221080510768951122904877040921022067199125160054800528000080194800004002771282842143289852931329089190743191291600102080000208000029189291641180021109211010800008000011080882048950690847547388890384706852976741234628545000155020616662898131800503503546080000800102900929056290172928429162