Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDR (pre-index, D)

Test 1: uops

Code:

  ldr d0, [x6, #8]!
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e202223243a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5b6bbl1d cache miss ld nonspec (bf)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
100510568111101105371001010252333241925200010001000100010005073045826110151040104082438992000100010001040104011100110001000010160184107322281449106646653790175516661037100053441000100010411041104110411041
10041040800000195161001410252037051525200010001000100010005071445824010151040104082438972000100010001040104011100110001000010106038107420311472107136652580075516561037100039371000100010411041104110411041
10041040800010112000002010251037012825200010001000100010005069045824110151040104082438982000100010001040104011100110001000010200048105920201447105830651550175516551037100042401000100010411041109010411041
10041040800000184000010102503200232520001000100010001000507224582311015104010408243898200010001000104010401110011000100001020004110733030854105836554550075616551037100031301000100010411041104110411041
1004104080000019714100181025932062025200010001000100010005075445824010151040104082438972000100010001040104011100110001000010140263107150201062106236447470075516551037100028241000100010411041104110411041
1004104070000011021610030102514372122252000100010001000100050746458241101510401040824389720001000100010401040111001100010000102800521050000048106230752520177617661037100038301000100010411041104110411041
10041040800000110200002410251033473125200010001000100010005076245823110151040104082438972000100010001040104011100110001000010400049107412291848108336551610075616551037100032231000100010411041104110411041
10041040800000110221100201025173335242520001000100010001000507064582501015104010408243898200010001000104010401110011000100001038016010540020647105636556390077517661037100038421000100010411041104110411041
1004104080000019318100112102553722222520001000100010001000507384581911015104010408243897200010001000104010401110011000100001000007610693112046104830757800175617661037100044431000100010411041104110411041
10041040800000179190002010251737282125200010001000100010005073845824110151040104082438972000100010001040104011100110001000010240063106330221449107836553340175616651037100051391000100010411041104110411041

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr d0, [x6, #8]!
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.1857

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f181e1f2022293a3e3f4043494d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
502091223739120020000447081517040112122008812331217922570520503501012410000401001000010000107757946124464655921012168701218381218541151620311554360100302001000010000602001000010000121778121777115020110099100401001000010000110010936011853110709213993736201095711721340155032101761112155750242102011478961000050100121780121726121849121905121902
50204121805913000000044408301736111612192778622121576257049950374101291000240100100001000010784794606477465187201217570121925121774114997031154846010030200100001000060200100001000012175212181111502011009910040100100001000001001095801125311071122412928322010947120212200303210176111218325026298210328761000050100121736121706121847121757122152
50204121759912000000043708251720092121923808331216302570478503621012510000401001000010000107961346073954655303012170601218481220521152940311538660100302001000010000602001000010000121811121910115020110099100401001000010000010010891011051910742207891832181094713331250911032101761112150950236108510829451000050100121893122071121971121926121731
50204121877914100000041308111696110812163580023121495257048750346101311000040100100001000010773634610332465355601217950121852122032114877031153156010030200100001000060200100001000012164712183811502011009910040100100001000001001094211215251070721469123425109401114116105032101761112159050250110710109891000050100121903121922121933121905121913
502041217019121111000458078817281108121860782221215272570475503661012410000401001000010000107814646045894651950012189901219271218801150940311554160100302001000010000602001000010000121758122001115020110099100401001000010000010010899111353410728207993452181091012531221311032101761112165050296108111799741000050100121922121771121727121863122063
502041219559141000000437081217121124121905788231215562570487503501013010000401001000010000107976646078404656832012183001219691217771150400311565160100302001000010000602001000010000121933121784115020110099100401001000010000010010911111351510792211119313621109401304133133032101761112160650254110711559441000050100121793121913122063122005121720
50204121806912100000046008191728110412175979833122635638708735063310162100564400111239111821138695467131747195890123903012428312446311620102261168876619833708101211115868612111331118812426812451130150201100991004010010000100000100109261127529107122168937783610925118312906180389512914212423550584114110599181000050100124940124436124939125201124929
5020412436593611000021430082717360144121982801341214092570460503441013210000401001000010000107872246025924656028012191801219101220361151210311541360100302001000010000602001000010000122011121575115020210099100401001000010000010010947011853810720208994572151097012821230310032101761112149250252107810428851000050100121784121626121783122024121924
502041218139120000000439083117680108121820821321215242570493503321012810000401001000010000107748046011704655681012192801218921218881151600311549660100302001000010000602001000010000121770121774115020110099100401001000010000010010908012053710725213109147621109481304120109032101761112160850254108110589331000050100121912121815121819121926121885
502041219419131000000455086718001108121877794321214662570523503801013410000401001000010000107974846064144657596012199701217671219161151310311551960100302001000010000602001000010000121918122065115020110099100401001000010000010010932010052010742206891436111091712421260012032101761112171250266115410309771000050100121933121680121867121860121802

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.1879

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f191e1f2022293a3e3f4043494d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cdcfd0d5map dispatch bubble (d6)dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
5002912194691200000000042808411696212012177180432121419257041850260101291000040010100001000010807064611101465435612195801218141216901151280311538560010300201000010000600201000010000121760121841115002110910400101000010000010109231109530107132081392072361091411631161191703140017820012912163550266106511729621000050010121815121870121947122076121848
5002412191991320000200045608251736111212198478722121741257041850252101281000040010100001000010784834612968465500412186701218881219351153210311553860010300201000010000600201000010000121978121816115002110910400101000010000010109191112530107082091189074301089012631301313031400108200121112174950274105010949921000050010121837122031121818122004121894
5002412180891100000000043908401728110412195281222121595257039750274101281000040010100001000010791674612449465677012193401219531218701151830311564460010300201000010000600201000010000121751121831115002110910400101000010000010109191885501071821111945722710932139312203120314009820112912176150272101110799671000050010121916121991122033122000121922
500241218029130000000004530838171208012193680922121630257042150260101361000040010100001000010793294611519466153912186001218211218141151440311567260010300201000010000600201000010000121984121817115002110910400101000010000010108911115536107031851295380391090613431261690314008820010812156450252102510479461000050010121783122012121805122034121892
500241218419131000010004560829169611361218638012212154225704185026810135100004016110000100001079284460925446522371218280121817122003115204031155096001030020100001000060020100001000012192112183511500211091040010100001000001010936111453510712204149407045109251504119009031400108200151112165250262108910239231000050010121750121862121768121841121826
500241217609120000001004420845161611321220028082212154225703825025410128100004001010000100001078807460222546558331218330121895121880115106031155066001030020100001000060020100001000012194912188611500211091040010100001000001010900011053610689206810197235109241222124038031400782001115121508502509929649471000050010121934121966121936121934122021
50024121895913200000010440082517842108121929790221215972570382502721012710000400101000010000107881646092294662530121991012187212190111513503115625600103002010000100006002010000100001221721219705150021109104001010000100001101086921115471071721489177226109121264125269031400108200111012163650254102211489001000050010121900122002121850121834121901
500241217389132012020004400826170429612189379822121729257045150288101301000040010100001000010787174610374466048012203701219411222001152580201155426001030266100801008260750100001004012206612266331500211091040010100001000011010859411553010717211119281182810921129513023303140098200131212183450290105111749681000050010121867121842121854121937121908
50024122051914200000000501088217122112121914813341215952570385502541013710000400101000010000107795246081794662537122007012181412190411514003115668600103002010000100006002010000100001219131220311150021109104001010000100000101093221095301071320110934864010940136512720160314009820015912164850244106210439681000050010121915121883121927121987121899
500241219939132001000003990813172801161217527843212159225704095027010117100004001010000100001077169460545646649881218250121934121849115248031155476001030020100001000060020100001000012190012182311500211091040010100001000001010910010754510699206109124623109421272124031403140088200141012162750262106410879041000050010122048121901121842122028121876

Test 3: throughput

Count: 8

Code:

  ldr d0, [x6, #8]!
  ldr d0, [x7, #8]!
  ldr d0, [x8, #8]!
  ldr d0, [x9, #8]!
  ldr d0, [x10, #8]!
  ldr d0, [x11, #8]!
  ldr d0, [x12, #8]!
  ldr d0, [x13, #8]!
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3667

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)090e0f1e1f2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80209294582191010068310829174413011229336791334163716442038251601408014480000801008000040070712872781532938329154293491910531930316010020080000200800002946129416118020110099411001008000080000010080914041147190852216391591074535785329754119476146950005110117112935639800455835469080000801002936629480292312941129229
80204294232190000069250812179212814029388795400158816342285251601528013380000801008000040072112863931472936329380294511932931942916010020080000200800002928129374118020110099501001008000080000010080909040953280851856341192978460685556753135444649000035110117112928437800345515529180000801002931629266294962936229294
8020429297219000006330082317121176829140815299153915912083251601368013880000801008000040073312911771432928629445292221912831927516010020080766200800002960529378118020110099351001008000080000010080926037153220851506931690544524385284876139446952720005110117112936138800355575628780000801002923829391292772930429383
80204294092190000165960817171213511229220837377161318222140251601398015180000801008000040074912912201402937729215293751916631950216010020080000200800002948629411118020110099301001008000080000010080936042153990847277011493432447385849832123457252290035110117112910826800515306259080000801002931929372292852931829237
80204293202200001067100856166412310029280819324167618871952251601408014480000801008000040076312987281462926729347295191937831917716010020080000200800002937229380118020110099391001008000080000010080934041156200850116681296176517685906896126447847840035110117112938423800445625339080000801002935129239292232945729389
80204294012190000066600849173695104292618173591524188821302516015480140800008010080000400749129557615229341293462953219222319227160100200800002008000029538293781180201100995410010080000800000100809510414531108532465410952805679855917381204833538903351101171129489338004259262310480000801002937629333294612936929195
802042940221800011656608581744109116295068383401687169521742516014380143800008010080000400728129880114329359292562926319362319293160100200800002008000029383292251180201100992310010080000800000100808970392530108487369999361344738856508681224614482100951101171129131318005658354710880000801002928929274292982940229290
802042926921900010667508231712124184293158183731741180621582516014580136800008010080000400733128570814629432291902939119078319262160100200800002008000029434293201180201100994910010080000800000100809450391450208460867610895544877855118791334843487500751101171129389418004561255410280000801002938029454292482928829384
80204292642190001066590833171211311629390805340185716021988251601408014680000801008000040072712908701552925529328293491918231940716010020080000200800002937129350118020110099451001008000080000010080934041951690849816901390578509686038809141467751350075110117112960924800365946178780000801002958529404294972931129109
80204294182180001070210829176813612029295799356195216541935251601538015480000801008000040079713029681462961329363292881917531923016010020080000200800002936029217118020110099441001008000080000010080930040557680849316331194448522385948746118494353050005110116112948425800485225999180000801002944529216294882957929478

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3762

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f191e2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)dbddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80029304852243000108525853170413514030060780199216622024245025160071800688000080010800004004301323035016930037300493007719884320166160010208000020800003021130144118002110928101080000800001080980513125803867895721188242663687903757126679760395303502000011601129953298007978171615380000800102998930191300453007930225
8002430202225300010868580616641231203006279822761906205425352516006780067800008001080000400351132445400662971229826300401984031996416001020800002080000300683011211800211094110108000080000108096136387547987316621890748729487061811131637850833505502000011611129783318006570467214480000800103025229978302783011229859
80024301442262000108783836174412314430403802211316281972281325160062800748000080010800004003071329203005630218302223022520121319957160010208000020800003001130295118002110939101080000800001080955173835829867966401595052648488464748128676158931820502000011601130378358005968868012780000800102997930317301663035930080
80024300532261100108692819171211710830143791229814662169276825160081800858000080010800004004111325491016730120302612988320160320131160010208000020800002995829793118002110939101080000800001080929153196496873446231091146671188225800139709358921803502000011601230029368006968768913480000800103017730059302713022230106
8002429985225200010895180517041211483006280620471769205725982516006680071800008001080000400415132802701662997629889300402008021198561600102080000208000030277301151180021109581010800008000010809233534454628723762012974766994871847341197188532033010502000011601129564248005670065814080000800102997029979299843022330132
80024300322262000108974790169612814030254806213218171821232325160070800768000080010800004003241321228016930153303453016320133320043160010208000020800002982430147118002110931101080000800001080956323425601868116581388346689487624765126702456303734502000011601129983338005970570313080000800103003230053303603029829996
80024301522242000107966788163210514030079824213818011924280325160078800798000080010800004004181313565016530324301612983120159320156160010208000020800002994029784118002110941101080000800001080980303416495880206031393642710188038779132738054773163502000011601129973338005465566913380000800103021230101301263011130345
8002430026224202010832277117361211002993981519931718212825182516007280073800008001080000400381132146601533026630383303732004732003916001020800002080000302833020211800211094810108000080000108098351330616187572678996098707487701767122639146195438502000011601130133408005760663214580000800103002729980301492992030122
80024300252233000109223858171213012830017791221915792087276125160068800808000080010800004003251330631007030146301433018020015320085160010208000020800002993330010118002110935101080000800001081128573226110871106741291682727787450802132730652945103502000011601130115348006068167614380000800103025930328302923044630069
8002430286226320010861080817041431043013182320761945205025862516007280069800008001080000400421133707500622997730608301122002232031816001020800002080000301102976611800211094210108000080000108095054363608287447646788846683387561796129694956435203502000011601130168288007464369712080000800103019829967299083016330259