Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDR (post-index, D)

Test 1: uops

Code:

  ldr d0, [x6], #8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f1e20222b3a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5b6bbl1d cache miss ld nonspec (bf)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10051055800007417103121025161557192520001000100010001000507624582410151040104082438982000100010001040104011100110001000210200069103821178271040486244800731161110371000413601000100010411041104110411041
100410408000074251022810251113020192520001000100010001000507624582410151040104082438982000100010001040104011100110001000010140054102201012401053244317200731161110371000353401000100010411041104110411041
100410407000052190010102501411424252000100010001000100050762458241015104010408243898200010001000104010401110011000100021024074510240700241024366317200731161110371000353301000100010411041104110411041
100410407000062120024102512141241225200010001000100010005076245824101510401040824389820001000100010401040111001100010002101600451027201712381044246273200731161110371000333001000100010411041104110411041
1004104080000731200101025131737122520001000100010001000507784582410151040104082438982000100010001040104011100110001000210220034103933150241067366295600731161110371000363601000100010411041104110411041
1004104080000671600101025013111202520001000100010001000507464582410151040104082438982000100010001040104011100110001000210201207210429100321025415274800731161110371000333301000100010411041104110411041
10041040800006915101010259183142025200010001000100010005077045824101510401040824389820001000100010401040111001100010002101900681039211710301051367284800731161110371000423201000100010411041104110411041
100410408000060131020102510143212125200010001000100010005075445824101510401040824389820001000100010401040111001100010002102000541045101312271047476255600731161110371000333201000100010411041104110411041
10041040800008228002010250151182125200010001000100010005076245824101510401040824389820001000100010401040111001100010002100000791044311312241040306295600731161110371000333901000100010411041104110411041
1004104080000611510041025121205212520001000100010001000507704582410151040104082438982000100010001040104011100110001000210160073103831164291040365264800731161110371000383601000100010411041104110411041

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr d0, [x6], #8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.1890

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0f18191e1f202223293a3e3f4043494d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
502091219449140000000062208211077601081215288215412145225704815039810151100004010010000100001076184460382746497881217481217901216941150193115353601003020010000100006020010000100001216731217711150201100991004010010000100001100110220115550107052011292748411093314451262030321017611121561502609308528681000050100121979121794121915121732121635
502041218149110000000461608441068801801218828118312154925705295035410140100004010010000100001077819460554546549991219361218271217941150593115456601003020010000100006020010000100001217431217631150201100991004010010000100000100109230118551107132071190866401091113131181060321017611121438502748928649021000050100121655121641121625121893121733
502041216539120000000057308141072011161217778185012138225704935039010133100004010010000100001078071460714746503661216741218331217691148863115510601003020010000100006020010000100001218481218191150201100991004010010000100000100109080117536106852221590848321091112951231050321017611121456502728268607811000050100121647121902121760121831121939
502051218109130000000059208251071201441218317966312166325705055036210130100004010010000100001076905460395246524081217451218021216321150023115283601003020010000100006020010000100001217001215911150201100991004010010000100000100109802119543107201959923424310958130712510120321017611121565502688628667011000050100121664121740121624121824121782
5020412186091211000000570080110744111612178981850121477257047850352101161000040100100001000010776394602212464669512180512191812161711518331153286010030200100001000060200100001000012174812171311502011009910040100100001000001001091221235451071421312910504710911128511924703210176111215325026091810307551000050100121772121693121887121714121835
502041218309130000000053708061070401121217248337212141325704905033610113100004010010000100001079011459813846482461218091216441217791149913115440601003020010000100006049610000100001217801216881150201100991004010010000100000100109140120549107182141492772381092511051291080321017610121340502548488727811000050100121777121658121716121745121889
5020412171591200000004609083910688012812168480640121501257049050382101381000040100100001000010770184607842464825712177612170712182311496331154206010030200100001000060200100001000012173012190911502011009910040100100001000001001090501205631072220712892504110919118512710120321017611121349502549048488301000050100121783121771121747121799121808
502041218909140000000056008291070401361216448084212286563870934506701021110052440971103311060114023646622494703773123665124495124531116354231116997658733368111136111586709011128111591243141244503015020110099100401001000010000010010907011854110714217109064819699109181194122101013872129322123722505628919207631000050100124401124577124414123672124607
5020412388293200011000234832568051071201361240287845212163021070481503421015710050429561099510975112250846557494701686121940121786122012114978311534560100303251000010000602001000010000121681121980115020110099100401001000010000010010932011952210718203129817437109311275134004032101761112176750262107910489701000050100122048121775122041121890121988
502041218479130000000044808020174401001219628143012166925705085034810152100004010010000100001079561460927746565131219111220681220731151363115591601003020010000100006020010000100001218381218051150202100991004010010000100000100109050104555106842101094056371093112441260154032101761112161950278102211469571000050100121951121885122043122006121907

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.1840

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f191e1f2022293a3e3f40434d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
5002912207691320000100508264833160821041217637851121559257040950264101311000040010100461000010775474606272464975211218380121848121581115040031155886001030020100001000060020100001000012183812188511500211091040010100001000001010892312151410665194129252622109251433123237314048200491214805026897010029611000050010121924121890121803121984122022
500241219539112000200047708031696210812177179311215122570403502761012810000400101000010000107681846070094658281112189701218091217541150470311546460010300201000010000600201000010000121834121799115002110910400101000010000110109202112549106982171290078151089113031142073140782009412147150232102410448931000050010121814121970121861121888121762
50024121936912200000005070775172821001216757791121489257036750240101261000040010100001000010770794601788465221811219970121781121786115099031154436001030020100001004960020100001000012198312178111500211091040010100001000011010911212054010696188988676241088812131252043140382003412155050252100011348811000050010122056121629121926121807121796
5002412174391320000000502077117122108121785793112147425704005026810127100004001010000100001076791460280646525331122018012159812182311494303115425600103002010000100006002010000100001217071217051150021109104001010000100000101095531045231069820689617216109721294120369314041700741215045025291110749001000050010121802121886121873121749121862
5002412184091220000000433080317043100121613744112135125703405025010123100004001010000100001078402460709246503180121802012204412184911526603115491600103002010000100006002010000100001217501216971150021109104001010000100000101091641175221073620110899803210917118512740331404820044121448502489949359451000050010121855121766121806121601121883
500241217779144010000042708091712310012179178911215472570412502381012710000400101000010000108188046171994659027112183801220351218721149720311545060010300201000010000600201000010000121890121882115002110910400101000010000010109062115516107222031191676211090412831272053140382006412138950250100910598761000050010121837121837121895121824121815
50024121897912200001004150811172021081215597761121223257039450266101551000040010100001000010789154605103465344411218140121895121891115116031155916001030020100001000060020100001000012174612207811500211091040010100001000001010904611251010730196887674161092913441173043140482004712157150230100210839071000050010121759121842121728121874121730
5002412175991320020000423081516882108121828776212138625703675024810112100004001010000100001076485460809146576121121887012199012192411503503115539600103002010000100006002010000100001216141218741150021109104001010000100000101091321085001068721010872681210913131311420133140482004312145450248108510658281000050010121799121885121814121691121809
50024121973913200000004720808168831081218187811121331257037050256101201000040010100001000010792394605913465537511217850121997121781115033031156656001030020100001000060020100001000012183412189111500211091040010100001000001010927311254010688198890370251088513151124013314038200431214625024896510018551000050010121894121902121814121953121721
500241218529134000101044508711744368122013800112152925704185025410136100004001010000100001077745460680446480881121827012228712183111517703115480600103002010000100006002010000100001218381219431150021109104001010000100000101087061105251069420689147415108831213117203314038200431216485024492010179671000050010121830121945121764121812121830

Test 3: throughput

Count: 8

Code:

  ldr d0, [x6], #8
  ldr d0, [x7], #8
  ldr d0, [x8], #8
  ldr d0, [x9], #8
  ldr d0, [x10], #8
  ldr d0, [x11], #8
  ldr d0, [x12], #8
  ldr d0, [x13], #8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3664

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
802092998122000000100711082817601211442916487946320872109218925160145801398000080100800004008101297851156291242911129242190553192271601002008000020080000292512948711802011009921100100800008000001008091404384923328496777416942305416857907341474641517400005110117112922632800445414317580000801002920629180292452933529082
8020429334218101000006566846179211429629227886501210020432186251601528013680000801008000040080912898101462944229153293051937531942416010020080000200800002939229128118020110099111001008000080000010080984042054370852517628940365333859127421385000504100305110116112925236800394684387480000801002929429357292692930429295
80204293502180000000064248381736107104292358714082077210818392516014880143800008010080000400745128805913829196292882917819333319142160100200800002008000029047291301180201100991110010080000800000100809080399507008486679710960244762856407661305140504300305110117112939736800435004406680000801002917629314292272932529172
80204291572200000000069618171728107100293448404622114223820912516014680155800008010080000400742129693315329296291832927819267319302160100200800002008000029341291191180201100992110010080000800000100808930440567108527071813950384917862588021294515551000905110117112934432800424424486680000801002923329250293002946029355
80204292802200000100068838431696113124298528564522079201621222516015480146800008010080000400798129503904129196291472952719310319308160100200800002008000029274292881180201100991510010080000800000100809380484576030850517929930804781852687641524974486900605110125112918633800505014339080000801002933229600293252931029292
8020429434218000000006791820173610810829231854438216719882089601604608013780000801008017840164812877020562920329052293661931031939916010020080000202801922933529306118020110099231001008000080000010080928041250720851197739916764974861568441364862518003305110117112938134800384034495180000801002923029268293332923229265
8020429304218000000006801837175210615229314856404208222102100251601418015580000801008000040073312908230412925229391294001923231915216010020080000200800002921529407118020110099191001008000080000010080956194325102085447780139437857168596782013252825503190605110117112949032800454684166480000801002917229267291732937929462
8020429393221101000007049847174412113629240855431191622752024251601468014780000801008000040074712849450392921129389291721923731919416010020080000200800002951729088118020110099321001008000080000010080958214855434085671767109538047738596183414152495071190505110116112934823800405044836980000801002946329321292242922129419
8020429357219100100006611822173610315629286842447195419161954251601428014880000801008000040071812934990422917529168292701953231906816010020080000200800002938129378118020110099221001008000080000010080963194185278085517764148683050408582879411653115560191805110116112923349800434364628080000801002915529445293712937129531
8020429449220111101117143855168013692293378504151998219521822516015580341800008010080356401711128330204729300291102929619113181920916010020080000200800002931729361118020110099301001008000080000010080962194645057085142721139203454518599984414946605564194405110117112943547800514894507780000801002927529333291982924529131

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3679

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)67696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)d9ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002930008220100016706818172012414429603809579184917752109251600588005980000800108000040029512944115129402294932942019449319500160010208000020800002936429492118002110935101080000800000108094920409587485301666139084847968602790413753525502341350209160432929940800436326288480000800102932829403295052964229698
80024295972192000064648121720119112295687965331926189521712516004680060800008001080000400267129645938293432950329492191643193261600102080000208000029504294971180021109411010800008000001080935373515062854076151090548464286249790127542253163637502031604329474248005158462810580000800102954829610295442952229512
80024293712202221066547941736122352293967986141760163621052516005080055800008001080000400319129725453294962955529396194393193961600102080000208000029420295381180021109451010800008000001080953384045266854456481090342522286178779123519855143404502041606429473258004065759612480000800102960529325294402944029580
8002429607220210016363828168010814029549762523174019692263251600518006380000800108000040026813102904429431294872948019366319552160010208000020800002959129548118002110940101080000800000108091140413489585259668891413846848578377713755935189373316502061603429385328004258465810380000800102947829739294322954529466
80024294792212100067678071768119112294517965601543190822752516004380061800008001080000400330129280249294302942529513194713192451600102080000208000029338292271180021109371010800008000001080943364064958852636121388274532585449818125503954003208502041603429539278004760957110480000800102946029540293952929629423
8002429343222200006160822167212512429517782534156819042191251600488005480000800108000040032612954854629250293682949219392319197160010208000020800002938529444118002110920101080000800001108097037376550685333695138527453268595174713754675464383750207160352941130800435805739980000800102947529440294762942629542
80024295182202000070798111664115112294107735161709178522272516006580061800008001080000400253130430138294702923429262195763193751600102080000208000029327294761180021109431010800008000001080916403916093859636961292848516186020738118500451483403502071606629480408004556860810880000800102934129478295592963729416
8002429560220201006867832174412110429273794522150519282390251600608005680000800108000040031013060474929310294472935519569319420160010208000020800002943929390118002110937101080000800000108093635369526785439689895642518086060721127566652083744502061607429503248004759665210580000800102931129444293752980029369
8002429376220200006853814170411413229379766464166819612106251600498005980000800108000040034012951214629394293822940519478319461160010208000020800002940529398118002110952101080000800000108097333436576384562679894880454485825747126477652243706502071603429467338004859659611980000800102938229501293382973229502
800242939522020100684979816961259229431748543153018342139251600538005780000800108000040027513010032929402294662956519432319482160010208000020800002949329317118002110939101080000800000108094337364582085545629988774575086176730139538754353637502061606929579358004067560210880000800102944329436293992944629442