Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, post-index, 1 reg, 16B)

Test 1: uops

Code:

  ld1 { v0.16b }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f61696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
61005290102343001001100160100478328395000239912000100010001000100050005000501597428511291093102000100020002892728904116100110001000010030110000003100011100131209438689531240622140132623820175960283991000157081313714680100010002888229060289072905128924
6100428961232100001000000000464528659000239742000100010001000100050055000601598228353290593102000100020002891028853116100110001000110000010000002100011100133759335695831281682129332373815255458284061000160391345314534100010002900828952289822904329126
61004289772331000000000150000470328581000238712000100010001000100050005000101599528382288633102000100020002883428821116100110001000010020110000000100001100130299256695531221592136232523806165963283201000158081322314489100010002904129037289252905029020
61004289932331000000000150000473428487000238712000100010001000100050005000901608428386290003102000100020002878128835116100110001000010000210000000100022200133249428688031400602131532623818215559283881000157861324414582100010002909928952289332898928599
61004289762332100000010000004704286040002402320001000100010001000500050001001608428439289783102000100020002881328871116100110001000010000110010001100021300130609222691931510572131832673816246167282581000158081331214619100010002889728800288752895628966
610042900323310000000001888100465428519011240192000100010001000100050005000501592128445289513102000100020002879328953116100110001000010000010010000100100100130859311697131270632128032393808235867284951000156031319114583100010002919529057289882908429112
61004286422411010001010101004784284100102371120001000100010001000500050051001599728309287153102000100120002872428739116100110001000010000210040000100000000131479213693831390612116332303816165771282151000153851311414693100010002868728842288272883128859
61004289132322100000000101004800283570112367320001000100010001000500050001901594528245288483102000100020002860228735116100110001000010000110000001100010300132439258693331630582110132063815196353282171000154801304014398100010002885828743287502868128737
61004288192321100000010310000459728376000237522000100010001000100050005000301612028348288803102000100020002872728721116100110001000010020110000000100002200133089393697531721662115031293806125959282091000155531314714505100010002873828934288442892628841
610042878923110100000004000048412848310023678200210001000100010005000500050160012828028882310200010002000286602861411610011000100001001221000000386100010200132169302696030991632117832283813156060281421000159721298914723100010002891628754288842888028747

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ld1 { v0.16b }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f223a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
502051200479310100000130001200201197372570103501021000110000401001000010000106210845380184584945120026012003512003611328631136986010030200100001000060200200001000012006012004711502011009910040100100001000001001000001100001031000011000321017611119756500029681000050100120051120051120036120051120051
50204120047930000110010001200351197532570103501001000110000401001000010000106207245379794584945120026012005012003611330131136986010030200100001000060200200001000012040212003911502011009910040100100001000001001000001100000031000011000321017611119759500000981000050100120051120051120051120036120051
50204120052931000000010001200201197532570103501021000210000401001000010000106210845378654584945120026012005712005011330431136986010030200100001000060200200001000012008412005411502011009910040100100001000001001000001100000001000011000321017611119760500000981000050100120036120051120054120051120051
50204123253931000000010001200351197532570103501021000110000402421000010000106210845378654584473120026012005212005011330131136986010030200100001000060200200001000012009412005311502011009910040100100001000001001000001100001001000001000321017611119759500009601000050100120036120036120051120036120051
502041201349300000000100101200201197502570103501021000110000401291000010000106210845378654584945120026012005012005011330131136986032330200100001000060200200001000012008412003711502011009910040100100001000001001000001100000031000011400321017611119761500029681000050100120051120051120051120040120148
50204120143931000000010001200431197532570119501021000110000401001000010000106210845374124584945120026012005012005011330131137356010030200100001000060200200001000012013212005411502011009910040100100001000001001000000100010031000011000321017611119760500009981000050100120051120051120051120051120051
50204120050930000000010001200351196802570103501021000110000401001000010000106210845379794584945120026012005012004711330131136986010030200100001000060200200781000012041112004711502011009910040100100001000001001000021100000001000001000321017611119759500020601000050100120051120051120051120051120051
50204120035930000000000001200351197372570103501021000110000401001000010000106210845379794584945120026012005012005011330131136986010030200100001000060200200001000012010612005621502011009910040100100001000001001000001100001001000011000321017611119759500029981000050100120036120036120051120051120036
502041200359310001000130001200331197532570103501021000210000401001000010000106210845374124584984120026012005012005011330131136986010030200100001000060200200001000012009212007911502011009910040100100001000001001000000100000031000011000321017611119759500029601000050100120140120051120051120051120051
5020412005093000000011000120035119753257010350102100011000040100100001000010620724537979458498412001101200501200501133013113698601003020010000100006020020000100001200951200851150201100991004010010000100000100100000110000003100001100032101541111974650002131091000050100120055120055120055120147120052

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
5002512014197010000000019000012003511973225700135001210004100004001010000100001062157453871745864941120032120056120056113327731137206001030020100001000060020200001000012011212005311500211091040010100001000001010001211000202041000010000000314022822322119769500109681000050010120051120048120051120051120052
500241200509650000100002010112004111965925700135001210004100004015110000100001062139453883145859071120032120159120056113330031137206001030020100481000060020200001000012006312005321500211091040010100001000001010004111000205902051000010100001314017822219119769500026681000050010120051120052120142120051120048
50024120047965000011000665000012003411973025700135001210001100034001010038100001064190453921845855071120033120056120121113330731137206001030020100001000060020200001000012005612004111500211091040010100001000001010001111000101011000010100000321320822019119766500029651000050010120051120081120072120051120051
500251200499300000011001010112004111973625700165001210002100004001010000100001062202453894545861411120026120050120050113324031137146001030020100001000060020200001000012005012004711500211091040010100001000001010000011000000031000511101000314019822313119852500049981000050010120057120057120058120057120057
5002412014593110100000013000012003511973025700135001210001100004015210000100001062139453883145860241120032120057120056113330031137786001030020100001000060020200001000012005812005311500211091040010100001000001010001111000101011000010100000314022822322119770500029981000050010120051120051120049120051120142
50024120050930000000000148800012004111973625700165001410002100004001010000100001062193453905945861411120023120050120050113324031137146001030020100001000060020200001000012011912005011500211091040010100001000001010000001000003001000011101000314019821119119775500049781000050010120057120147120057120057120057
500241200569311010001001000012003511973025700135001210001100004001010000100001062166454039645860241120032120053120056113330031137206001030020100001000060020200001000012005412005611500211091040010100001000001010001211000100241000010100000314019822422119769500029981000050010120052120051120051120061120141
50024120050931001001100288000120041119737841705445034010004100024001010000100001062184453905945861411120023120050120050113324031137666001030020100001000060020200001000012005012004711500211091040010100001000001010000011000000031000011101000314020821920119775500129981000050010120051120051120051120051120051
5002412005093000000100020001120041119772257001650014100021000040010100001000010621934539059458614111200261201431200511133240311371460010300201000010000600202000010000120050120050115002110910400101000010000010100000110000010010000111010003140218220121198365000261381000050010120051120051120036120051120036
5002412013493000000000020000120041119736257001650014100021000040010100001000010621934539059458614111200261200471200501133240111137146001030020100001000060020200001000012005012004711500211091040010100001000001010000011000301001000011101001314016822021119769500026681000050010120051120052120051120141120051

Test 3: throughput

Count: 8

Code:

  ld1 { v0.16b }, [x6], x8
  ld1 { v0.16b }, [x6], x8
  ld1 { v0.16b }, [x6], x8
  ld1 { v0.16b }, [x6], x8
  ld1 { v0.16b }, [x6], x8
  ld1 { v0.16b }, [x6], x8
  ld1 { v0.16b }, [x6], x8
  ld1 { v0.16b }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020580040643000001100360008002516672516010080100800008010080000417967937588240800548004080040699243699971601002008000020016000080040800401180201100991001008000080000110080000021080046001780014610210051101171180037180000131080000801008004180041800418004180041
802048004064300000000035000800251661025160100801008000080100800004179679375882208001580040800406992436999716010020080000200160160800408004011802011009910010080000800004100800000210800150018800176115210051261171180076180031131380000801008014380142800918019580498
8020480190644000001110155880080025160803651601658019580050801888006941790863760933080054801928009169979137002916040620080160202160320800918019131802011009910010080000800001100800287008009903764800226114277051421252180115180048131580000801008014280092801938014280191
802048009164401010002128726400800251669461601008010080025801008000041795373758824080015800408004069940369997160100200800002001600008004080090118020110099100100800008000011008006741908001704158001661021005125217118003718000013080000801008004180092800418004180092
80204800916430001000013400080075166025160160801008000080182800004179679375988508005480090800406992476999716010020080000200160160800408004021802011009910010080000800001100800000190800411019800166114210051101171180037080000131380000801008004180041800418004180041
8020480040643000000000220008002516652516010080100800008010080000417967137588220800158004080040699243699971601002008000020016000080040800401180201100991001008000080000110080000019080016001880019011400051101171180037080000131080000801008004180041800418004180041
80204800406430010000002200080025166725160100801478000080100800004179679375882408001580040800406992436999716025020080080200160000800908004011802011009910010080000800001100800088270800301029800006130277651251171180328080042131380000801008004180041800418004180041
8020480040643000000000440008002516614251601008010080000801008000041796633758823080015800408004069924186999716010020080000200160000800408004021802011009910010080000800001100800060140800071016800106191700511011711800370800009980000801008004180041800418004180041
8020480040643100010000800008007616682516010080100800008010080000417963137588240800158004080040699243699971601002008000020016000080040800401180201100991001008000080000010080007000800251098001461252371511011711800371800009080000801008004180041800418004180041
80204800406430010001001900080381106112516010080100800008048180069417884337601250800158004080040699243700291601002008000020016000080040800401180201100991001008000080000010080008623080026101680019611023715110117118003718000091080000801008004180041800418004180041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002580040620100000000043010008002516614251600108001080000800108000041785973758824180015800408004069946370020160010208000020160000800408004011800211091010800008000001080007723800270002880019612524705020051665800371800009980000800108004180041800418004180041
800248004062110000100004401001800251668251600108001080000817408000041785973758824080015800408004069963370020160010208000020160000800408004011800211091010800008000001080029824800260007800196125237050200516448003708002712980000800108004180041800418004180041
800248004062110010000003201001800251668251600108001080000800108000041785973758823080015800408004069946370020160010208000020160000800408004011800211091010800008000001080007823800260102580000612523715020051655800371800009980000800108004180041800418004180041
800248004062010000000003101001800251669251600108001080000800108000041785973758824080015800408004069946870020160010208000020160000800408004011800211091010800008000001080008623800500012580018612623605020042576800370800009980000800108004180041800418004180041
8002480040620100200000031010008002516612251600638001080000800108000041785973758824080054800408004069946370020160010208000020160000800908004011800211091010800008000001080007623800250012580019612723715020071654800371800009980000800108004180041800418004180041
800248004062010000000003301001800251668251600108001080000800108000041785973758824080015800408004069946370020160153208000020160000800408004011800211091010800008000001080006823800250012580019612423705020041657800371800009980000800108004180041800418004180041
8002480040620100100000031001008002516610251600108003980000800108000041785973758824080015800408004069946370020160010208000020160000800408009111800211091010800008000001080007723800250012880062612623705020071665800370800009980000800108004180041800418004180041
8002480040621100001000032001008002516682516006380010800008001080000417859737588230800158004080040699463700201600102080000201600008004080040118002110910108000080000010800077238002704138800186125237050200616548003708000012980000800108004180041800418004180041
8002480040621100101000032001008002516620251600108001080000800108000041786453759887080015800408004069946370020160010208000020160000800408004011800211091010800008000001080007723800490102780019612523705020051656800370800009980000800108009180041800418004180041
8002480040621101000000031001018002516612516001080010800008001080000417859737588240800158004080040699463700201601532080000201600008004080040118002110910108000080000110800077238002700119066804146126236050200416458003708000091280000800108004180086800418004180041