Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, post-index, 1 reg, 1D)

Test 1: uops

Code:

  ld1 { v0.1d }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)1e1f223a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f61696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
6100529308220113412215014586287720124378200010001000100010005000500040159712858529243310200010002000291342910211610011000100010012210020111000212121317692646843303311712168731013812136661285201000162401376815158100010002938029374293472940629327
6100429343219012613201014644288370024304200010001000100010005000500030159422860929384310200010002000291582924611610011000100010021210030011001213101314392066869305015622166131063809126261284811000163961363815359100010002933229288293692936829348
6100429383219012212713014564288530024395200010001000100010005000500020159442863529332310200010002000292302915911610011000100010021310010321000212101296692286880302315682174430683815186571284381000163361364715287100010002936429287293022941229283
6100429383220011812801014607288130124353200010001000100010005000500030159522870329348310200010002000291162918411610011000100010013310030111001312101297291116862307021682174831073811146061285311000164671356315253100010002936029377292672933729285
6100429283219012512408014568287510024326200010001000100010005000500055159432854529340310200010002000292162927011610011000100010022410020111000210101294993896883309310602165131173811146163284341000165121372915249100010002934329273293362930829348
6100429299220012412315014677288090024278200010001000100010005000500000159542863629246310200010002000290832910411610011000100010032010020021001213101281392616871309116662169230763811136361283701000161641359715233100010002930229314293522932729347
610042933821901221220401464928812002433320001000100010001000500050005015970285982926531020001000200029112291792161001100010001002121002012100121311129569198686930888602168030963809156160284171000165701365814986100010002931429380293602935729292
6100429271220012512205014594287930024457200010001000100010005000500050159522861529320310200010002000292942922111610011000100010013210020121000213101285290446910304613602164231223817146562284371000162891377215116100010002936529393292872930729444
6100429427219012412315014654287841024335200010001000100010005000500010159422865229298310200010002000291372909311610011000100010042010020121001223111312692656888312615582168331553810135873284931000162851379215122100010002934029357292802948729293
6100429421219012012301014661287841124328200010001000100010005000500050159632859029367310200010002000291332917511610011000100010021210030221001313121302590616822309115642169630583815196962284381000162501361615204100010002938429370293282941129357

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ld1 { v0.1d }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2324353a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
50205120050964010100110050000012002611975725701035010210001100004010010000100001062072453797945849450120026012005012003511329831136986010030200100001000060200200001000012005012004711502011009910040100100001000001001000001100001001000001110000321237666119765500049681000050100120057120057120058120057120057
50204120056964111000110000010012003511975325701035010210000100004010010000100001062117453797945844730120027012003512003511333231136956010030200100001000060200200001000012005012004711502011009910040100100001000001001000001100001001000010100000321267666119759500029981000050100120051120037120051120036120051
50205120050965000000010010000012003511975325701035010210000100004010010000100001062072454013145844730120026012005012005011330131136996010030200100001000060200200001000012003512004821502011009910040100100001000001001000001100000031000000100000321237636119759500029681000050100120048120051120051120051120052
50204120054964000000110110000012003511975325701035010210001100004010010000100401062081453805745849840120027012003512013511330131136986010030200100001000060200200001000012005012012511502011009910040100100001000001001000001100000001000010100000321269566119759500029681000050100120051120051120051120036120051
502041200519650000000100100000120035119753257010050102100011000040242100001000010621084537412458494501200260120053120050113301311369860100302001000010000604502000010000120050120047115020110099100401001000010000010010000011000000010000101000003261928256119836500259601000050100120507120144120331120236122146
502041203269661122101133536176000212020312079827170135501141000610004406731003910197106401345461084587310012011201202351202341133461911392660548305721007910125609302024210123120320120139415020110099100401001000010000010010003111000500110000111000003237976106119835500029981000050100120052120138120051120145120053
502041201439650100100100188000012003511975325701035010210000100004010010000100001062108453797945849450120028012005012005211330231137006010030200100001000060200200001000012005012005011502011009910040100100001000001001000001100000001000010100000321237666119759500029681000050100120051120051120051120052120052
5020412005196400000001001010001204501199021147016550144100091001040674101601015610720254547110459548911200260120050120035113302311369860100302001000010000602002000010000120050120048215020110099100401001000010000110010000001000040010000001000003212376631197595000291081000050100120052120054120036120052120051
50204120054964000000110010000012003511975325701035010210001100004011010007100451075132455028345856511120017012006212005711337571137406012430224100081000860248200161000812006112005811502011009910040100100001000001001000121100011041000011110111322351655119841500041110121000050100120061120061120062120061120061
502041200609641000001000190000012004211973725701035010210001100004010010000100001062144453813145852181120032012005412005411330531137026010030200100001000060200200001000012005112005211502011009910040100100001000001001000001100001031000010100000321267666119764500021310121000050100120055120055120036120052120061

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0061

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f233a3f43494d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
5002512006593110010000300012005011119737257001650014100031000040010100001000010622744539136458680401200331200571200611133393113721600103002010000100006002020000100001200571200651150021109104001010000100000101000231100020021000111110003140982101111976850006101091000050010120061120066120066120066120066
5002412005793111000000300012005001120724257001950016100031000040010100001000010622834539401458660911201291200661204011133393113721604523014310000100406002020080100001200491200673150021109104001010000100000101000221100010041175311110103140108288119768500139651000050010120054120054120054120054120062
5002412006193010100000200012003800119736257001650014100021000040010100001000010622384539059458645301200401200611200611133303113725600103002010000100006002020000100001200651200611150022109104001010000100000101000143210002220510001111100031641282111111976850006111091000050010120067120397120058120407120402
5002412040993410010000300212003410119737257001650016100031000040010100001000010622024539401458660901200411200601200601133313113726600103002010000100006002020000100001200491200491150021109104001010000100000101000211100010011000012111003140882111011978450004131091000050010120050120050120050120058120061
500241200578991100110020101200431111974125700195001610002100004001010000100001062238453924945864531120037120061120061113330311372560010300201000010000600202000010000120061120056115002110910400101000010000010100022110002002100001111000314098299119775500066601000050010120062120062120057120062120062
50024120061899100011002000120046111197412570019500141000210000400101000010000106223845392494586453112003712006112005611332731137256001030020100001000060020200001000012005612005311500211091040010100001000001010002121000101190100011101000314010821111119768500066651000050010120062120062120062120062120057
50024120056899110111003000120041101197362570016500141000210000400101000010000106223845390594586141112003712005412006111332731137206001030020100001004160020200001000012006112006111500211091040010100001000001010002221000200110000121100031405821212119775500046951000050010120062120057120062120062120062
50024120061930100011002000120046001197412570016500141000210000400101000010000106223845389454586453112003712006112006111332731137136001030020100001000060020200001000012006112006111500211091040010100001000001010001121000210172651000611113003265882911120269500066651000050010120070120062120062120065120059
50024120049930100011003000120046111197422570019500141000210000400101000010000106216645392494586453112003212006112006111333531137136001030020100001000060020200001000012004912006111500211091040010100001000001010002121000101210000111100031408829101197805000610651000050010120057120055120054120057120062
500241200539311011110014000120041001197362570016500141000210000400101000010039106219345391764586453112003712006112006111333531137136022930020100001000060020200001000012006112005611500211091040010100001000001010001121000218210001121100031401082811119772500046651000050010120054120054120054120054120139

Test 3: throughput

Count: 8

Code:

  ld1 { v0.1d }, [x6], x8
  ld1 { v0.1d }, [x6], x8
  ld1 { v0.1d }, [x6], x8
  ld1 { v0.1d }, [x6], x8
  ld1 { v0.1d }, [x6], x8
  ld1 { v0.1d }, [x6], x8
  ld1 { v0.1d }, [x6], x8
  ld1 { v0.1d }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f23243f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
802058004062000000103100080025166925160100801008000080100080000417970337588241800158004080040699243699971601002008008020016000080040800401180201100990100100800008000001008000001480013010108001361917051100031743800371800009980000801008004180041800418004180041
802048004062100000001800080025166025160100801008000080100080000417968737588240800158004080040699243699971601002008000020016000080040800401180201100990100100800008000001008000001480013000080012611217051100031732800371800009680000801008004180041800418004180041
802048004062000000001500080025166425160100801008000080100080000417969537588240800158004080040699247699971601002008000020016000080040800401180201100990100100800008000001008000001780013000138001361917051100031733800371800009080000801008004180098800418004180041
802048004062100001044900080093166025160100801008000080100080000417967137588240800158009080040699243699971601002008000020016000080040800401180201100990100100800008000001008000001780012000080013601017051100031733800371800000680000801008004180041800418004180041
80204800406200000001280008002516092516010080100800008010008000041796633758824080015800408004069924369997160100200800002001600008004080040118020110099010010080000800000100800000178001300021730800006190051100031753800371800009080000801008009380041800418004180041
802048004062000000001900080025066425160100801008000080100080000417934637588240800158004080040699243699971601002008000020016000080040800401180201100990100100800008000001008000001480000000138001361017051100031723800370800000080000801008004180041800418004180041
8020480040620000000019000800251666251601008010080000801000800004179687375882408001580040800406992436999716010020080000200160000800408004011802011009901001008000080000010080000017800130001280012001017051100021732800761800009980000801008004180041800418004180041
80204800406200000000190008002516662516010080100800008010008000041796953758824080015800408004069924369997160100200800002001600008004080040118020110099010010080000800000100800000178001300013800130110170511000217328003708000010980000801008004180041800418004180041
80204800406210000040198800800251604251601008010080000801000800004179687375882408001580040800406992436999716010020080000200160000800408004011802011009901001008000080000010080000017800130101280013611317051100031733800370800009980000801008004180041800418004180041
802048004062100000001800080025166425160100801008000080100080000417967137588240800158004080040699243699971601002008000020016000080040800401180201100990100100800008000001008000001480013020128001300120051100031733800371800000680000801008004180041800418004180041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)daddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80025800406200001100010150000018002516642516001080010800008001080000417862937598600800158004080040699463700201600102080080201600008004080040118002110910108000080000010800000148003500980013611017000502241605580037180381101080000800108004180041800418004180041
80024800406200100100042954616000180025160482516001080010800008001080000417864537588240800158008480040699463700201600102080000201601608004080040118002110910108000080000110800000148001310080034600170005022616076803291800000080000800108004180041800418034680041
800248004062101001011001900001800251666251600108001080000800108000041786293758824080015800408004069946370020160010208000020160000800408004011800211091010800008000011080000014800102018800126001700050225160458003708000010080000800108004180041800418004180041
80024800406210100100000190000180025066625160010800108000080010800004178645375882408001580040800406994637002016001020800002016000080040800401180021109101080000800001108000000800131016800146112170005022516055800371800000980000800108004180041800418004180041
8002480040621010010110018000018002516672516001080039800008001080000417863737588240800158004080040699463700201600102080000201600008009280040118002110910108000080000010800880178001310168001301101700050224160548003718000010980000800108004180041800418004180041
800248004062001001000001800001800251669251600108001080000800108000041786293758824080015800408004069946370052160010208000020160000800408004011800211091010800008000011080000017800221013800126113170005022716053800370800009980000800108004180041800418004180041
80024800406200100100000151000018002516052516001080010800008001080000417862937588240800158004080040699463700201600102080000201600008004080040118002110910108000080000110800000148001000740800126110170005038416056800371800009980000800108004180041800418004180041
8002480040621010010000054000018002516062516001080038800008008480000417860537588240800158004080040699463700201600102080000201600008009080091118002110910108000080000110800000080012000800346012170005022516054800371800009980000800108004180041800418004180041
80024800406200100100000300100180025166525160010800108000080010800004178629375882408001580040800406994637002016001020800002016000080040800401180021109101080000800001108000000800100012800106101700050383160548003718000012980000800108004180041800418004180041
800248004062001001000001800001800251066251600108001080000800108000041786293758824080015800408004069946370052160010208000020160000800408004011800211091010800008000011080090014800120012800136110170015022516055800371800009980000800108004180041800418004180041