Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1R (1D)

Test 1: uops

Code:

  ld1r { v0.1d }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 2.003

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.003

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e22243a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f61696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
62005285352141011011102610502528055010160662003100410001000100050001194150227152832228534310200010001000100010002800828139116100110001000010000210000000100020000013904998373223327060198463445381410464427802142791227412816100010002808428188282242826928023
6200428187210001000006005215279640001639020001002100010001000500011924100227202838227991310200010001000100010002815928232116100110001000010000210000000100020000013149983371323301046198223246380912394027887146481259213495100010002816428256281812835128507
6200428147211011110003005095281330011643220031004100010001000500011943100227472807728398310200010001000100010002819228114116100110001000010021310030021100221012013524994672503418043196693437381317484027860138971237313821100010002823128116282202834028178
62004282522100111110030052672815600016366200310031000100010005000119391102265928148286543102000100010001000100028411281271161001100010000100323100400231000312110141581067172533484042196623325381014404027738140371221013106100010002845928315281782813328312
620042833521101101000200506327955000161782003100110001000100050001194290226912818828287310200010001000100010002836928158116100110001000010021210030011100223213013518100357105345304519619345538179443927834146931229112704100010002836828023280032839628152
6200427971213010010001105286279380001615220011001100010001000500011961802274228063282373102000100010001000100028061281121161001100010001100133100200031001210120140771041572803583040197393426381919434027832142371270513520100010002815928095281602830728566
6200428346211011111003005018279840011602620051003100010001000500011942110227482819128185310200010001000100010002810828030116100110001000010021210010001100022011014022104177135344704319506339138138484527757138971254913383100010002846028296282282811028062
620042837421301111100100513528119000161992004100110001000100050001194050226852814428210310200010001000100010002826428121116100110001000010033210010014410002131201350797717251339904019714320338179444227861142811263613887100010002812828379284672845328192
620042823321001101100300514728091001161042003100310001000100050001194730226972812728229310200010001000100010002838528157116100110001000010023010010011100001210013611991472763509143197583310381912394327894140591255212989100010002804228214281222810128042
6200428340210011010003105120279680001653620031003100010001000500011933140228442814228052310200010001000100010002803628124116100110001000110022210030001100002011014139104537108347015019524338738098394327780150981225813174100010002854428454280722837928546

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ld1r { v0.1d }, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0056

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
602051400541049111110020100140041139429129353257010440100200041000030100200001000012639766693974143122290140030140041140053131799313239160100302001000020000602001000020000140056140041215020210099100401001000010000010010001111000100110000110100321041262213956240000669100001000040100140057140057140057140057140057
602041400531049110110010000140026139429129367257010440100200041000030100200001005612638146693974143151430140032140044140053131819313240260100302001000020000602001000020000140056140053115020110099100401001000010000010010001111000300110000111110321021142213955040000609100001000040100140057140042140057140057140042
602041400411049111110020101140089139430129367257010240100200041000030100200001000012639766693244143122290140032140056140041131802313240160100302001000020000602001000020000140056140053115020110099100401001000010000010010002211000100110000011110321021142213956240000069100001000040100140057140057140054140042140057
6020414005610491011000170001140038139429129367257010440100200021000030100200001000012639766693974143096670140017140056140056131802313239160427302001000020000602001000020000140056140053115020110099100401001000010000010010001111000101110000111100321021262213956240000069100001000040100140057140057140059140057140057
602041400561049100010010000140038139456129353257010440100200041000030100200001000012639766693974143122291140029140056140056131799313240160100302001000020000602001000020000140041140041115020110099100401001000010000110010001211000211410000111111321021392213956740000609100001000040100140042140057140057140058140057
602041400561049101010010101140041139429129367257010240100200041000030100200001000012639766693974143122290140017140056140056131802313239160100303611000020000602001000020000140111140053115020110099100401001000010000010010002111000341410000011100321011392213956740005009100001000040100140057140057140054140057140054
602041400561049101110020001140041139413129353257010440100200021000030100200001000012639766693244143122291140029140053140056131802313240160100302001000020000605781000020000140053140057115020110099100401001000010000010010002201000301110000111100321011262213956740000999100001000040100140042140057140042140057140057
60204140056104911101003200001400411394131293672570104401002000210000301002000010000126304866935321431279901400171400531400411318023132401601003020010061200006020010000200001400561400531150202100991004010010000100000100100011010002002110000111100321021392213955040000969100001000040100140057140057140042140042140057
602041400561049111110020000140038139456129364257010240100200041000030100200001000012639766693974143096670140029140053140041131828313239160100302001000020000602001000020000140041140054115020110099100401001000010000010010001101000101110000110110321021392213956740000906100001000040100140057140057140054140042140057
60204140056104911111002000014002613942912936425701024010020004100003010020000100001263976669397414312439014002914005314005613189531324016010030200100002000060200100002000014004114005311502011009910040100100001000011001000111100010011000001111032102172213956740000090100001000040100140057140057140057140042140042

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0047

retire uop (01)cycle (02)03090e0f1e22233f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)cfl1i cache miss demand (d3)d5map dispatch bubble (d6)daddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
60025140047104910011014002013939412935925700124001020002100003001020000100001264443669353814325829014002301400351400471318153132420600103002010000200006002010000200001400471400351150021109104001010000100001101000011000000100001031400311302213962340000666100001000040010140048140049140048140048140036
6002414004710490116521014003213939412935925700104001020002100023001020000100001264443669294714325829114002301400471400351318033132430600103002010000200006002010000200001400471400471150021109104001010000100000101000011000000100001131400211302213956640000660100001000040010140036140048140048140048140048
60024140035104900011014003213939412935925700124001020000100003001020000100001264443669353814325829114001101400471400471318823132430600103002010000200006002010000200001400471400471150021109104001010000100000101000001000000100000131400211302213956640000666100001000040010140036140048140048140048140048
60024140035104900011014003213939412935925700124001020002100003001020000100001264443669294714325829114001101400471400471318153132430600103002010000200006002010000200001400471400471150021109104001010000100000101000011000000100001131400311312213955440000066100001000040010140048140065140052140043140048
60024140047104900111014003213939412935925700104001020002100003001020000100001264443669353814325829114002301400471400351318153132430600103002010000200006002010000200001400471400471150021109104001010000100000101000011000000100001131400211302213956640000666100001000040010140036140036140036140048140049
60024140047104800110014003213939412935925700124001020002100003001020000100001264443669353814325829114002301400471400471318153132420600103002010000200006002010000200001400471400471150021109104001010000100000101000011000000100001131400213502213988140000066100001000040010140048140048140036140092140048
60024140047105000011014003213939412935925700124001020002100003001020000100001264443669294714325829114001101400351400351318033132420600103018010000200006002010000200001400541400471150021109104001010000100000101000011000000100021131400211302213956640000600100001000040010140048140036140036140036140048
60024140035104900011014003213939412934725700124001020002100003001020000100001264488669294714321442114002301400351400471318153132430600103002010000200006002010000200001400471400471150021109104001010000100000101000011000000100001031400211302213956640000066100001000040010140048140048140036140048140036
60024140047104900010014003213939412934725700124001020002100003001020000100001264443669353814325933114002301400471400471318153132430600103002010000200006002010000200001400471400471150021109104001010000100000101000001000000100001131400211302213956640000066100001000040010140048140036140048140036140048
60024140047104900110014003213939412935925700124001020002100003001020000100001264443669353814325829014002301400471400351318153132430600103021210000200006002010000200001400471401301150021109104001010000100001101000011000000100001031400211102213956640000666100001000040010140048140048140048140048140048

Test 3: throughput

Count: 8

Code:

  ld1r { v0.1d }, [x6]
  ld1r { v0.1d }, [x6]
  ld1r { v0.1d }, [x6]
  ld1r { v0.1d }, [x6]
  ld1r { v0.1d }, [x6]
  ld1r { v0.1d }, [x6]
  ld1r { v0.1d }, [x6]
  ld1r { v0.1d }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)0309l2 tlb miss instruction (0a)0e0f181e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16020526730200100004500126693212082516014510080045800001008000080000500116913818873340267092670826728665036686160100200800008000020080000800002672826728118020110099100100800008000011008000043800393398003961394351101116101226705101080000800001002672926787268222674326734
1602042670820000000451012671321212182516014510080045800001008000080000500116875418835730266892673526728665036686160100200800008000020080000800002672826728118020110099100100800008000001008000043800391398003961394351101416101126725101080000800001002672926818268292674726880
160204267282000000045101267130121232516014510080045800001008000080000500117462818873870267092672826754672136686160100200800008000020080212800002670826708118020110099100100800008000001008000043800392080000613905110111681226705101080000800001002700626742267462673526709
16020426728200000004510126713212121925160100100800458000010080000800005001174628188733402670926728267286650366861601002008000080000200800008000026728267281180201100991001008000080000010080000438003910800396139435110101611112672510080000800001002672926824268272671426734
1602042672820000000451012669321204251601451008004580000100800008000050011746281887218026709267282672866503668616010020080000800002008000080000267282672811802011009910010080000800000100800004380039039800396139435110101611112670701080000800001002673026849267342674526729
16020426728201000000000267132121219251601451008004580000100800008000050011687541884011126689267082672866503668616010020080000800002008000080000267282672811802011009910010080000800000100800004380039242800390104351101116121126725101080000800001002672926803268152671526745
1602042672820010100451012671321212625160145100800458000010080000800005001168754188733412668926728267286650366661601002008000080000200800008000026708267281180201100991001008000080000010080000438003930800396104351101016111326725101080000800001002672926795268322673626733
160204267332000000045011267132121273251601451008004580000100800008000050011688801887343126689267112672866503668616010020080000800002008000080000267282672811802011009910010080000800000100800004380039142800390139435110111611726725101080000800001002672926780268222673626734
16020426728200000004501026713212121825160100100800458000010080000800005001174628188357312670926728267286650366861601002008000080000200800008000026728267281180201100991001008000080000010080000438000010800396139435110131611122670510080000800001002672926793268202673526734
16020426728200100000010267132121216251601451008004580000100800008000050011746281883573126709267282672866503668616010020080000800002008000080000267282672811802011009910010080000800001100800004380039139800396004351101116121226705101080000800001002697526731267382673126729

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)daddfetch restart (de)e0eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16002526735200110006510326718218181625160071108006280000108000080000501170529188375126714267152673366773671316001020800008000020800008000026733267331180021109101080000800000108001920428001911162800386057411910502031604426730908000080000102673426799268292673726735
16002426733200101006500326718018181725160072108001980000108000080000501170304187790926714267332673366773671316001020800008000020800008000026715267331180021109101080000800000108001921428005711059800386019421911502031603426730098000080000102673426796268092672126726
1600242671520011000660032671800181925160072108006280000108000080000501167371188808526714267332671566783671316001020800008000020800008000026733267331180021109101080000800000108001921428005702159800380057411910502041604426730098000080000102673426734267342673426722
160024267152001110065100267002180172516007210800628000010800008000050117309318880852670026733267156678366951600102080000800002080000800002673326733118002110910108000080000010800192142800191005980038615701900502041605426730908000080000102673426793268382672026743
1600242673320010100650022671821802125160073108001980000108000080000501171570188380126714267332673366773671316001020800008000020800008000026733267331180021109101080000800000108001919428005712021800386119421910502031604426730998000080000102673426773268292674326726
1600242673320010100651022671820019251600721080062800001080000800005011730931888085266962673326733667836713160010208000080000208000080000267332671511800211091010800008000001080019200800570102180038611901910502031614326730908000080000102671626868267892684026734
160024267332001110065102267182018212516007110800618000010800008000050116753018756192671426715267156678367131600102080000800002080000800002673326715118002110910108000080000010800192042800190015980038605801920502041603526730998000080000102671626869268012681926716
16002426715200110006510326718018181825160071108006180000108000080000501167530188511626714267332671566603671316001020800008000020800008000026733267331180021109101080000800000108001919428005811159800006157401910502041604326712998000080000102673426828268442673826720
1600242673920011100650002670021801625160071108006280000108000080000501164410187647826716267422673366603671516001020800008019220800008000026733267331180021109101080000800000108001921428005711059800000157421900502031604426730998000080000102673426825268132673926749
160024267332001010065103267182002225160073108006180000108000080000501169528187919426714267332673366603669516001020800008000020800008000026733267151180021109101080000800000108001921428005712160800000019421900502041603426730998000080000102673426780268392673926747