Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, post-index, 1 reg, 8H)

Test 1: uops

Code:

  ld1 { v0.8h }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0f1e22233a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
610052939022040100021930045732879601124225200010001000100010005000500051595128864295133102000100020002927029309116100210001000110000310010011001213000133049318693031331642187230943814167473284781000165401375915125100010002930829388296022969929478
61004294922290010100471046172878700024269200010001000100010005000500081597328647293293102000100020002915829162116100110001000010000310000001000202000128638948690530780682174930903819176567284381000163211376615177100010002934429371293192939129367
61004292422201000000300046192881200024410200010001000100010005000500001597928702293763102000100020002913829186116100110001000010000210010001000202000128859257685030810672166231043813176567284161000164721373815229100010002929529436292762934429255
61004293562191000100180045992888100024289200010001000100010005000500051595428728293993102000100020002920229104116100110001000010000210000001000303000129349137687131441682164031623812177470284361000162451377115283100010002933829349293092939529357
61004293952200010100220046352880400024391200010001000100010005000500001597428695294463102000100020002918429121116100110001000010000210000001000303000129129098687930611722168330523815187268283741000163631366315224100010002934029321293742931629286
61004293172191010000510045882884700024346200010001000100010005000500071598228701293573102000100020002916129102116100110001000010000310000011000202000132159248684731030582176630973818136470284991000164841371515077100010002946429490295533030529768
6100429502227100010029104693289320002450920001000100010001000500050005159502818328532161020001000200028532282871161001100010000100002100110110012131001351510108716834110662058132533814207172280561000140691232213712100010002845928477285252851728273
610042821521210101002620050342804601123392200010001000100010005000500051597528056283163102000100020002827028207116100110001000010000210010011001213000133819754716633570682068734233814217070278361000143861242713461100010002840728378283342846928357
610042824021110101002600504028037001232452000100010001000100050005000715977281042825631020001000200028084282731161001100010000100002100100010012131001380610103729533760702071633713815167070278571000144361281413701100010002856228146284502825128357
61004283312121000100140050282806601123516200010001000100010005000500151597228075286223102000100020002825028374116100110001000010000210010011001213000139349698699433210662064433563813136670278351000147291255613908100010002826728441282612845228209

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ld1 { v0.8h }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f233f4d5051schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
502051200579300011001300120039119754025701035010210001100004010010000100001062117453813145851010120027012005412005411330231137086010030200100001000060200200001000012005312005111502011009910040100100001000001001000001100000010000110032101761111976350002101091000050100120052120055120055120055120055
50204120051930000000100120039119757025701035010210001100004010010000100001062144453801745851390120027012005412008611330531137006010030200100001000060200200001000012005112005111502011009910040100100001000011001000001100000010000110032101761111976350002131391000050100120055120052120055120055120052
5020412005493100000013001200361197540257010350102100011000040100100001000010621174538131458510101200300120054120051113305311370260100302001000010000602002000010000120054120051115020110099100401001000010000010010005011000010100001100321017611119763500021013101000050100120055120055120055120055120036
50204120054930000000100120039119754025701035010210001100004025210000100401062144453820945851010120104012005712005411330531136996010030200100001000060200200001000012005412005111502011009910040100100001000011001000001100000281010000110032101761111976450002131091000050100120055120055120052120055120057
502041201409310000001600120039119754025701035010210001100024010010000100001062117453801745851010120030012005412005411330231137026010030200100001000060200200001000012005112005411502011009910040100100001000001001000001100000010000110032101761111976150002131091000050100120055120055120052120052120055
50204120054931000000100120039119754047701035010210001100004010010000100001062144453813145851010120030012005112005411330331137026010030200100001000060200200001000012013812005111502011009910040100100001000011001000001100000310000010032102761111976350002131091000050100120055120052120141120055120055
50204120054930100000100120039121391025701035010210001100004010010000100001062117453813145851010120030012005112005411330531136996010030200100001000060200200801000012005112005311502011009910040100100001000011001000001100000310000110032101761111976050002101091000050100120134120055120053120052120055
50204120054930000010100120039119754025701035010210001100004010010000100001062117453986745851010120030012005412005411330531136996010030328100001000060200200001000012005912005211502011009910040100100001000011001000001100000310000110032361761111976050002101091000050100120055120055120052120052120057
50204120138930000000100120042119757025701035010210003100004010010000100001062180453813145851400120027012005412005411330331137466010030200100001000060200200001000012005412005111502011009910040100100001000011001000001100030010000110032101761111976350012141391000050100120052120055120057120055120055
502041200549300101001688012003611975702570103501021000310000401001000010000106211745381314585101012003101200541200571133051211370260100302001000010000602002000010000120058120051115020110099100401001000010000110010000011000013100001100321017611119766500021310121000050100120055120056120052120055120145

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0056

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)daddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
50025120053931100000020000012003811973625700135001210002100004001010000100001062176453848545861410120034120041120041113315311371760010300201000010000600202000010000120058120054115002110910400101000010000010100011110001021100001101003140582022119772500040681000050010120042120057120057120042120042
500241200539301010100946352000012003811973648700165001210001100004001010000100001062176453848545861410120032120041120056113315311371760010300201000010000600202000010000120056120053215002110910400101000010000010100011110001111100001101003140282033119760500049681000050010120057120057120058120042120057
50024120041934100100020100112003811973625700165001410002100004001010000100001062193453905945861411120017120053120041113327311371760010300201000010000600202000010000120053120151115002110910400101000010000010100031110002014100001111103140282022119760500049651000050010120057120054122123122894120057
5002412005693110010001400001120039119733257001650012100011000040010100001000010621934539097458679711200171200531201401133323113720600103002010000100006026620000100001200551200531150021109104001010000100001101000511100011111000011110031402820321197725000410651000050010120054120057120054120054120057
50024120053931100010010100012004111972145700165001210001100004001010040100001062185453848545861411120032120087120105113330311371960229300201000010000600202000010000120053120041215002110910400101000010000110100011010002001100001101003140282022119760500049081000050010120045120044120057120042120147
5002412005393111001001340000012003911973647700135001210002100004001010000100001062193453894545861411120030120056120056113330311372060010300201000010000600202000010000120041120142115002110910400101000010000110100011110001101100000111103175317032119768500040661000050010120058120103120109120057120059
500241200419311011000340000112003811973625700135001410002100004001010040100001062193453860245856700120017120053120057113330311372060010300201000010000600202000010000120058120053115002110910400101000010000110100012110001004100010111003140382033119845500126001000050010120042120042120057120054120054
50024120149931110000020000012004111973825700135003210004100004015210000100401062194453848545861801120107120041120041113327311371760010300201000010000600202000010000120056120041115002110910400101000010000010100013110001012726100000111003140282022119774500047651000050010120054120045120054120054120042
50024120148931110100020000012004211972125700165001410001100004001010000100001062193454039845861410120029120056120056113330311372060010300201004010000600202000010000120056120053115002110910400101000010000010100051110002114100001111103140385032119775500040051000050010120057120059120058120055120054
500241200539311001100200001120041119733257001650022100021000040010100001000010621664538485458614101200291200531200561133303113766600103002010000100006002020000100001200561200531150021109104001010000100000101000111100010010100001111203140382032119772500046651000050010120054120057120057120054120042

Test 3: throughput

Count: 8

Code:

  ld1 { v0.8h }, [x6], x8
  ld1 { v0.8h }, [x6], x8
  ld1 { v0.8h }, [x6], x8
  ld1 { v0.8h }, [x6], x8
  ld1 { v0.8h }, [x6], x8
  ld1 { v0.8h }, [x6], x8
  ld1 { v0.8h }, [x6], x8
  ld1 { v0.8h }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
802058004062010001000320008002516610251601008010080000801008069041792183758824080015800408004069924369997160100200800002001600008004080040118020110099100100800008000011008000872380026002258001861252370511021711800371800009980000801008004180041800418004180041
802048004062000000000190018002516610251601008010080025801008000041796793758824080015800408009169924369997160100200800002001600008004080040118020110099100100800008000001008000702680009041258001961251770511011712800370800009980000801008004180041800418004180041
80204800406211000000032000800251668251601008010080000801008000041796413758823080015800408004069924369997160100200800002001600008004080040118020110099100100800008000001008000782380025020288000061723605110117118003718000010980000801008004180041800418004180041
80204800406200000000021000800251669251601008010080000801008000041796793758824180015800408004069924369997160100200800002001600008004080040118020110099100100800008000001008000702380033010268001961251770511012511800370800009980000801008004180041800418004180041
80204800406201101000032000800251668251601658010080000801008000041796553758823080054800408004069924369997160100200800002001600008004080040118020110099100100800008000001008000762380025020288004061252370514311711800370800009980000801008004180041800418004180092
8020480040620010000001500080025166925160100801008000080100800004179695375882408001580040800406992437003016010020080000200160000800408004011802011009910010080000800000100800060238001203268000061261760511011711800371800006080000801008004180041800418004180041
802048004062110010000310008002516610251601758010080000801008000041796473758825080015800408004069924369997160100200800002001600008004080040118020110099100100800008000001008000872480027031258001861262360511011711800371800009980000801008004180041800418004180041
802048004062000000000150008002516011251601008010080000801008000041796873758824080015800408004069924369997160100200800002001600008004080040118020210099100100800008000001008000902380013060258001960291770511011711800371800009980000801008004180041800418004180041
802048004062010011000400008002516611251601008010080000801008000041796553758824080015800408004069924369997160100200800002001600008004080040118020110099100100800008000001008000772380027030318001961252370511011711800371800009980000801008004180041800418004180041
802048004062001000000160108002516682516010080100800258010080000417968737588240800158009280040699243699971601002008000020016000080040800401180201100991001008000080000010080006023800120103080018612617615110117118003718000091080000801008004180041800418004180041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f223f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002580040620000000230080025166625160010800108000080010800004178621375882280015800918004069946370020160010208000020160000800408004011800211091010800008000001080000021800170001580017611621005020015161415800371800001441080000800108004180041800418004180041
8002480092621000000230180025166152516001080010800008001080000417862137588228001580040800406994637002016001020800002016000080040800401180021109101080000800000108000001980017000771800176116210050210151681480037180000131080000800108009280041800418004180041
8002480040620000000230080025166625160010800108000080010800004178613375882280015800408004069964370020160010208000020160000800408004011800211091010800008000001080000019800170001680017611521005021011161414800371800293631380000800108004180041800418004180041
80024800406200000013500800251666251600108001080000800108000041786133758822800158004080040699463700201600102080000201600008004080040118002110910108000080000010800000198001600018800166116210050210151614680037080000101380000800108004180041800418004180041
80024800406210011002300800251667251600108003880000800108000041786213758823800158004080040699463700201600102080000201600008004080091118002110910108000080000010800000198001600016800176115210050200716151480037180000131380000800108004180041800418004180041
80024800406200000003800800251666251600108001080000800108000041786213758822800158004080040699468700201600102080000201600008004080040118002110910108000080000010800000218001700015800176116210050200141681580037180000101380000800108004180041800418004180041
800248004062000110016700800251667251600108001080000800108000041786213758822800158004080040699463700201600102080000201600008004080040118002110910108000080000010800000238001600020800156116210050200151614680037180000101380000800108009380041800418004180041
800248004062100000023008002516652516001080010800008008380000417861337588228001580040800916997537002016001020800002016000080040800401180021109101080000800000108000002180017000198001761152100502001516151480037080000101080000800108004180041800418004180041
800248004062100000032008002516602516001080010800008001080000417862137588238001580040800406994637002016001020800002016000080040800401180021109101080000800000108000001980016010178001661162100502002723361480037180000131380000800108004180041800418004180041
800248004062000000023008002516672516006380010800008001080000417863737588228001580040800406994637002016001020800002016000080090800401180021109101080000800000108000002180017000178001701152100502001416151580037080000131080000800108004180041800418004180041