Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, post-index, 1 reg, 2D)

Test 1: uops

Code:

  ld1 { v0.2d }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5e5f61696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
6100528604222111901001000101004865282250222356020001000100010001000500050000501595128175285533102000100020002860428528116100110001000010032310010211100331011013211924168333152734210033168381193837279901000153291284813995100010002851128598285952869228735
6100428702222011401610000500004785282080002367220001000100010001000500050000190159572815828595310200010002000284582845811610011000100001001301005011201001323100131889401710031976402094632433808123329281051000150391251514079100010002850128533286012857728623
610042859122201161171000010000469328300000237082000100010001000100050005000012015964281142856131020001000200028530284671161001100010000100213100100211003224100131989750696532506402092931723809173532281911000154451280314397100010002860628582285942862228666
61004287242210116210100004000048722826512223649200010001000100010005000500001301596028138284143102000100020002851828481116100110001000010013310010024100104010013437964570763162733210003253380893630281371000153511260413965100010002862228560285902864228557
6100428612221011211510000170000486428291020236042000100010001000100050005000013015960281762866131020001000200028564285191161001100010004100243100100111000044110131919556700831798292091131633810133935280471000153771264214106100010002857328599286412856528585
610042852522201101141000050000468128322000236792000100010001000100050005000011015949280962850031020001000200028537284731161001100010001100323100100182100002311013271947069253191837209983286381083536281681000150341302114277100010002850828621286392854828609
610042869322101120130000010000485428242002235302000100010001000100050005000013015937281322858131020001000200028492284811161001100010000100123100500141003223110133499480705332926362086632353809113231281401000150771278114329100010002866628624286312866928641
610042859322201180131000060000483728297101236582000100010001000100050005000017015961283262860831020001000200028492285851161001100010000100130100400211000020100134339545700132531239210333197381183735281021000151051314514166100010002868028589285812863228528
610042845522201100111000060000489328283122235812000100010001000100050005000012015948281742865831020001000200028410284991161001100010000100233100400021000214110132379528698131886312092931533813123235281091000152651282614327100010002851028687285622852828722
6100428547222011011610000100004701282980012363620001000100010001000500050010701595828246284823102000100020002840328443116100110001000010033310060001100021410013102951369773212738209363132381463231281631000152781274814358100010002866328522284952856128632

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ld1 { v0.2d }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0053

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)daddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
50205120054930100011002010011200261197592570106501041000210000401001000010000106213545380934585179012003001200531200531133673113724601003020010000100006020020000100001200531200531150201100991004010010000100001100100022110001001110000111103210176011119763500046651000050100120054120054120057120057120055
502041200539301000000017001011200411197562570106501041000210000401001000010000106213545387114588662112003001200531201441133043113705601003034410000100006020020098100001200531200531150201100991004010010000100000100100012110002010410000111113210176011119762500046051000050100120054120054120054120054120147
5020412005393010000000148801011200381197592570120501141000210000401001000010000106213545380934585179012002901200561200531133043113746601003020010000100006020020000100001200411201501150201100991004010010000100000100100012110001000110000011103210176011119762500046651000050100120054120054120054120147120057
502041200539301100000014001011200381196852570106501041000210000401001000010000106213545381314585179012003201200621201421133073113724601003020010000100006044420000100001200531200411150201100991004010010000100000100100021110001030110000111103210176011119750500046651000050100120054120054120054120054120054
502041200539311000000014001011200381197562570106501041000110000401001004010000106216245381734585179012003201200561200531133043113759603243020010000100006020020000100001200561200531150201100991004010010000100000100100011110001000410000111103210176011119763500046751000050100120057120054120054120054120146
502041200539311001000028801011201251197564770106501041000210000401001000010000106216245382074585179012002901200561200531133043113708601003020010000100006020020000100001201451200551150201100991004010010000100000100100012110001000110000111103210376011119763500049901000050100120153120057120054120054120054
50204120041931110001002001011200381197562570106501041000210000401001000010000106390945382074585179012002901200531200531133043113778601003020010000100006020020000100001200531200531150201100991004010010000100000100100011110001001282610000111113210176011119765500046681000050100120057120054120057120057120054
502041200539311000000013001001200381197562570106501041000210000402421000010000106213545381694585179012001701200561200531133043113766601003020010000100006020020000100001200561200531150201100991004010010000100000100100013110002000410000111113210176011119763500046651000050100120057120054120054120057120042
5020412005693010000000134001001200381197567170106501041000210000401001000010000106213545381694593656012003201200531200441133073113713601003020010000100006020020000100001200531200541150201100991004010010000100000100100022110001011110000111103210193011119762500040601000050100120054120042120054120057120094
502041200539301000110014001011200381197564870106501041000210000401001000010000106213545382454585179012002901200531200561133053113725601003020010000100006020020000100001200531200581150201100991004010010000100001100100022110001001110000111103210176011119762500046681000050100120057120055120057120058120054

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0050

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd0d5map dispatch bubble (d6)dbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
5002512005493000010003101000120035119734257001350012100011000040010100001000010621034538831458590701200241200501200501133210311371460010300201000010000600202000010000120050120047115002110910400101000010000101000001100001031000010100031400382022119754500029081000050010120051120052120051120051120036
500241200519310000000200000120035119730257001350012100011000240010100001000010621034538328458590701200281200501200571133240311373960010300201000010000600202000010000120050120047115002110910400101000010000101000000100000001000010100031400382023119769500029681000050010120051120054120051120051120123
50024120035930000000013881001120027119733477001650014100021000040010100001000010621664538945458614101200351200561200561133270311375160010300201000010000603122000010000120050120047215002110910400101000010000101000001100000001000000100031400482022119769500020651000050010120051120139120051120051120051
500241200359310000001101000120035119730257001350012100011000040010100001000010643034538831458590701200261200501200531133240311373560010300201000010000600202000010000120047120051115002110910400101000010000101000001100001031000010100031400482043119769500000001000050010120132120036120036120041120142
50024120035930000000014401000120035119730257001350012100011000040010100001000010621034538717458590711200291200501200501133610311371660010300201000010000600202008010000120051120049115002110910400101000010000101000000100001001000010100031400282032119769500029681000050010120051120054120051120051120092
5002412004793000000001010001200351197302570010500121000410002400101000010000106213945388314585985112007812005012005111376610311371560010300201000010000600202000010000120052120050115002110910400101000010000101000001100000001000010100031400282023119769500029681000050010120053120148120036120053120051
5002412004793100000012201000120035119730257001350012100011000040010100001000010621664541075458602401200261200501200501133240311371560010300201000010000600202000010000120050120035115002110910400101000010000101000001100001001000010100031640382032119766500029681000050010120051120051120051120051120051
5002412013993000000001000001200501197302570025500121000110000400101000010000106213945389454585907012001112003612003511332403113700600103002010000100006002020000100001200511201331150021109104001010000100001010000011000010010000101000314006820331197695000212691000050010120145120051120036120036120051
50024120050931000000013000001200321197302570013500121000110000400101004010000106211245388314585907012001112005012005011332403113717600103002010000100006002020000100001200501200471150021109104001010000100001010000001000010010000101000314002820221197725000210601000050010120051120051120136120052120051
50024120050930000000113000001200351197142570013500101000110000400101000010000106213945405454585907012002612004712003511332403611375360010300201000010000600202000010000120050120047115002110910400101000010000101000001100000031000010100031400382033119772500020001000050010120051120036120036120051120125

Test 3: throughput

Count: 8

Code:

  ld1 { v0.2d }, [x6], x8
  ld1 { v0.2d }, [x6], x8
  ld1 { v0.2d }, [x6], x8
  ld1 { v0.2d }, [x6], x8
  ld1 { v0.2d }, [x6], x8
  ld1 { v0.2d }, [x6], x8
  ld1 { v0.2d }, [x6], x8
  ld1 { v0.2d }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020580040620100011003200000800251668251601008010080000801008000041796553758824080015800408004069924369997160248200800002001600008004080040118020110099100100800008000001008000872380026002580019612523601511011711800371800526680000801008004180041800418004180041
80204800406200000001021880000800251664925160100801008000080100800004179679375882308001580040800406992436999716010020080000200160000800408004011802011009910010080000800000100800000178001200128001561917000511011711800370800000680000801008009280041800418004180041
802048004062101010000310100080025066525160100801008000080194800004179695375882408001580092800406992436999716010020080000200160000800408004011802011009910010080000800000100800098238002501258001961017000511011711800371800000080000801008004180041800418004180041
802048009062101000001320000180025166725160100801008000080100800004179615375882308001580040800406992436999716027220080000200160000800408004011802021009910010080000800000100800000178000000128001261250600511011711800370800009080000801008004180041800418004180041
80204800406201000000015100000800250664251601008010080000801008000041796953758824080015800408004069924369997160100200800002001600008004080040118020110099100100800008000001008000782380027012580019601417000511011711800371800009680000801008004180041800418004180041
80204800406200120000031000018002516682516010080100800008010080000417965537588220800158004080040699243699971601002008000020016000080040800401180201100991001008000080000010080000017800100010800136126247005110117218003718000091080000801008009380041800418004180041
80204800406211000000019000008002510642516010080174800008010080000417969537588240800158004080040699243699971601002008000020016032080040802431180201100991001008000080000010080007623800273026800186110170205110217118003718000091280000801008004180041800418004180041
80204800406210000000032000018002506612516010080100800008010080000417965537588210800158004080040699243699971601002008000020016000080040800401180201100991001008000080000010080000014800130008001261017000511011711800371800009680000801008004180041800418004180041
80204800406200000100031000008002516610251601008014280000801008000041796553758823080054800408004069924369997160100200800002001600008004080040118020110099100100800008000001008004401780037027608023361623630515713311800760800779980000801008009180092801418009280143
8020480091622110100012831760000800761605045160231801008005080174800694179592376093518009580089801416994377009516025120080078200162716808528089617180201100991001008000080000010080058623800472376180062611014000512513311801160800420680000801008009180091800928009180092

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5e5f6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800258004062010000000035000080025166144016001080010800008001080000417862137588200000800158004080040699463700201600102080000201600008004080040118002110910108000080000010800078278002910298002261292771502007165380037180000131380000800108004180041800418004180041
80024800406201110000003500018002516612516001080010800008001080000417861337588230000800158004080040699463700201600102080000201600008004080040118002110910108000080000010800088278003001328000061302760502007166580037180000161380000800108004180041800418004180041
80024800406201000000003500008002506615251600108001080000800108000041786453758820000080015800408004069946370020160010208000020160000800408004011800211091010800008000001080007827800300029800236129060502008164780037080000131380000800108004180041800418004180041
80024800406431000000003500008002516614251600108001080000800108000041785973758824000080015800408004069946370020160010208024020160000800408004011800211091010800008000001080007727800290029800230130277050200516548003708000013080000800108004180041800418004181559
800248004062011001000035000080025166925160010800108000080010800004178621375882300008001580040800406994637005216001020800002016000080040800401180021109101080000800000108000862780030003080023612906050200516468003718000001380000800108004180041800418004180041
80024800406201100000003500018002516613251600108003880000800108000041786213758824000080015800408004069946370020160010208000020160000800408008911800211091010800008000001080006708002900298002361292771502004165380037180000131380000800108004180041800418004180041
80024800406211010100003600008002516613251600108001080000800108000041786133758825000080015800408004069946370020160010208000020160000800408004011800211091010800008000001080007727800300129800236129071502004164680037080000131380000800108004180041800418004180041
800248004062011000000036000080025166102516001080010800008001080000417862137588180000800158004080040699463700201600102080000201600008004080040118002110910108000080000010800076278003101786800236129277050200416378003718000013080000800108004180041800418004180041
8002480040620111000000350000800251669251600108003880000800108000041786133758824000080171800408004069946370020160010208000020160000800408004021800211091010800008000001080007827800080030800236129060502004164680037180000131380000800108004180041800418004180041
80024800406211100000003900008002510622516001080010800008001080000417860537588200000800158004080040699463700201601532080000201600008004080040118002110910108000080000010800087278002901298002761292761502005167580037080000141380000800108004180041800418004180041