Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD4 (multiple, 16B)

Test 1: uops

Code:

  ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 12.000

Issues: 12.022

Integer unit issues: 0.000

Load/store unit issues: 4.000

SIMD/FP unit issues: 8.022

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e1f2223243a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696b6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
720052936921911711400036000047412910300015432120228022400080004000203359839120024719029189293073101200040008000400020000292502922211610011000100004005584006011640006168401282293036843305094719288307738247464128486162931297213589400080002935429251292602934729303
72004293292191211161002640100452829135000153831202280224000800040002034798443105247690291912932131012000400080004000200002922029308116100110001000040055124007002640026168401282291606821304064119329310338219464628633164591309213492400080002932129362293422934429302
72004293572201120141002970100459729061000153491202280084000800040002031198440105247910293832935431012000400080004000200002920829231116100110001000140065040070104400260604112922905868083007844192453032381914474228539163681304813719400080002937229289293252941429382
72004293062191131141003210100450029084000153631202280224000800040002033098431000247410291472929331012000400080004000200002925129312116100110001000040055840070014400260684112889911168423026646193643089382018444928517164751314613599400080002941129352292822932229286
72004292712191151131003390100450729107040153421202280084000800040002034398412000247970291792925431012000400080004000200002925729258116100110001000040045840080026400000684212955907268203036947193893146382118474528522164231303113681400080002924929275293272929129304
720042926622011411210033400004517290970001536012022803240008000400020323985283002483302921229275310120004000800040002000029308292621161001100010000400568400800164006616124113236916068563038946192783106382416465028581165081315013541400080002928629284293472931929317
7200429396219111112100120000452029064000152911200880224000800040002033898471215247740292252934031012000400080004000200002918129282116100110001000040065840080016400261684212837934969493077640194013079382014474828521163631301013536400080002929329323292912934629266
72004293232191141131003240100447829047000153641202280204000800040002034398396100247930291762932931012000400080004000200002926229201116100110001000040065840070016400860604212827914268223037949193733068382313394228474164881315513784400080002940329346293832934729283
720042926421911411700027901004564291030001528012022802240008000400020341986051002475602920429303310120004000800040002000029209292001161001100010000400651240130111240080168411280691406847304884519308307338218444228508162391307613609400080002926529343293182932829343
72004293012191101141002810100450629106000152771203280224000800040002033798155000248010292252934631012000400080004000200002923529260116100110001000140066840080026400261484212902919668303023744193003108381918414128564164141314013560400080002926729284292622932829287

Test 2: throughput

Count: 8

Code:

  ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0007

retire uop (01)cycle (02)03mmu table walk data (08)0e0f18191e1f22233f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
96020516007311990000024535210160044312122596015610064005632000010064000032000050016804081856001211600241600631600590034196010020032000064000020032000016000001600591600591180201100991001008000080000110032000035032003206003200326132350051092174116047601001320000640000100161213161072160044160060160060
9602041600591204010004368610160574312125996015610064000032000010064000032000050016804581856001211600401600591607480021419609082003201366416042003200001600000160059160744118020110099100100800008000001003200003503200320003532000060323500510911711160040010101320000640000100160061160060160060160060160060
960204160059119900110380001600440012259601561006400003200001006400003200005001680458816000011600401600681600590034196010020032000064000020032000016000001600591600431180201100991001008000080000010032000035032000000032320032610000510911711160056010100320000640000100160060160060160060160060160060
960204160059119900000380101600283121225960156100640540320000100640000320000500168041518560012116004116005916005900325960100200320000640000200320000160000016005916005911802011009910010080000800001100320000350320000000932003261320005109117111600400001320000640000100160060160060160060160060160044
96020416005911990110038000160044312125796015610064005632000010064000032000050016804151856001211600401600591600590034196010020032000064000020032000016000001601911600431180201100991001008000080000010032000000320032000032003200323500510911711160040010101320000640000100160062160060160060160060160060
96020416004311990000038010160028312122596015610064005632000010064000032000050016800018160000116004016005916005900341960100200320000640000200320000160000016005916005911802011009910010080000800000100320000003200320503232003260320005109117211600560000320000640000100160060160044160060160060160044
96020416004511990000038001160028312122596010010064005632013010064000032000050016800011856001211600401600591600590034196010020032000064000020032000016000001600591600431180201100991001008000080000010032000000320000000323200006103500510911711160056010101320000640000100160060160060160060160060160044
96020416004311990000038001160044301225960156100640056320000100640000320000500168000118560020116004016006216004300325960100200320000640000200320000160000016005916004311802011009910010080000800000100320000350320032000353200320132000510911711160040010101320000640000100160045160060160060160060160044
960204160059119801100380011600280012259601001006400563200001006400003200005001680415816000011600241600591600590034196010020032000064000020032000016000001600431600591180201100991001008000080000010032000035032003200032320032010350051091171116005600101320000640000100160060160060160060160060160060
960204160059119900000380011600443121225960156100640056320000100640000320000500168372518560012116004016004316005900325960100200320000640000200320000160000016005916005911802011009910010080000800000100320000350320032000032003261323500510911711160056010101320000640000100160060160060160060160063160060

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0009

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
9600251600741199100200007300011600591641810125960074106400723200001064000032000050168151221761472016003701600561600740035696001020320000640000203200001600000160074160074118002110910108000080000010320013145803200660006732002461674413150195177516007301010532000064000010160057160075160075160075160075
960024160056119910000000380001160059254180125960050106400723200001064000032000050168037021761472016005501600741600748293569600102032000064000020320000160000016005716007411800211091010800008000001032001414580320037101383200246167013150195177516005301010332000064000010160075160075160072160058160075
960024160074119910000000730001160059160180025960050106400403200001064000032000050168152316640032016005501600571600748035696001020320000640000203200001600000160074160057118002110910108000080000010320014140032006719067320054613801305019517751600710100032000064000010160075160075160075160075160075
96002416007411991001000073000116015016000125960050106400763200001064000032000050168151221761472116005701600551600569033796001020320000640000203200001600000160074160076118002110910108000080000010320142135803200670013832005461384413150197177516007101010332000064000010160058160057160075160058160075
96002416005511981002100038010116005916018002596005010640076320000106400003200005016803541664003211600370160074160074803569600102032000064000020320000160000016007416007411800211091010800008000001032001313003200670016832002560384413150195177516007101010332000064000010160075160075160075160057160075
9600241600571199100101006820001160059164181012596004610640036320000106400003200005016815122176147211600550160057160074803569600102032000064000020320000160000016007416005711800211091010800008000001032001314580320038001673200546167013250194175716007101010532000064000010160075160075160075160057160058
96002416007411991001000073010116006116418101259600861064006432000010640000320000501681512217614721160055016007416007400356960010203200006400002032000016000001600741600551180021109101080000800000103200141458032006710168320054606744131501971775160071000032000064000010160056160075160075160075160059
9600241600561199100100003800011600591640002596008610640076320000106400003200005016815122176147211600570160055160074003569604062032000064026420320000160000016007416005611800211091010800008000001032001415580320038340703200540167441305019517571600710100432000064000010160057160075160075160075160075
9600241600741199100111208500011600411600012596008610640300320000106400003201365016803792176147211600370160057160056803569600102032000064000020320000160000016007416005711800211091010800008000001032001314580320067100673200246067431315019517751600520010032000064000010160075160058160057160075160075
960024160057119910011000381121011600411641810805896008210640072320000106400003200005016815162176162011600550160056160074803569600102032000064000020320000160000016007616007411800211091010800008000001032001413580320068200673200246066441315019517571600540002132000064000010160075160059160075160075160058