Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD4R (post-index, 16B)

Test 1: uops

Code:

  ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 6.008

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 4.008

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)0e0f181e1f22233a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
650052900123351010101040004737285410016575601210004000100010004000100050005023475261122952286882890931060001000400020004000287932880711610011000100001005131002031497100101210401324093296915313105620153333938122558532813310001562512647138091000400010002901028785288142878828811
6500428896231101000001180104620282960016346601210014012100010004000100050005024475121022875286862875131060001000400020004000286942861111610011000100001003021000000100120301310694396953319204819605333338151653552837210001583312701139011000400010002864128768287882883028711
6500428791232101000000120004710283610016356601210004012100010004000100050005014475401222861285562866031060001000400020004000285642865111610011000100001000231001104100121301339494926942328105019643313838142051442816010001552412594138101000400010002885828763287092871228697
650042872123120000000000004709282540016527601210004012100010004000100050005000475041022965286352861231060001000400020004000286342866411610011000100001001001001101100121301308493817044318515119712317838172349542846410001525812704136281000400010002863528740288002860328639
6500428951233101000000128810473328367001640060121000400810001000400010005000500047548122285928376287403106000100040002000400028635284411161001100010000100000100000403100011301321793337053320604919510318838111548542816110001548212416137431000400010002868328708287652872928633
6500428765222100001000140004781283610016419600810004012100010004000100050005000474261122945286142858631060001000400020004000285152856811610011000100001000021001000100100001323092406990320205319621320138182259512812510001574912576133481000400010002880628620285722865128499
650042859022210000010012000493028278001641460121000400010001000400010005000500047434922893286372871531060001000400020004000286302862211610011000100001000021000000100221201327393107030320515319610323238282153532805710001566612725140141000400010002922630116295072944329545
6500429619237608007000400048392845210164446012100040121000100040001000500050004740411229722866028560310600010004000200040002884028701116100110001000010000210011015100010001323196377014326534819476324438171649492820910001555212744138731000400010002866828513285822862828767
650042861422200701400013000461628602101675660121000401210001000400010005000503147520822901289412892531960001000400020004000284622852211610011000100001000001000001100001201333997876860320035519510327538201552532807110001511612234137141000400010002871228665285962857528507
650042858622220701500030004673283760116467601210004008100010004000100050005002474266229602862428774310600010004000200040002861528564116100110001000010000210010001001213166131999795697832864491954831843821958482803410001537712614136231000400010002872628771286832874128761

Test 2: throughput

Count: 8

Code:

  ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f223a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cdcfd2d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4002058005762101000002101080045166025480100801003200728000080100320000800004804994800118640020800418006080042003394801002008000032000020016000032000080060800571180201100991001008000080000100800000188001600148001661140000511102171280057180000131080000320000801008006280061800618006180061
4002048006062000000000000800451600254801728010032007280000801003200008000048049948002886400248003880057800600034248010020080000320000200160000320336800608005711802011009910010080000800001008000001880015001980016611423000510901172180057180000131080000320000801008006180061800618006180061
400204800606210000000220008002710002548017280100320072800008010032000080000480499480023415999680038800608006000342480100200800003200002001600003200008006080060118020110099100100800008000010080000008001600138001661020000510901171180057080000131480000320000801008006180058800618006180061
400204800606210000000220018004616002548017280100320072800008010032000080000480499480015864002080041800428006000339480100200800003200002001600003200008006080057118020110099100100800008000010080000018800031019800166013000051090117118005718000001080000320000801008006180061800618006180061
40020480060621000000026000800451660254801728010032000080000801003200008000048049948001586400208004180060800600034248010020080000320000200160000320000800608005711802011009910010080000800001008000001980015001680015611320000510901171280057080000131080000320000801008006180061800618006180062
4002048006062100000002300080045166025480172801003200728000080100320000800004804994799984574864800418006080045003424801002008000032000020016000032000080060800571180201100991001008000080000100800000188001600178000061020000510901172280057080000101080000320000801008006180061800618006180061
40020480042621000000000018003016602548017280100320072800008010032000080000480499480018864002080038800608006000342480100200800003200002001600003200008006080057118020110099100100800008000010080000008001600168001661142000051093117118005708000013080000320000801008006180043800618006180046
40020480060621000000020001800451660254801728010032000080000801003200008000048049948001986400208004180042800600034248010020080000320000200160000320000800608005711802011009910010080000800001008000001980015001880014610200005111021712800571800000080000320000801008004380061800438006180061
40020480060621000000022001800451660254801728010032000080000801003200008000048049948002886400208004180060800600733948010020080000320000200160000320000800608005711802011009910010080000800001008000002080016101680015611320000511102171280039080000131380000320000801008006180061800618006180061
400204800606200000000000180027060025480172801003200008000080100320000800004804994800219600020800388006080060003394801002008000032000020016000032000080060800571180201100991001008000080000100800000208001510188001661162000051110117128005718000013080000320000801008006180061800618005880061

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e18191e1f2223243f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4000258005862001000005400008004516602548008280010320072800008001032000080000480049480017864002080041800428006000342480010208000032000020160000320000800608004211800211091010800008000001080000018800160001780016611320050195176580057180000131380000320000800108006180061800618006180061
4000248006062000000003601008004516002548008280010320072800008001032000080000480049480031864002080041800428004200342480010208000032000020160000320000800608006011800211091010800008000001080000018800150101980016611618050196175680057080000101380000320000800108006180043800618006180062
4000248006062100000002820000800481060254800828001032007280000800103200008000048004948001586400208003880057800600172342480010208000032000020160000320000800608005711800211091010800008000001080000020800160001980015611420050195177580057080000131380000320000800108006180058800618006180061
400024800606210000000321010080045106025480078800103200728000080010320000800004800494800158640020800418006080060003424800102080000320000201600003200008006180057118002110910108000080000010800000188001600008001561020050195175680057180000131380000320000800108006180061800618006180061
400024800606200000000730100800450660254800108001032006080000800103200008000048004948001686400208004180060800600034248001020800003200002016000032000080060800571180021109101080000800000108000002080016010168001401130050195175680057180000131080000320000800108005880061800588005880061
4000248005762000000004680000800451660254800828001032007280000800103200008000048004948001586400208002380060800600033948001020800003200002016000032000080060800601180021109101080000800000108000002080016000080016611320050196175480057180000131480000320000800108006180046800618006180060
40002480060620000000045600008004516602548008280010320072800008001032000080000480049480015864002080041800608006000342480010208000032000020160000320000800608005911800211091010800008000001080000018800150001680016611520050195175580057180000131080000320000800108006180061800618006180061
40002480060620000000049300008004516602548008280010320072800008001032000080000480049480021960002080041800608006000342480010208000032000020160000320000800608005711800211091010800008000001080000020800150101680013611618050195175680058080000131380000320000800108005880061800618006180061
40002480042621000000044800008004516602548008280010320072800008001032000080000480049480021864002080041800608006000342480010208000032000020160000320000800608006111800211091010800008000001080000018800160001480016611320050195177580039180000131080000320000800108006180058800618006180061
40002480060620000000010600008002700602548008280010320072800008001032000080000480049479998960002080041800608006000342480328208000032000020160000320000800628006011800211091010800008000001080000020800130001680016611820050345346880287180080131380000320000800108019480349800878006180350