Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD4R (2S)

Test 1: uops

Code:

  ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 5.012

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 4.012

retire (01)cycle (02)03050708090a0b0e0f18191e1f22243a3f43464951inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)5f696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a7a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
650052926321902100140010050004586288790001700950164016100040001000500047514022865290822921831050001000400010004000290602923211610011000100011000021000000010012121212974909369103063752202203084381614443628336160631388614787100040002928229264292972937629337
650042928422011111111000050004578288290101698350164000100040001000500047577822923291402925031050001000400010004008291752909811610011000100001000031002000310010100012797910668443060948201703105381611454428400162521397514988100040002922229244292802932929274
650042933222011401151000360004566288580101698050084016100040001000500147595322914291502918031050001000400010004000291482918611610011000100001002331002002110003231112985908368263012854201903111381815453828360163471402814985100040002926329294293442934729246
650042940521901300110000080104562288220001689950164012100040001000500047613022947290472934331050001000400010004000291272910811610011000100001000031001000010022020012942912468313107944201223073381618424528377164171405814883100040002923529217292872921829308
650042920521901300170000040004630288540001691150044016100040001000500047629522907291052931531050001000400010004000291872910011610011000100001000031000000110002100012807922869113061760202303104381718394228455164811391715124100040002926929205292542927029239
65004293102190160015000003030104560287130001698450124004100040001000500047651522897290752927231050001000400010004000291862917511610011000100001000031000000010002020013056938968493079953202253080381816475028333163111382114841100040002926629241292752927829243
65004293882190110015000000010453528887000169735012401610004000100050004760722292429079293103105000100040001000400029101291091161001100010000100000100201031000002001299490916896302464520196307638179434128364163191390514856100040002921529234292392934229269
650042925721901400150000040104566288260111694650164016100040001000500047633222883291122930731050001000400010004000291562911611610011000100001000021002010010022030012869952368303104747200803068381812454228441164631403015020100040002920529343292212926729269
6500429234219014001500000400046382876800017007501640161000400010015002476252229172901329254310500010004000100040002914529095116100110001000010000310000002100021300129039105684131471048200773095382313474328454163071398714924100040002920929280292552930129282
650042928322011601151110040004594287920111693150164016100040001000500047596022925291092926631050001000400010004000291212918811610011000100011000431000000010022130012864907668403118946201423065381610393828522163591390815038100040002929629289294022921929278

Test 2: throughput

Count: 8

Code:

  ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0008

retire (01)cycle (02)030e0f191e1f22233f43464951inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6e72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8acafb5b6bbdcache load miss (bf)c5cfd5d6ddinst fetch restart (de)e0e7eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
4002058006760011000108004516625400100100320000800001003200008000050040002938400000800410800418006003424001002008000032000020080000320000800578005711802011009910010080000800001100800000080014108000060141805109217228003801310800003200001008004280061800618006180061
4002048004159900023000800451602540016410032006480000100320000800005004000003840000180038080060800600323400100200800003200002008000032000080057800571180201100991001008000080000010080000018800140080014001400510921722800570130800003200001008005880042800618006180058
40020480041600000000080045106254001001003200768000010032000080000500400022702394018002208004480041034240010020080000320000200800003200008006080057118020110099100100800008000001008000001880013020800006014220510921722800571010800003200001008006180061800588006180061
4002048006059900000008004510625400100100320076800001003200008000050040002996000201800220800608004103424001002008000032000020080000320000800578005711802011009910010080000800000100800000188001801780018011800510921722800541013800003200001008004280045800588006180042
40020480165600000240008004516025400176100320076800001003200008000050040003241503761800380800608004103424001002008000032000020080000320000800578005711802011009910010080000800000100800000188001401380018010220510921722800380130800003200001008006180061800618006180058
400204800416001000000800451662540010010032000080000100320000800005004000248640020180041080057800570323400100200800003200002008000032000080060800571180201100991001008000080000010080000008000000800006102205109217228003801313800003200001008006180058800618005880061
4002048006060000000108004516625400176100320000800001003200008000050040001586400201800380800608004103394001002008000032000020080000320000800608005711802011009910010080000800000100800000188000000800140102215109217228003811310800003200001008006180061800428006180061
400204800606000003200080045166254001761003200008000010032000080000500400000960002018004108004180060034240010020080000320000200800003200008004480057118020110099100100800008000001008000001880018017800180114005109217228005711013800003200001008006180042800618006180061
400204800606000002401080045106254001001003200768000010032000080000500400000864002018002208004180041034240010020080000320000200800003200008005780057118020110099100100800008000001008000001880018118800180018220510921722800571130800003200001008006180042800428005880042
40020480060600010240008004516025400176100320064800001003200008000050040002986400201800410800608006003234001002008000032000020080000320000800608005711802011009910010080000800000100800000188001502180018601822051092172280038000800003200001008004280042800618004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0007

retire (01)cycle (02)03040e0f18191e1f2223243f4346494e51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)c2cfd0itlb miss (d4)d5d6ddinst fetch restart (de)e0e7eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
40002580057600110002001008004716602540008610320076800001032000080000504000228640024080022080041800600342400010208000032000020800003200008005780057118002110910108000080000110800000080014000138000061130050192402317242380054101080000320000108005880042800428004280058
4000248005759900000240000800451060254000861032006480000103200008000050400011384000018004108004480041033940001020800003200002080000320000800478004611800211091010800008000001080000018800140001880014611300501915026172026800411131380000320000108006180061800618006180061
4000248005759900000240100800260000254000861032007680000103200008000050400018960002018004108006080041034240001020800003200002080000320000800418004111800211091010800008000001080000008000000017800006100050561502517242480054013080000320000108004280058800588005880042
4000248006060000000190100800431660254000861032007680000103200008000050400000864002018004108004180060034240001020800003200002080000320000800578005711800211091010800008000001080000008001800018800186002205019210111722128003810080000320000108004280042800618004280061
400024800606000000023000080045006025400086103200768000010320000800005040007086400200800380800418005703424000102080000320000208000032000080060800411180021109101080000800000108000001880013000080018011322050192102017241180057001380000320000108006180061804748027780061
4000248004159900000230100800451060254000101032007680000103200008000050400022960002008004108004180041032340001020800003200002080000320000800608005711800211091010800008000001080000008001300010280014610180501918012172619800391131380000320000108004280061800618004280061
400024800576000000023000080045006025400074103200008000010320000800005040002996000201800410800608006003394000102080000320000208000032000080060800571180021109101080000800001108000000800000001480014011300501921025172525800541101080000320000108005880058800588005880042
4000248004160000000200000800450660254000101032007680000103200008000050400011384000018003808006080057034240001020800003200002080000320000800418005711800211091010800008000001080000018800180001880017601300501918026172125800541101380000320000108006180042800618004280061
4000248005759900000001008004200002540001010320000800001032000080000504000293840000180022080060800600323400010208000032000020800003200008006080041118002210910108000080000010800000188001700018800176113220501918025171426800381131380000320000108006180042800618006180061
40002480041600010002400008004510002540008610320076800001032000080000504000243840000180038080041800600339400010208000032000020800003200008005780057118002110910108000080000110800000188001800017800006114180501918025172325800380131380000320000108006180113800618004280058