Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD4R (8B)

Test 1: uops

Code:

  ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 5.008

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 4.008

retire (01)cycle (02)0307080a0e0f18191e1f223a3f43464951inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a8a9acafb5bbdcache load miss (bf)cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
65005285002131601701009014988280310111600450004000100040001000500047548310229632820928402310500010004000100040002821828195116100110001000110010210020051000122138491008071793392851192433319381731464927979145461276513512100040002883529035285952892928561
650042895421327018006579217614762283720011637050254020100140121000509947560418229392883528372388050151003400010004000282322843311610011000100001000021002305100110213771999171223349647192203404381221525427926145181261913679100040002873628548288482898828576
6500428323211150180000226414913284030111648250124008100240001004504548012207231442858028843291235015100140161004400828204286877161001100010000100320100502710031121372010247722132911045191623350381229495027917145531259313355100040002855828635283682836428282
6500428464213140160000201509728035111162505012400810004000100050004752030822998282192826219465000100040001000400028418283431161001100010000100002100010141002112134381007771463359846192573387382226514927940142571243213192100040002880428497285422847928378
65004283742122001800003005006280420111593850084012100040001000500047558410229982814028248310500010004000100040002818628222116100110001000010000210010001000022138261037171593357653191883331381719405228062141151252313210100040002850828413285032827328450
65004284032121702001000015107281880101610050084008100040001000500047514310229242820928251310500010004000100040002840428237116100110001000010000210012011001112137991007571253383943192563391381722434727912143161235212943100040002851128343283452835128470
65004284932131501900007005045280080001602150084000100040001000500047578300229402819528276310500010004000100040002829428185116100110001000010000010010021002020138641019571503377953192603391381623434427879146171247713343100040002862828484285922823828311
650042827421218016000020149432803511115949501240121000400010005000475081102293928187282483105000100040001000400028194283111161001100010000100000100100010011121390910135719133681052194363362381822474728023143681256613517100040002855728334284402855228474
6500428291212170140000300487628098011161565012401210004000100050004757230822949281782822031050001000400010004000282462826711610011000100001000001001001100112213774999271613419749192583449381119525027920144651240613377100040002859428417285972829428412
65004283662131801711003015104280430101620250004012100040001000500047568400229182822328325310500010004000100040002826728271116100110001000010000010010001001110137181036871793390844192523328381423525527801143801261113422100040002851628502285232839428350

Test 2: throughput

Count: 8

Code:

  ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0007

retire (01)cycle (02)03080e0f1e1f223a3f43464951inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6e72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8a9acafb5b6bbdcache load miss (bf)cfd5d6ddinst fetch restart (de)e0e7eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
400205800676000112001180042160254001641003200648000010032000080000500400038864002008003808005780057033940010020080000320000200800003200008005780057118020110099100100800008000001008000718800140013800006101851092172280054010100800003200001008006780048800588005880058
40020480057600000190108004216025400164100320064800001003200008000050040000986400200800380800578005703394001002008000032000020080000320000800578005711802011009910010080000800000100800000800140008001461141851092172280054010100800003200001008005880058800588039880058
4002048005759900020010800421662540016410032006480000100320000800005004000098640020080038080057800570339400100200800003200002008000032000080041800571180201100991001008000080000010080000188001300148001361141851092172280054110100800003200001008005880058800588005880067
4002048005760000063010800421662540016410032006480000100320000800005004000118640020080038080057800410339400100200800003200002008000032000080057800571180201100991001008000080000010080000188001400138001461141851092172280054010100800003200001008005880058800588005880042
400204800575990002001080042166254001641003200008000010032000080000500400009384000008003808005780057032340010020080000320000200800003200008005780041118020110099100100800008000011008000018800000008001461131851091172280038110132800003200001008006780058800588005880058
400204800415990002901080042066254001001003200008000010032000080000500400009384000008003808005780057033940010020080000320000200800003200008005780057118020110099100100800008000001008000018800000014800140113185109217228005400100800003200001008005880051800588005880058
400204800576000001901080042166254001641003200648000010032000080000500400018864002008002208005780057033940010020080000320000200800003200008005780041118020110099100100800008000001008000018800130014800006014185109217228005400100800003200001008005880058800588044980058
400204800415990001901080042166254001641003200648000010032000080000500400009864002008003808005780041033940010020080000320000200800003200008004780057118020110099100100800008000001008000018800140014800146014185109217228005401000800003200001008005880058800478005880058
4002048005760000019010800421662540016410032006480000100320000800005004000098640020080038080057800570339400100200800003200002008000032000080057800571180201100991001008000080000010080000188001400138001461141851092172280054010100800003200001008005880058800588005880058
400204800576000001901080042160254001641003200648000010032000080000500400018384000008003808004180041033940010020080000320000200800003200008004180041118020110099100100800008000001008000018800140014800476113185109217228005410100800003200001008005880051800588005880042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0007

retire (01)cycle (02)03080e0f18191e1f22233f4346494e51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5b6bbdcache load miss (bf)cfd5d6dbddinst fetch restart (de)e0e7eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
40002580475600000002001080042166025400086103200768000010320000800005040002086400200800418006080060033940001020800003200002080000320000800418005711800211091010800008000011080194018800140017800146100501971704280054101080000320000108034780058800588005880061
400024800415990000000008004516602540007410320076800001032000080000504000009600020080041800608004103234000102080000320000208000032000080060800571180021109101080000800001108000000800180008001861022501941703580041113080000320000108005880058800588005880042
4000248004160000000190008004216602540008610320000800001032000080000504000248640020080041800608006003424000102080000320000208000032000080060800571180021109101080000800001108000001880000001780000600185019217024800570131080000320000108006180061800618005880058
400024800606000000020010800451660254000861032007680000103200008000050400050960002008003880041800600342400010208000032000020800003200008006080041118002110910108000080000010800000228005400080018600050194170448005700080000320000108006180058800588006180058
4000248006060001100190108004216602540007410320000800001032000080000504000088640020080038800608004103424000102080000320000208000032000080060800571180021109101080000800000108000001880013000800176013185019517055800541101080000320000108005880042800588004280042
4000248006059900000240008002616602540001010320064800001032080080000504000008640020080038800578004103234000102080000320000208000032000080041800571180021109101080000800000108000001880000001480000611318501941704280038013080000320000108006180058800428006180058
400024800606000000024010800451600254000101032007680000103208008000050400009864002018002280041800570319240001020800003200002080000320000800578004121800211091010800008000001080000018800170038016161180501941702480038101080000320000108005880058800588005880042
4000248005760000000350008004516602540008610320064800001032000080000504000249600020080041804458006003244000102080000320000208000032000080060800571180021109101080000800000108000001880018301880018601722501921705880057013080000320000108005880051800618005880058
40002480041600000000010800261000254000741032000080000103200008000050400011864002008003880057800570342400010208000032000020800003200008005780057118002110910108000080000010800000188001800080017010225019217044800571131080000320000108006180058800428006180061
400024800606000000023000800450000125400086103200768000010320000800005040002196000200800418004480060032340001020800003200002080000320000800578005711800211091010800008000001080192008001400188001761018501941704280039013080000320000108005880042800618006180061