Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD4R (4H)

Test 1: uops

Code:

  ld4r { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 5.008

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 4.008

retire (01)cycle (02)03040708090a0e0f18191e22243a3f464951inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)5f696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a3a6a8acafb5bbdcache load miss (bf)cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
65005294732195111310002104632288050016922500040041000400010005000475464229172906029251310500010004000100040002929229205116100110001000110003100000100121212975930968343056149201733081381712514928456162541414115236100040002924629314292932935729260
65004292632190101110000104671287820016928500840001000400010005000474882228952915329289310500010004000100040002923929143116100110001000010002100010100120212821926668783098044201133054381613405028417165671360914865100040002930929327292372929929388
650042943521900001000027004749287461016948500040081000400010005000474624229402912629314310500010004000100040002914829183116100110001000010002100000100021213190924369083022139201303066381115454628320164091394614880100040002936029240293262917629258
65004292282191100100003004531287750016937500840001000400010005000475790228882907729278310500010004000100040002914829186116100110001000010002100000100031213026937469053162044201323109381311433828283164341395914822100040002928129242293902933529302
65004293182190000000002004609288250016949500040081000400010005000475542228972913129324310500010004000100040002926529213116100110001000110003100000100020212849909668473058141202103093381513373728264162111409614927100040002928129196292472926029381
6500429294220000010000310471528795101695550084008100040001000500047621222932290602930331050001000400010004000292052921811610011000100001000310010010000031280790836901306803720157306938158483928332163311398415192100040002926229265292722922729244
65004293592200000011003104507287650016977501240001000400010005000476130229312907529261310500010004000100040002904829178116100110001000010003100010100020312802942368753049037201563092381319384628378166301394314885100040002933829298292982930529249
65004292672190000000000104599287320017071500840121000400010005000476193228942909629355310500010004000100040002917529157116100110001000010002100000100020212952913168663061037201343036381114424128392165471391115038100040002941129356293582928029256
65004293792190000000003104615288170016936500840121000400010005000476394228862907829317310500010004000100040002916429159116100110001000110002100000100000012877920968103095040201353043381911444528314164271393914976100040002929429288293502926129246
65004292702200000100002104506288250017037500040121000400010005000476272229002908229342310500010004000100040002913629162116100110001000010000100200100020312799928568603046043200673061381313454228388164581410714565100040002924829279293782935029324

Test 2: throughput

Count: 8

Code:

  ld4r { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  ld4r { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  ld4r { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  ld4r { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  ld4r { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  ld4r { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  ld4r { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  ld4r { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0008

retire (01)cycle (02)03050b0e0f181e1f22233f43464951inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8acafb5b6bbdcache load miss (bf)c2branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0e7eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
4002058061459911110123901080045006254001761003200648000010032000080000500400855108803041800380800418006000342400100200800003200002008000032000080060800411180201100991001008000080000110080000080000017800236113220005109117118003801010800003200001008006180061800618004280061
40020480060599100002400080042106254001001003200768000010032000080000500400054108803040800220800608004100323400100200800003200002008000032000080041800411180201100991001008000080000010080000188001811880023011322000510911711800570010800003200001008006180061800618006180042
400204800606001000056010800451662540010010032000080000100320000800005004000485440156180041080060800600034240010020080000320000200800003200008004180041118020110099100100800008000001008000018800000138002401000005109117118005711313800003200001008004280058800618006180061
4002048006059910100000080026160254001761003200768000010032000080000500400048544002818002208006080060003234001002008000032000020080050320000800578004111802011009910010080000800000100800001880000008002401000005109117118005711013800003200001008004280042800588004280061
4002048005759910100230008004510625400176100320076800001003200008000050040004710880304080022080057800570033940010020080000320000200800003200008004180057118020110099100100800008000001008000008000001880024601318000510911711800380130800003200001008006180061800618004280061
4002048004160010010001080045066254001761003200008000010032000080000500400046108803040800410800608006000339400100200800003200002008000032000080060800411180202100991001008000080000110080000080018008002301018000510911711800570130800003200001008004280061800618006180042
40020480041599100002400080045066254001761003200768000010032000080000500400047628626008003808010780057003424001002008000032000020080000320000800608004111802011009910010080000800000100800001880000018800230100000510911711800381013800003200001008006180042800618006180042
40020480060600100001900080045166254001761003200008000010032000080000500400046108803041800380800418006000323400100200800003200002008000032000080057800571180201100991001008000080000010080000228001801880023011822000510911711800380010800003200001008006180061800618006380061
400204800606001000017316810800261062540017610032007680000100320000800005004010561088030408002208004180041003424001002008000032000020080000320000800418005711802011009910010080000800000100800002280014018680023600180005109117118003801310800003200001008006180061800588006180061
400204800605991000024010800451662540017610032007680000100320000800005004000465622636180041080060800600034240010020080000320000200800003200008006080057118020110099100100800008000001008000018800170188002360022000510911711800570013800003200001008006180061800618004280058

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0007

retire (01)cycle (02)03070e0f181e22243f4346494e51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)cdcfd0d5d6d9daddinst fetch restart (de)e0e7eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
40002580057599000019108004516602540007410320064800001032000080000504007938640020180022800608006034240001020800003200002080000320800800448044711800211091010800008000001080008827800311031800006014000501902117001515800570131080000320000108004280042800618005880061
4000248006059900002310800451660254000861032007680000103200008000050400685864002018004180060800603424000102080000320000208000032000080060800571180021109101080000800000108000000800000008001300000050190161700161380057101380000320000108004280061800618006180061
400024800606000010230080042160025400086103200768000010320000800005040089886400200800228004180060342400010208000032000020800003200008006080041118002110910108000080000010800000188000000208001401172200501901617001315800570131380000320000108004280061800428004280061
400024800436000000000800451660254000101032007680000103200008000050400318864002008002280060800603424000102080000320000208000032000080060800571180021109101080000800000108000000800000008000060022005019015170015138005700080000320000108005880043800588005880058
40002480060599111023108002916602540008610320076800001032000080000504000344473180080043800608004134240001020800003200002080000320000800608005711800211091010800008000011080000008001800188000061132200501901617001613800571311080000320000108004280061800428006180061
4000248004160001100008002616602540008610320076800001032000080000504000668640020080041800608006034240001020800003200002080000320000800608005711800211091010800008000001080000018800180014800146013220050190151700141480038101080000320000108006180061800618006180061
4000248006059900000008004216602540008610320076800001032000080000504000299600020080038800418004134240001020800003200002080000320000800608004111800211091010800008000011080000008000000080017600220050190161700161480038113080000320000108005880061800618006180061
4000248006059900000008004510602540008610320000800001032000080000504005268640020080041800418006034240001020800003200002080000320000800418005711800211091010800008000001080000008000000080000610220050190131700151380041101380000320000108004280042800618006180042
4000248004159900001910800451660254000101032007680000103200008000050400062960002008002280041800603424000102080000320000208000032000080060800571180021109101080000800000108000000800182414800136102200501901517001616800381131380000320000108005880058800458006180042
4000248006060000000008002616602540008610320052800001032000080000504003479600020080043800418006034240001020800003200002080000320000800608005711800211091010800008000001080000018800000018800006114220050190161700161680057001080000320000108004280042800428006180042