Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD3 (single structure, D)

Test 1: uops

Code:

  ld3 { v0.d, v1.d, v2.d }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 5.008

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 3.008

retire (01)cycle (02)03050708090a0b0e0f1e22243a3f43464951inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)5f696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a7a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
65005293712201321122100200468028817000170125010300820003000200010000357292228852909029287310500020003000200090002928629308116100110001000020044420060022200042621132069188691531401376202893120381510575628340161191332614954200030002931429425292932931029361
65004293692191181118100200452628931000170915010301020003000200010000356353228622911329319310500020003000200090002921929150116100110001000020044620030017200242422129729067686331241252203333103381119495828391161771320614911200030002928429360291922927929390
650042921921912511200001200466228769000169865003300820003000200010000357961229882915329220310500020003000200090002917729120116100110001000020043420030014200244420130429458689530821159202233067381712565428557162921318614931200030002942029323293622940629286
65004293642201171124100500465528860000169795003301020003000200010000357788228722912429234310500020003000200090002916529256116100110001000020032420030004200262421129439371679730361660203303095381015516428436162731321414953200030002931229295293662924729349
65004293292201201124200200463428789000169885008300320003000200010000357906229092916729261310500020003000200090002918929206116100110001000020033420020014200262022130369201688230631053201973132382111646028386162191314714856200030002929529221293402932829245
6500429311221121112010081045822874600017056500830032000300020001000035781822927292082931831050002000300020009000291902920911610011000100012003262003000420004242213202934368683066957203023029381815545628388161691333214786200030002932329317293632932429281
65004292222191271121100200460728874000169635008300320003000200010000357694229542914429279310500020003000200090002914429135116100110001000020042420060004200044421132179240694930831052202833134381414635528398163071323214837200030002925729371292462929729215
650042932922011911210001700463728929000170775008300820003000200010000357318229282909129269310500020003000200090002910429166116100110001000020044620040012200244622132969376687130851258202803047381819646428368160651329615098200030002933129321292882931829263
65004293372200240023000500464628813000170505000300320003000200010000357455229202906829307310500020003000200090002917929140116100110001000020000420000000200242000129229257684131401168202653074381319606728517166041317814875200030002931229264292892929329348
65004292232200210028000600463428770000170325003300320003000200010000357264228932915529318310500020003000200090002916229258116100110001000120000420020105200041600129399484683231071565201803093381513616428476160731318114655200030002930729320293082928129285

Test 2: throughput

Count: 8

Code:

  ld3 { v0.d, v1.d, v2.d }[1], [x6]
  ld3 { v0.d, v1.d, v2.d }[1], [x6]
  ld3 { v0.d, v1.d, v2.d }[1], [x6]
  ld3 { v0.d, v1.d, v2.d }[1], [x6]
  ld3 { v0.d, v1.d, v2.d }[1], [x6]
  ld3 { v0.d, v1.d, v2.d }[1], [x6]
  ld3 { v0.d, v1.d, v2.d }[1], [x6]
  ld3 { v0.d, v1.d, v2.d }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0008

retire (01)cycle (02)0305080b0e0f181e1f2223243a3f4346494e4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0e7eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
400205160065119911010057010021600502551598873254001061002400121600001002400001600005008012842279076311600460160046160046799333800474001002001600002400002001600007200001600651600651180201100991001008000080000110016001313016015901541600006152431210511011711160038014102160000240000100160061160061160042160293160042
400204160041119900001042010001600450121215983802540010610024000616000010024000016000050080085322789342016004101600601600417990938003840010020016000024000020016000072000016004116004111802011009910010080000800001100160000035160036303216003601324000051101171216005701002160000240000100160061160042160061160057160042
40020416005611990000044201000160045012121598380254001061002400061600001002400001600005008000052279297401602010160041160041799093800234001002001600002400002001600007200001600601600581180201100991001008000080000110016045603516003600361600000104000051101171116003800142160000240000100160057160057160057160057160061
400204160058119900100042010001600452120159877025400106100240006160000100240000160000500800000227937720160041016004116006079909380042400100200160000240000200160000720000160060160056118020110099100100800008000001001600000351604940036160036010000051101171116005701402160000240000100160042160057160061160042160061
40020416006011990000004200002160050255159887122540010610024001216000010024000016000050080129422790763016002701600461600657993338002840010020016000024000020016000072000016006516006511802011009910010080000800000100160013140160051001216000060524313105110117111600430000160000240000100160068160047160066160066160066
4002041600651201100000600000016004520121598770130400106100240006160000100240000160000500800862227893421160037016006016006379796380042400100200160000240000200160000720000160060160041118020110099100100800008000001001600000351600361032160036613240000511011711160038014102160000240000100160299160042160063160061160042
40020416006011990000105500002160026212121598770254005561002400061600001002400001600005008000002279418411602010160041160060799283800424001002001600002406992001600007200001600601600561180201100991001008000080000010016001313431600120152160495611201200511011711160062014142160000240000100160042160042160063160061160061
40020416006011990000004200002160026201215987702540010610024000016000010024000016000050080085322789342016004101600601600607992838004240010020016000024000020016000072000016004116005611802011009910010080000800000100160000035160036003616000061324000051101171116005700142160000240000100160042160061160057160061160061
400204160056119800000042000021600500001598530254001121002400121600001002400001600005008000392279470211600270160299160065799333800474001002001600002400002001600007200001600461600651180201100991001008000080000010016000003516000000361600366100000511011711160053010140160000240000100160061160061160042160057160061
400204160060119900010000100216004521212159869025400106100240006160000100240000160000500800853227893420160022016006016006079928380023400100200160464240000200160000720000160041160056118020110099100100800008000001001600000016003642361600366032400005110117111600570002160000240000100160057160042160042160042160042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0007

retire (01)cycle (02)03090e0f191e2223243a3f4346494e4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8a9acafb5b6bbdcache load miss (bf)c2c5branch mispredict (cb)cdcficache miss (d3)d5d6ddinst fetch restart (de)e0e7eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
4000251600541199001038100016004121212159838025400010102400061600001024000016000050800374227923750116002216005416005679922380023400010201600002400002016000072000016005416005411800211091010800008000011016000027160024002416003261243500005020213171010160053006016000024000010160107160057160059160055160105
40002416005411990000167100116004121201598690254000161024000016000010240000160000508003772278807811160039160041160056799243800384000102016000024000020160000720000160054160054118002110910108000080000010160000016002400016003260243500005020111171091600381106016000024000010160042160042160057160042160042
4000241600561199001038000116004121201598690254000101024000016000010240000160000508000002278934201160022160056160054799093800384000102016000024000020160000720000160056160054118002110910108000080000010160000016003200321600326132350000502009171091600531100216000024000010160057160057160057160057160042
400024160041119801003810011600392120159861025400016102400061600001024000016000050800219227923750116003716005616005679909380038400010201600002400002016046472000016005416004111800211091010800008000001016000027160032103216003261243500005020091711111600510106216000024000010160057160057160042160055160042
4000241600561199000000001160026012015986102540001610240006160000102400001600005080101122789342011600371600561600547992438003840001020160000240000201600007200001600541600541180021109101080000800000101600000160000003216003260240000050200917910160053166216000024000010160055160055160055160057160105
4000241600411198100038000016002620121598690254000161024000016000010240000160000508000002278934201160022160054160041799223800384000102016000024000020160000720000160056160054118002110910108000080000010160000271600240001600320024350000502009179101600531106216000024000010160057160057160042160057160042
40002416010611990000301001160041000159869025400010102400001601161024050416000050800377227893420116003716004116005679922380038400010201600002400002016000072000016005416004111800211091010800008000001016000027160000003516000061035000050201101798160038166016000024000010160057160042160057160055160057
40002416005611990000370000160026212121598690254000161024000016000010240000160122508002222278934201160037160041160041799093800384000102016000024000020160000720000160056160054118002110910108000080000010160000271600321035160032613200000502019179916003801010016000024000010160042160057160057160042160042
40002416005611980000381000160026012121598380254000161024000616000010240000160000508003772279237501160035160041160041799093800384000102016000024000020160464720000160056160054118002110910108000080000110160000271600000001600006003500005020191710101600531100216000024000010160042160057160055160042160042
4000241600411198000038000116004121201598690254000161024000616000010240000160000508003742279297401160037160055160054799093800384000102016000024000020160000720000160054160041118002110910108000080000010160000016000000016002401000000502001017991600381106216000024000010160042160042160057160057160103