Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD3 (multiple structures, 8B)

Test 1: uops

Code:

  ld3 { v0.8b, v1.8b, v2.8b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 5.006

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 3.006

retire (01)cycle (02)0304070a0e0f1e2223243a3f43464951inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)5f696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a3a6a8acafb5bbdcache load miss (bf)c2cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
650052936622121300010046652878500016946500930092000300020001000035722112291629125292793105000200030002000600029192291461161001100010000200002000002000424012985908768693058060202093118382210544828450163241331114960200030002933129248292602931729277
65004292682190100100004559287830001698950003009200030002000100003576192288229130293013105000200030002000600029129292191161001100010000200062000002000400012860906168663090053203773049382214484828389162471317115007200030002930529270292862926929273
65004293532201101060004674288780001702050063012200030002000100003562832282429119292423105000200030002000600029136292701161001100010000200062002002000004012841903868363078147202033057382214464728358163491347815026200030002919929302293322924329198
65004292932190000061004567288210001687750063006200030002000100003574302290629082291793105000200030002000600029074292201161001100010001200042000002000426012776916168623058151202253086382512445028412160891327315169200030002930629255293342930029204
65004293032180000040004623288850001694850063006200030002000100043580732293029076293173105000200030002000600029209291501161001100010000200062000002002406012835907168253021151202333033382913494528443163951341815045200030002933329266292902928729298
65004293102180000000004581288790001696750123012200030002000100003573832290329073293593105000200030002000600029040291231161001100010000200062000022000400012891919368713072144202343062382612465128451165681335314896200030002924529279293332930829264
65004293462200000000004560288490001703250063006200030002000100003568602288229052293043105000200030002000600029114291051161001100010001200042000102002004013031914068433062041202153029382514454428463164161334015054200030002923829235292312924029206
65004291752191110041004506288630001701250063000200030002000100003576872287729087292493105000200030002000600029194291791161001100010000200002002052000426012719912968463080045201833049382211484528435164051328414961200030002933729336293722930929318
650042920721900100600045982878100016985500030062000300020001000035751112286029162292633105000200030002000600029148291401161001100010000200042002032000404012737909368803065045201813103382311494828452162921335615063200030002926429315293122924529316
65004292642190000000004607288220221698950003000200030002000100003569222290529109292853105000200030002000600029149290581161001100010001200062002002002604012892919068823077042201633067382815474728380164171338314750200030002927029253293142936929342

Test 2: throughput

Count: 8

Code:

  ld3 { v0.8b, v1.8b, v2.8b }, [x6]
  ld3 { v0.8b, v1.8b, v2.8b }, [x6]
  ld3 { v0.8b, v1.8b, v2.8b }, [x6]
  ld3 { v0.8b, v1.8b, v2.8b }, [x6]
  ld3 { v0.8b, v1.8b, v2.8b }, [x6]
  ld3 { v0.8b, v1.8b, v2.8b }, [x6]
  ld3 { v0.8b, v1.8b, v2.8b }, [x6]
  ld3 { v0.8b, v1.8b, v2.8b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire (01)cycle (02)030e0f1e1f22233a3f4346494f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8acafb5b6bbdcache load miss (bf)c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0e7eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
40020580069600114200008002721200264001781002401201600121002400241600165008002102882157080022800428004261340014020016001624002420016001648004880042800421180201100991001008000080000010016000435160040001600406103511151160160080039014101600002400001008004480043800448004380043
4002048004259900420002800282121202540018410024000916001210024002416001650080092928821671800228004280043613400140200160016240024200160016480048800428004211802011009910010080000800000100160004351600400361600366104011151170160080039014141600002400001008004380043800438004380043
400204800426001042000080027001202540012110024007216001210024002416001650080109628808560800228004280042613400140200160016240024200160016480048800428004211802011009910010080000800000100160004016003603216004060323511151160160080039014101600002400001008004480044800438004380043
40020480042599004200008002700120254001211002400721600121002400241600165008009292882164080022800428004261340014020016001624002420016001648004880042800421180201100991001008000080000010016000401600360361600406132351115116016008003900141600002400001008004380043800438004380044
4002048004260000420000800272120025400121100240072160012100240024160016500800929288351608002280041800413234001002001600002400002001600004800008004180041118020110099100100800008000001001600000160000036160036003200005109117118003800141600002400001008004280042800428004280042
4002048004159900901028002601212025400163100240063160000100240000160000500800853288332708002280041800413234001002001600002400002001600004800008004180041118020110099100100800008000001001600000160036001600360132000051091171180038014101600002400001008004280042800428004280042
400204800415990042000080026212002540016310024006316000010024000016000050080085328833290800228004180041323400100200160000240000200160000480000800418004111802011009910010080000800000100160000016009600160000600400005109117118003800141600002400001008004280042800428004280042
40020480041599003010280026212120254001001002400001600001002400001600005008003742880000180022800418004132340010020016000024000020016000048000080041800411180201100991001008000080000010016000001600000321600006032400005109117118003801401600002400001008004280042800428004280042
400204800416000042000280026212120254001001002400581600001002400001600005008008532883330080022800418004132340010020016000024000020016000048000080041800411180201100991001008000080000010016000035160000036160036010000051091171180038014141600002400001008004280042800428004280042
40020480041600000001080026212002540016310024006316000010024000016000050080085328800000800228004180041323400100200160000240000200160000480000800418004121802011009910010080000800000100160000351600350016003601324000051091171180038110101600002400001008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire (01)cycle (02)03040708090b0e0f18191e1f22233a3f4346494f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5b6bbdcache load miss (bf)c2cdcfd0d2d5d6dadbddinst fetch restart (de)e0e7eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
4000258005660000010000048201008002620120254000561024005716000010240000160000508000002881210018002280041800410732340001020160000240000201600004800008004180041118002110910108000080000010160000027160000003216003261243500501900171700910800381106160000240000108004280042800428004280042
4000248004159900010000076500018002621212025400010102400571600001024000016000050800000288128200800228004180041003234000102016000024000020160000480000800418004111800211091010800008000001016000002716003200321600326100005019001117008108003811010160000240000108004280042800428004280042
400024800415990000000008250101800262000254000681024004516000010240000160000508003982880000008002280041800410029234000102016000024000020160000480000800418004111800211091010800008000001016000002716003200321600006132350150190091700910800381100160000240000108004280042800428004280042
400024800416000000000008700001800262121202540005510240057160000102400001600005080037428812100080022800418004100323400010201600002400002016000048000080041800411180021109101080000800000101600000016002400321600326024350050190010170081080038106160000240000108004280042800428004280042
4000248004159900000000087900008002620120254000561024000016000010240000160000508003772881212008002280041800410032340001020160000240000201600004800008004180041118002110910108000080000010160000027160024000160000002400050190010170010108003811010160000240000108004280042800428004280042
400024800415990000000003001018002621212025400067102400001600001024000016000050800374288121000800228004180041003234000102016000024000020160000480000800418004111800211091010800008000001016000000160024000160042610000501900101700911800380010160000240000108004280042800428004280042
40002480041600000001000924000180026001202540006710240000160000102400001600005080037428819680180022800418004100323400010201600002400002016000048000080041800411180021109101080000800000101600000271600321032160032612400050190010170010108003811010160000240000108004280042800428004280042
40002480041600000000000957010180026212120254000721024005716000010240000160000508003742881210008002280041800410032340001020160000240000201600004800008004180041118002110910108000080000010160000001600000001600000124000501900917001010800380106160000240000108004280106800428004280042
4000248004160000000000089400018002621212025400068102400001600001024000016000050800377288000000800228004180041003234000102016000024000020160000480000800418004111800211091010800008000001016000000160000003216000000323500501900917011198003811010160000240000108004280042800428004280042
400024800416000000000008790101800262120025400010102400571600001024000016000050800374288000000800228004180041003234000102016000024000020160092480000800418004111800211091010800008000001016000002716003200207160000600350050190091700988003811010160000240000108004280042800428004280042