Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD3 (multiple, 8B)

Test 1: uops

Code:

  ld3 { v0.8b, v1.8b, v2.8b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 5.006

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 3.006

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)l2 tlb miss instruction (0a)0e0f1e2223243a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
650052936622121300010046652878500016946500930092000300020001000035722112291629125292793105000200030002000600029192291461161001100010000200002000002000424012985908768693058060202093118382210544828450163241331114960200030002933129248292602931729277
65004292682190100100004559287830001698950003009200030002000100003576192288229130293013105000200030002000600029129292191161001100010000200062000002000400012860906168663090053203773049382214484828389162471317115007200030002930529270292862926929273
65004293532201101060004674288780001702050063012200030002000100003562832282429119292423105000200030002000600029136292701161001100010000200062002002000004012841903868363078147202033057382214464728358163491347815026200030002919929302293322924329198
65004292932190000061004567288210001687750063006200030002000100003574302290629082291793105000200030002000600029074292201161001100010001200042000002000426012776916168623058151202253086382512445028412160891327315169200030002930629255293342930029204
65004293032180000040004623288850001694850063006200030002000100043580732293029076293173105000200030002000600029209291501161001100010000200062000002002406012835907168253021151202333033382913494528443163951341815045200030002933329266292902928729298
65004293102180000000004581288790001696750123012200030002000100003573832290329073293593105000200030002000600029040291231161001100010000200062000022000400012891919368713072144202343062382612465128451165681335314896200030002924529279293332930829264
65004293462200000000004560288490001703250063006200030002000100003568602288229052293043105000200030002000600029114291051161001100010001200042000102002004013031914068433062041202153029382514454428463164161334015054200030002923829235292312924029206
65004291752191110041004506288630001701250063000200030002000100003576872287729087292493105000200030002000600029194291791161001100010000200002002052000426012719912968463080045201833049382211484528435164051328414961200030002933729336293722930929318
650042920721900100600045982878100016985500030062000300020001000035751112286029162292633105000200030002000600029148291401161001100010000200042002032000404012737909368803065045201813103382311494828452162921335615063200030002926429315293122924529316
65004292642190000000004607288220221698950003000200030002000100003569222290529109292853105000200030002000600029149290581161001100010001200062002002002604012892919068823077042201633067382815474728380164171338314750200030002927029253293142936929342

Test 2: throughput

Count: 8

Code:

  ld3 { v0.8b, v1.8b, v2.8b }, [x6]
  ld3 { v0.8b, v1.8b, v2.8b }, [x6]
  ld3 { v0.8b, v1.8b, v2.8b }, [x6]
  ld3 { v0.8b, v1.8b, v2.8b }, [x6]
  ld3 { v0.8b, v1.8b, v2.8b }, [x6]
  ld3 { v0.8b, v1.8b, v2.8b }, [x6]
  ld3 { v0.8b, v1.8b, v2.8b }, [x6]
  ld3 { v0.8b, v1.8b, v2.8b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)030e0f1e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
40020580069600114200008002721200264001781002401201600121002400241600165008002102882157080022800428004261340014020016001624002420016001648004880042800421180201100991001008000080000010016000435160040001600406103511151160160080039014101600002400001008004480043800448004380043
4002048004259900420002800282121202540018410024000916001210024002416001650080092928821671800228004280043613400140200160016240024200160016480048800428004211802011009910010080000800000100160004351600400361600366104011151170160080039014141600002400001008004380043800438004380043
400204800426001042000080027001202540012110024007216001210024002416001650080109628808560800228004280042613400140200160016240024200160016480048800428004211802011009910010080000800000100160004016003603216004060323511151160160080039014101600002400001008004480044800438004380043
40020480042599004200008002700120254001211002400721600121002400241600165008009292882164080022800428004261340014020016001624002420016001648004880042800421180201100991001008000080000010016000401600360361600406132351115116016008003900141600002400001008004380043800438004380044
4002048004260000420000800272120025400121100240072160012100240024160016500800929288351608002280041800413234001002001600002400002001600004800008004180041118020110099100100800008000001001600000160000036160036003200005109117118003800141600002400001008004280042800428004280042
4002048004159900901028002601212025400163100240063160000100240000160000500800853288332708002280041800413234001002001600002400002001600004800008004180041118020110099100100800008000001001600000160036001600360132000051091171180038014101600002400001008004280042800428004280042
400204800415990042000080026212002540016310024006316000010024000016000050080085328833290800228004180041323400100200160000240000200160000480000800418004111802011009910010080000800000100160000016009600160000600400005109117118003800141600002400001008004280042800428004280042
40020480041599003010280026212120254001001002400001600001002400001600005008003742880000180022800418004132340010020016000024000020016000048000080041800411180201100991001008000080000010016000001600000321600006032400005109117118003801401600002400001008004280042800428004280042
400204800416000042000280026212120254001001002400581600001002400001600005008008532883330080022800418004132340010020016000024000020016000048000080041800411180201100991001008000080000010016000035160000036160036010000051091171180038014141600002400001008004280042800428004280042
40020480041600000001080026212002540016310024006316000010024000016000050080085328800000800228004180041323400100200160000240000200160000480000800418004121802011009910010080000800000100160000351600350016003601324000051091171180038110101600002400001008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cdcfd0d2d5map dispatch bubble (d6)dadbddfetch restart (de)e0e7eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4000258005660000010000048201008002620120254000561024005716000010240000160000508000002881210018002280041800410732340001020160000240000201600004800008004180041118002110910108000080000010160000027160000003216003261243500501900171700910800381106160000240000108004280042800428004280042
4000248004159900010000076500018002621212025400010102400571600001024000016000050800000288128200800228004180041003234000102016000024000020160000480000800418004111800211091010800008000001016000002716003200321600326100005019001117008108003811010160000240000108004280042800428004280042
400024800415990000000008250101800262000254000681024004516000010240000160000508003982880000008002280041800410029234000102016000024000020160000480000800418004111800211091010800008000001016000002716003200321600006132350150190091700910800381100160000240000108004280042800428004280042
400024800416000000000008700001800262121202540005510240057160000102400001600005080037428812100080022800418004100323400010201600002400002016000048000080041800411180021109101080000800000101600000016002400321600326024350050190010170081080038106160000240000108004280042800428004280042
4000248004159900000000087900008002620120254000561024000016000010240000160000508003772881212008002280041800410032340001020160000240000201600004800008004180041118002110910108000080000010160000027160024000160000002400050190010170010108003811010160000240000108004280042800428004280042
400024800415990000000003001018002621212025400067102400001600001024000016000050800374288121000800228004180041003234000102016000024000020160000480000800418004111800211091010800008000001016000000160024000160042610000501900101700911800380010160000240000108004280042800428004280042
40002480041600000001000924000180026001202540006710240000160000102400001600005080037428819680180022800418004100323400010201600002400002016000048000080041800411180021109101080000800000101600000271600321032160032612400050190010170010108003811010160000240000108004280042800428004280042
40002480041600000000000957010180026212120254000721024005716000010240000160000508003742881210008002280041800410032340001020160000240000201600004800008004180041118002110910108000080000010160000001600000001600000124000501900917001010800380106160000240000108004280106800428004280042
4000248004160000000000089400018002621212025400068102400001600001024000016000050800377288000000800228004180041003234000102016000024000020160000480000800418004111800211091010800008000001016000000160000003216000000323500501900917011198003811010160000240000108004280042800428004280042
400024800416000000000008790101800262120025400010102400571600001024000016000050800374288000000800228004180041003234000102016000024000020160092480000800418004111800211091010800008000001016000002716003200207160000600350050190091700988003811010160000240000108004280042800428004280042