Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD3R (2D)

Test 1: uops

Code:

  ld3r { v0.2d, v1.2d, v2.2d }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 5.006

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 3.006

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)0e0f181e1f2223243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
65005292482201600161105010045752883520169965006300620003000200010000356133229282914629201310500020003000200060002914629172116100110001000120000200202200040001305791166825307655820250301538085343328458164261332414892200030002929429141292782920529281
650042929821911001410000100459628795101701350063009200030002000100023566152295029135292773105000200030002000600029168291751161001100010000200062000002000000612925931468553037746201523131381212323728443164931319714769200030002924229249292622923729305
65004293092191100150006010045522876800169805000300620003000200010000356835229232904229209310500020003000200060002908929144116100110001000020006200000200040001281591166822304024620101309838176423628424164821336214973200030002915329317292082926329253
65004293022191300110007000046082884300169765009300620003000200010000356105228852912629226310500020003000200060002916129031116100110001000020004200000200000001286692186822306784020177303438236273028559160301330914981200030002930329229292792927129222
650042922221914001400070000462728843001697850063000200030002000100003570522286729037292643105000200030002000600029165291871161001100010000200062000032000400412916917868003088441201263062380814403628358163761328714869200030002920129235292832929329198
6500429258219160090000010046062900200169035000300920003000200010000356862228872916029248310500020003000200060002915429034116100110001000020006200000200040001282792456868304354820110313438134423828364164631335314808200030002925729350293392927429317
6500429273219900100007010045832881300169415009300620003000200010000356245228802915929251310500020003000200060002921529185116100110001000020004200003200240041285591546855306865120203308838137363828370164071337115040200030002922329225292172931329210
6500429292219170080006000045982882400169885006300020003000200010000356935229372918229262310500020003000200060002921229207116100110001000020004200000200140041289391076809307154820140309038117343928395163471337514804200030002927229236292822925529257
65004292562191401110006010045462884600170185009300620003000200010000356895228862905429232310500020003000200060002908529150116100110001000020006200000200040061285291756816311663720203314438088353728469164401311714824200030002922229246293212922229239
650042927221915011200050100452328831021703150093006200030002000100003571252293229112292473105000200030002000600029143291361161001100010000200042000002002400412867923968593085546198083073381815353728366163711337414962200030002930029266292452922029258

Test 2: throughput

Count: 8

Code:

  ld3r { v0.2d, v1.2d, v2.2d }, [x6]
  ld3r { v0.2d, v1.2d, v2.2d }, [x6]
  ld3r { v0.2d, v1.2d, v2.2d }, [x6]
  ld3r { v0.2d, v1.2d, v2.2d }, [x6]
  ld3r { v0.2d, v1.2d, v2.2d }, [x6]
  ld3r { v0.2d, v1.2d, v2.2d }, [x6]
  ld3r { v0.2d, v1.2d, v2.2d }, [x6]
  ld3r { v0.2d, v1.2d, v2.2d }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6esimd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4002058006860011100005810080026255025400157100240057160000100240000160000500800853288000018002280041800410323400100200160000240000200160000480000800418004111802011009910010080000800000100160000350160032031600006132005109117118003801401600002400001008004280042800428004280042
4002048004159900000000100800260121202540015710024006316000010024000016000050080037728819781800228004180041032340010020016000024000020016000048000080041800411180201100991001008000080000010016000000160032001600326000051091171180038010141600002400001008004280042800428004280042
4002048004159900000004200080026212120254001631002400571600001002400001600005008008532880000180022800418004103234001002001600002400002001600004800008004180041118020110099100100800008000001001600005301600321016000060040051091171180038010141600002400001008004280042800428004280042
40020480041599000000000028002621212025400157100240057160000100240000160000500800853288332918002280041800410323400100200160000240000200160000480000800418004111802011009910010080000800000100160000530160032001600320136400510911711800380001600002400001008004280042800428004280042
400204800415990000000420008002621212025400163100240057160000100240000160000500800000288000008002280041800410323400100200160000240000200160000480000800418004111802011009910010080000800000100160000530160318433616003260035051091171180124010101600002400001008004280042800428004280042
40020480041599000000042000800262121202540016310024000016000010024000016000050080085328819680800228004180041032340010020016000024000020016000048000080041800411180201100991001008000080000010016000053016003223616003261040051091171180038014101600002400001008004280042800428004280042
40020480041599000000042102800262012025400100100240063160000100240000160000500800376288269208002280041800410323400100200160000240000200160000480000800418004111802011009910010080000800000100160000350160032036160000603235051091171180038110101600002400001008004280042800428004280042
40020480041599000000042002800262121202540010010024006316000010024000016000050080000028800000800228004180041032340010020016000024000020016000048000080041800411180201100991001008000080000010016000035016003203616006060324005109117118003801401600002400001008004280042800428004280042
400204800416000000000000280026212120254001631002400631600001002400001600005008008532880000080022800418069573234001002001600002400002001600004800008004180041118020110099100100800008000001001600003501600320361600326104005109117118003810101600002400001008004280042800428004280042
4002048004160000000000000800262120025400157100240000160000100240000160000500800377288000008002280041800410323400100200160000240000200160000480000800418004111802011009910010080000800001100160000350160032236160036013240051091171180104014101600002400001008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd0d5map dispatch bubble (d6)ddfetch restart (de)dfe0e7eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
400025800566000000000300000180026212120254000101024000016000010240000160000508002192881253180022800418004103234000102016000024000020160000480000800418004111800211091010800008000011016000002701600240024160024612427005035351755380038106160000240000108004280042800428004280042
40002480041599000000030000008002621212025400010102400451600001024000016000050800219288119918002280041800410323400010201600002400002016000048000080041800411180021109101080000800000101600000270160024002416000060027005022341744380038106160000240000108004280042800428004280042
40002480041600000000000100080026212120254000101024000016000010240000160000508002222881212180022800418004103234000102016000024000020160000480000800418004111800211091010800008000001016000002701600230024160024602427005022351766380038166160000240000108004280042800428004280042
40002480041600000000000000080026212120254000101024000016000010240000160000508002192881199180022800418004103234000102016000024000020160000480000800418004111800211091010800008000001016000002701600000001600240000005022341745380038166160000240000108004280042800428004280042
400024800416000000000300100080026012120254000561024004616000010240000160000508002192880000180022800418004103234000102016000024000020160000480000800418004111800211091010800008000001016000002701600240024160000012427005022341744380038106160000240000108004280042800428004280042
400024800416000000000300100080026212120254000561024004616000010240000160000508002192881212180022800418004103234000102016000024000020160000480000800418004111800211091010800008000001016000002701600240024160024612427005022361745380038166160000240000108004280042800428004280042
40002480041599000000060010008002620120254000551024004516000010240000160000508002182881210180022800418004103234000102016000024000020160000480000800418004111800211091010800008000001016000002701600240024160024612427005022341744380038066160000240000108004280042800428004280042
4000248004159900000000010008002621212025400056102400001600001024000016000050800219288120818002280041800410323400010201600002400002016000048000080041800411180021109101080000800000101600000270160024002416002461027005022341744380038106160000240000108004280042800428004280042
400024800416000000000300100080026212120254000561024004616000010240000160000508002192881212180022800418004103234000102016009224000020160000480000800418004111800211091010800008000011016000002701600240024160024612427005022361744380038166160000240000108004280047800428004280042
40002480041599000000030000008002601212025400056102400461600001024000016000050800000288121218002280041800410323400010201600002400002016000048000080041800411180021109101080000800000101600000270160024000160024612427005022341744380038106160000240000108004280042800428004280042