Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD3R (1D)

Test 1: uops

Code:

  ld3r { v0.1d, v1.1d, v2.1d }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 5.006

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 3.006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e2223243a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f61696b6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)dfe0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6500529326220015017000000100453428765002170705006300920003000200010000357514022864029048292273105000200030002000600029115291261161001100010001200000200210420004040130039400689130514412016131183818113545328365162721315414997200030002925929308292902928829193
650042939521901201201000500046382883800216974501230062000300020001000035603002295502913229248310500020003000200060002916829116116100110001000020000420020022000624012783911168463036941201513122381264544328419163621335615006200030002933429323292652927429283
6500429263219016014000005000458928808000169765006300620003000200010000357022022912029113293413105000200030002000600029170291671161001100010000200006200000020000040131449068681630561035202633072381694238228407164221328714804200030002926629262293362921929326
6500429214219016014000000000452128763000170665012301220003000200010000356942022930029097291763105000200030002000600029127291281161001100010000200004200000320004060131819180681030317382012030983817104339328419163021334814981200030002927829314292862924229327
650042930822001509010002000458828846000169895003300620003000200010000356907023007029148292603105000200030002000600029146292051161001100010000200000200000020004040129039172683030458362020730903820153329228349162371347114686200030002933529250293122924329247
6500429300219011010000000000453728761000169605006300620003000200010000357712022926029159293143105000200030002000600029158291191161001100010001200004200000020006040126939156682330648392018330623817103634328445162891343914870200030002926729326292282922929284
650042926521801601600000000045592885400016983500030092000300020001000035717302290802912929244310500020003000200060002913429124116100110001000120000620000022000424012900910268413034842201493124381663540328416164891346114741200030002923629262292732922229292
6500429310220013013000000000454028880020170885006300020003000200010000356153022943029044292433105000200030002000600029202291271161001100010001200000200000020004040129499163688430679352024530833819103737228459163771345415004200030002933329226292502919828777
6500429207219018016010004000460928870000169995006300020003000200010000358002022934029053292213105000200030002000600029081291021161001100010001200030200000020024000127499122680130617362010730953819104736228421163481354114948200030002926229246293032921429296
650042936521908014000005000458028818020169615000300020003000200010000357184022887029104292443105000200030002000600029203291921161001100010001200006200000020024040129639155686530536352015030603818104030228382163841341114993200030002932929205293112921129287

Test 2: throughput

Count: 8

Code:

  ld3r { v0.1d, v1.1d, v2.1d }, [x6]
  ld3r { v0.1d, v1.1d, v2.1d }, [x6]
  ld3r { v0.1d, v1.1d, v2.1d }, [x6]
  ld3r { v0.1d, v1.1d, v2.1d }, [x6]
  ld3r { v0.1d, v1.1d, v2.1d }, [x6]
  ld3r { v0.1d, v1.1d, v2.1d }, [x6]
  ld3r { v0.1d, v1.1d, v2.1d }, [x6]
  ld3r { v0.1d, v1.1d, v2.1d }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
40020580069600000100075010028002720532540018510024008316001210024002416001650080145628850841800228004380042061340014020016001624002420016001648004880042800421180201100991001008000080000010016001812016005600152160043615143120111511601600800390131301600002400001008004380043800438004380043
40020480042600110000056010028002825532540014110024007716001210024002416001650080145628850781800228004380042071340014020016001624002420016001648004880042800431180201100991001008000080000010016001814431600570025216004461520132111511701600800390131311600002400001008004380043800438004480043
40020480043600100000058000008002605532540016810024006716000010024000016000050080137228848951800228004180041032340010020016000024000020016000048000080041800411180201100991001008000080000010016001314431600130001316003961134313100051091171180038001311600002400001008004280042800428004280042
400204800416001100001570100280026255325400119100240079160000100240000160000500801386288489218002280041800410323400100200160000240000200160000480000800418004111802011009910010080000800000100160013124316005300052160039615243131000510911711800380131301600002400001008004280042800428004280042
40020480041599100000058010028002625532540017910024007916000010024000016000050080010228848921800228004180041032340010020016000024000020016000048000080041800411180201100991001008000080000010016001414431601120725116003961124313000051091171180038001311600002400001008004280042800428004280042
40020480041599101000057000028002605732540016610024007916000010024000016000050080138628848951800228004180041032340010020016000024000020016000048000080041800411180201100991001008000080000010016001313431600530025216000061524313200051091171180038001301600002400001008004280042800428004280042
400204800416001110000580100280026255325400118100240074160000100240000160000500801383288489918002280041800410323400100200160000240000200160000480000800418004111802011009910010080000800000100160014124316005300152160039615243131000510911711800380131301600002400001008010880042800428004280042
4002048004160011100005801002800260553254001681002400791600001002400001600005008013822884895180022800418004103234001002001600002400002001600004800008004180041118020110099100100800008000001001600131543160052000511600006151013000051091171180038013001600002400001008004280042800428004280042
4002048004159911000005800002800262500254001741002400181600001002400001600005008013832884906080022800418004103234001002001600002400002001600004800008004180041118020110099100100800008000001001600121343160051002511600006151012100051091171180038013011600002400001008004280042800428004280042
400204800415991110000120100280026255325400174100240074160000100240000160000500801383288499618002280041800410323400100200160000240000200160000480000800418004111802011009910010080000800000100160015134316005100013160039611343132000510911711800380131311600002400001008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f191e1f22233a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6esimd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)d9dbddfetch restart (de)e0e7eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
400025800426000101000010080026012122540001010240063160000102400001600005080085328800000800228004180041032340001020160000240000201600004800008004180041118002110910108000080000010160000351600320035160036610400050196176078800380000160000240000108004280042800428004280105
4000248004160000000000102800262012254000731024006316000010240000160000508008532882004080022800418004103234000102016000024000020160000480000800418004111800211091010800008000001016000040160000003616000000320005019517005880038014140160000240000108004280042800428004280042
400024800415990000004200008002621212254000681024006016000010240000160000508003772881975080022800418004103234000102016000024000020160000480000800418004111800211091010800008000001016000035160000103616003660040005019817005780038010140160000240000108004280042800428004280042
4000248004160000000000102800262121225400073102400631600001024000016000050800853288197508002280041800410323400010201600002400002016000048000080041800411180021109101080000800000101600000160032003216000061035005019517018880038014140160000240000108004280042800428004280042
400024800415991000004200028002621212254000731024006316000010240000160000508008532881977080022800418004103234000102016000024000020160000480000800418004111800211091010800008000001016000035160000003616003661040005019617008880038014100160000240000108004280042800428004280042
4000258004159900000042000080026001225400010102400631600001024000016000050800377288197808002280041800410323400010201600002400002016000048000080041800411180021109101080000800000101600003516003600016003261360005019817007680038014100160000240000108004280042800428004280042
40002480041600000000380000800262121225400010102400631600001024000016000050800853288197808002280041800410323400010201600002400002016000048000080041800411180021109101080000800000101600000160000003216003200323500501951700778003801400160000240000108004280042800428004280042
4000248004160000000042000080026212025400073102400001600001024000016000050800000288196808002280041800410323400010201600002400002016000048000080041800411180022109101080000800000101600000160036000160036003635005019717008580038014140160000240000108004280042800428004280042
40002480041599000000000028002621212254000681024000016000010240000160000508003782881975080022800418004103234000102016000024000020160000480000800418004111800211091010800008000001016000001600330036160032613640005019817007680038110100160000240000108004280042800428004280042
40002480041599000000001028002600122540006810240000160000102400001600005080085328833370800228004180041032340001020160000240000201600004800008004180041118002110910108000080000010160000351600000032160000603240105019717008780038014100160000240000108004280042800428004280042