Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, post-index, 4 regs, 2D)

Test 1: uops

Code:

  ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 5.000

Integer unit issues: 1.000

Load/store unit issues: 4.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f223a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
6400528622223720100101012880478328544000226605000100040001000400450002071561682728312285987105000400050052849728472116100110001000040004840060007914004514000130699616693032201541960631653801335452280651000155561235113858400010002857328730286202862928709
640042868422211001001001200479928432000227495000100040001000400050002071810169382821028696310500040005000286342858211610011000100004000411400630013400651000150132799762699032631611970532503803215651281591000151931248313686400010002871228702286392867628790
64004284912220100100000000450628478000225745000100040001000400050002071315168982830128634310500040005000285452859311610011000100004000084006000640060061100133399445697132420561965132563804265453280701000151341236913745400010002855428522286032871928540
64004286012220100100000240046972835500022764500010004000100040005000207141416928283352865631050004000500028523285251161001100010000400008400602004000004000131249416699031900501969531893799185052280851000151491248513648400010002873328632287752876128649
640042857022300001000001200475728512044225365000100040001000400050002073081692128301285883105000400050002850828612116100110001000040000124006000640075001200132399382695831590611956631573805225749281261000155311242313858400010002869628705286652863828628
6400428596222000010000000049082844500022661500010004000100040005000207201316938283112867031050004000500028511284971161001100010000400000400600064000500000132349611690631310531978932403804245364280551000151741249513860400010002867828753285542879528805
6400428794222010010000012004761284960002255750001000400010004000500020713121690428302286493105000400050002857228600116100110001000040000840170001240065161100131839507698032080571974331493811205659281651000155491230813710400010002863528711287502862828702
640042871022210001000001201479328468000226095000100040001000400050002073212169532831328623310500040005000285012852211610011000100004000512400600034006516000132349541701732030551973432213804175956281191000154081269213569400010002879528719286842867628629
64004287502210100100000120046472857000422642500010004000100040005000207126169232831228500310500040005000285112846211610011000100004000084006020040000101200133679670692931960571974731593809265355280671000153441220513538400010002871128801287472862728604
64004288412230100100000120048112859904422644500010004000100040005000207238169412832128720310500040005000286312851011610011000100004000404006010740006101200134179553706432090541980131823814215352281191000156841268113700400010002872328512286072869428685

Test 2: throughput

Count: 8

Code:

  ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6], x8
  ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6], x8
  ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6], x8
  ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6], x8
  ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6], x8
  ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6], x8
  ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6], x8
  ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0f18191e1f243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606167696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
320205106745799100010006500310672627721254001008010032000080100320000480495472774310010671101067401067412668814267224001002003200002004000001067361067411180201100990100100800008000001003200191945032005911059320041615945180051093011711106737800001313232000080100106738106737106737106742106737
32020410673682710001000900021067213772325400101801003200008010032000048049647003431001067120106740106737266853267194001002003200002004000001067361067361180201100990100100800008000001003200181845032005910059320041616045180051103011711106740800001313232000080100106737106737106741106737106742
32020410673782710100000660021067212771925400100801003200008010032000048049146689301001067110106737106737266883267224001002003200002004000001067401067371180201100994100100800008000011003200182045032005900159320041615745180051103411611106734800001313232000080100106740106739106740106737106741
32020410674982810100000660021067212772025400100801003200008010032000048049646682271501067110106743106740266843267194001002003200002004000001067361067401180201100994100100800008000001003200171945032019011168320042615945181051098411611106733800001313232000080100106741106739106737106738106737
32020410673682710000000750031067222772325400100801003200008010032000048049647276951501067110106738106736267573267204001002003200002004000001067361068871180201100994100100800008000001003200191845032005910059320041616045181051108411711106733800001313432000080100106738106741106740106737106737
32020410673682710201000650021067222772125400100801493200008010032000048049547365041501067110106740106741266843267184001002003200002004000001067361067371180201100994100100800008000001003200181845032005902162320041616044183051108411711106733800001313232000080100106737106737106738106738106737
32020410673682710101000640021067212772525400100801003200008010032000048049646682271501068440106737106736266843267204001002003200002004000001067361067401180201100994100100800008000001003200191851032005800062320041615945180051108011711106733800001313232000080100106737106737106737106741106737
320204106745827100010006500210672427722254001008010032000080100320000480496466802815010671101067401067432668832671840010020032000020040000010673610673611802011009941001008000080000010032001818453032005910060320041615945181051108411711106733800001313232000080100106741106739106737106738106741
320204106740827101000006500310672637721254001008010032000080100320000480496466913015010671501067361067362668811267184001002003200002004000001067361067401180201100994100100800008000001003200181845032005910062320042615945180051108411712106733800001313232000080100106738106737106885106737106737
320204106736828101010001620021067213772125400100801003201308010032000048049646689301511067120106740106736266883267224001002003200002004000001067361067361180201100994100100800008000001003200181945032005900060320040615945181051098412511106734800001313232000080100106737106742106737106738106738

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.3341

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)dbddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
32002510672782700000000010800011067243154242540001080010320000800103200004800444734768000106706107040106760266913267174000102032000020400000106731106727118002110901010800008000001032000004503200420005132004261424500005020101609610673280000141232000080010106728106728106728106732106732
320024106877827000001100480002106712344222540001080010320000800103200004800444734671000106706107053106863266753267144000102032000020400000106734106727118002110951010800008000001032000004503200420008373200426141450000502051609810672880000141032000080010106734106732106735106889106732
320024106731827000000000180010210671634427254000108001032000080057320000480044473481600010671010703810687726679326714400010203200002040000010673110672711800211095101080000800000103200002450320042000453200426142450000502081608810673280000141432000080010106732106732106728106732106728
320024106727827000000000480001106713315421254000108001032000080010320000480044462518800010670610704010685526683326714400010203200002040000010688710672711800211095101080000800001103200000450320042000423200426142450000502071618810672880000141032000080010106732106728106732106736106736
32002410673182700000000047000210671634021254000108001032000080010320000480044473467100010671710704410683026675326718400010203200002040000010673110672711800211095101080000800000103200000450320041000483200426142450000503381608910673580000101032000080010106732106732106732106732106732
32002410673182800000000448000110686734421514000108001032000080010320000480043473467100010670210704310684326675326710400010203200002040000010673410676211800211095101080000800000103200000450320042000423200426142450000502081605810672980000141032000080010106732106732106881106732106736
32002410673182700010000048000210671234421254000108001032000080010320000480044469301900010670610692110697626682326714400010203200002040000010673110673111800211095101080000800000103252022450320042000453200426142450000502051608710673380000151032000080010106732106732106734106739106728
32002410673585700000000048000110671634421254000108001032000080010320000480044473467100010670210699010711626682326710400010203200002040000010688110673111800211091101080000800000103200000450320042010423200426139450000502081605810672880000181432000080010106732106732106737106732106740
320024106731857000000000480002106720341521254000108001032000080010320000480044473467100010671010694110695026682326725400010203200002040000010673110673111800211095101080000800000103200000450320042000393200006142450000503281609510672980000141032000080010106736106732106732106735106735
320024106735857000000001480002106716341212540001080010320000800103200004800444734671000106709107073106926266793268374000102032000020400000106727106727118002110951010800008000001032000004503200420007903200426142000005020916081010672880000141032000080010106732106736106880106732106732