Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, post-index, 1 reg, 4S)

Test 1: uops

Code:

  ld1 { v0.4s }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f23243a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
610052952323820112110022268880046002883601024599200210011000100210015000503921602028902295141527200210012004294342942111610011000100001000310010011001213000133279418688331678342204133513811114844285171000158681354715042100010002926029370294282944629450
610042940723612101500000700047192889200124302200010001000100010005000500041596628814295243102000100020002931829191116100110001000010003100000610002030001317992556958316013432168032843810104744288371000165981401915549100010002983429949293622931529686
61004295832371700160001012000465028913000246832000100010001000100050005000515951287482940331020001000200029231292621161001100010000100021001101100020300013074956469303176940218213276381174039288971000161181356515135100010002926229310294362944729344
61004294162361700170000011000476928762011244982000100010001000100050005000715958286542937331020001000200029184291891161001100010000100021000001100120202013052942269233102838217343239380884241285811000162431361615037100010002939429227293592936829343
61004292922262200170000013000473928927000243392000100010001000100050005000101601428752294061110200010002000294222929511610011000100001000310000001000303000132419515689231928502169933093812124337286931000162201367815082100010002962829563295432946729551
61004294132361400171010029000471929100000243232000100010001000100050005000415989288112983531020001000200029297291041161001100010000100021000000100020200013354962269213085643217423181381174239286811000160651351514806100010002936729291293682932929357
61004293782281500180000020004693288520002436420001000100010001000500050379159672886029362328200010002000291422917311610011000100001000310000001000003000132399590684931481139217633355380994242285821000164741370814870100010002931429338293782929729398
61004293302281500160000020004545288870002435020001001100010001000500050003159662878129392310200010002000291992916211610011000100001000310001001000303000131919217690231668452168532453811114538286081000160131359915284100010002945529456295102938429393
61004295472391800201000016000478727924010239222000100010001000100050005000515935281732862131020001000200028555286681161001100010000100031001001100021310013406941269373140949212023225381295147282611000155451328514385100010002877328675287742872328676
6100428781222180019000004001483328453020237942000100010001000100050005001111595728259287183102000100020002858928684116100110001000010022100100110002020001337895096977317512502109431263811194543281711000156271327314523100010002877628751287482870828736

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ld1 { v0.4s }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss data (0b)0e0f18191e1f23243f4d5051schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
50205120051932000000100012004111975702570103501021000110000401001000010000106214445381704584473112001112003612003511330531137086010030200100001000060200200001000012005412005411502011009910040100100001000011001000001100000100100011100032112101221199225002001001000050100120212120222120241120240120232
5020412012793211003127017600120132119680068701175013410003100044024210038100781067852454184045875231120260120221120131113313261138036054330325100831004260692200801008312014312013531502011009910040100100001000011001000691100040005415100031100132112762211976350002010121000050100120055120055120055120055120036
502041200579300001001300012002011975702570103501021000110000401001000010000106214445381314585101112003312005412005411330831136836010030200100001000060200200001000012005412005111502011009910040100100001000011001000001100000003100000000032112762211976350002131391000050100120055120055120055120055120036
502041200549310010001000120039119754025701035010010001100004010010000100001062072453813145851011120027120054120051113305311370260100302001000010000602002000010000120054120051115020110099100401001000010000110010000011000000001000011000321127622119760500021313121000050100120055120055120055120055120055
502041200549310000001000120039119758025701035010210001100004010010000100001062144453741245851011120030120054120054113302311375960990302001000010000602002000010000120054120051115020110099100401001000010000110010000001000000031000011000321127622119763500021010121000050100120052120055120055120052120055
502041200569310000001000120039119757025701035010210001100004010010000100001062072453813145851011120032120054120057113305311370260100302001000010000602002000010000120054120051115020110099100401001000010000110010000001000001001000011000321127622119763500021313121000050100120058120055120055120058120036
502041200519310000001000120041119757225701035010210001100004010010000100001062144453813145851011120030120055120054113305311370260100302001000010000602002000010000120051120051115020110099100401001000010000110010000011000001001000011000321127622119763500021313121000050100120055120052120055120055120036
50204120035931000000100012002011975402570103501021000110000401001000010000106211745374124585101112002712005412003511330631136846010030200100001000060200200001000012005412009611502011009910040100100001000011001000001100000100100001100032112762211976350000131391000050100120055120052120058120055120055
502041200519310000001000120039119757025701005010210001100004010010000100001062144453825145851011120030120054120055113302311376760100302001000010000602002000010000120065120038115020110099100401001000010000110010000011000001031000011000321127622119763500021013121000050100120060120055120036120059120036
502041200359300000001000120039119757025701035010210001100004010010000100001062171453813145851011120030120035120055113302311370260100302001000010000602002000010000120035120054115020110099100401001000010000110010000011000001061000011000321127622119763500021313121000050100120036120036120055120055120055

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0050

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f243a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
5002512004793000000060001200351198152570013500101000010000400101003910000106211245387174585946120026120051120050113309311371160010300201000010000600202000010000120053120048115002110910400101000010000010100002110001200291010001112031893106961199815003010081000050010120320120134120326120215120229
5002412030793201002340217600120124119827937002750041100071000440436100791016510663154545203458975712017512013812040411346923113817604523002010000100006002020000100001200501200531150021109104001010000100000101000001100000003100001100314028222119769500029681000050010120052120051120087120061120051
50024120035930000000100012002311979825700105001210001100004001010000100001062139453883145859071200261200471200501133243113714600103002010000100006002020000100001200351200501150021109104001010000100000101000001100000006100001100314028232119769500029681000050010120036120051120054120110120051
5002412003593100000013000120058119783257001350012100011000040010100001000010621394538831458550712002612003712006011332811113714604493002010000100006002020000100001200501203911150021109104001010000100000101000001100000103100001100314038222119766500029681000050010120051120051120136120087120054
50024120050931000000600012003511973625700135001210000100004001010000100001062139453883145859461200261200501200501133223113714600103002010000100006002020000100001200501200471150021109104001010000100000101000001100000103100000100314028222119754500029081000050010120053120051120120120083120051
500241200509310000001300012003511979325700105001210001100004001010000100001062139453832845859071200261200501200501133243113714600103002010000100006002020000100001200501200501150021109104001010000100000101000001100000000100001100314038222119770500009981000050010120051120051120054120114120048
500241200509300000001300012003511973325700135001210001100004001010000100001062139453883145859071201301200501200501133273113714600103002010000100006002020000100001200501200531150021109104001010000100000101000001100000003100001100314028232119754500029681000050010120051120051120054120109120048
50024120047931001100100012003511979525700135001010001100004001010000100001062139453832845859071200261200501200501133093113714600103002010000100006002020000100001200501200471150021109104001010000100000101000001100000000100001100314038222119769500029681000050010120048120051120106120062120051
50025120035930000000100012002011973325700135001210001100004001010000100001062139453883145859071200261200501200501133243113714600103002010000100006002020000100001200501200501150021109104001010000100000101000001100000000100000000314028233119769500029981000050010120036120051120057120121120048
50024120047930000000100012003511972925700135001210000100004001010000100001062139453832845859071200261200351200501133213113714600103002010000100006002020000100001200501200351150021109104001010000100000101000001100000103100001100318739322119884500180001000050010120322120220120325120248120235

Test 3: throughput

Count: 8

Code:

  ld1 { v0.4s }, [x6], x8
  ld1 { v0.4s }, [x6], x8
  ld1 { v0.4s }, [x6], x8
  ld1 { v0.4s }, [x6], x8
  ld1 { v0.4s }, [x6], x8
  ld1 { v0.4s }, [x6], x8
  ld1 { v0.4s }, [x6], x8
  ld1 { v0.4s }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f233f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020580040620010110123008002516672516010080100800008010080000417967937588228001580040800406992436999716010020080000200160000800408004011802011009910010080000800000100800000198001700039800176102105111217128003718000001080000801008004180041800418004180041
8020480040620000100033008002510672516010080100800008010080000417965537588238001580091800406992436999716010020080000200160000800408004021802011009910010080000800000100800000198001701016800166115190511011711800371800000080000801008004180041800418004180041
802048004062000000000008002516652516010080100800008010080000417967937588238001580040800406992436999716010020080000200160000800408004011802011009910010080000800000100800000080017000178001660150051101171180037180000131380000801008004180041800418017980041
80204800406200000000280180025166725160100801008000080100800004179695375882480054800408004069924369997160100200800002001600008004080040118020110099100100800008000001008000001980015000158000061170051101171180037180000131080000801008004180041800418004180041
8020480040620000000035008002516011251601008010080000801008006941796793758822800158004080040699243699971601002008000020016000080040800401180201100991001008000080000010080000021800160201780017611421051101171180037180000131080000801008004180041800418004180092
80204800406200000000220080025106725160100801008000080100800004179663375882380015800408004069924370029160100200800002001600008004080040118020110099100100800008000001008000001980017000380017611521051101171180037180000131080000801008004180041800418004180041
8020480040621000000023008002516652516010080100800008010080000417967137588228001580040800406992436999716010020080000200160000800408004011802011009910010080000800000100800000218001700075780017611521051101171180037180000101280000801008004180041800418004180041
80204800406200000000210080025166625160100801408000080100800004179703375882280015800408004069924369997160100200800002001600008009180040118020110099100100800008000001008000002180017010148001561021051101171180037180000131380000801008004180041801028004180041
80204800406200000000440180025166725160100801008000080100800004179695375987380015800408004069924369997160100200801602001601608004080112218020110099100100800008000001008004731980040002149480022611421051101171180037180161131380000801008014380091801438014280091
802048014262100110121321760801271669439016106780532804508129281036417521937779768005480139801416994287002816010020080080200160160800908014021802011009910010080000800000100800220198004000015218003961021251111171180037080000131080000801008004180041800418004180041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22243f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cdcfd2d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002580040620110000288800800250665251600108001080025800108000041786133758824080015080040800406994637002016001020800002016016080040800401180021109101080000800000108000001480036010128001461917005020010161710800370800009080000800108004180041800418004180041
8002480040620000000318800800251608461600108001080025800108000041786373758824080015080040800956994637002016001020800002016016080040800401180021109101080000800000108000000800000002180013601317005020019161810800371800009680000800108004180041800418004180041
80024800406210011001800081496106425160010800108000080010800004178629375882308001508004080040699463700201600102080000201600008004080040118002110910108000080000010800000148001300075880000019170050200916179800371800009980000800108004180041800418004180041
800248004062000000016000800250066251600108001080000800108000041786293758824080015080040800406994637002016001020800002016016080040800401180021109101080000800000108000001480012010138001361101700502001716917800371800009680000800108014280041800418004180092
80024800406430000002200080025160725160010800108000080010800004178613375882408001508024380040699463700201600102080000201601608004080040118002110910108000080000010800000208001600017800176115210050200916171880037180000131380000800108004180041800418004180041
800248004062100000022000800251667251600108001080000800108000041786213758824080015080040800406994637002016001020800002016000080040800401180021109101080000800000108000001980017000158001761152100502001716111880037080000101080000800108004180041800418004180041
800248004062101000023000800251667251600108001080000800108000041786133758822080054080040800406994637002016001020800002016000080040800401180021109101080000800000108000002180017000178000061152100502001116172080076180000131080000800108004180041800418009180041
80024800406210000002000080076166625160010800108000080010800694178621375882308001508004080040699643700201600102080000201600008004080040118002110910108000080000010800000198001702017800006102100502009169178003718000013080000800108004180041800418004180041
800248004062000000021000801271067209161000804718002580084800694178237376094708005408009080091699827700841602942080000201601608009080141218002110910108000080000010800090198004100215178001561019205052019259198007618005513080000800108004180041800928009280092
800248009062001002115417600800750605966160064800668002580156801384178288375988408001508024380040699463700201600102080000201600008004080040118002110910108000080000010800000208001500017800176115210050200181616148003708000013080000800108004180041800418004180041