Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, post-index, 1 reg, 4H)

Test 1: uops

Code:

  ld1 { v0.4h }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f3a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
610052940922700201300030464028720011241822000100010001000100050005000111594728706292993102000100020002906329139116100110001000010010310020021002224000129529187690131250522161233133812165341284051000162571345915113100010002926229313293312930629271
61004293882270040011003046872872301024280200010001000100010005000500021596628731293433102000100020002917529203116100110001000110000310000001000203100130449415697731513512168832683812154548284711000163111369615061100010002921529227292872932229162
6100429392227001111000304761288570002428620001000100010001000500050005159862866429286310200010002000292542906311610011000100011000031000000100020200012860911570013189046216113119381084449285021000162001374915037100010002929729346293842934229329
61004293092270100031004046802875801024332200010001000100010005000500021595928584292443102000100020002913129119116100110001000110000310000001000213100130189304697931621492170632023806184949284801000162441378115133100010002936829379294392935429300
61004293662270011000002046322872401024352200010001000100010005000500001594528679293663102000100020002918629082116100110001000110000310021061000222000129149174686531210552168231263814134845284411000162041378614902100010002934229882295372934329338
6100429318228011001000304654288220012434920001000100010001000500050001159782865229317310200010002000291122914911610011000100001000031002000100221300013002919970093068248216633219381475257285171000161121370915403100010002927229420291752934929325
6100429381227203000000204702287640002441720001000100010001000500050010159612855829319310200010002000292272917811610011000100011000021000000100020200013124920569373118049216353164381275254284971000160151365115222100010002940029408293122946329276
61004292272270010010004046022875600124330200010001000100010005000500041596228644292613102000100020002920329043116100110001000110002310000001001202000130619268691831301422175132503816154349284721000160931368115079100010002934929332292742922329339
61004293652260010020002046352875301024245200010001000100010005000500001596828656292643102000100020002915129204116100110001000110000210000021000202000130479462695630951562170632373806105149285561000163671371515116100010002930629250291862942229197
6100429304226003002000204663287760002434720001000100010001000500050000159692858229349310200010002000291982916511610011000100001000221000011100121200013087933269243140249217473255381975449284941000163021353114901100010002938529291293452896429273

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ld1 { v0.4h }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f233a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
502051200419301000000200012003511975325701035011010001100004010010000100001062162453813445849451120011120050120050113301311370460100302001000010000602002000010000120051120124115020110099100401001000010000010010002111000200410000111100321017611119759500029681000050100120051120051120048120051120051
502041200509300000000100012003511975025701035010210001100004010010000100001062108453797945850620120027120050120050113301311369860100302001000010000602002000010000120050120151115020110099100401001000010000010010001111000111110000111110321017611119759500029681000050100120051120051120052120051120051
502041200509310000000100012003511975325701035010210001100004010010000100001062108453797945844730120026120050120050113301311369860100302001000010000602002000010000120050120053115020110099100401001000010000010010001111000200410000111100321017611119759500029981000050100120051120051120051120051120051
5020412005093100001006000120041119759257010650104100021000040100100001000010621624538093458529601200321200561200581133072011376360100302001004010000602002000010000120143120053115020110099100401001000010000010010000011000000310000101000323317611119765500049981000050100120057120057120060120057120060
50204120805936110010011500012004111975925701065010410002100004010010000100001062135453824545851790120032120056120057113292311368960100302001000010000602002000010000120056120108115020110099100401001000010000010010000011000000310000101000321017611119765500049981000050100120042120057120057120054120054
502041200569311000000200012003811975625701065010410002100004010010000100001062162453820745851790120032120059120061113307311370860100302001000010000602002000010000120056120209115020110099100401001000010000010010001211000201110000111100321017611119832500049981000050100120057120042120057120057120093
502041200569301000000200012004111975925701065010410002100004010010000100001062162453763345851790120026120050120050113301311369860100302001000010000602002000010000120050120130115020110099100401001000010000010010001111000100110000111100321017611119759500029681000050100120051120051120051120053120051
502041200509300000100100112003811971925701065010410002100004010010000100001062162453820745851790120032120056120057113307311370760547302001000010000602002000010000120056120053115020110099100401001000010000010010000011000000010000101000321017612119767500029681000050100120051120051120051120051120051
502041200509310000000100012004111975925701065010410002100004010010000100001062162453828745851790120032120056120056113309311370460100302001000010000602002000010000120056120053115020110099100401001000010000010010000011000000010000101000321017611119756500029981000050100120051120051120051120052120052
502041200539300100000100012003511975325701035010210001100004010010000100001062108453797945850620120026120050120050113301311369560100302001000010000602002000010000120050120054115020110099100401001000010000010010000011000000010000101001321019111119765500049981000050100120057120057120148120057120057

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f23243f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
50025120047930000100100012002011972725700135001210001100004001010000100001062139453895145859070120026120050120050113324311371460010300201000010000600202000010000120050120047215002110910400101000010000101000001100000003100001100314034822627119769500020681000050010120148120051120051120052120048
50024120092931000000160000120035119730257001350012100011000240010100001000010621574538831458590701200261200371200501133241111371460010300201000010000600202000010000120052120047115002110910400101000010000101000001100000000100001100314014821325119769500026951000050010120183120048120051120051120048
50024120047931000000100012003511973025700135001210001100044001010000100001062139453883145859070120026120050120051113324311377260233300201000010000600202000010000120050120035115002110910400101000010000101000001100010100100000100314029842628119769500116751000050010120141120051120052120051120048
500241200519310000001300012003511973625700295001210003100004001010000100001062139453883145859070120026120050120047113324311383260010300201000010166600202000010000120050120047115002110910400101000010000101000001100000002793100001100314012822312119769500009601000050010120051120051120052120051120051
50024120047931000000100012003511973025700105001210000100004001010000100381062139453883145859450120100120050120047113324311371460010300201000010000600202000010000120050120222115002110910400101000010000101000001100000003100001100314026823031119769500029981000050010120048120051120051120144120051
50024120050930000000100012003511973025700135002210001100004001010000100001062139453883145859070120023120050120047113324311371460010300201000010000600202000010000120047120047115002110910400101000010000101000001100000100100000001314028822714119769500026681000050010120051120036120136120051120051
5002412005093000000013000120035119714257001350022100011000040010100001000010621394538831458590701200261200501200521133281011369960010300201000010000600202000010000120053120047115002110910400101000010000101000001100020000100001100314031822629119773500021310121065450010120069120053120062120052120053
50024120053964001012130011200401197352570013500121000110000400101000010000106217545407824586063012003012005512008811336831137686001030020100001000060020200001000012005412005111500211091040010100001000010100000110000000310000110031401282291311977350002142301000050010120055120055120056120055120055
50024120054931000000001012004211973425700275001210001100004001010000100001062175453898345860630120030120054120035113328311371560010300201000010000600202000010000120054120138115002110910400101000010000101000001100000202841100001100314017823112119773500021310121000050010120055120055120052120055120055
500241200549301000101601012003911971425700135001210001100004001010000100001062175454076845860630120030120054120057113328311371560010300201000010000600202000010000120054120054115002110910400101000010000101000001100000103100001100314012822929119770500021310121000050010120055120149120140120143120148

Test 3: throughput

Count: 8

Code:

  ld1 { v0.4h }, [x6], x8
  ld1 { v0.4h }, [x6], x8
  ld1 { v0.4h }, [x6], x8
  ld1 { v0.4h }, [x6], x8
  ld1 { v0.4h }, [x6], x8
  ld1 { v0.4h }, [x6], x8
  ld1 { v0.4h }, [x6], x8
  ld1 { v0.4h }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e18191e1f233f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfl1i tlb miss demand (d4)d5map dispatch bubble (d6)d9ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80205800406430000005200800251601325160100801008000080100800004179687375882308009380040800406994236999716010020080008200160016800408004011802011009910010080000800000100800000198001700168001760172101115118001600080037080000101080000801008004180041800418004180041
80204800406431000001288080025166645160100801008000080106800004179689375882308005480040800406993176999216010620080008200160190800408004011802011009910010080000800001100800000218001700178001461172101115118001600080037180000111380000801008004180041800418009180041
8020480040643000000220080025166725160100801008000080100800004179655375882208001580040800406992436999716010020080000200160000800408004011802011009910010080000800000100800002198001500158001661151900005110011701180037180000101080000801008004180041800418004180041
8020480091643000010230080025166725160100801008000080100800004179679375882408001580040800406992436999716010020080000200160000800408004011802011009910010080000800001100800000208001710168001761172100005110011701180037180000131380000801008004180041800418004180041
80204800406430000001530080025166725160100801008000080100800004179671375882208001580040800406992436999716010020080080200160000800408004011802011009910010080000800001100800000198001510235800146117210000511001170118003718000010080000801008004180041800418004180041
802048004064300000023008002516672516010080147800008010080000417967937588220800158004080040699243699971601002008000020016000080040800911180201100991001008000080000110080000019800170020800156119210000511001170118003708000001080000801008004180041800418004180041
80204800406430000002300800251661125160100801008000080100800004178746378333618001580040800406992436999716010020080000200160000800408004011802011009910010080000800001100800000218003800158001661142100005110011701180037180000101080000801008004180041800418004180041
802048004064300000022880800251605251601008010080000801008000041796713758822080015800408004069924369997160100200800002001600008004080040118020110099100100800008000001008000001980016107588001761142100005110011701180037080000131080000801008004180041800418004180041
8020480040643000000350080025166725160100801328000080100800004179679375882408001580040800406992436999716010020080000200160176800918004011802011009910010080000800000100800000198001710268001761162100005110011701180037180000101380000801008004180041800418004180041
80204800406420000002200800251661125160100801008000080100800004179457375882208001580040800406994236999716010020080000200160000800408004011802011009910010080000800000100800000198001400198001761141900005110011701180037180000131380000801008004180041800418004180041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002580040600010110010300008002516642516001080010800008001080000417861337588218001580040800406994637002016001020800002016000080040800401180021109101080000800000108000001480013001680010611317005020116118003718000012680000800108004180041800418004180041
80024800406200000000750100800251666251600108001080000800108000041786293758824800158004080040699463700201600102080000201600008004080040118002110910108000080000010800000178001300138001261101400502011611800371800009680000800108004180041800418004180041
8002480040620000000088880008002516648251600108001080000800108000041786373758824800158004080040699463700201600102080000201601608004080040118002110910108000080000010800000178001310138001261101700502011611800371800009680000800108004180041800928004180041
80024800406200000000613521008002516610251600108001080000800108000041786293759934800158004080040699463700201600102080000201600008004080040118002110910108000080000010800000178001300138001261121700502011612800371800009680000800108004180041800418004180041
800248004062100000004710000800251664251600108001080000800108000041786293758824800158004080040699463700201600102080000201600008004080040118002110910108000080000010800000178001200748800096191400502011611800371800009980000800108009180041800418004180041
8002480040620000000048100008002516610251600108001080000800108000041786373758824800158004080040699463700201600102080000201600008004080040218002110910108000080000010800000148001200168001360101700502011611800371800009980000800108004180041800418004180092
800248004062000000001200000800251666251600108001080000800108000041786293758824800158004080040699463700201600102080000201600008004080040118002110910108000080000010800000148001210128001361131700502011611800371800009080000800108004180041800418004180041
80024800406200000000600000080025166525160010800108000080010800004178597375882480015800408004069946370020160157208008020160000800408004011800211091010800008000001080000014800120015800130191700502011611800370800000980000800108004180041800418004180041
8002480040620000000059200008002516610251600108001080000800848000041785973758824800158004080091699463700201600102080000201600008004080040118002110910108000080000010800000178001300138001261121700502011611800371800009980000800108004180041800418004180041
800248004062000000003760000800251669251600108001080000800108000041786373758824800158004080040699467700201600102080000201600008004080040118002110910108000080000010800000178001300198001361261700502011633800370800276680000800108004180041800418004180041