Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, post-index, 3 regs, 8H)

Test 1: uops

Code:

  st1 { v0.8h, v1.8h, v2.8h }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 4.000

Integer unit issues: 1.000

Load/store unit issues: 3.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f233a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
63005288392331211800003047262855244228524000100030001000300050001590223170282814728799310400030007000286012868411610011000100030000903000023001190811308596156880307711512011432223805154750282371000158771238313695300010002867328780287912838928809
630042875323101610000330468428614042268840001000300010003000500015903171702328177287223104000300070002862828647116100110001000300009030000230011900132789468693031619492013532353813124644282871000158731261613657300010002875128883286812874228777
63004286182320141100003046982872104228114000100030001000300050001590291700128247287263104000300070002885928714116100110001000300009030001030011900133169497698831628442007931133812224242281641000154661258613998300010002877728767287202884328688
6300428716232017140000004644286354422646400010003000100030005000159031517006282402872731040003000700028806288421161001100010003000090300102300100001343095056972316210452017032053813134346281791000154721244613777300010002884428705286942885628809
63004287572310171700000047672869520226994000100030001000300050001590241704928276288253104000300070002883228774116100110001000300006030001030000600132149558699731136472017232683815224844283191000155931268313738300010002883128778287252886028879
63004287642320141800001046902871420227904000100030001000300050001590141704828233288313304000300070002883928872116100110001000300006030000030000000133059363695231396442011832813817214446281891000155301256113868300010002896928805288412897628786
63004289292320181200001046962877120226734000100030001000300050001590641706528244288613104000300070002868728691116100110001000300006030000030000600132199427688531895432020132133812154442281711000156981268614045300010002884628896288352879428865
630042892123102017000010453628768022270440001000300010003000500015905617049282512888931040003000700028775288261161001100010003000000300000300106001334096206945315813502033032193816193946282701000155961270314068300010002883628953288882881828772
630042870523102012000000463628682202277240001000300010003000500015906517039282412885631040003000700028754287671161001100010003000060300000300006001340995476974319611482013732083817214148282901000156771256813762300010002884928895288222881628845
630042884823101315000000469728681002282940001000300010003000500015903217008282112887031040003000700028744286541161001100010003000060300000300106001318194316891316410412014532993811194540281691000155281265613440300010002873728812287992872228834

Test 2: throughput

Count: 8

Code:

  st1 { v0.8h, v1.8h, v2.8h }, [x6], x8
  st1 { v0.8h, v1.8h, v2.8h }, [x6], x8
  st1 { v0.8h, v1.8h, v2.8h }, [x6], x8
  st1 { v0.8h, v1.8h, v2.8h }, [x6], x8
  st1 { v0.8h, v1.8h, v2.8h }, [x6], x8
  st1 { v0.8h, v1.8h, v2.8h }, [x6], x8
  st1 { v0.8h, v1.8h, v2.8h }, [x6], x8
  st1 { v0.8h, v1.8h, v2.8h }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f22233a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2402051200619311100002821700112004316149253201008010024000080100240000470658552022012002401200531200478996339001732010020024000020056000012006312005011802011009910010080000800001002400151500024001600162400001636141000511011621120050080000024000080100120051120069120048120054120048
240204120059930100100811410112004316052532010080100240000801002400004706645519719120035012005012004789971390016320100200240000200560000120058120060118020110099100100800008000010024001615360024001600182400021636140000511011612120055080000024000080100120059120054120051120060120061
240204120047931110100781800112003501602532010080100240000801002400004706645520247120035012005012005989971390019320100200240000200560000120059120058118020110099100100800008000010024001414360024001820142400021636140000511011621120044080000024000080100120048120059120053120052120048
240204120058931110000601800112004316165253201008010024000080100240000471120552022012003301200591200518996539001732010020024000020056000012005212005911802011009910010080000800001002400141400024001601202400001636140000511021621120058080000024000080100120062120060120053120069120053
240204120059931101000902100112003216164253201008010024000080100240000470664552024412003401200481200528997239001632010020024000020056000012005312005911802011009910010080000800001002400141400024001600142400021636140000511011612120055080000024000080100120059120053120061120060120059
2402041200509301000001081801112004416022532010080100240000801002400004706645519718120035012005112004789971390016320100200240000200560000120059120060118020110099100100800008000010024001516360024001601222400001436140000511021612120056080000024000080100120060120053120048120059120059
240204120051931100100447190011200321616782532021280100240060801002400004706565524696120034012019112006089960390005320100200240000200560000120051120052218020110099100100800008000010024001414030324001603142400021436140000511022521120178080000024000080100120048120059120054120061120052
240204120054931111000541701112004501652532010080100240000801002400004706645519884120022012005212005889974390009320100200240000200560000120058120053118020110099100100800008000010024001515360224001602162400021636141000511021612120049080000024000080100120053120061120059120054120060
240204120057930111000902301112003216163302532019780100240000801372400004711505537368120034012005812004789972151900223201092002400162005600371200471200471180201100991001008000080000100240014144410240016102024000216421401115116016011200441800021724000080100120052120055120052120055120043
24020412004296800000012300012003516160463201998010224000080100240108470656551990812014201200581201958996013900103201002002400002005600001203221200481180201100991001008000080000100240015153600240016107972400021636140000511011621120046080000024000080100120048120048120194120188120054

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0f18191e1f233a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
24002512005293011000000017011200391616525320010800102400008004924000047014755196920120029012005412005489987390034320010202400002056000012006312005311800211091010800008000010240016154400240016021724000216441400502000216841200448000024000080010120048120055120053120055120055
240024120063931101000000190112003916161253200108004724000080010240000470147551995601200230120047120047899891090034320010202400002056000012005412005211800211091010800008000010240015144600240016001424000216441400502000516531200448000024000080010120055120053120055120055120055
2400241200549301011000001901120032161622712032010880161240180800472401084708715529605012326401203251204579027025902633203022024012120560839120457120333418002110910108000080000102401341744591024019703232224006216441400504600444851201728011224000080010120326120192120451120191120326
240024120454933122010000180112003716161253200108001024000080010240000470147552010001200290120055120052899883900363200102024000020560000120055120052118002110910108000080000102400141400024001601192400021601400502000516351200498000024000080010120048120055120064120055120055
240024120054931101010000180112003801632532001080010240000800102400004701475520028012002901200531200548998939003232001020240000205600001200531200541180021109101080000800001024001414420224001600192400021601410502000316671200528000024000080010120053120048120055120053120048
24002412005493011100000018011200371616025320010800102400008001024000047013955200280120022012004812005489989390028320010202400002056000012005112005411800211091010800008000010240014144401240016101824000216441411502000216451200498000024000080010120055120054120048120055120061
240024120047930100010000140112003216162253200108001024000080010240000470147551969201200290120054120047899982390032320010202400002056000012005412005411800211091010800008000010240014144400240016101724000216461400502000516461200498000024000080010120055120054120064120055120055
2400241200549301000100002401120039160625320010800102400008001024000047014755199080120029012005712005289987390034320010202400002056000012005412005211800211091010800008000010240014144401240016111724000014441410502000616461200498000024000080010120055120053120055120055120055
240024120054930110000001218011200381516125320010800102400008001024000047014755199080120022012005412005189982390027320010202400002056000012004712005211800211091010800008000010240014144400240016011424000216441400502000216431200498000024000080010120055120053120055120055120048
240024120047931110000001219011200371616225320010800102400008001024000047014755199330120029012004712005289987390034320010202400002056000012005412005211800211091010800008000010240015154400240016001624000216441410502000216431200498000024000080010120055120054120064120055120048