Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST4 (single, D)

Test 1: uops

Code:

  st4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 4.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 2.000

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)191e1f22243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6400729068233705093005004283452217143400020002000200020002161716000230021803281332846831040002002200040004000282782819111610011000100020000200202200004013696979972503304045194183246380925485327935147471240213401200020002836728383284872847528482
6400428436214102090004878284742217344400020002000200020002161716000190021779282482851431040002000200040004000284272846211610011000100020006200202200026013759989271893410145195023281381027465027981148231231913654200020002848528520283722842628476
64004284382132020000049162863900174004000200020002000200021635160001400218022822928638310400020002000400040002844328255116100110001000200002002022000060135431001371513432048195043229380721464727972141831210313120200020002833428436283272852028484
64004286422131020030049602817402174954000200020002000200021615160001300217962815328534310400020002000400040002829828408116100110001000200062002022002260136401011871353320146195383323380828514827935145011241713619200020002826328312285802832928251
64004282342131020030150762817102173444000200020002000200021631160001400218292825528368310400020002000400040002831528295116100110001000200002002022000260136191010971553257045193663315381224534627973140161198613861200020002828328460286022837228200
640042831721310103030051082816422173294000200020002000200021618160001900218002858128959310400020002000400040002883829023116100110001000200062002050200226013455989670803367152195443368380323515427973149931247713455200020002858628362288472864428703
640042794621200103930050502830622173384000200020002000200021613160001100218612837928352310400020002000400040002845328582116100110001000200062000022002060136991008571573483046192693322380925524727984142851243713678200020002816828503283232827728590
64004284172112030483004969283442017592400020002000200020002161316000180021790282512833231040002000200040004000283472836411610011000100020006200200200026013555985970863371045194263302380224464627910144951197713153200020002833928405281982842628522
640042851522030101230150642804502174384000200020002000200021607160001200218162822028434310400020002000400040002828528300116100110001000200062002022002260135221037870833360045192753135381229525527997146401219213515200020002855128597285322846828386
640042838121340401230047832831602171314000200020002000200021627160001200217802827528366310400020002000400040002825928416116100110001000200062000022000262138071002871393353148195733232380624474628072150421214913418200020002842928249285662840028529

Test 2: throughput

Count: 8

Code:

  st4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  st4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  st4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  st4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  st4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  st4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  st4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  st4 { v0.d, v1.d, v2.d, v3.d }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2324373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
3202078007062001002000000060981800301616025327409100163774160060100160000160000500222576012975460800248021980045032732010020016000016000020032000032000080045800451180201100991001008000080000100160062000016000200816000220000511251745800421600001600001008004680046800468005080050
32020480045620010010011290037011800301616025324478100165035160000100160000160000500255986812842110800258005880051033132010020016000016000020032000032000080050800501180201100991001008000080000100160015143401160016011616000216361410511131753800471600001600001008004680049800468004680046
3202048005262001001000030066121800301616025326743100160350160000100160000160000500226656212964790800248004980046032732010020016000016000020032000032000080045800451180201100991001008000080000100160000034001600020011160002234000511231755800421600001600001008004680045800468005080046
320204801226210100100033004646180030161602532303410016419616000010016000016000050022257601293267080024800458004503263201002001600001600002003200003200008004580045118020110099100100800008000010016000003400160002002160002234000511251735800461600001600001008004980046800508004680046
320204800456200100100000006560180034161602532570610016612916000010016000016000050023024751292786080025800458004503313201002001600001600002003200003200008004580045118020110099100100800008000010016000003400160002000160002234000511241745800461600001600001008004680046800508004680046
320204800456200100100012300493318003016160253242921001639061600001001600001600005002202347129692908002480045800450327320100200160000160000200320000320000800458004511802011009910010080000800001001602420000160002002160000034000511151745800421600001600001008006280053800518005980059
32020480052620110110000190045392800431616025327147100160491160000100160000160000500359939712993770800338005180049034032010020016000016000020032000032000080059800591180201100991001008000080000100160014143631160016011716000216371400511251755800551600001600001008006180051800598005880059
3202048005062011111100014004711280045161622532562110016561616000010016000016000050023999201301846080025800588005803323201002001600001600002003200003200008005880058118020110099100100800008000010016000003600160002102160002234000511331755800421600001600001008004680046800478004680045
320204800496200100100003103886180034161602532596010016711116000010016000016000050022396331303069080025800458004503313201002001600001600002003200003200008004580045118020110099100100800008000010016000003400160000002160002234000511231755800421600001600001008005380046800468004680046
32020480045620010010001530050281800301616025322293100165796160000100160000160000500255988612954270800268005880050033232010020016000016000020032000032000080050800501180201100991001008000080000100160015143601160014112116000216361400511251755800471600001600001008004680046800508004680046

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f22373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)61696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
32002780058621000000120053410800301616025324492101641761600001016000016000050223314712800360800340800508004503303200102016000016000020320000320000800458004511800211090101080000800001016000003400160002000160000234000501903174480042160000160000108004680046800468004680046
32002480048621000000123148770800341616025325211101653541600001016000016000050223692912971480800240800458004503273200102016000016000020320000320000800628004511800211090101080000800001016000003400160002002160002234000501903174480042160000160000108004580046800468004680050
32002480052620000000030562808003001602532571610165508160000101600001600005021536601297236080024080046800460327320010201600001600002032000032000080045801224180021109010108000080000101600000340016000000974160002234000501903175380042160000160000108004680050800468004680049
32002480049621000000030439408003016160253253341016521916000010160000160000502156661130157008002308004980045033232001020160000160000203200003200008004980045118002110901010800008000010160000034001600020020160002234000501903173480042160000160000108004680045800468004980046
3200248004562000000012305017080030161602532831710165286160000101600001600005022397341300850080025080058800450334320010201600001600002032000032000080045800451180021109010108000080000101600000000160000002160002234000501904174480042160000160000108004680046800468004780046
3200248004962000000006056130800361616025325114101641191600001016000016000050222906513017550800240800458004503273200102016000016000020320000320000800458004511800211090101080000800001016000003400160002100160002234000501904174480042160000160000108005080046800468005080046
3200248004562000000009046540800301616025325310101653961600001016000016000050221237212955240800240800458004503273200102016000016000020320000320000800458004511800211090101080000800001016000003400160000005160000034000501903173380042160000160000108004680045800508004780047
320024800456200000001230395308003416160253264071016423216000010160000160000502128584130152908002408004980048032732001020160000160000203200003200008004580704118002110901010800008000010160000034001600020031600022340005019042123480041160000160000108005080046800468005080046
320024800456210000000305286080030161602532521410164210160000101600001600005022372091293767080025080045800450327320010201600001600002032000032000080049800641180021109010108000080000101600000000160002005160000034000501904175380042160000160000108004680046800498004680046
3200248004562000000045304942080030161602532530110164433160000101600001600005022046581297774080025080045800450327320010201600001600002032000032000080045800451180021109010108000080000101600000000160002105160002234000501904173380041160000160000108004580046800468004680046