Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, 4 regs, 1D)

Test 1: uops

Code:

  st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 4.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 2.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)181e1f22233a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
64006293252192141021000004475289942018099400020002000200020002180016000012218332889929177310400020002000400040002919329152116100110001000200006020000002000202012881918768993040246203033075380813454528439165221352614893200020002921529222292562915829337
6400429268219010011000310449829099201807040002000200020002000218021600000218472890229237310400020002000400040002911129146116100110001000200248120020022000202112754916668043061146202153116381718504528367162161328714879200020002925429209292872925429275
64004292152190010010001004504290112218001400020002000200020002180516000011218922890929232310400020002000400040002915329137116100110001000200000020000002000000012864913368243047046202413115380416484728423163331321714777200020002926829304292272931029331
64004291762200101021002104522290652218054400020002000200020002180616000016218262884629260310400020002000400040002912329145116100110001000200338020020152000202112859897968263041149202073076380215525228377163011330714878200020002925529234291892929729327
6400429274218001001000100460429072201806040082000200020002000218141600006218502885429312310400020002000400040002922729151116100110001000200006020000002000060012724912768013029144202383030381712484228494164151333414995200020002932229305293322929229244
6400429282219012112100300446029076001803740002000200020002000218101600008219092898629223310400020002000400040002924429177116100110001000200236020020052000202112988901968143073145202993062381014474428484165521336915007200020002930429250293492932429327
64004292812190000010001004578290522018049400020002000200020002181116000013218492898929246310400020002000400040002920829245116100110001000200004020000002000000012843904668203020046203113034381212524928552165251328014735200020002928629283292712929229235
64004292972200100001003004700291092018106400020002000200020002180916000012219002885529237310400020002000400040002920229223116100110001000200004020000002000040012937909668093041047202533054381017444928501163511334614842200020002921629359292102931029296
64004292162190111000002004532290370018036400020002000200020002180116000011218572907029174310400020002000400040002923329285116100110001000200426120021022000262112837906468363033146202853046380515415528427164271330415087200020002926129267293292938429197
6400429241219010112000200457229038001797040002000200020002000218111600001121842289372921531040002000200040004000292272922311610011000100020043622002128200026221280991456847302634320210306538148504828513163701320815025200020002921029254292502926429213

Test 2: throughput

Count: 8

Code:

  st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x6]
  st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x6]
  st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x6]
  st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x6]
  st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x6]
  st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x6]
  st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x6]
  st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss data (0b)18191e1f23373f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320206800846200000003058278003016160253253641001650101600001001601181601085002229366129862608002408004580209032732010020016000016000020032000032096080044800451180201100991001008000080000100160062034001600020051600022340510931712800421600001600001008004780049800468004680046
3202048021162000000123061048003016160473248641001644851600001001600001600005002392843130244208002508004580044032732010020016000016000020032000032000080045800451180201100991001008000080000100160000034001600020021600022340510921711800421600001600001008004680050800458004780046
32020480045620001000304062800301616025326510100164890160000100160000160000500221571212993520800240800458004503273201002001600001600002003200003200008021180045118020110099100100800008000010016000000001600020081600022340510931711800421600001600001008004680046800458004780046
32020480045620000101200398380030000253249251001650171600001001600001600005002215406129899708002408004980048032732032620016000016000020032000032000080045800441180201100991001008000080000100160000034001600620021600022340510922611800411600001600001008005180046800498004680046
32020480045620000001891053368003016160253256101001672831600001001600001600005002158486130083308002408004480045033032010020016000016000020032000032000080045800451180201100991001008000080000100160000034001600020051600022340510931711800421600001600001008004680046800468004780049
3202048004962000000030562680030000253249731001689631600601001600001600005002239656129782608002408004580045032732010020016000016000020032000032096080216800482180201100991001008000080000100160000034001600020081600002340510921711800421600001600001008004680045800508004680046
32020480045620000000904368800301600253261491001660801600001001600001601085002078237130438308002408004580045032732010020016000016012020032000032000080045800451180201100991001008000080000100160000034001600020051600020340510931711800421600001600001008004680050800468004680046
32020480045621000000313909800331600253241781001648441600001001600001600005002228088129900608002408004680044032732010020016000016000020032000032000080045800451180201100991001008000080000100160000034001600020051600022340510931711800451600001600001008004680049800448004680046
320204800496220000003039828003016160253262471001654611600001001600001600005002158849129961208002408004580045033032010020016000016000020032000032000080045800452180201100991001008000080000100160000034001600020021600022340510932631801951600001600001008021080376800878014980376
320204802126210001213217903526801961616894732327810016514416006010016011816010851122046921300463080418080211803781639123320326202160240160120200320480320480802188037331802011009910010080000800001001610200348710160902009271601222347513552613801961600001600001008037580210803768022080211

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2223373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
3200268006062010000012900573308004316160253280661016532716000010160000160000502390491130086208002480045800450326320010201600001600002032000032000080049800482180021109101080000800001016000000001600020021600021634140050198176880058160000160000108006280046800598005980046
320024800516201101001442100216918002916161253290141016678716000010160000160000503679304129978508019680058800590340320010201600001600002032000032000080222800521180021109101080000800001016001514360016001600211600021640140050196178880055160000160000108004680050800468005880052
3200248006062010000062000508608003516160253257031016000716000010160000160000503679360129836108002480051800510339320010201600001600002032000032000080050800511180021109101080000800001016001514340016000211171600021638140050198178780049160000160000108021180059800468006080060
3200248005962010010003005536180036161602532505910164843160000101601181600005023999201301575080024800588005903313200102016000016000020320000320000800518004411800211091010800008000010160014034001600160017160000163600050196179680047160000160000108004680049800538004680061
320024800586221110021821105237080037161602532548510165640160000101600001600005021589121299474080024800458004406263200102016000016000020320000320000800488004811800211091010800008000010160000000016000200917160002234000501910176980047160000160000108004680050800518006080051
320024800496211001000200045100800350160253251751016577916000010160000160108502319900129978508002580049800608234032001020160000160000203200003200008004980050118002110910108000080000101601220340016000200216000223400050197178880045160000160000108005080046800468004680046
320024802116200000001230035711800341616025323396101609801600001016011816000050222317013048980800258004580052032732001020160000160000203200003200008006080059118002110910108000080000101600141434011600161192716000216341400501972610780213160000160000108021980212802128021880046
320024801376220110112649100437408019416162747032631210164461160060101601181601085022584951306765080336802118037483821832046220160360160120203204803202408021080211218002110910108000080000101600001701290160076001857160122236140050467269780215160000160000108021180378802308038580558
3200248022062312000012190050791800301416025324607101641771600001016000016000050239865112974980800248004980045032732001020160000160000203200003200008004480045118002110910108000080000101600000001160004001816000223400050197178680045160000160000108004680046800468004680046
32002480045621000000000060060800301616025327585101661831600001016000016000050222622612958220800258004580045032732001020160000160000203200003200008004880050118002110910108000080000101600000340016000200816000223600050198176680041160000160000108004680046800468004780046