Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST4 (multiple, 2S)

Test 1: uops

Code:

  st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 6.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 4.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2224373a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafl1d cache miss st nonspec (c0)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6600729247229510400001101460929234021605060004000200040002000256143580418237302897829217310600020004000400010000292802912411610011000100020002002000003200060013023922069253110056201453233380117595928440163781325314239200040002928029384293922928429111
660042922322700010000000046302932920161446000400020004000200025624358044238522910329318310600020004000400010000291972937411610011000100020000402000000200000012972919869113118057200953193381220525528476160611357414076200040002939129309293172940429352
6600429330227110000001000461829200001586660004000200040002000256213580513238042905429493310600020004000400010000292742929311610011000100020042402000200200040013132917868853165156201083217381221545628571158771350214141200040002949729370292792933429383
660042928522711000000010046732932420160836000400020004000200025608357998238712915029237310600020004000400010000293042929911610011000100020000002000100200040013120924569613119056200203231381425555828517165421324114173200040002914929194293792946229288
660042943622800000000000046762923402159576001400020004000200025605358007238062909229217310600020004000400010000292522938111610011000100020000602000006200040013015926069203124057201943234380519575628552161011326213934200040002935229349293532924629343
6600429341228410000000000470929244201614960004000200040002000256173580810238422909629297310600020004000400010000293242934811610011000100020000002000000200040013136949468743156157200413260381018545828596162481361614072200040002927029302294552936529392
660042940022711010000000045642921220161366000400020004000200025607358385238032911929402310600020004000400010000292882933111610011000100020000402000003200000012920936869173060049201293156381022595728491164261340714206200040002931329286295152933529476
660042934522900010000001045612917102161086000400020004000200025608358134237882914429326328600020004000400010000293592932611610011000100020040402000003200040013097921368673096056201473233381224515528581162411350714193200040002936829277292272926429383
660042929222810000000100045892927800160176000400020004000200225597358100238142921029286310600020004000400010000291902926611610011000100020000002000000200000013156945568733154152201573226381516586128611164771333113909200040002925229403293472930929149
660042938822711010009101046662917000160386000400020004000200025607358103238442912229416310600020004000400010000292522938511610011000100020000602000000200060013022940169673157160201683210381116505128547164471320214209200040002933529401292992931629299

Test 2: throughput

Count: 8

Code:

  st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.2629

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2224373a3f46494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)dadbddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
480207100287784000000050043657010139216162147661525518071100362248160000100320000160000500458782656907230102246099620101049204143215264801002001600003200002003200008000001009871028061180201100991001008000080000100160000034115480160002202160000234005109117001110200000160000320000100103011100929101887100449101276
480204101030788000000020041308010244316162192723125521034100361495160000100320000160000500469404458385840100543099526101721211763215484801002001600003200002003200008000009944710029811802011009910010080000800001001600000341069601600020021600022340051101170011100035001600003200001009987710121410111599926100405
48020410053277800000008004345001028881616183668122551932310035969316000010032000016000050045874085786437010072701008039980720148322283480100200160000320000200320000800000101582101646118020110099100100800008000010016000003410907016000210016000223400510911700119951700160000320000100101979100781102185101014101473
480204101683779000000000040772010202316162231641525521526100362284160000100320000160000500464992456811750101143010105810028220481322483480100200160000320000200320000800000101075100490118020110099100100800008000010016000003410403016000200216000223800510911700211020930216000032000010010117799967100019100883101172
480204100545788000000030045424010071816162473678825524391100360772160000100320000160000500467931955670441101904310005010076921806321406480100200160000320000200320000800000100906101519118020110099100100800008000010016000003411113016000212816000203400510921700111011390016000032000010010023910063810142399984101375
480204101531787000000020040384010167416162334693925521753100361297160000100320000160000500476936155395950103180010087210023520477321720480100200160000320000200320000800000101206101300118020110099100100800008000010016000003488840160002002160002234005109117001110081900160000320000100100861101273102492100719101441
480204100637786000000120004110701015211616197766262552151010036104416000010032000016000050046480585638873099873010087710057820171321205480100200160000320000200320000800000100586102318118020110099100100800008000010016000003410231016000210516000223400510911700111022340016000032000010010167410113110075699338101304
48020410039179400000013820040164010111816162351640352523416100362256160000100320000160000500470281855039120101033010094610105919794321295480100200160000320000200320000800000100216100325118020110099100100800008000010016000003410083016000200216000223400511011700111016650016000032000010010255810097999067100110101574
4802041009767880000001880041090010133016162186636025522828100361703160000100320000160000500474495155594050102086010174499701195483210704801002001600003200002003200008000001000041028021180201100991001008000080000100160000034109620160002002160002234005122335003310060910160000320000100100611100766103221101965101773
4802041003657920111212226401674004146501026651616227765027852227410036213016012010032047216021650047448005556795010089901015261010842110612216834807882001602403204802003204808018001011531024733180201100991001008000080000100160120234107642160062141042160062234005109117001110132400160000320000100101205100954100619101465102462

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.2647

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk data (08)l2 tlb miss data (0b)18191e1f2324373a3f46494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2branch mispred nonspec (cb)cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
48002710120174700000030040259010103601622046569255216281036084716000010320000160000504705696560647110197799992100303223373210814800102016000032000020320000800000102212997491180021109101080000800001016000042100530160002002160062242000501901817183510177616000032000010102135101683100735102028102157
480024101586790000000201406360102948161620337132255198321036099216000010320000160000504712664567231110108899714101866221623199894800102016000032000020320000800000101861101204118002110910108000080000101600003410588016000210216000220000501901117191910092216000032000010100699100987101946100529101275
48002410231279000000122014053609964116162401633925522736103649411600001032000016000050468213057816631012811012741012312171132107248001020160000320000203200008000001000911021351180021109101080000800001016000034106790160002002160000034000501901917192010145316000032000010101793100856102124101057101269
4800241000797800000008014218901014931616202571952552324610359028160000103200001600005047136195669015100419101097101610213873215294800102016000032000020320000800000102267101046118002110910108000080000101600003610858016000200216000023400050200181718181001951600003200001010194010130999575101520101382
48002410170479200000020040863010126016162216648825521022103623441600001032000016000050468622755395241026521026271016902150032148148001020160000320000203200008000001018401022511180021109101080000800001016000034111680160002100160002234000501902517201910127616000032000010100875100845101708102113101982
48002410075481200000122014262701020951616212363962552247410361678160000103200001600005046904025727515101279101615101490214263211124800102016000032000020320000800000101389996741180021109101080000800001016000034111640160002002160000234000501901817191910185616000032000010100771102170102377101658100341
480024100923776000000200426060995231616235566732552153110360715160000103200001600005046416625698820101158102812996302138032122148035420160000320240203200008000001006431012701180021109101080000800001016000034174640160002002160002234010502001717181710075916000032000010100334100586101624101736101267
480024102147782000000200388580101331002413706925521425103632731600001032000016000050461242756543211007201003771007272217732144648035420160000320000203200008000001023479949111800211091010800008000010160000341060301600000021600022340005020091719201014991600003200001010097199948101208101362101590
48002410227877800000020039092010089101619476745255208151035840416000010320000160000504765421567936299444100800101181198863225564800102016000032000020320000800000100759983282180021109101080000800001016000034107500160002000160002234000503201217101810137216000032000010100933101325102685101635100547
4800249916677000000080042131010076701621296614255188691036186316000010320000160000504641910579491499461101022102003209953203404800102016000032000020320000800000101535101419118002110910108000080000101600003410469016000210216000203400050200181719121015891600003200001099746101680101756100459101359