Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST4 (multiple, 8B)

Test 1: uops

Code:

  st4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 6.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 4.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f222324373a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
66007291882331210130000410004620288870215745600640002000400020002560235807110023782288452909431060002000400040001000029020290321161001100010002003241200233220042620013183934869063099950198893384381323504328422159211282413792200040002918229083292622917729143
6600429182234120018221264180100047172915722159246006400020044004200425714358706002387429041293602910604220144036401610050292812931010161001100010002007369200410220022623013195914969193073841200263268381132454928518163921322414009200040002919129319290422931029309
660042910823512402002127931000454629021221611960004000200040042000255983579940023773288512918931060002000400040001000029133291591161001100010002003301200211220002621109112971931869453090645197343173381422484828252156131295913511200040002894529020290092920329598
660042900323412201910112920000456028876221567160004000200040002000256263580960023783289292899433260002000400040001000028874289351161001100010002003261200200220002021256213193978469033077842198363219381522444828423157941328313625200040002890929018288682899128982
660042888923711801321517225612350000453429475021626060664028202440562018264263622520024119294662965610339860962030405540541016029956294821161001100010002004341200200820002421013371945769543148749198813243381529494428264155321272013640200040002886828820288822886328893
6600428896223119018100031000475828777001558760004000200040002000256143581240023826287042888831060002000400040001000028852288101161001100010002003342200201220002421013391973369583149848196573224381720454328190154991285213264200040002875028784287682896728810
6600428805222124017000031000479928577001534760004000200040002000256103580760023802286472866831060002000400040001000028738288221161001100010002003441200300520082423012957909667973057650208833329381320494529165164291361214293200040003012929839296802997630193
66004302112421162232141113201059100046892985200161226000404420144036201325875359134002392729493298414717860312004400040001000029628295782161001100010002004200200211520002621013111932169713144845201873292381226525228579162611338714077200040002931229501295752957429532
6600429257227116123100030000461529368001615060004001200040002000256213580330023803293672952131060002000400040001000029301296281161001100010002003300200200220002021013131948368903181653201353179381620424728644164831342714162200040002950729477295432942129358
6600429569228121016100031000460929365001628860004000200040002000256123580720023835292902950532760002000400440001000029557295121161001100010002002363200200220002021013081965769313121449204763386381721465228879162741339714268200040002985329834298812981229672

Test 2: throughput

Count: 8

Code:

  st4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  st4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  st4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  st4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  st4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  st4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  st4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  st4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.2676

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f222324373f46494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4802079805677800001220004291410201716162356675125520186100360731160000100320000160000500468848256200270101285101556996902269632233948010020016000032000020032000080000010138910054411802011009910010080000800001001600000349150016000200216000223400510911711100044160000320000100104863100880101738101000100080
48020410184080100000810041380101056161620726568255237321003626921600001003200001600005004620819561774201001071016371017712263932046848010020016000032000020032000080000010102310209111802011009910010080000800001001600000349123016000200816000223600511011711101166160000320000100100873100745100993100549101411
4802041023827970000021004068910010016162146631925523919100360225160000100320000160000500465980657108630101947100810100969217903196304801002001600003200002003200008000001003539933811802011009910010080000800001001600000341094501600000021600022340051091171110061716000032000010099992101954101135101155100334
48020410106078100001220104246510105416162123595125521673100360055160000100320000160000500468119155965710100765101881102354219093210944801002001600003200002003200008000001006641020001180201100991001008000080000100160000034950101600020021600022340051101171110109916000032000010099953101364102243100929102573
4802041006237810000080003726710018216161869601425522356100360898160000100320000160000500468137254888360101691100744100229200433204264801002001600003200002003200008000001015031010931180201100991001008000080000100160000034103950160002100160000234005109117111016141600003200001009819610058210071299590100350
480204101659790200003010402851023031616185062543652253410036321916000010032000016000050046029995554647010105010151210057121478322016480100200160000320000200320000800000998291006661180201100991001008000080000100160000034112480160001002160002234005110117119736216000032000010010028510107710243499186101938
4802049992478300001220004182810042116162238677225522262100363061160000100320000160000500469270555852480101479100122101024204713205784801002001600003200002003200008000001004781008451180201100991001008000080000100160000034110020160002001416000003400511011711100987160000320000100102386102570101239101555100893
480204100265784000008010417069929601621176146255185581003617341600001003200001600005004659782556903501021401017541005992172432067448010020016000032000020032000080000010099010135611802011009910010080000800001001600000341052301600020021600022000510911711100919160000320000100102315101037100359102037100672
48020410108977500000200042452101272161618866531255203201003599091600001003200001600005004707041558657901005551023669998522713321326480100200160240320480202320480801200101948100130318020110099100100800008000010016012723410505016012200116216012223400538522151110208716000032000010099091100459100738102260102738
4802041009388001122264266000422611019701616165762545252254310036072716012010032047216021650046386325704417010250210113210180421199321408480100200160000320000200320000800000100590101895118020110099100100800008000010016000003411435016000200816000223400510911711101865160000320000100101099103064101116100673101538

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.2634

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2223373f46494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cdcfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
480027103459793000000031040938101959161622357080255164551036029716000010320000160000504736862571033100101833100688100152205283219814800102016000032000020320000800000101407987021180021109101080000800001016000004399890160000003160002242005019005176610090716000032000010101691100666102185102793101006
48002410060178400000003004174710128716161920564225519606103640091600001032000016000050462230157326361510144310126110163920348322298480010201600003200002032000080000010133610031311800211091010800008000010160000042109210160002002160002242005019006176610223716000032000010100744100223101104101386100582
480024102459783000000040040204101561161621275781255188811036110616006010320000160000504744188564101100986751016389958721303320543480010201600003200002032000080000010044110161911800211091010800008000010160000042107460160000002160002244005019006175610081016000032000010100798100289101340101946100247
48002410062877900000112300397481010321616216099322551932410358969160000103200001600005047439555581104151010391004471007952226432068648035420160000320000203200008000001026471012891180021109101080000800001016000004210429016000000216000224300501900617561009721600003200001010016998966101484101430101806
48002410118179100000003003440999852161622096846255201201036164616000010320000160000504770823585484805101416100191995762236931979548001020160000320000203200008000001021771017571180021109101080000800001016000000110080160002005160002042005019006175710196916000032000010101888102431101659100739101745
48002410022178900000003004059610199516162077631925522971103620151600601032023616010850468417155325310010219410177110171021773322051480354201601203228802032552081320010282310150120180021109101080000800001016006424293552160182021047160062242005032005357510078516000032000010101720100083100952101318101648
48002410050178700000003004052710152116162004665925519207103643841600001032000016000050464305356549300010161199709100218220943207504800102016000032000020320000800000100184100451118002110910108000080000101600000429281016000200216000224201501900517661013721600003200001099899101240100924101662101745
480024996227830000000300389711006261602355666125523202103622691600001032000016000050472964956432150099259101456101981214833221014800102016000032000020320000800000101153100871118002110910108000080000101600000421031901600020051600022000501900517651024251600003200001099115102042102165101507100961
480024102330777000000030042649101127160214560002552079910359796160000103200001600005046728365512058001012481003801014382095432149048001020160000320000203200008000001018691009371180021109101080000800001016000004210166016000210216000224200501900617669949616000032000010100006100704102337100541102004
4800241010427840000000300376561015121616246266422551781010360568160000103200001600005046395765511575001006411006001028032045132067448001020160000320000203200008000001018581007801180021109101080000800001016000000102040160002002160002242005019005176510047116000032000010101740101286101976100645101268