Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, 3 regs, 4H)

Test 1: uops

Code:

  st1 { v0.4h, v1.4h, v2.4h }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2223243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
63006295132364030020000010046382912620185403000100020001000200015909800018002166829179294993103000200010004000200029337293351161001100010002003381200201820002020013235932468973156051207693380380021474128747164451325014981200010002950829372294082950729426
630042937723701100100012100046702920722185853000100020001000200015902800018002171429126294933103000200010004000200029414292771161001100010002003041200201220002000013221937569323183041207693209380722504528825162161336315090200010002939729468294652941729581
630042941023601000100021200047262926622185493000100020001000200015904800012002173129158295293103000200010004000200029222294231161001100010002000062200212020000000013293947869533137143208063384380822464328713163241342215103200010002937429306295202943529391
63004294092370010010000100046222921722184713000100020001000200015907800012002170629194293583103000200010004000200029377293111161001100010002004061200201220000000013232940668963188052207793265381425484928708160461334015199200010002949929459294432937929598
6300429392237000000000010004679292400018515300010002000100020001590580007002177429080294883103000200010004000200029435294051161001100010002000040200201020002020013029927568883118148208283239381221484828773161211336915359200010002954929464294582958329392
6300429469238010000000030004603293650018483300010002000100020001590380007002175029177294783103000200010004000200029498294871161001100010002002001200201220000400013037939070023120047207713293381329483728764164001326814936200010002953629383293802949929504
6300429402237000001000030004541292380018539300010002000100020001590880005002152029083293783103000200010004000200029357292891161001100010002003382200201220000600013065927168913181049207583302380524484928688159401351014913200010002949829402294422947029593
630042939523700100010001100464729297021854730001000200010002000159138000500217402910529398310300020001000400020002943729488116100110001000200306120020152000040035813208929469103102046207963227381321484528675162191351414958200010002956629458294212937529451
6300429579235001001000010004656292760018403300010002000100020001590280008002173229223295163103000200010004000200029367293971161001100010002000001200302020000600013088934868963167142209413254381219464728711162281337314853200010002946129436294862951329531
63004294702370000010000500046412931600184703000100020001000200015906800010002174229191294443103000200010004000200229399294721161001100010002004400200202220000000013216936868633105153209003297381220454328781163571348514830200010002948329500295332947629531

Test 2: throughput

Count: 8

Code:

  st1 { v0.4h, v1.4h, v2.4h }, [x6]
  st1 { v0.4h, v1.4h, v2.4h }, [x6]
  st1 { v0.4h, v1.4h, v2.4h }, [x6]
  st1 { v0.4h, v1.4h, v2.4h }, [x6]
  st1 { v0.4h, v1.4h, v2.4h }, [x6]
  st1 { v0.4h, v1.4h, v2.4h }, [x6]
  st1 { v0.4h, v1.4h, v2.4h }, [x6]
  st1 { v0.4h, v1.4h, v2.4h }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f23373f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
24020680043600000000305680025161612524078510080662160000100800001600005003679508640220080015080043800434995635000124010020016000080000200320000160000800438004311802011009910010080000800001001600000420016000210216000204200511011611800400160000800001008004480044800448004480044
24020480043600000001830358002816161252406471008003416000010080000160000500367953264013308001808004380043499563499982401002001600008000020032000016000080043800431180201100991001008000080000100160000042001600020061600022000511011611800400160000800001008004480044800418004180044
240204800436210000003092680028161602524013810080042160000100800001600005003679508640220080018080043800404995634999824010020016000080000200320000160000800408004511802011009910010080000800001001600000420016000200216000024200511011611800370160000800001008004480044800448004480044
2402048004362100000030104480028161612524015010080028160000100800001600005003679532640127080031080043800434995635000124010020016000080000200320000160000800438004311802011009910010080000800001001600000420016000200516000224200511011611800400160000800001008004680044800448004480044
24020480043620000000304880028161612524057610080476160000100800001600005003679532642580080018080043800434995635000124010020016000080000200320000160000800438004311802011009910010080000800001001600000420016000200216000224200511011611800400160000800001008004480044800448004480044
2402048004362000000030248002816161252401301008003816000010080000160000500367950864016008001808004380043499563500012401002001600008006220032000016000080043800421180201100991001008000080000100160000044001600020021600020020511011611800370160000800001008004480044800448004480044
24020480040620000006307168002816012524012910080388160000100800001600005003679388641299080018080043800434995635000124010020016134581910202320000160000800438004311802011009910010080000800001001600000420016000200516000224200511011611800400160000800001008021180047800448004180044
240204800406200000012304280197161602524012610081044160000100800591600005003679532643888080018080043802134995635000124010020016000080000200320000160000800438004011802011009910010080000800001001600000420016000200216000224200512311611800400160000800001008004480044800448004480044
2402048004362000000040996800281601252401471008091916000010480000160000500367953264026108001808004380043499563500012401002001600008000020032000016000080043800431180201100991001008000080000100160000000016000210216000224200511011611800370160000800001008004480044800448004480044
2402048004362100000091010218002516161252407121008003716000010080000160000500367953264302108001808004380040499533500002401002001601238000020032000016000080043800431180201100991001008000080000100160000042001600000031600002000511011611800400160000800001008021380041800448004180044

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0f18191e1f2223373f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
240026800426210000003006658020716160252410221080699160000108000016000050368642064001108002580042800434997735003024001020160000800002032000016000080042800421180021109101080000800001016000003400160002005160000234005020081687800390016000080000108004380043800518004380043
240024800426200000003009228002716160252408361080788160060108000016000050367986864002108002680214800424997735002224001020160000800002032000016000080050800501180021109101080000800001016006003400160002102160002234005020071687800390016000080000108005080043800438005280043
24002480042621000000900560800270160252406991080836160000108000016000050367946064256208001780042800424998435003024001020160000800002032000016000080042800421180021109101080000800001016000003400160002005160002234015020081688800390016000080000108004380043800508004380043
240024800426200000003106718002716160252410621080633160000108000016000050367946064318008001780042800424997735003024001020160000800002032000016000080042800421180021109101080000800001016000003400160002005160002234005020091687800390016000080000108004380043800508004380043
24002480040620000000910048003616160252401841081012160000108000016000050367946064208008001780042800504997735003024001020160000800002032000016012480042800401180021109101080000800001016000003400160002002160002234005020081678800370016000080000108005180043800438004180041
24002480048620000000300559800270164252408801080659160000108000016000050367938864243508001780042800424998575002224001020160000800002032000016000080042800421180021109101080000800001016000003400160002005160002234005020071687800370016000080000108005180043800438005080043
2400248004262110000273007308002716160252407421080999160000108000016000050367946064254018002480042800424997735002224001020160000800002032000016000080217800511180021109101080000800001016000003400160002102160002234005020081698800390016000080000108005080043800438004380214
240024800426200000000005048002716160252406771080667160000108000016000050367946064244108001780042800424998585002224001020160000800002032000016000080042800421180021109101080000800001016000003400160062102160002234405020071687800470016000080000108021480043800418004180043
2400248004262000000060010148002716160252400121080110160000108000016010850367946064001208001780042800425010735002224001020160000800002032000016000080042800421180021109101080000800001016000023400160002005160000234005020081687800390016000080000108004380043800438005180041
2400248004262200000090010138002716160252408921081461160000108000016000050367986864001508002680042800424997735002224001020160000800002032000016000080049800501180021109101080000800001016000003400160002002160002234005020081687800390016000080000108021080043800438005080043