Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, 4 regs, 2S)

Test 1: uops

Code:

  st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 4.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 2.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0f191e1f2223243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6400629366227112103100030004823291580218207400020002000200020002180716000122191029059293553104000200020004000400029199292961161001100010002002241200201220002421013339928869183179048203403191380918494928660160671322714895200020002934029292293872926729439
6400429543229011000100030004707292060018296400020002000200020002180716000102189529056292483104000200020004000400029292292511161001100010002003460200201220002421013230894267312984255202963289381232544728422159501298214524200020002912629064291222916929170
640042910423301111100065000465028856221784740002000200020022000217971600082192028908292343104000200020004000400029503294651161001100010002002300200202220002623013455932269613202049204123303380819465728781163991330514641200020002951529519294942965429441
640042953823701000000203000474929298001865440002000200020002000218121600062193629223297726104000200020004000400029425293471161001100010002004201200206220002621013018938369293148147203993184381617455128693158811336514944200020002942429377293192942429389
64004293792270122010000300145582917620182254000200020002000200021878160005219122915629295330400020002000400040002915429380116100110001000200234120020239720002622013295927169723085047203613238381314404228807161471337914857200020002957529391294502940029362
64004293932270102001000300046662926500182194000200020002000200021820160004219392913429421329400020002000400040002925329392116100110001000200336120020139220002621013179933370023119149204033264381318544928678162591319714767200020002944029446293432933629367
6400429572228210100100123000470829191001819640002000200020002002218071600052192629204293423104000200020004000400029371293051161001100010002002360200206220002621013256940669703160146204223262381716484328734163161328614664200020002943029444293482936429400
640042942022701010110062000461229139001828640002000200020002000218011600012194629059294373104000200020004000400029397293661161001100010002002360200201220002621013323946968853148147204023277381416434728733162661323715007200020002934629421293022945329513
640042954922801000022193000461829174001833240002000200020002000218071600022188529158293233104000200020004000400029331293431161001100010002002341200280520002421013474950069383182045204723414381710445128630162111342514569200020002936529292293492932529424
640042946422801100000003000462629168001818940002000200020002000218021600042194428999293383104000200020004000400029232291721161001100010002002261200201220002421012975916769483036147203573239381715465128683162381346914754200020002930729248292702930329262

Test 2: throughput

Count: 8

Code:

  st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2224373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
3202068007162000000003003819180031161602532447410016618216000010016000016000050022343381309774018002480045800450332320100200160000160000200320000320000800458004511802011009910010080000800001001600000340016000200216000023400005109117228004601600001600001008004680050800468004680050
320204800456200010000900520118003001602532543310016389016000010016000016000050022396821298117018002480045800440327320100200160000160000200320240320000800488004511802011009910010080000800001001600000340016000000216000223400005109117228004201600001600001008004680046800458004980044
3202048004562010100006006152180030161602532651410016422616000010016000016000050021588531300647018002380043800450327320100200160000160000200320000320000800458004911802011009910010080000800001001600000340016000210516000223400005111117118004501600001600001008004680046800468004680049
320204800496200010000310611808003016160253240001001664701600001001600001600005002398606129964401800248004580044032632010020016000016000020032000032000080049800481180201100991001008000080000100160000000016000200216000203400005111217218004201600001600001008004680049800468004680046
320204800486200000005613550069771800361616025324384100165860160000100160000160000500211256212946940180024842238316301772832055220016060016000020032000032027280048800481180201100991001008000080000100160000000016000000016000223400005109117118004201600001600001008005080046800468004680046
320204800456201010000000421218003401602532568410016322116000010016000016000050021584951293147018002480049800450327320100200160000160000200320000320000800458004411802011009910010080000800001001600000340016000200816000223400005111217228004201600001600001008004680050804688004880046
320204800495991010000300500018003016160253250801001649671600721665616342216064850023951991290952018002483477851460331333201002001600001600002003200003200008004580045118020110099100100800008000010016000003400160002550816000023400005111117228004201600001600001008004680046800468005080046
3202048004559900100003006164080034161602532629110016344716000010016000016000050022309831298614018002480045800450330320100200160000160000200320000320000800458004511802011009910010080000800001001600000000160000001861600022000005109117128004601600001600001008004580050800468004580050
3202048004460010100003006800180034161602632555910016422816000410016013416001650020774081295227018002480045800450131632013220016001616001620032003232003280045800451180201100991001008000080000100160000000016000200121600022001115116016218004301600001600001008004680046800468004780046
3202048004560010000003006176180030161602532540310016389916000010016000016000050022068401304612318002480045800450327320100200160000160000200320000320000800458004611802011009910010080000800001001600000420016000200016000224200005111217218004201600001600001008004680046800468004680046

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f2223373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320026800566211010001900546318003716160253251851016509816000010160000160000502559888130350480024800528005203323200102016000016000020320000320000800518005311800211091010800008000010160015144400160016121716000224214050199178980048160000160000108005380053800518005280052
3200248023062010100018004739180036161602532496510166081160000101600001600005027198011304015800268006380045033332001020160000160000203200003200008005280046118002110910108000080000101600141444001600160016160002164414050198179880049160000160000108005280053800468005280052
3200248005362010020010600482018020816161876932365110166352160060101603541601085022501141293298803378038482386133214228320236201601201601202032048032048080211802202180021109101080000800001016013414426301600760093816012216441405047926101182816160000160000108022380221802248022180222
32002480224622122212641050048580801961616101137323354101639401600601016023616010850229471912970418002680050800510333320462201600001600002032000032000080063800511180021109101080000800001016001515444016001600171600021644140501981781080050160000160000108005280052800528019880053
3200248005262010100017005887180037161602532569210164650160000101600001600005025597341313758800278005380051033232001020160000160000203200003200008005280045118002110910108000080000101600151444001600160019160002164214050199179480060160000160000108005280052800518005680051
32002480051621110000170044701800351616025324877101657521600001016000016000050271943313023578002680051800510345320010201600001600002032000032000080051800451180021109101080000800001016000014447016001600211600021644141501981781080049160000160000108005180046800538005280052
32002480052620110003180055981800361616025325215101652601600001016000016000050279976312988218002480052800510334320010201600001600002032000032000080052800521180021109101080000800001016001514420016001600161600021644140501910176880048160000160000108005280052800638006680053
320024800636211000001710470818003616160253269581016593916000010160000160000502559849129825080027800528005103343200102016000016000020320000320000800528004511800211091010800008000010160015144400160016011616000216441405019817101080052160000160000108005280052800538005280052
320024800506211100012300705318004816160253242221016427516000010160000160000502638483132929980027800518005109343200102016000016000020320000320000800538005211800211091010800008000010160014144400160016002160002164414050198176880048160000160000108006380053800548005380064
32002480051621100001217005367180038161602532822810166955160000101600001600005027191631293624800268005080045033232001020160000160000203200003200008004480051118002110910108000080000101600151444001600020117160002164414050197179980048160000160000108004680052800528005280046