Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST3 (single, D)

Test 1: uops

Code:

  st3 { v0.d, v1.d, v2.d }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 4.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 2.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
640072938123701900180000004661292450018181400020002000200020002159516000092181729085292973104000200020004000600629356293621161001100010002003441200202220000426013140943669183151745204193259381018525128636162131315114554200020002932929297292872941429227
640042938423411600191009104759290930018195400020022000200020002161016000062183829138293503104000200020004000600029309292771161001100010002005343200201220002620013263959069033153847203303260381418444928656159691328014910200020002941329285293282930929320
64004292312350170015000177304630291132218224400420002000200020002162516000032185229173293543284000200020004000600029355294291161001100010002000042200000020020421012994942269643138646203533234381713494828667160881300814647200020002930629354293562934029163
64004293252351211018100210045602915700180944000200020002000200021616160000021797290862946931040002000200040046000292732918911610011000100020042602002013872000242001304095356907315364520214320538099474828703163561317614399200020002925129312292962930529285
640042928523702110190000304713291852218179400020002000200020002161316000042184229080293683104000200220004000600029270291311161001100010002000240200000020000620013041950869433125851203103205381810475328658159881344114728200020002935829321292322945729372
6400429327236117112210021913304636292632018298400020002000200020002163116000092177828992292963104000200020004000600029282293491161001100010002004363200200220022820013160945869903144443202863231381313484628623160301327614655200020002921929243294052934829326
640042926723601800190002730460329185021817240042000200020002000216141600003218282915129230327400020002000400060002924329209116100110001000200006020000042520000422013009957969123128938203533239380812464528608159511324014776200020002921329279293112934929387
6400429288236117111721033104603291140018136400020002000200220002161016000052179529010293203104000200020004000600029230292701161001100010002003241200401220002820013365929469543184749203763234381211444028489160061337014979200020002950129540293342930229349
6400429278236018002100024304633290992018387400020002000200020002161816000012175729012293173104000200020004000600029260292831161001100010002000040200000020000620013172946369313046844203153262381325494428672161371312114917200020002947229474293272937429387
6400429395235120112010033004662292210018279400020002000200020002161616000002183029165293623104000200020004000600029297293141161001100010002003361200201220002420013024948269483133442204793287381513484428657163011308614737200020002942729512293382933029086

Test 2: throughput

Count: 8

Code:

  st3 { v0.d, v1.d, v2.d }[1], [x6]
  st3 { v0.d, v1.d, v2.d }[1], [x6]
  st3 { v0.d, v1.d, v2.d }[1], [x6]
  st3 { v0.d, v1.d, v2.d }[1], [x6]
  st3 { v0.d, v1.d, v2.d }[1], [x6]
  st3 { v0.d, v1.d, v2.d }[1], [x6]
  st3 { v0.d, v1.d, v2.d }[1], [x6]
  st3 { v0.d, v1.d, v2.d }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f23373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
3202078005862000000002130585808003716002532632310016570416000010016000016000050021582341299350800238004580045032632010020016000016000020032000048000080045800451180201100991001008000080000100160000004060160002002160002234000510981784800421600001600001008004680046800508004780048
32020480049621000002009058640800290160253254401001653221600001001600001600005002290786130207080022800458004403273201002001600001600002003200004800008004680048218020110099100100800008000010016001414360016001600191600021636141051091117910800471600001600001008005180051800518005280051
32020480052621100010096180162518003516002532686510016682316000010016000016000050035655321297271800258005180050034032010020016000016000020032000048000080058800591180201100991001008000080000100160015143600160016001616000216361400510981788800491600001600001008005980050800518005980059
320204800506201000100151806111180043161402532480110016122416000010016000016000050025598861300924800258006080058034232010020016000016000020032000048000080051800511180201100991001008000080000100160014153600160016111916000016361400510941794800571600001600001008006180052800608006080060
3202048005062010000000190515418003716160253257781001652721600001001600001600005003679133129719080036800508005803403201002001600001600002003200004800008005880057118020110099100100800008000010016001414369016001600181600021401410510971747800561600001600001008006180052800598005080051
32020480050621100000021210589118004416161625325296100164708160000100160000160000500367930413009838003380050800500332320100200160000160000200320000480000800588022611802011009910010080000800001001600141436011600160024160002163614005109817109800471600001600001008005280058800508006080051
32020480060643100000031805872180044161604632539210016478516000010016000016000050023999201297347801958005080049033232010020016000016000020032000048000080220800501180201100991001008000080000100160015140001600001102160002234000510981778800451600001600001008004980046800468005080045
32020480044621000000009072650800341616025325119100164194160000100160000160000500207791413027018002380049800490326320100200160000160000200320000480000800448004411802011009910010080000800001001600000340016000030925160002234000510981789800421600001600001008004580218800468004580045
3202048004462000000000306520080029161602532594310016439216000010016000016010850021582551295615800248004980045823263201002001600001600002003200004800008004580045118020110099100100800008000010016000003400160002008160002234000510981788800421600001600001008004680046800508004580046
32020480044621000000003053310800291616025326936100167215160000100160000160000500215939612951398002380049800440327320100200160000160000200320000480000800458004411802011009910010080000800001001600000340016000200922160002200005109917104801291600001600001008005080045800458005080050

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f24373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320027800586211000001290610208003016160253240251016570416000010160000160000502319283129463308002480044800450333320010201600001600002032000048000080048800491180021109101080000800001016000003800160000008160002234005022131715780047160000160000108005180060800518004580045
32002480045620000000030456608002901602532389810163582160000101600001600005022392391295839080023800448004503263200102016000016012020320000480360831018120021800211091010800008000010160000434630160060029171601220340050192117971080357160000160000108022080210803778021280209
32002480210622000011132185044070803551608949322391101661031601201016011816010850214797512959190803278020980209816224320462201601201601202032048048036080044802162180021109101080000800001016006023462016000200928160062234005019121712680049160000160000108005180059800628004680045
3200248004662000000000068740800291616025324221101674511600001016000016000050215744512971170800228004880049032732001020160000160000203200004800008004580045118002110910108000080000101600000340016000200216000220005019111771280044160000160000108005280051800458004480045
32002480044621000000030591108003016160253239761016479616000010160000160000502228137130090308002380048800490326320010201600001600002032000048000080044800451180021109101080000800001016000003400160002002160002234005019517121280056160000160000108005980061800528004680046
32002480044620000000090649608003016160253259911016404616000010160000160000502159548130047608002380044800440327320010201600001600002032000048000080046800441180021109101080000800001016000003400160002005160000234005021111712680047160000160000108005280052800508004580047
3200248004462000000003061660800301616025324420101642991600001016000016000050239890717813980800248004580045032632001020160000160000203200004800008004480044118002110910108000080000101600000340016000200216000223400501961781280047160000160000108006080060800518004680045
3200248004462000000009044990800291600253259211016406916000010160000160000502383768129486408002480044800450330320010201600001600002032000048000080049800491180021109101080000800001016000003400160002002160002234005019617131380047160000160000108022880045800498004680045
32002480045620000000030413008003001603332630210166611160060101600001600005020793801294133080025800458004503263200102016000016000020320000480000800448004511800211091010800008000010160000034001600020001600022340050191217101280056160000160000108005980059801318004680049
320024800496200000000303821080034161602532338510164419160000101600001600005023191071294694080025800448004403273200102016000016000020320000480000800448004511800211091010800008000010160000034001600020071600022340050191217131380056160000160000108122080045800468004580046