Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST2 (multiple, 8H)

Test 1: uops

Code:

  st2 { v0.8h, v1.8h }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 4.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 2.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696b6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6400729105234112200291003100472828746021807640002000200020002000216171600012082180402882029173310400020002000400040002878328880116100110001000200006020000002002040171130619370690531611256200373212381015625828559157461290014625200020002900629002291422904128975
64004291032330020002300015110463528825001798540002000200020002000216201600070821823028821291063104000200020004000400029000289691161001100010002000040200000020000400132219618683731511152201643283382224555628473159511305214557200020002899028974290242912929020
6400428991233002200310003100468428930001799140002000200020002000216101600040021874029262294371029400020022002400440002945329657216100110001000200026220010221520000620132089437687031811254207443379380518525028924163801346715068200020002971029669295472959629688
640042962723800211028023132000458929512221858540082000200220022000216331601660821921029415297671010400020002002400040082974929629216100110001000200406520000049320020000132519414694131771355205373286381616575328780161091337914933200020002973329454294692948729686
640042956923600210025010000046832925502183844000200020002000200021628160009002182202899129409310400020002000400040042958529695116100110001000200006020000048620000600131809640694431381549204693214381614605728748163781328514742200020002942429518293352927229381
640042934823700200033000011200470129167021839940002000200020002000216061600020021791029111293823104000200020004000400029385293901161001100010002000040200000320000600129058881676030471056209443132381562575329414163981320815021200020003026230200303403021130017
64004299532420025003000018000048002973422188104000200020002000200021622160005002178902906329283330400020002000400040002942629482116100110001000200004020000062000060102613113957169653120858205283371381615574828770163091322414793200020002949129362294262943829387
64004294392370028002300000004708292842218434400020002000200020002161616000900218530292012954731040002000200040004004299443005115161001100010002012400201410794020200020129439222678330721651207933156380732455229102161251340215138200020003010129972300273011630335
64004300532420021003100001004766287282217669400020002004200020002164016000150821784028557287723104000200020004000400028748287281161001100010002000000200000020000000132149295682330961259198523194381816585428320154861273914343200020002880628917287302878928751
6400428885223002900190000300465428656201774540002000200020002000216201600051021811028603287958104000200020024000400028727287531161001100010002000000200000020000000130859520690031361461198213215380812545428287157171281214209200020002873428811287782887728883

Test 2: throughput

Count: 8

Code:

  st2 { v0.8h, v1.8h }, [x6]
  st2 { v0.8h, v1.8h }, [x6]
  st2 { v0.8h, v1.8h }, [x6]
  st2 { v0.8h, v1.8h }, [x6]
  st2 { v0.8h, v1.8h }, [x6]
  st2 { v0.8h, v1.8h }, [x6]
  st2 { v0.8h, v1.8h }, [x6]
  st2 { v0.8h, v1.8h }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f23373a3f46494f5051schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320207800706211000027190586618003416160025323658100164822160000100160000160000500359929813042260800358005880059033232010020016000016000020032000032000080058800591180201100991001008000080000100160000153610160016002916000216361410005109117118005801600001600001008005180059800508005180060
3202048005062010100018055971800361616002532272310016528816000010016000016000050036792001297489080024800588005803323201002001600001600002003200003200008004480050118020110099100100800008000010016000003400160016102116000216361400005109117118004701600001600001008004580059800518005180051
32020480058620100000210477018004516160025325980100160823160000100160000160000500271984112983060800258004980048033332010020016000016000020032000032000080058800581180201100991001008000080000100160000143600160002001716000216361410005109117118005501600001600001008006080050800608006080051
3202048006062110100020063430800361616002532538610016122116000010016013416000050036793041295275080035800588005903313201002001600001600002003200003200008005180050118020110099100100800008000010016001514340016001601171600021601400005109117118004901600001600001008006180045800488005180051
32020480061620100000170466708004416161025323624100165019160000100160000160000500367930412904880800338005880050034032010020016000016000020032000032000080044800501180201100991001008000080000100160014143600160002018160002034000005109117118005601600001600001008005380051800608006180045
3202048005262110000121905821800441615002532593510016435116000010016000016000050035991641293512080026800598005803333201002001600001600002003200003200008005980059118020110099100100800008000010016001414360016000210016000216361400005109117118006201600001600001008006580053800598006080051
32020480051620111000190344418004401600253232371001624261600001001600001600005002479917129164408003380058800520134332010020016012016036020032000032000080048800611180201100991001008000080000100160015153600160016001616000216361410005109117118004701600001600001008006280050800518005980046
32020480060621101000190242518004316160025325427100166125160000100160000160000500367923812954720800258006280057033432010020016000016000020032027232000080058800581180201100991001008000080000100160015153401160016202016000216361410005109117118004801600001600001008004680046800508006080060
3202048005862110100019035031800351616002532391010016658016000010016000016000050035993651295430080025800598005903393201002001600001600002003200003200008005080051118020110099100100800008000010016025773601160016101616000216361400005109117118004801600001600001008005380059800508005180060
3202048005862111000019067301800351616002532590110016563616000010016000016000050036793731303525080034800508005903393201002001600001600002003200003200008005180049118020110099100100800008000010016000003600160016001716000216361410005109117118004701600001600001008006080062800538005180061

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f24373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0ea? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
32002780071620110100121802434180036150025325365101662531600001016000016000050271979912999681080026080051800630334320010201600001600002032000032000080052800501180021109101080000800001016001615000160016112116000216441415019031743800490160000160000108005380052800648005280053
32002480052620100008014043131800351616025320014101629481600001016000016000050223811912991791080025080050800500334320010201600001600002032000032000080050800501180021109101080000800001016001414440116001601141600021601435019031734802060160000160000108005180052800538022380051
32002480051621110100122003859180037160025325032101612171600001016000016000050239992012937220080026080050800510732320010201600001600002032000032000080051800541180021109101080000800001016001415000160016011616000016441415019042634800490160000160000108005280052800538005280064
32002480053621100100111170167718003616160253258801016366816000010160000160000502399803129440210800260800508005003333200102016000016000020320000320240800528005011800211091010800008000010160014144400160016002516000216441415019041734800490160000160000108005280051800528005180052
3200248006262010010001908236180037161602532471810164701160000101600001600005023998311307011008002508005280050031273200102016000016000020320000320000800508005211800211091010800008000010160015144423160016001716000016421405019031743800490160000160000108005380053800528005280052
32002480052621110000014057811800371616025325659101651901600001016000016010850247977513034900080025080062800500345320010201600001600002032000032000080051800511180021109101080000800001016001414420116001400211600021601405019031743800490160000160000108005380052800518005280053
3200248004862110000001701124118003716160493246341016432416000010160000160000502799785129864110800270802298005003333200102016000016000020320000320000800628006211800211091010800008000010160077164400160016001616000016441405019031743800480160000160000108005380052800648005280053
3200248005162010000031701118003616160253247181016470016000010160000160000502479775129457200800280800518005003333200102016000016000020320000320000800518005111800211091010800008000010160014144400160016011616000214441425019031723800480160000160000108005280049800538005280063
3200248005162010000062104708180048161502532294110164444160000101600001600005026418881303540008002608005180051073332001020160000160000203200003200008005180052118002110910108000080000101600151400116001603211600021601415019041734800490160000160000108004880054800528005280052
3200248005064211000012140589308002916160253242071016918716162010160590160108502079511129911710800260800518005003323200102016000016000020320000320000800508005111800211091010800008000010160015144400160016012016000214441415019031734800480160000160000108005380051800528005380051