Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, 3 regs, 1D)

Test 1: uops

Code:

  st1 { v0.1d, v1.1d, v2.1d }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)181e1f233a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6300629089233104030012890452329105301831330031000200410002000159278008122167428913291081031300320021000400820062912629154416100110001000200224220031113852000062013198920368733115255206863282381818555328455158541289314379200010002912829187291132906329127
630042939623442031601047262872100178283000100020001002200015910800010217432840328717328300020001000400020002919928744116100110001000200004020000062000040013120923269463153150202733211382015585628311153091271714603200010002871928717286622864828776
630042875522223030001047602861500177333000100020001000200015914800010217572850328794310300020001000400020002871728712116100110001000200000020000002000004013112975369933173255202133164381227605528236151561277214305200010002872828677287172879528691
63004287482223313009104775285600017688300010002000100020001590480008216632851128742310300020001000400020002864228600116100110001000200004020000032000040013080937969403147244201593205381816515728277155591272814815200010003031029974292482945929329
63004293752273403001210469829136001825130001000200010002000159038000121673290632932331030002000100040002000291642931111610011000100020000402000003882000060013180938270193144354205983208382015525228661161881334814547200010002935529333294382936829286
63004293252272303000004670291880018224300310002000100020001590080004217002904129411310300020001000400420002920929269116100110001000200004020000002000040013237922069233228150206513134381813515228681161491311514721200010002928129336293322923729200
630042925722724000012104807291170018340300310002000100020001590680001217582909729291330300020001000400020002916729278116100110001000200004020001002000060013096954969403179253205683321382218495028706161071299114929200010002936829472293622950029385
63004293112282311000104674291680018323300010002000100020001591080005217772889929276610300020001000400020002924029248116100110001000200004020000002000060013191936969503154150207063301382716495028675159801304714784200010002935229488295452934029479
630042937822935010012104715291830018371300010002000100020001590580006217802902229222310300020001000400020002928429233116100110001000200006020000002000040012947919269743123150206403211382112485228471162851333914969200010002915829337292792936029342
63004294402271303000004702289800018237300010002000100020001590780000216942898829247310300020001000400020002932229189116100110001000200004020000002000060012895921769933166149205233161381915545228544162661338214860200010002932629503293152931729334

Test 2: throughput

Count: 8

Code:

  st1 { v0.1d, v1.1d, v2.1d }, [x6]
  st1 { v0.1d, v1.1d, v2.1d }, [x6]
  st1 { v0.1d, v1.1d, v2.1d }, [x6]
  st1 { v0.1d, v1.1d, v2.1d }, [x6]
  st1 { v0.1d, v1.1d, v2.1d }, [x6]
  st1 { v0.1d, v1.1d, v2.1d }, [x6]
  st1 { v0.1d, v1.1d, v2.1d }, [x6]
  st1 { v0.1d, v1.1d, v2.1d }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f23373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2402068005462000000003032080028161612524011110080560160000100800001600005003679508640140080018800468004349956350012240100200160000800002003200001600008004380043118020110099100100800008000010016000004200160002002160002242005110011612800520160000800001008005380055800648005580055
2402048005462011000001905018003901652524068210080706160000100800001600005003680541640115080027800548005549968350012240100200160000800002003200001600008006680063218020110099100100800008000010016000004200160002006160002242005110011621800510160000800001008005480055800538005480055
24020480054620111000303029080028161612524062810080063160000100800001600005003679532640117080018800438004349966350001240100200160000800622003200001600008004380043118020110099100100800008000010016000004200160002002160002242005110011622800490160000800001008005580064800558005580044
2402048004362100000003096308002816012524012310080038160000100800001600005003679508640126080015800438004349956350001240267200160000800002003200001600008004380040118020110099100100800008000010016000004200160002005160002242005110011621800490160000800001008005580053800558005580053
2402048005462012110033210640080028161612524015610080029160000100800001600005003679388640131080018800438004349956350001240100200160000800002003200001600008004380043118020110099100100800008000010016000004200160002002160002248005110011612800400160000800001008021180044800488004480044
24020480045621000000030808002816161252408111008056416000010080000160000500367972464247208001880043800434995635000124010020016000080000200320000160000800498021011802011009910010080000800001001600000440016000210216000220005110021611801950160000800001008004480044802148004480044
2402048004362000000003011081415161612524071110080634160000100800001600005003679532641833080323800438004750334350001240100200160243800002003200001600008004380043118020110099100100800008000010016000004200160002002160002242005110011611800400160000800001008004480044800538004480044
24020480040622000000123031080028016125240121100805241600001008000016000050036795326420850800188004380043499553500012401002001600008000020032000016000080054800541180201100991001008000080000100160014144401160016001816000216461405110011611800400160000800001008004480044800418004380044
2402048015162000000003010470800391616525240141100800601600001008000016000050036800636400990800298005480054499653500192401002001600008006120032000016000080055800541180201100991001008000080000100160015144400160016001916006216441405110011613800490160000800001008022080065800538005580064
2402048005562110000001708541800371616325240775100800311600601008000016000050036800636400980800298005280051499673500212401002001600008000020032000016000080052802161180201100991001008000080000100160014144400160016011716000216441405110011611800490160000800001008005580065800538005580064

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)18191e1f23373f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2400268004362000012301800281616125240672108052016000010800001600005036795326414820800158004380043499783500232400102016000080000203200001600008004380043118002110910108000080000101600000420016000200216000000050205164480040016000080000108004480044800448004480044
2400248004362100012305638002816161252409281080006160000108000016000050367950864168008001880043800434997835002224001020160000800002032000016000080043800431180021109101080000800001016000004200160002002160002042050203163380040016000080000108004480041800448004480044
24002480043621000030557800321616125240010108060316000010802361600005036795326417460800188004380043499783500232400102016000080000203200001600008004380043118002110910108000080000101600000000160000102160002044050203165480040016000080000108004480044800448004480044
2400248004362100000058002816161252400121080007160000108000016000050367953264104408001880043800434997835002324001020160000800002032000016000080043800431180021109101080000800001016000004200160002005160002242050203163380040016000080000108004480044800448004480044
240024800436200001230495800281616108252400131080000160000108000016000050367953264216018001880040800434997835002024001020160000800002032024416000080040800431180021109101080000800001016000004200160002002160002244050203165580040016000080000108004480044800448004380044
2400248004062000003028002816161252400161080761160000108000016000050367953264050308001880042800434997875002324001020160000800002032000016000080043800431180021109101080000800001016000004200160002002160002242050204163480040016000080000108004180044800448004480044
240024800436210000302378002816161252400151080932160000108000016000050367953264124508001880043800434997835002324001020160000800002032024416000080043800431180021109101080000800001016000004200160000002160002242050204165480040016000080000108004480044800448004180044
2400248004062100012003800321616125240021108000616000010800001600005036793886400160800188004380043499753500232400102016000080000203200001600008004380043118002110910108000080000101600000000160002102160002242050204164480040016000080000108004480044800448004480041
24002480043620000144302578002816161252400131080004160000108000016000050367948464216008001880043800434997835002324001020160122800002032000016000080043800431180021109101080000800001016000004200160002002160062242050203164380195016000080000108004480044800448021680044
2400248004362000012301800281616025240016108000216000010800001600005036859406400000800188004380043499783500232400102016000080000203200001600008004080043118002110910108000080000101600000420016000200016000020050204253480350016000080000108004480044800448004480044