Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, post-index, 2 regs, 2D)

Test 1: uops

Code:

  st1 { v0.2d, v1.2d }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 3.000

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22233a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5e5f60696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
62005294732282100210000310465428759222411030001000200010012000500010092080160392861829294910300020005000291132919821610011000100020000602000000200006000130969288686931600522076232323798195047283101000161051359515068200010002918629369292662923729202
620042921222610000000001004689287882224193300010002000100020005000100000230160192828229464310300020005000292052918611610011000100020000602000000200006000129359290687031140512060732043805125251283861000161841349115002200010002932629397292812921129273
620042936322610000000001004645287212224223300010002000100020005000100000250160242828129278310300020005000291212917211610011000100020000602000000200006000129929092690130981492059731743793105553283931000161501370814912200010002932129261294092925329268
620042922222800000000001004701287472224199300010002000100020005000100000100160552842329245310300020005000291862918311610011000100020000402000000200000000131139253687030830492059531183812105058284411000163591371314999200010002917229463292742930829343
6200429313227000000000010145732882700241773000100020001000200050001000002016047284592935431030002000500029166292461161001100010002000060200000020000400013165928369333090057206913226380595152284411000163021362915095200010002937329394292392925929399
620042929522710000000000014676288080024181300010002000100020005000100000100160752859329393310300020005000292192914411610011000100020000002000000200000000130339304692331540472058332143808194748284681000162781361315102200010002930729430292772932629293
620042934422710000000000014651287940024167300010002000100020005000100000160160812848829261310300020005000290982907211610011000100020033012002202200020200130589464697731331532058832303808164744283731000163091364215219200010002934329407293242926829257
62004292642270000000000201463128922002425730001000200010002000500010001060160192850829247310300020005000291772921011610011000100020000002000000200000000129469335693632680442062232143801124953284051000162331354315005200010002913829315294322923029408
62004292982271000000000101467128809022423830001000200010002000500010000060160572833829317310300020005000291202900911610011000100020033622002112200020210129519308690131070472082631333811184747285071000162611340814888200010002930029325292122940229280
62004293152270000000000101459328811222420830001000200010002000500010000090160232843129449310300020005000290302911711610011000100020033012002002200020210129489231690830490522064632223811154854285601000163901356915097200010002916629278293882937729390

Test 2: throughput

Count: 8

Code:

  st1 { v0.2d, v1.2d }, [x6], x8
  st1 { v0.2d, v1.2d }, [x6], x8
  st1 { v0.2d, v1.2d }, [x6], x8
  st1 { v0.2d, v1.2d }, [x6], x8
  st1 { v0.2d, v1.2d }, [x6], x8
  st1 { v0.2d, v1.2d }, [x6], x8
  st1 { v0.2d, v1.2d }, [x6], x8
  st1 { v0.2d, v1.2d }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f22233a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1602058005462110011001900180039161602524010080100160000801001600009209063680004080026800548006459967360009240100200160000200400000800548005411802011009910010080000800001001600141544001600160018160002242005110116218004180000160000801008017980043800458004380043
16020480042620011000014001800391616125240100801001600008015616000092090636800520800188004280042599563600002401002001600002004000008004280042118020110099100100800008000010016000004200160002002160002242005123116118004080000160000801008004480043800448004380055
160204800426210000010180008002716161252401008010016000080100160000188035836794720800188004380042599563601052401002001600002004000008004280043118020110099100100800008000010016000004200160002002160002242005110116118004080000160000801008004680043800438004480043
16020480042621000000030008002816161512401008010016000080100160000180043036821981800188004280042599553600012401002001600002004000008017780042118020110099100100800008000010016000004200160002002160002242005110116118015980000160000801008004480044800438017880043
16020480042621000000030008002716161252401008010016000080100160000920906368486408002680063800645996836001224010020016000020040000080052800511180201100991001008000080000100160014144401160016011816000216421415110116118004980056160000801008005580055800658005380055
1602048018762011000103000800271616425240100801001600008010016000018803583679472080027800478005459966360012240100200160000200400000800488006311802011009910010080000800001001600141444011600160016160002242005110116118003980000160000801008004380043800438004480045
160204800436210000000181008016116161252401008010016000080156160000180043036794480800178017680043599553600002401002001600002004000008004280054118020110099100100800008000010016000004200160002002160002242005110116118003980000160000801008004380043800438004380175
1602048004362100000003000800271616125240100801001600008010016000018803583679448080017800438004259956116000024010020016000020040000080043801892180201100991001008000080000100160000042300160002002160002242005110116118003980000160000801008004380043800438004380043
1602048004262100000031700180039161512524010080100160000801001600009209103680004080038800548005459965360012240100200160000200400000800478006311802011009910010080000800001001600141444001600160079616000214441405110116118005080000160000801008005680048800558006580053
1602048005262111000001900180037161622524021680100160000801001600001080790367990808014880054801855996736000524010020016000020040030080123800541180201100991001008000080000100160015154401160016111716006016441405110116118016980000160000801008005580064801828004880064

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f233a3f46494f5051schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1600258004262000000003018003601620252400108001016078081364160000920321368002808003080054800555998936003224001020160000204000008005280052118002110910108000080000101600151444001600140018160002164414050346168580060800000160000800108005580048800558005680053
160024800546201000011219018003716166025240010800101600008001016000092031336800280800278005480054599893600352401742016000020400000800548004711800211091010800008000010160014140001600161116160000164414050208166580044800000160000800108005580048800558004880053
160024800546211000000200180045160102524012680010160000800101600009203213680028080027800548005459987360035240010201600002040000080047800541180021109101080000800001016001414000160016011716000216014150206165680060800000160000800108005580064800548009280055
160024800476201001000220180040160702524001080010160000800101600001306581368005208002980063800636008036003624001020160000204000008004780055118002110910108000080000101600141444001600160121160002164414150204156480044800560160000800108004880065800558005680048
16002480186620100000917018056201650252400108001016000080010160000920313367969208043180047800545998936002724001020160000204000008005280052118002110910108000080000101600151444001600160118160062144414050204157480048800000160000800108005380048800528005580053
1600248005262110100001801800321616002524001080010160000800101600001399973367993308002780054800555998936003524001020160000204000008005480054118002110910108000080000101600151544011600160117161682164314050205156580060800000160000800108005580055800538005580053
16002480064621110100019018003916166025240010800101600008001016000092032136800280800298005480047599873600352400102016000020400000801958006411800211091010800008000010160015144400160014111916000216014150205168580050800000160000800108005880053800558005580055
160024800526211001001214018003901610252400108006616000080010160000920321367969208002980185800545998236003524001020160000204000008006380064118002110910108000080000101600741450011600160114160002164414150395166480049800560160000800108005580052800558005580055
160024801416201000010180180037016402524001080010160000800101600001080213367993308002780047800555998936003524001020160000204000008004780054118002110910108000080000101600151544001600160018160002164414050204164580048800000160000800108006580048800558006480055
16002480054621121000017018003716167802524001080010160000800101600001479937368000408002280051800475998936003224001020160000204000008004780054118002110910108000080000101600141444011600140014160002164414150205165780049800000160000800108018880049800558005580053