Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, post-index, 4 regs, 8B)

Test 1: uops

Code:

  ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 3.000

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f243a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5e5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
640052901023325002001000280046662847700223591300010002000100020005000100040101621928265289368103000400040002878128724116100110001000020000620000202200200260013139954269603162145219646322338122252522817210001598013043145182000200010002871928762287872886628861
640042887023119001200000500480528639011236363000100020001000200050001000005162422836328824310300040004000287922887811610011000100002000042002000020024024001310095816925319885519875316138182357572825310001574813126145182000200010002893028955289612885928864
64004288042312300180000040046232859400023727300010002000100020005000100000101622028296287813103000400040002868128614116100110001000020000420020100200040240013011935668913141105119818321038211253532832810001578512997145152000200010002883528783289042885628869
64004289352331800240000040046822855100223752300010002000100020005000100030101624128441286053103000400040002874828753116100110001000020000420020000200200040013208930469263102105319909321538101953532823310001585812941144192000200010002879428921288242895128812
64004288472312500200000000046272852700023659300010002000100020005000100000101623328432289593103000400040002877728718116100110001000020000420000000200000240013204955569773173105419899323738282050522827410001549413135141652000200010002881128853288452901328734
6400428929231230017000008004688285680202362130001000200010002000500010000091622428411288543103000400040002871328743116100110001000020000620000000200040060013076935369023192115419867338938192955592829010001569512962142852000200010002881628913290202891129020
6400428943233190010011001370046482855500023592300010002002100020005000100000101626128304288473273000400040042871428805116100110001000020000420000002200040200013143921468543143146019763328338142449592828410001584613081145742000200010002885628833289322892428933
6400428818232221023000001900481928569000235823000100020001000200050001000005162672848228883310300040004000287162881111610011000100002000002000000320000006001316592066866313454719893322038242350542823310001540712817145672000200010002896428909289022896628858
6400428743233220018000014004591285240002369030001000200010002000500010072011625628384287937103000400040002870728771116100110001000020000420020100200020040013373948168923081126019913321938142050482828510001544313142145432000200010002881829026289152883628738
6400428776232190015000000004647285050002367830001000200010002000500010000041624128447288553283000400040002874228764116100110001000020000420020000200040240012986934569233137125119784325038111850512822510001562513036144142000200010002877128890287502878628952

Test 2: throughput

Count: 8

Code:

  ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
3202058004264310200100660002800262141502524010080100160078801001600004824441035378080016800418004103232402172003200002003201568004180132118020110099100100800008000010016001111401601180106116003761474010100512111711800388003999160000160000801008004280042800428004280042
32020480041652111101017900028002621515272524010080100160000801001600004804999638420800168004180041613232401002003200002003200008013180041218020110099100100800008000010016001211401601190105016003661474010100510911711800388003999160000160000801008004280042801348004280042
3202048004165212110100329000280026213140252401008010016000080100160000480499960928080016800418013209232401002003201562003200008013180041118020110099100100800008000010016001114401600481015016003661464010100510911711800388000099160000160000801008004280042800428004280042
3202048004164310100100540002800263141427742404518021716007880100160000480499963729080016800418004103232401002003200002003200008004180041118020110099100100800008000010016001212401601920214916003761464010000510911711800388000099160000160000801008004280042800428004280042
3202048004264311000100227000280026215140252401008010016000080100160000480499960879180016800418004103232401002003200002003200008004180041118020110099100100800008000010016001011401600480005416003661474010000510911711800388000099160000160000801008004280042800428004380042
320204800416431000010056000280026214140252401008010016000080100160000480499960877080016800418004103232401002003200002003200008004180041118020110099100100800008000010016001110421600460004916003761483911100510911711800388000099160000160000801008004280042800428004280042
320204800416431010010059000280026215140252401008010016000080100160000480499961160080101800418004103232401002003200002003200008004180041118020110099100100800008000010016001112431600490104616003661484211200510911711800388000099160000160000801008004280042800428004280042
320204800416431111010068000280026215140252401008010016000080100160000480499961188080016800418004103232401002003200002003200008004180041118020110099100100800008000010016001111401600480115016003961464011100510911711800388000099160000160000801008004280042800428004280042
3202048004164310100100680002800262151402524010080100160000801001600004804999608890800178004180041032324010020032000020032000080041800411180201100991001008000080000100160011114016004711049160037614740110005109117118003880000109160000160000801008004280042800428004280042
32020480041643101001006800028002611415025240100801001600008010016000048049996088408001680041800410323240100200320000200320000800418004111802011009910010080000800001001600101040160048010541600386147011100510911711800388000099160000160000801008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)67696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320025800556431101000059010280026214140252400108001016000080010160000480049960916080016080041800410323240010203200002032000080041800411180021109101080000800001101600111040160048201116003601480110050191171180038080000990160000160000800108004280042800428004280042
32002480041642110100005700008002601414025240010800101600008001016000048004996003008001608004180041032324001020320000203200008004180041118002110910108000080000010160000025160030103216002960027000501911711800381800001000160000160000800108004280042800428006180042
32002480041642000000004700018002621212025240010800101618708071416000048004996036908001608011180041082324001020320000203200008004180041118002110910108000080000010160000025160030002116002961220000501911711800381800001060160000160000800108004280042800428006180042
320024800416420000000038000180026212120252400108001016000080010160000480049960000080016080041800410323240010203200002032000080041800411180021109101080000800000101600000331600300029160030012233000501911711800380800001000160000160000800108004280042800428004280042
3200248004164300000000150001800262121238252400108001016000080010160000480049960196080093080041800410323240010203200002032000080041800411180021109101080000800000101600000251601011022160030612125000501911711800381800001060160000160000800108004280042800428004280042
32002480041642000000003600008002621514432524001080010160000800101600004800499609040800160800418004103232400102032000020320000800418004111800211091010800008000001016008510401600480047160000014640110050191171280038080000990160000160000800108004280042805138004280042
32002480041643110000007200028002601401025240010800101600008001016000048028196002308001608004180041032324001020320000203200008004180041118002110910108000080000010160069103316003200291600246121370005019117118003808000010120160000160000800108004280042800428004280042
32002480041643000000004100018002601212050240010800101600008001016000048004996034208001608004180041233232400102032000020320000800418004111800211091010800008000001016000000160032000160021012233000501911711800381800001060160000160000800108004280046800428004280042
3200248004164300010000270000800262121202524001080010160000800101600004800499601790800160800418004103232400102032000020320000800418004111800211091010800008000001016000002516000000301600306129330005019117118003808000010120160000160000800108004280042800478004280042
3200248004164200000100360001800262121202524001080010160000800101600004800499603380800160800418004103232400102032000020320000800418004111800211091010800008000001016000000160030103016003061293300050191171180038080000000160000160000800108004280042800428004280042