Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD4 (multiple, 2S)

Test 1: uops

Code:

  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 6.016

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 4.016

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f191e22243a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
660052939122012320171000810456628790020168056016401620004000200010000475544229842910429248310600020004000200080002904429115116100110001000020044420050022002424221274992926870308054420091308438138474528409163901315814793200040002928129213292942929829271
66004293292191191120100010104580288370001678360044004200040002000100004749832300729004291603106000200040002000800029025290441161001100010000200340200401220026462212983920868663045940199813133380812414528424163361333214680200040002917229284292762927929227
660042930421911912170000810457328733000168456016402020004000200010000475067231012910729229310600020004000200080002906229095116100110001000020022620040252000040201290792996849305964320073311738129474128423162331323714629200040002926429179291912927129265
6600429192219121111800008004635287922011683560044016200040002000100004743272300529007292583106000200040002000800029089291261161001100010000200244200401220024242112921904168413054936201403088381012373828404160731323414803200040002928729255293322927029284
6600429270218117101610002104726288260201689160164004200040002000100004756282304529150292983106000200040002000800029071291281161001100010000200446200400520024442212955926568383059740200293104381510393828378164611345514975200040002920629279292602922729189
6600429223218117111610002104560287210001681660044016200040002000100004767422302829133292203106000200040002000800029138291571161001100010000200246200400220024442212954922068363089641199873088381713444228418162961314814744200040002919829329292352926529172
660042928422012011191000800457728773012168206012401620004000200010000475005229732915729311310600020004000200080002911329106116100110001000020053620030042002444201292691946827306364020047311938128444428309162051316314890200040002919829252293182926629182
6600429240219113111310008004659288420011680560204016200040002000100174759232297729145293203106000200040002000800029120291291161001100010000200326200201420020402013111905368443085739201363082381315414128420165251327814989200040002923629280292462935229259
660042928522011300151000500458228811200168836016401220004000200010000476628229722913429232310600020004000200080002918929190116100110001000020023420030022002426221289091636870305783819993308538138444828368163331324814815200040002927029188292402916829310
660042917421912211161010800458028789002168246004401620004000200010000475643230122909529305310600020004000200080002917629221116100110001000020042020030122002040201292991736820303154420054308638146424328397162531333714860200040002921529192292692931329253

Test 2: throughput

Count: 8

Code:

  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03l1d tlb fill (05)l2 tlb miss data (0b)0e0f1e22233a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6esimd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4802058007059900110102800492012025480184100320076160000100320000160000500800042960000018004508006080064034248010020016000032000020016000064000080041800591180201100991001008000080000010016000035016003600361600396000151092171180057001001600003200001008006580065800658004280065
48020480041599100000028002621212025480184100320000160000100320000160000500800853697596418002208006480064034248010020016000032000020016000064000080064800411180201100991001008000080000010016000035016003200361600006132400510911711800610141001600003200001008006580065800658004280065
480204800646000000421008004921212025480184100320084160000100320000160000500804079960000018002208006480041034648010020016000032000020016000064000080064800601180201100991001008000080000010016000035016003200361600326132005109117128004100701600003200001008004280065800658006580065
48020480041600000042002800452120025480184100320084160000100320000160000500800377960000018004108006480060034648010020016000032000020016000064000080064800411180201100991001008000080000010016000000160000000160032600005109117118006100001600003200001008006580042800658006580065
48020480064600000001028004900120254801841003200841600001003200001600005008003771088000008002208004180041034248010020016000032000020016000064000080060800411180201100991001008000080000010016000035016003200016003201360051091171180061114001600003200001008006580042800428004280065
480204800415990001000280049212120254801841003200001600001003200001600005008003759600000080045080064800640346480100200160000320000200160000640000800648006011802011009910010080000800000100160000001600360036160032613200510911711800611101001600003200001008006580061800658006580061
48020480064600001000008004901212025480100100320084160000100320000160000500800377108799961800220800658015203464801002001600003200002001600006400008004180041118020110099100100800008000001001600003501600360036160000613240051091171180061014001600003200001008004380065800658006580065
480204800646000010420028002600002548010010032008416000010032000016000050080379896000001800410800648006403464801002001600003200002001600006400008006080041118020110099100100800008000001001600003501600361036160000613640051091172280038114001600003200001008004280065801478004580042
480204800645990010420008004920002548018410032000016000010032000016000050080118710880752180045080064800640323480100200160000320000200160000640000800608004111802011009910010080000800000100160000001600360036160039613640051091171180038010001600003200001008004280065800658006580061
480204800645991000420008002601200254801001003200001600001003200001600005008011876722740080045080064800410323480100200160000320000200160000640000800608006011802011009910010080000800000100160000001600360001600396032400510911711800610141001600003200001008006580065800658006580065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03mmu table walk data (08)090e0f1e22243a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4800258006060001104700080045012025480010103200761600001032000016000050800374384000080022800608006003234800102016000032000020160000640000800608006011800211091010800008000001016000035160000013716003261323500501931733800571100160000320000108006180061800618006180042
48002580060599000038000800452120254800861032000016000010320000160000508004149600000800418004180060034248001020160000320000201600006400008006080060118002110910108000080000010160000351600000991600320100005019317328005711010160000320000108006180061800618006180061
48002480060599000001008004520122548001010320076160000103200001600005080010396000008004180041800600342480010201600003200002016000064000080060800601180021109101080000800000101600003516003201711600000100005019317238005712810160000320000108006180061800618006180061
4800248006059900003810080045212122548008610320076160000103200001600005080048496000008004180060800410342480010201600003200002016000064000080060800601180021109101080000800001101600003516003201216003261035005019317468017101010160000320000108006180042800618006180061
4800248007259900003810080045000254800101032007616000010320000160000508000009600000800228006080060034248001020160000320000201600006400008004180060118002110910108000080000010160000351600320269160032613235005019217338005711410160000320000108006180061800618006180042
4800248006059900003810080026012122548008610320076160000103200001600005080037796000008004180060800600342480010201600003200002016000064000080061800441180021109101080000800000101600000160000028116003200320005019317338005701010160000320000108004280042800618006180042
4800248006060000003800080045212122548008610320076160000103200001600005080043196000008004180060800600323480010201600003200002016000064000080060800411180021109101080000800000101600000160032052160032603235005019217338003801010160000320000108006180042800618006180061
480024800696000000380008004521202548008610320076160000103200001600005080048296000008004180060800600342480010201600003200002016000064000080060800601180021109101080000800000101600000160032029516003201323500501931733800571140160000320000108004280061800618004280061
480024800695990000380008004501212254800101032007616000010320000160000508003779600000800858004180060034248001020160000320000201600006400008004180060118002110910108000080000010160000016003205116000061323500501931733800571010160000320000108006180042800618004280061
4800248006760000003810080045012122548009410320076160000103200001600005080046438400008004180060800420323480010201600003200002016000064000080060800601180021109101080000800000101600003516000002831600326032350050193173380057100160000320000108004280061800428006180061