Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, post-index, 4 regs, 4S)

Test 1: uops

Code:

  ld1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 5.000

Integer unit issues: 1.000

Load/store unit issues: 4.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e3a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)c2c9cecfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
640052947023722200000132471829218004229055000100040001001400050002070411693528481288173105000400450002884528882116100110001000140001140060040035148100133109477712132652391972731803817174139281511000151621245013661400010002876028650286022861828670
640042876022200101000948172847300422671500010004000100040005000207195169552836228741310500040005000285202863911610011000100004000840000240026148000131709499698231751341979831703824194343282311000155181242513902400010002869828694287302874228709
640042869722201000100047192862420022821500010004000100040005005207004169492825528733310500040005000285402854911610011000100004000840000040005100000131519356695632060451971532143822154043282601000155891239713715400010002868228714288032882628779
64004287462230011000084803286450002267150001000400010004000500020709516949283392870831050004000500029027289671161001100010001400004007004002616400141392710246702331650501968933173835184146283661000154531256113945400010002891128922287972889328782
640042884722300000000846832869624022575500010004000100040005000207235169572806728439310500040005000284452851011610011000100004000840000040030100000132999640713934260411964831863829163535282091000155141200013229400010002850428577285662844728400
640042860722020100000047462838720422599500010004000100040005000207146169812825628855310500040005000284612859911610011000100004000840060240006100000132809383701731741391962132193828213641281031000151001210613154400010002854328421287382870028623
640042834022200000000049532858500022502500010004000100040005000207224169452838728775310500040005000286002849411610011000100014000840000040000160000133749933712631750411979533253825174139280321000147181214913129400010002844728611287232855328570
6400428697221010000001348852839900022486500010004000100040005000207136169682826428629310500040005000284462858211610011000100004000840060040000140000131389341698130940361968331393820123939281901000155121241113668400010002867828668287462860228624
640042865022210000010848862865600022709500010004000100040005000207164169342839328766310500040005000286732852711610011000100014000040000340030140000131589287695831050362014531503829134436284281000152841252213679400010002862628710286512871228689
640042859922001100001897472928709040229515000100040001000400050002070621693328690290403105000400050002806528334116100110001000140008400203400701480001380110295720933820371965234363827123841279441000146761227813375400010002833328291284022851128318

Test 2: throughput

Count: 8

Code:

  ld1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  ld1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  ld1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  ld1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  ld1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  ld1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  ld1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  ld1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.3341

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606167696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cdcfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
32020510673182700000018000021067143142125400100801003200008010032000048049447348660001067061067151067082667932671840010020032019220040000010673110672711802011009951001008000080000010032000004503200420042320042614145000051100011611106728800011010032000080100106736106736106756106732106728
3202041067358270000004800011069973442125400100801003200008010032000048049447347670001066871067311067352665632671440010020032000020040000010673110673111802011009951001008000080000010032000004503200420042320042614245000051100011711106724800031414032000080100106709106732106880106728106732
3202041067318270000006000011068750442325400100801003200008014732000048049447321830001068161067311067312668732671040010020032000020040000010673510672711802011009951001008000080000010032013205803200530054320054615544000051100011711106736800531414032000080100106732106734106732106733106713
3202041067278270000004800021068473410254001008010032000080100320000480775473467100010670610670810673526679326714400100200320192200400000106731106727118020110099510010080000800000100320522045032004220032004201424500005110541161110673280001014032000080100106736106728106728106728106884
320204106731827001100480101106869344212540010080100320000801003200004804954665926000106710106731106735266793267144001002003200002004000001067311067311180201100995100100800008000001003200000450320039003932004261045000051100011611106728800031410032000080100106732106740106732106740106732
32020410670882700000048000210703430152325400100801003200008010032000048049446552510001067061067351067352667932671440010020032000020040000010672710673711802011009951001008000080000010032000004503200421042320042614145000051100011611106732800111414032000080100106732106732106732106728106736
32020410670882900000048000210699534421254002788014732000080194320356480494468532100710684110703010702926868112701640055020032019220040048010718310703431802011009951001008000080000110032013024533320434021617320432614245000051470013411106998811031010132000080100106930107036106886107188107186
320204107185829100012325176001107170244225400100801003200008010032000048049447346710001067061067351067082667932671440010020032000020040000010673510672711802011009951001008000080000010032000004503200420042320041614245000051100011611106734800041014032000080100106732106712106736106732106713
320204106731827000000570002106869044212540010080100320130801003200004804944734479000106706106731106727266593267144001002003200002004000001067311067271180201100996100100800008000001003200000450320042004232004261384500005110001161110673280004015032000080100106732106737106732106728106713
320204106731827000000480002106848344232540010080100320000801003200004804944734479000106706106731106735266793267144001002003200002004000001067311067271180201100990100100800008000001003200000003200420042320042614245000051100011611106728800031010032000080100106737106732106732106732106732

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.3341

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)67696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
3200251067328271000100040002106717218017254000108001032000080010320000480047465505601067071067321067322667532671040001020320000204000001067321067321180021109010108000080000010320000180032000010058320000611844181050191516341067298000099132000080010106733106733106733106733106733
32002410673282810100000650021067202181517254000108001032000080010320000480026470868101067071067321067322668032671540001020320000204000001067321067321180021109010108000080000010320018193803200581115832004160180182050200316331067298000009132000080010106735106733106733106715106733
32002410673282810010000000210671720181254000108001032000080010320000480046473447901067071067131067132667632671540001020320000204000001067321067131180021109010108000080000010320000194403200420115832000061580181150190316441067118000099132000080010106733106733106715106733106733
32002410673282710010000650021067172181812540001080010320000800103200004800424680790010671010671410671426680326697400010203200002040000010673210673211800211090101080000800000103200191844032005800059320042613544010502007163310672980000010132000080010106733106733106733106739106733
320024106732827101000006500210669921518162540001080010320000800103200004800474734479010668910673210671326681326715400010203200002040000010673210673211800211090101080000800000103200001944032004210142320041615844181050200316331067298000099132000080010106733106722106733106734106737
32002410673682710000000650021067172015172540001080010320000800103200004800434706976010670210673210673226680326715400010203200002040000010673710673211800211094101080000800000103200191841032005800021473200006158441810509723254310673780000910132000080010106741106756106728106715106737
325397107454857111011006500210671731818162540001080010320000800103200004800264662214010670710673210673226680326715400010203200002040000010673210673611800211091101080000800000103200001844032005810018320041615844180050200316331067298000009132000080010106733106728106715106733106733
3200241067328271011000066003106724318182125400010800583201308001032000048032447086810107095107544106746267042526981400010203200002040000010673710674011800211090101080000800000103200191838032005910259320040001844182050190416441067298000099132000080010106715106714106733106733106733
3200241067328001001000048010106699315181625400010800103200008001032000048004747344790106695106732106732266803267154000102032000020400000106732106732118002110901010800008000001032000018003200000000320041611844181050190417431067298000099132000080010106733106715106733106733106733
320024106713799111100006400010676521801725400010800103200008001032000048004146692650106702106730106739266843267004000102032000020400000106714106732118002110901010800008000011032001818380320059000623200396156001150200216431067298000099032000080010106733106733106728106733106728