Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD4 (single, S)

Test 1: uops

Code:

  ld4 { v0.s, v1.s, v2.s, v3.s }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 5.004

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 4.004

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)0e0f191e1f2223243a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6500529356220210121110200004623288830001700650044004100040001000500047513000230622911329305310500010004000100080002917229354116100110001000100041100020100020201284891786943302365020159319038185383628342164831397215076100040002933329150295372921929339
6500429255219111010000263520004531287860001703350044004100040001000500047558300229402911129223310500010004000100080002919429118116100110001000100003100000100020201317791846920306974220155311838176373928297163541401514928100040002931829205292252927129262
650042929121919001800014010044892883600016930500440041000400010005007475481002296229090293033105000100040001000800029159292091161001100010001000021000001000212012823919668453060436203773135382012394028391163781395615016100040002922929293292852919329315
6500429237218140021000000004582288110101690550044004100040001000500047548000229022906229238310500010004000100080002906129105116100110001000100002100000100020301280394976843319844020211312538159373828353161991387814811100040002925929335293122933129319
6500429301219130115000200004737287170011694650044004100040001000500047538110228892903829253310500010004000100080002911829150116100110001000100003100001100020201312291576858304594520195308338168363728304159941394314774100040002924329204291852922429254
6500429248220220010000300004504287880111695850084004100040001000500047585200229162908429285310500010004000100080002910329017116100110001000100002100001100220201296090746898305183320200308938157383428467162091387314923100040002926629174292192924429275
650042925221911011800020000452828837010170035004400410004000100050004763800022874290232921331050001000400010008000292062906211610011000100010000210000110001030128289100680930291035201043183381611383728368163491389015024100040002923829207292632930429232
6500429265220170110010300004596288730011698950084004100040041000500247524105228702903729253329500010004000100080002914029137116100110001000100003100101100022301288292006858309964220122308738219383828289162671381715041100040002930729200292582925129253
65004292072201701160020000046162876301016969500840081000400010005057475261002288629044292483105000100040001000800029125291371161001100010001000021000021000202213227925668333064636202143124381410343528323161151394715056100040002923429269294042928629266
650042933321913001311062010045532881301016901500440041000400010005000475960002288229041293143105000100040001000800029110292091161001100010001000021000021001203012935941968583072740201703192382010393828377163581405514775100040002925629376292472924129239

Test 2: throughput

Count: 8

Code:

  ld4 { v0.s, v1.s, v2.s, v3.s }[1], [x6]
  ld4 { v0.s, v1.s, v2.s, v3.s }[1], [x6]
  ld4 { v0.s, v1.s, v2.s, v3.s }[1], [x6]
  ld4 { v0.s, v1.s, v2.s, v3.s }[1], [x6]
  ld4 { v0.s, v1.s, v2.s, v3.s }[1], [x6]
  ld4 { v0.s, v1.s, v2.s, v3.s }[1], [x6]
  ld4 { v0.s, v1.s, v2.s, v3.s }[1], [x6]
  ld4 { v0.s, v1.s, v2.s, v3.s }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0008

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
400205160061119811100000320000116003110679961254001161003200168000010032000080000500400008227154720160034016005316005379947038003540010020080000320000200800006400001600531600401180202100991001008000080000010080000015800110008001161015000051100117111600500066280000320000100160054160054160041160041160054
400204160053119900000000001000160025106799532540010810032000880000100320000800005004000072271547201600420160061160061799550380043400100200800003200002008000064000016006116004611802011009910010080000800000100800098080026104180018612624720051100117111600430009280000320000100160062160062160062160062160062
40020416006111991010000090010011600461067994625400116100320016800001003200008000050040003922716884016002701600611600467995503800434001002008000032000020080000640000160046160061118020110099100100800008000001008000782480008002680020612624700051100117111600581090280000320000100160062160062160047160062160062
40020416006111981000000060000116004610079946254001081003200168000010032000080000500400033227140680160027016006116006179955038004340010020080000320000200800006400001600611600611180201100991001008000080000010080008808002600298001801724610051100117111600580099080000320000100160047160062160062160062160047
4002041600611199111100007000001600311607994625400116100320016800001003200008000050040003022716884016004201601151600617995503800434001002008000032000020080000640000160061160061118020110099100100800008000001008000782380028007800200170600051100117111600580099280000320000100160062160062160062160062160047
400204160046119910100000320000116004616079961254001161003200088000010032000080000500400000227168840160042016006116006179955038004340010020080000320000200800006400001600461600611180201100991001008000080000010080008708002500258001961623710051100117111600580090280000320000100160062160062160062160062160062
400204160061119910100000701001160046166799612540011610032000880000100320000800005004000002271688401600420160061160046799550380043400100200800003200002008000064000016006116004611802011009910010080000800000100800087080029002580019612624700051100117111600580009280000320000100160062160062160062160062160062
40020416006111991101000032000011600460667994625400108100320016800001003200008000050040003322716884016004201600611600617994003800284001002008000032000020080000640000160061160061118020110099100100800008000001008000782380028002680019602523600051100117111600581099280000320000100160062160062160062160047160062
4002041600611199111010003201001160046160799612540011610032001680000100320000800005004000392271406801600420160046160061799550380043400100200800003200002008000064000016006116004611802011009910010080000800000100800077248002600268001861724711051100117111600581009080000320000100160062160047160062160062160062
40020416006111991111100060100016004616679961254001161003200168018010032000080000500400027227168841160042016004616006179955038002840010020080000320000200800006400001600611600461180201100991001008000080000010080008724800060026800000170610051100117111600580099280000320000100160062160062160047160062160062

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0007

retire uop (01)cycle (02)030e0f1e22233a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)d9ddfetch restart (de)e0e7eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
40002516005611990020000160044006799592540001810320008800001032000080000504000002271604401600400160040160059799530380022400010208000032000020800006400001600401600561180021109101080000800000108000000800001328001361022001050201517057160056171028000032000010160041160060160060160041160041
4000241600401199113300016002516679940254000181032000880045103200008000050400029227126881160037016005916005979953038004140001020800003200002080000640000160056160056118002110910108000080000010800000188001400800186014000005020517075160037013028000032000010160060160060160041160060160057
400024160040119900200001600251667994025400010103200088000010320000800005040002222716044116004001600401600597995303800414000102080000320000208000064000016004016004011800211091010800008000001080000022800180178000060022000050205170571600560101028000032000010160057160041160060160060160060
400024160040119800241001600441667995925400010103200088000010320000800005040000022716044116002101600591600597993403800224000102080000320000208000064000016005916005611800211091010800008000001080000008001400800006117000005020517075160056001028000032000010160041160060160060160060160057
400024160059119900240001600251667995925400018103200088000010320000800005040003722716044116004001600401600597995303800224000102080000320000208000064000016004016005611800211091010800008000011080000018800130080013011422000050208170571600561131328000032000010160041160041160057160060160060
400024160059119900200001600250667995925400018103200088000010320000800005040002922712688116004001600561600407995303800414000102080000320000208000064000016004016005611800211091010800008000001080000008001401780000601418000050207170671600560131328000032000010160060160041160041160060160041
400024160059119900231001600251607994025400018103200088000010320000800005040002422716044116002101600591600407995303800414000102080000320000208000064000016005616005611800211091010800008000001080000018800180188001461142200005020717077160037010028000032000010160057160060160041160060160041
400024160059119800010016002506079940254000181032000880000103200008000050400032227160441160021016005916004079934038004140001020800003200002080000640000160040160056118002110910108000080000010800482188001802080018011322000050205170861600560131308000032000010160060160057160041160057160041
40002416005911980031100160044066799084740011110320104800901032035280000504000262271604401600400160059160059799530380041400010208000032000020800006400001600401600561180021109101080000800000108000001880000224800006102200005041717067160056101028000032000010160060160060160060160060160041
400024160040119900320001600441007994025400103103200088000010320000800005040002922712688016004001600401600407995303800384000102080000320000208000064000016005916005611800211091010800008000001080000008001801780013611818000050205170751600561131008000032000010160060160041160041160060160057