Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD4 (multiple, 8H)

Test 1: uops

Code:

  ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 12.000

Issues: 12.014

Integer unit issues: 0.000

Load/store unit issues: 4.000

SIMD/FP unit issues: 8.014

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)0e0f1e1f22243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
720052844221118220080105114280594214234120148014400080004000203379827780024785280352817831012000400080004000200002846528213116100110001000040008400402400001480137931028072583508750181033485381416454827902140851173811930400080002825628271283812821028265
72004285352111214008000521528127401421312000800040008000400020336983219002477928044283073101200040008000400020000282332813911610011000100004000840040040000000013450961772583508838180063401381314444327878151751211911780400080002813328446282462803128087
7200428113210191400100105114281370414084120008014400080004000203419823230024720282392808331012000400080004000200002820328426116100110001000140008400400400441400133361027671663433741181093421381013424427870139981172512046400080002850228645283812840028125
72004286842122117008000516728127041417212014800040008000400020343980764002471928135280703101200040008000400020000281502829411610011000100004000840040440024048013918105157208324294018410351738059424327875141871204412299400080002814028155281682813228091
7200428193211161210801050502812104140851201480144000800040002034098247120024726280882805731012000400080004000200002831928185116100110001000040000400406400440480139541025971553501946181253577380411494427988151071177012255400080002812728213282372834128098
72004281542101619000000526527928401406212014801440008000400020347983877002471128103281823101200040008000400020000280972814711610011000100004000840000240004140013927989370373497944181983437381117455627975142031155212025400080002810428276281362817728055
7200428157211131901000051862806340140291200080144000800040002033098295600247152815328533310120004000800040002000028075280431161001100010001400084004044004404801380710298723834551137181293407380913444227958139741169612087400080002827428099284802815928290
720042854721215180080005185281894414243120148000400080004000203359828190024712281372834931012000400080004000200002808828176116100110001000040008400407400400080137801023970373337639181093402381615444627939145541190312051400080002819528292283912844328099
72004285722121916009000508428050441426512014801440008000400020340983093002470028024281543101200040008000400020000281862841111610011000100004000840030440024028014077102687008346654218011342938127434027777141671173811858400080002811528192282042807928064
720042805021116130080105237279994413992120008000400080004000203439829180024742280652809931012000400080004000200002819228063116100110001000140008400002400441480138811014872223435844181303362381712454627873140301174612223400080002804228188284342823028060

Test 2: throughput

Count: 8

Code:

  ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0008

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
960205160072119910100100006301031600542660025960541100640068320000100640000320000500168123611520028116003001600691600694035196054420032000064000020032000016000001600691600491180201100991001008000080000010032001413440320053000533200006153441300051094174416006613130320000640000100160070160070160070160050160070
960204160049119910100100001301031600542665025960156100640060320000100640000320000500168123611520028016005001600491600503035196010020032000064000020032000016000001600711600491180201100991001008000080000010032001314440320053001533200406113441310051094174416006613132320000640000100160070160070160050160070160164
9602041600691199100001000013000216005400000259601681006400163200001006400003200005001680098217607520160050016006916004830331960100200320000640000200320000160000016006916006911802011009910010080000800000100320014134403200530015332003900534413200510941744160045000320000640000100160050160070160050160070160070
96020416006911991010010000130103160054260502596016010064006832013010064000032000050016815652176075201600500160049160049660351960100200320000640000200320000160000016006916004911802011009910010080000800000100320013140032005302056320000605344130005109417441600681302320000640000100160050160050160070160070160070
96020416006912021000000000590000160034206502596011610064005632000010064000032000050016812362176075201600300160049160069303319601002003200006400002003201241600000160069160069118020110099100100800008000001003200141343032005300056320040605344130005129417431600660132320000640000100160070160070160070160070160070
9602041600691199100000000059000316005426660259601681006400683200001006400003200005001681238217607521160050016004916006930273196010020032000064000020032000016000001600691600691180201100991001008000080000010032001414440320053101143200406013441310051094174416006613134320000640000100160070160052160050160070160070
96020416006911991010000000130000160054166002596015610064006032000010064000032000050016812492176075201600300160069160071003519601002003200006400002003200001600000160049160069118020110099100100800008000001003200131400320054000133200406153441310051094176416004613132320000640000100160072160050160050160070160050
96020416006911991000010000590003160034266002596011610064006832000010064000032000050016812442176075201600300160049160049303319601002003200006400002003200001600000160069160049118020110099100100800008000001003200141344032005410153320040615344131005113517421600660132320000640000100160070160070160071160070160070
960204160069119910100000005900001600542065025960156100640016320000100640000320000500168123621760752116005001600491600690033196010020032000064000020032000016000001600491600691180201100991001008000080000010032001513440320053111563200006112013000510941731116006613132320000640000100160050160070160050160070160204
960204160069119910000000001300041600542665025960168100640016320000100640000320000500168009821760752016005001600691600490035196010020032000064000020032000016000001600691600691180201100991001008000080000010032001513440320013100533200406113441320051094174416016613130320000640000100160070160070160070160070160050

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0007

retire uop (01)cycle (02)03l1d tlb fill (05)0e0f1e22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd0l1i cache miss demand (d3)d5map dispatch bubble (d6)d9ddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
960025160059119900057400016004931200259600741064006432000010640000320000501680881185600120160029016004316006400346960010203200006400002032000016000001600431600591180021109101080000800000103200003532003710320037610400005019003170331600561010032000064000010160065160065160066160065160065
9600241600641199010981021600493012025960074106400003200001064000032000050168088121759996016004001600591600640034196001020320000640000203200001600000160043160043118002110910108000080000010320000353200321503200326132350005019003170321600611410132000064000010160060160065160065160060160044
96002416004311990001041001600493120025960074106400643200001064000032000050168041318560012016004501600641600640034696001020320000640000203200001600000160064160059118002110910108000080000010320000353200370032003761370000501901317023160061010132000064000010160065160044160921161648160044
96002416006411990001251001600283121202596006610640000320000106400003200005016800018160000116004001600641600590034196001020320000640000203200001600000160059160059118002110910108000080000010320000032003700320037603200005019003170231600611010132000064000010160044160065160065160060160060
960024160064119800043102160049312120259600101064000032000010640000320000501680890185600120160045016006416006400346960010203200006400002032000016000001600641600591180021109101080000800000103200003532003200320037013240000501900317033160061140032000064000010160044160065160066160065160142
96002416004311990006910016004431212025960449106400563200001064000032000050168088181600000160024016005916006400346960010203200006400002032000016000001600431600591180022109101080000800000103200000320037037320037610400005019003170231600611010032000064000010160065160065160060160044160060
960024160043119900043102160028301202596006610640000320000106400003200005016808812175999601600460160064160154003419600102032000064000020320000160000016004316005911800211091010800008000001032000003200006440320000613200005019003170331600561410132000064000010160065160065160065160044160044
960024160064119900038100160028312120259600741064006432000010640000320000501680881185600120160024016006416004300348960010203200006400002032000016000001600641600431180021109101080000800001103200003532003200320037613700005019403170331600561010132000064000010160067160065160065160065160065
9600241600591199000720021600283121202596006610640056320000106400003200005016808811856001201600450160064160064003259600102032000064000020320000160000016005916004311800211091010800008000001032000035320037032320037600350005019003170331600401414132000064000010160060160044160065160060160044
96002416005911980001090021600283121202596007410640056320000106400003200005016808811856001201600240160059160064003419600102032000064000020320000160000016006416005911800211091010800008000011032000003200000323200370137400005019003170331600611410132000064000010160065160065160044160061160065