Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD4R (post-index, 4S)

Test 1: uops

Code:

  ld4r { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 6.012

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 4.012

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f233a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
650052950623700201100000300470728880011170516012100040121000100040001000500050004759405002289929232294513106000100040002000400029257294421161001100010001100003100312210010100001319691326905316404820346335538121952572924410001625913211144121000400010002939629383294862962129797
6500429602237100000000001200464828806000173456012100040081000100040001000500050004757001000230213043231056471826003100040002000400029168291521161001100010000100123100101110002130011461317194526986317005020265316738131451502897410001625513273147261000400010002912529140290172979229224
650042896223200010000017132650149604528290690111686560481000400810041009404010095055519347644010002300029251296992022060471000400020084032290482924316161001100010001100003100301410013130001312094026935310105820249315737981955582841310001613413137141591000400010002919529033293342933029204
650042936622611002000000138004592288150111703260121000401210001000400410005000500647643010002286229113293247486000100040042000400029143292341161001100010001100213100401210022030001296193906877303605020168330838061155512853510001640513350140961000400010002921629251293262928029226
650042916122600010000000264004690288480011701260001000400010001000400010005000500047422010002287429135291483106000100040002000400029285291361161001100010000100223100101210020030001301991516971309205620068323038091151532849410001593813172143331000400010002925729262291992928429217
6500429221227100000000000004598289060101708560001000400010001000400010005000500047432011002286529141294053106000100040002000400029247291761161001100010001100220100301110010100001291893756851316904420259319638091255512846910001625813179142781000400010002933229191292642925229312
6500429192227010000000006004679288190101696760121000400410001000400010005000500047418010002292129134292663106000100040002000400029126290761161001100010001100124100111210003130001298990996934308605220142317138071155542841510001595312996143741000400010002934529550291802914729263
6500429260227000000000001200454328797010169266012100040121000100040001000500050004742205102287429105292163106000100040002000400029256291951161001100010001100223100100410003001101311292416930313215820244323238142057542854110001620113043144841000400010002940629338292212920529307
65004293782271000000000030046022890401117050601210004016100010004000100050005000475850500229282914629368310600010004000200040002919329152116100110001000010022010020111000300000130819228691931570512029032143812755522850010001626813283142281000400010002933529347293232926629191
650042929122710000000000300449428781011169806012100040161000100040001000500050004763005002290329213293033106000100040002000400029179292241161001100010000100220100101110020000001321492626932315415320246323838141659562848910001594713228142331000400010002927129383293662930529366

Test 2: throughput

Count: 8

Code:

  ld4r { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  ld4r { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  ld4r { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  ld4r { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  ld4r { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  ld4r { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  ld4r { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  ld4r { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0007

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f233f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696b6d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
400205800546430000003400800421660634801568010032005680000801003200008000048049947999976800208003808005780058003394801002008000032000020016000032000080057801961180201100991001008000080000110080000013800131012800126091700512511611800541800009680000320000801008019780058800588005880198
400204800576430000001540080042106025480156801003200568007680100320000800004804994800107680020800380801978005500263948010020080075320000200160000320000800548005411802011009910010080000800001100800000158001310198000061121300510912511800540800009680000320000801008005880196800588004480058
4002048005764300000022008003016602548016880100320056802288010032000080000480499480019768002080038080057800572003394805562008000032000020016000032032080045800541180201100991001008000080000110080000017800134010018001261101300510911711800541800009980000320000801008005880058800588005880197
40020480057643000000228818003910602548014880100320056800008010032000080000480970480007768002080038080058800570153394801002008000032000020016000032000080057801941180201100991001008000080000110080000017800130013800136091700510911711800541800809680000320000801008005880058800588005980058
4002048005764400000034008004216622254801568010032005680000801003200008000048049948001476800208003908005780057003394805442008000032000020016000032000080057800541180201100991001008000080000010080000017800131014800096001700510911711800541800009680000320000801008005880197800588005880058
4002048004364300000035008018216006348015680100320304800008010032000080000480499480006768002080038080057800570033948055820080000320000200160150320000800428005411802011009910010080000800001100800000138000000128001261101700510911711800540800009680000320000801008004380058800588005880058
400204800576430000001900800421660254801568010032030480000801003200008000048049948001160334088003808005780057003394801002008000032000020016016232000080057800541180201100991001008000080000010080000013800000013800136101700512311711800540800009980000320000801008005880056800588005880058
400204800576440000001900800421660254801568010032005680000801003200008000048049948170142061448003808019580057002639480100200800803200002001601543203008005780057218020110099100100800008000011008000001380013008788001361017005109117118028718000091080000320000801008005880197801978005880058
40020480042643100000198808004216602548015680100320056800008010032030480000480499480011768002480038080057800590026394801002008000032000020016000032000080057800541180201100991001008000080000010080000008001300138001661101800510911711800390800009680000320000801008005580043800588005880058
40020480195643000000190080042166025480156801003203048000080100320000800004804994800116272140800380800428005700339480100200800003200002001600003200008005780055118020110099100100800008000011008000001380013008958000061101700510921711800540800009080000320000801008005880055800438005880058

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4000258005862101000002101080045166025480082800103200728000080010320000800004800494816619600020008002680042800600033948001020800003200002016000032000080060800571180021109101080000800001108000001880015100800156114205019000217228005718000001080000320000800108006180063800618006180061
4000248005762000000003401080045166025480082800103200728000080010320000800004800494800218640020008004180060800600033948001020800003200002016000032000080060800601180021109101080000800000108000001880000001980015611505019000217228005718000013080000320000800108408682814804568005880058
400024800606210000000220108018016602548008280010320072800008001032032080000480049480015864002000800388006080060200339480010208000032000020160000320000800618006011800211091010800008000001080000018800150017800176114050190001172280057180000131080000320000800108006180043800618004680196
400024800606210000000340108002716602548008280010320072800008001032000080083480049480005864002000800418004280060003244800102080000320000201600003200008006080060118002110910108000080000010800000208001510148001561132050190002171280054080000131080000320000800108006180061800438004380061
4000248019562100000002101080045166025480082800103200728000080010320000800004800494799988640020008004180061800570034248049020800003200002016000032000080057800601180021109101080000800000108000002080015000800166116205019000417228005408000011080000320000800108006180061800588006180046
4000248004262100000001540108004516602548007080010320072800008001032000080000480049480014864002000800238006080060003424800102080000320000201600003200008006080060118002110910108000080000010800000208001510895800166114205019000117128005718000013080000320000800108006180061800618006180061
400024800606200000000200108004210602548008280085320072800008001032000080000480049480021864002000800418006080042003394800102080000320000201600003203088006080060118002110910108000080000010800000080015201680013611505019000217228016318000001380000320000800108006180061800438006180061
40002480057620000000022010801821660254800828001032007280000800853200008000048004948001186400200080041801958006000339480010208000032032420160000320000801488004221800211091010800008000001080158218801780017768017361142050340001251380163181440131080000320000800108019580197801988008780197
4000248035162200100114188810801791602263480898801713232248127981447324468815174890845103785416816008014680193805064016739648097020800793206242016015032064480351801973180021109101080000800000108015722080099009018009061132050190002172280057080000131580000320000800108005880046800618005880058
40002480060621000000022000800451660254800828001032006080000800103200008000048004948001586400200080041800608006000334480010208000032000020160000320000800428005711800211091010800008000001080000020800160018800166114050190002171180057180000101380000320000800108006180061800618006180061