Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD4R (post-index, 1D)

Test 1: uops

Code:

  ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 7.008

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 4.008

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f191e22233a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
6600528348213020002500008005016280820115647700010004000200010004000200050001000247452162303927999283363107000200040003000800028112281111161001100010000200004200200220020200013990998171073356125219045335238101250572788810001415012173130832000400010002851928222282022808028606
660042863021201800230001800500627891001569770081000400020001000400020005000100034754482301128246283333107000200040003000800028224282601161001100010000200004200200220002040013941961771283345145219065337938122551522798010001429511815129312000400010002844728312282972811328263
660042836121202100140000000486528079011610970001000401220001000400020005000100044754442304528262285583107000200040003000800028294281661161001100010000200306200000220044000013924977071153381114719202330738071452562782510001426211926129522000400010002847628066282162820328186
6600428456213020002100007005107282550015781700010004008200010004000200050001000447536623018281452821231070002000400030008000280902812911610011000100002002302002000200202420139141009771743276125019147325238101649452783910001465011645128532000400010002836428421284322843628323
6600428159213024002200004005093279370115728700410004012200010004000200050001000247452142304828138283883107000200040003000800028189281551161001100010000200030200200020022262113799990572223378154819032327638142149492794710001477912429125352000400010002809728282282482851928365
6600428325212022002000007004832280690015929701210004000200010004000200050001000047516112304328198280593107000200040003000800028404281411161001100010000200000200200220020060113945985670903306135119099335338101349512787710021432312060125132000400010002841428166284932857828425
6600428382212019001800005005050281140115928701210004012200010004000200050001000047554823010284362813231070002000400030008000281472833811610011000100002000002000000200042600135411017971453280134819086327738191852522787210001498812046129582000400010002822028253284452821828359
66004285312121240014000070050282803500159727000100040202000100040002000500010000474448230062828328465310700020004000300080002811228118116100110001000020000020000002002622001388898277163337994919181324738141652522784310001441611999131752000400010002845228360284092838928145
66004282602121180018000040049062802120157847004100040082000100040002000500010000474381223058282732820231070002000400030008000282902817911610011000100002000042008002200240000139061039171363319135119088330638211355512789110001430912168129802000400010002827328321283062818028203
6600428142212015002200000005130280530016095700410004000200010004000200050001000047436102304628152282573107000200040003000800028166282101161001100010000200004200600020020200013754100757225322894919018341538131344472786410001491211755131672000400010002836928359282602819528375

Test 2: throughput

Count: 8

Code:

  ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [x6], x8
  ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [x6], x8
  ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [x6], x8
  ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [x6], x8
  ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [x6], x8
  ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [x6], x8
  ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [x6], x8
  ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f243a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
48020580070620010100100035002800452121400255601008010032000016000080100320000160000480499961025832000480046800658004200342560100200160000320264200240000640000800608005611802011009910010080000800000100160000000160000014716003061033005111217218020808000096160000320000801008006180061800618022880066
48020480042620010011000052002800452121200255601728010032007216000080100320000160000480499960887544003280041800658006500253956010020016000032000020024000064000080060800561180201100991001008000080000010016000002501601620021160028614833005109217228005708000099160000320000801008006180061800608006180049
4802048004262101001100005300180050212121025560172801003202441600008010032000016000048049996036010880152800418006080065003425601002001600003200002002400006406008005680042118020110099100100800008000001001600000250160000004616003601474000510911721800531800001012160000320000801008004380043800438004380043
480204800426200000000026140002800502012002556017280100320000160000801003200001600004804999603308320004800418006580060003385601002001600003200002002400006400008006080056118020110099100100800008000001001600110250160022004916003661000051112171180062080000010160000320000801008004880061800618006180066
480204800606200100101100230018004520000255601008010032022416000080100320000160000480499960330960000080041800658004800345560100200160000320000200240000640000800628006021802011009910010080000800000100160012112501600001022160030602933100511121712800621800001010160000320000801008006180061800618006180049
480204800656210100000000110018005020010255601848010032005616000080100320000160000480499959996479911280041800428004800342560100200160000320000200240000640000800658006011802011009910010080000800000100160000112501600300022160000613043110511121721800531800001010160000320000801008006180043800618006180061
48020480065621010010100035002801802121200255601848010032007216000080100320000160000480499960358960000080041800428004200342560100200160000320264200240000640000800658006511802011009910010080000800000100160012040016000000301600360122400051111171280062080000610160000320000801008006180061802298006180048
480204800606200010000000168001802132120002556017280100320076160000801663200001600004804999603314159992800238004280228003425605692001600003200002002400006400008006080065118020110099100100800008000001001600101140016015400301600306030011051242171180062180000010160000320000801008006180061800578006180061
4802048006562010000000113501280045212120352202561080802283205801605208023232000016039849176996397859715168004180060800600034256010020016000032000020024000064000080065800421180201100991001008000080000010016000011250160029002116000060213311351092172280054180000010160000320000801008006680061800618006180061
48020480065621100000110039002800270151200255601208010032033216000080100320000160136480499960338832000480041800608006000347560100200160000320000200240000640000800488006511802011009910010080000800000100160000040016001200321600006121330051092171280053080000610160000320000801008023080066800618004380043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0009

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f3a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
480025800696201101000043028005426700255600748001032006416000080010320000160000480048961108108807520800508006980069003525600102016000032000020240000640000800698006911800211091010800008000010160013043016005010050160039615043101501941755800660800001313160000320000800108007080070800708007080065
480024800696201011000011028005427600255600948001032006416000080010320000160132480048961098576001608005080063800694035256001020160000320000202400006400008006980069118002110910108000080000101600111143016005100250160039614943110501931633800660800001413160000320000800108007080070800708007080070
480024800716211110000056028005421212602556009480010320096160000800103200001600004800489611431088075208005080069800694035256001020160000320000202400006400008006980070118002110910108000080000101600111143016005100053160039615043110501941733800660800001313160000320000800108007080070800728007080070
480024800696211110000068008005426650255600948001032007616000080010320000160000480048961142108807520800508006980069303295600102016000032000020240000640000800638004711800211091010800008000010160011043016005000152160000614943100501931743800660800001313160000320000800108007080070800648007080070
4800248006962011000000670080054276505856008280010320076160000800103200001600004800489611011088075208004180060800693034556001020160000320000202400006400008006980069118002110910108000080000101600111135016003800013160039614943100501941633800660800001413160000320000800108007080070800708007080070
48002480069620100000005602800542126502556003480010320096160000800103200001600004800489611611088075208005080069800633035256001020160000320000202400006400008006980069118002110910108000080000101600111243016005000049160039615043110501931633800660800001713160000320000800108007080070800708007080070
480024800706200100000056038005426750255600748001032007616000080010320000160000480048960754108807520800508006980069303525600102016000032000020240000640000800698006011800211091010800008000010160010043016004900037160039613043100501931656800660800001313160000320000800108007080067800708007080070
480024800706210001000056038005426125025560078800103200761600008001032000016000048004896110810880756080050800638041730325356001020160000320000202400006400008006980069118002110910108000080000101600111043016005001049160039614943101501931644800660800001314160000320000800108007080065800708007080064
480024800696211010110056028005426760255600748001032007616000080010320000160000480049961112108808880800508006980069403525600102016000032000020240000640000800698006911800211091010800008000010160012043016004900049160039615043100501931633800660800001313160000320000800108007080070800708007380070
48002480069620100000005602800542675025560078800103200201600008001032000016000048004896069010880752080050800698006930352560010201600003200002024000064000080063800691180021109101080000800001016001110330160049010106916003961294300504852543803540801321313160000320000800108041780413802328023480417