Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD4 (multiple, post-index, 2S)

Test 1: uops

Code:

  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 7.008

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 4.008

retire uop (01)cycle (02)03mmu table walk instruction (07)09l2 tlb miss instruction (0a)0e0f1e22243a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
660052935922023022015104583288650101695170121000400820001000400020005000100004753692298929148292973107000200040003000800029092291871161001100010001200002000102200040001281292476908308797220194304138311256512842210001642813357143112000400010002924029363293422928629253
660042917822016024107004527288070001695670041000400020001000400020005000100004748862296229008292983107000200040003000800029149291171161001100010000200002002000200200041284193286839305775620039313438291449502841310001645413257143502000400010002923229304291652926629381
660042924922118024006004586287090001683370001000400020001000400020005000100004752092297228955292973107000200040003000800029150292081161001100010000200002002000200040201300392046802297175120057310538241657542847110001627113240143322000400010002925229191293512939829261
6600429312219220200060045152884300016894700010004000200010004000200050001000047506123000291062917031070002000400030008000290772917511610011000100002000420000002000402412749945768373158104920192308638291447522842510001647313428144362000400010002923229254292492934629234
660042926621922021000004589289330001689770001000400020001000400020005000100024752022301829138293133107000200040003000800029214291381161001100010000200042000100200000241271292746794318275820106312438311148512842610001628813162142212000400010002926429296293212933228921
660042923822016020000104758287720001690770081000400820001000400020005000100004750822299229142292503287000200040003000800029224291651161001100010000200042000000200040061288492406861305695820193315738221755472837610001617613467143562000400010002933929196292812930929298
6600429246219170200041045512882100016857700810004000200010004000200050001000047548223037292612921731070002000400030008000292332910911610011000100002000420000002000200413026919168183030115420126311938291150592860310001604113037140682000400010002938729348292582920429277
660042926822018018105004529287970111697170001000400820001000400020005000100004739232297829152292153107000200040003000800029188291451161001100010000200042000000200040041274391176893302445420094298838341754562839810001649613438144442000400010002926129372294022922829267
6600429322216280180000045702880600016916701210004008200010004000200050001000047570123103292092923831070002000400030008000290972916711610011000100002000420000002000000012933926568952970126320066319138231559502847310001610013344142352000400010002931129166292592932429186
660042925122025019106004620287430001686570081000400820001000400020005000100004742462296129135291863107000200040003000800029201291351161001100010000200042000000200020041284994166851302185219983313338271254532843010001623313288143052000400010002932029253292472928029291

Test 2: throughput

Count: 8

Code:

  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03090e0f1e1f223a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6esimd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4802058007060010052000800341315280255601288010032007216000080100320000160000480499960679108800001800458006480064033156010020016000032000020024000064000080064800491180201100991001008000080000010016000024916004504816004561130510911711800628000060160000320000801008005080065800508006580050
480204800645990005201180049101529025560172801003200721600008010032000016000048049996003260800201800338006480049033156010020016000032000020024000064000080064800641180201100991001008000080000010016000004916004604516004560450510911711800618000066160000320000801008006580065800658006580065
48020480064600000510118004913028025560172801003200281600008010032000016000048049996067360800201800458006480049034656010020016000032000020024000064000080064800641180202100991001008000080000010016000004916004501316004661130510911711800468000066160000320000801008006580065800658005080050
480204800496000005501180034100280255601728010032007216000080100320000160000480499960666608002008004580064800640346560100200160000320000200240000640000800648004911802011009910010080000800000100160000049160045046160013614641510911711800618000066160000320000801008006980065800658006580065
4802048004959900051011800341015290255601288010032006816000080100320000160000480499960679108800001800458004980049034656010020016000032000020024000064000080064800492180201100991001008000080000010016000004916001401316004660450510911711800618000066160000320000801008006580065800658006580065
480204800645990005100180049141429025560168801003200721600008010032000016000048049996067960800201800458006480064034656010020016000032000020024000064000080064800491180201100991001008000080000010016000044916001406016004500450510911711800618000000160000320000801008006580065800658005080065
48020480064599001520118004910029047560172801003200281600008010032000016000048049996070160800201800308004980049033156010020016000032000020024000064000080049800641180201100991001008000080000010016000000160045013160045604541510911711800618000066160000320000801008006580065800658006580050
4802048004960000051000800491115280255601288010032006816000080100320000160000480499960674608002018004580064800490346560100200160000320000200240000640000800648006411802011009910010080000800000100160000049160045113160013611341510911711800468000006160000320000801008006580050800508005080065
480204800645990001400080034110290255601728010032007216000080100320000160000480499960668108800001800308006480049034656010020016000032000020024000064000080049800491180201100991001008000080000010016000004916004604616004601130510911711800618000066160000320000801008005680065800658005080065
48020480064600011600008003410000255601728010032002816000080100320000160000480499960032608002018003080064800640331560100200160000320000200240000640000800648006411802011009910010080000800001100160000049160045045160045014541510911711800618000000160000320000801008005380065800658006580065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03090e0f1e223f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)branch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
480025800605990013518004521212025560010800103200001600008001032000016000048004996033041599921800238004280060003245600102016000032000020240000640000800608004211800211091010800008000011016000033160030001600296103305019041764800571800551011160000320000800108006180061800438006180061
48002480060599001361800452120025560082800103200001600008001032000016000048004995999696000001800238004280060003425600102016000032000020240000640000800608006011800211091010800008000001016000033160000029160000012900501906176680039080055010160000320000800108006180061800618006180043
4800248006059900001800272121202556054180010320000160000800103200001600004800499757599600000180590800608006000342560416201600003200002024038164000080060800601180021109101080000800000101600000160029010601600306130330501903177680057080000100160000320000800108006180061800618006180061
480024800596000003508004520120255600828001032007216000080010320000160000480049960338960000018004180060800600034256001020160000320000202400006400008004280060118002110910108000080000010160000331600000016002960303305019061776800570800001010160000320000800108006180061800618004380061
4800248006059900035180045201202556001080010320072160000800103200001601524800499605944439852180041800608006000342560010201600003200002024000064000080060800601180021109101080000800001101600003316002902916002961293305019041766800571800001010160000320000800108006180048800618006080061
480024800606000003518004501212025560010800103200721600008001032000016000048004996033141599921800418006080042003425600102016000032000020240000640000800428006011800211091010800008000001016000001600300301600306129005019061776800390800001010160000320000800108004380061800618006180061
480024800606000003508002721212025560082800103200721600008001032000016000048004996033196000001800418006080060003425600102016000032000020240000640000800428006011800211091010800008000001016000033160030033160000010005019051734800421800001010160000320000800108006180061800618006180061
48002480042600000361800272121202556008280010320072160000800103200001600004800499603389600008180041800608006000324560010201600003200002024000064000080060800601180021109101080000800001101600003316003003016003001303305019061743800571800001010160000320000800108006180043800618004380061
480024800606000003518004521212025560010800103200001600008001032000016000048004995999696000001800418004580060003425600102016000032000020240000640000800608006011800211091010800008000001016000033160030130160000010005019031743800571800001010160000320000800108006180043800618006180061
4800248006060000035180045212120255600828001032007216000080010320000160000480049960330960000018004180060800600034256001020160000320000202400006404328005280060118002110910108000080000010160000331600300301600306103305019041744800571800001010160000320000800108004380061800618006180061