Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD4 (multiple, 2S, post-index)

Test 1: uops

Code:

  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 7.008

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 4.008

retire (01)cycle (02)0307090a0e0f1e22243a3f43464951inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a3a6a8a9acafb5b6bbdcache load miss (bf)cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
660052935922023022015104583288650101695170121000400820001000400020005000100004753692298929148292973107000200040003000800029092291871161001100010001200002000102200040001281292476908308797220194304138311256512842210001642813357143112000400010002924029363293422928629253
660042917822016024107004527288070001695670041000400020001000400020005000100004748862296229008292983107000200040003000800029149291171161001100010000200002002000200200041284193286839305775620039313438291449502841310001645413257143502000400010002923229304291652926629381
660042924922118024006004586287090001683370001000400020001000400020005000100004752092297228955292973107000200040003000800029150292081161001100010000200002002000200040201300392046802297175120057310538241657542847110001627113240143322000400010002925229191293512939829261
6600429312219220200060045152884300016894700010004000200010004000200050001000047506123000291062917031070002000400030008000290772917511610011000100002000420000002000402412749945768373158104920192308638291447522842510001647313428144362000400010002923229254292492934629234
660042926621922021000004589289330001689770001000400020001000400020005000100024752022301829138293133107000200040003000800029214291381161001100010000200042000100200000241271292746794318275820106312438311148512842610001628813162142212000400010002926429296293212933228921
660042923822016020000104758287720001690770081000400820001000400020005000100004750822299229142292503287000200040003000800029224291651161001100010000200042000000200040061288492406861305695820193315738221755472837610001617613467143562000400010002933929196292812930929298
6600429246219170200041045512882100016857700810004000200010004000200050001000047548223037292612921731070002000400030008000292332910911610011000100002000420000002000200413026919168183030115420126311938291150592860310001604113037140682000400010002938729348292582920429277
660042926822018018105004529287970111697170001000400820001000400020005000100004739232297829152292153107000200040003000800029188291451161001100010000200042000000200040041274391176893302445420094298838341754562839810001649613438144442000400010002926129372294022922829267
6600429322216280180000045702880600016916701210004008200010004000200050001000047570123103292092923831070002000400030008000290972916711610011000100002000420000002000000012933926568952970126320066319138231559502847310001610013344142352000400010002931129166292592932429186
660042925122025019106004620287430001686570081000400820001000400020005000100004742462296129135291863107000200040003000800029201291351161001100010000200042000000200020041284994166851302185219983313338271254532843010001623313288143052000400010002932029253292472928029291

Test 2: throughput

Count: 8

Code:

  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0008

retire (01)cycle (02)03090e0f1e1f223a3f4346494e51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6e72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8acafb5b6bbdcache load miss (bf)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
4802058007060010052000800341315280255601288010032007216000080100320000160000480499960679108800001800458006480064033156010020016000032000020024000064000080064800491180201100991001008000080000010016000024916004504816004561130510911711800628000060160000320000801008005080065800508006580050
480204800645990005201180049101529025560172801003200721600008010032000016000048049996003260800201800338006480049033156010020016000032000020024000064000080064800641180201100991001008000080000010016000004916004604516004560450510911711800618000066160000320000801008006580065800658006580065
48020480064600000510118004913028025560172801003200281600008010032000016000048049996067360800201800458006480049034656010020016000032000020024000064000080064800641180202100991001008000080000010016000004916004501316004661130510911711800468000066160000320000801008006580065800658005080050
480204800496000005501180034100280255601728010032007216000080100320000160000480499960666608002008004580064800640346560100200160000320000200240000640000800648004911802011009910010080000800000100160000049160045046160013614641510911711800618000066160000320000801008006980065800658006580065
4802048004959900051011800341015290255601288010032006816000080100320000160000480499960679108800001800458004980049034656010020016000032000020024000064000080064800492180201100991001008000080000010016000004916001401316004660450510911711800618000066160000320000801008006580065800658006580065
480204800645990005100180049141429025560168801003200721600008010032000016000048049996067960800201800458006480064034656010020016000032000020024000064000080064800491180201100991001008000080000010016000044916001406016004500450510911711800618000000160000320000801008006580065800658005080065
48020480064599001520118004910029047560172801003200281600008010032000016000048049996070160800201800308004980049033156010020016000032000020024000064000080049800641180201100991001008000080000010016000000160045013160045604541510911711800618000066160000320000801008006580065800658006580050
4802048004960000051000800491115280255601288010032006816000080100320000160000480499960674608002018004580064800490346560100200160000320000200240000640000800648006411802011009910010080000800000100160000049160045113160013611341510911711800468000006160000320000801008006580050800508005080065
480204800645990001400080034110290255601728010032007216000080100320000160000480499960668108800001800308006480049034656010020016000032000020024000064000080049800491180201100991001008000080000010016000004916004604616004601130510911711800618000066160000320000801008005680065800658005080065
48020480064600011600008003410000255601728010032002816000080100320000160000480499960032608002018003080064800640331560100200160000320000200240000640000800648006411802011009910010080000800001100160000049160045045160045014541510911711800618000000160000320000801008005380065800658006580065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0008

retire (01)cycle (02)03090e0f1e223f4346494f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8acafb5b6bbdcache load miss (bf)branch mispredict (cb)cfd0d5d6ddinst fetch restart (de)e0e7? int output thing (e9)eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
480025800605990013518004521212025560010800103200001600008001032000016000048004996033041599921800238004280060003245600102016000032000020240000640000800608004211800211091010800008000011016000033160030001600296103305019041764800571800551011160000320000800108006180061800438006180061
48002480060599001361800452120025560082800103200001600008001032000016000048004995999696000001800238004280060003425600102016000032000020240000640000800608006011800211091010800008000001016000033160000029160000012900501906176680039080055010160000320000800108006180061800618006180043
4800248006059900001800272121202556054180010320000160000800103200001600004800499757599600000180590800608006000342560416201600003200002024038164000080060800601180021109101080000800000101600000160029010601600306130330501903177680057080000100160000320000800108006180061800618006180061
480024800596000003508004520120255600828001032007216000080010320000160000480049960338960000018004180060800600034256001020160000320000202400006400008004280060118002110910108000080000010160000331600000016002960303305019061776800570800001010160000320000800108006180061800618004380061
4800248006059900035180045201202556001080010320072160000800103200001601524800499605944439852180041800608006000342560010201600003200002024000064000080060800601180021109101080000800001101600003316002902916002961293305019041766800571800001010160000320000800108006180048800618006080061
480024800606000003518004501212025560010800103200721600008001032000016000048004996033141599921800418006080042003425600102016000032000020240000640000800428006011800211091010800008000001016000001600300301600306129005019061776800390800001010160000320000800108004380061800618006180061
480024800606000003508002721212025560082800103200721600008001032000016000048004996033196000001800418006080060003425600102016000032000020240000640000800428006011800211091010800008000001016000033160030033160000010005019051734800421800001010160000320000800108006180061800618006180061
48002480042600000361800272121202556008280010320072160000800103200001600004800499603389600008180041800608006000324560010201600003200002024000064000080060800601180021109101080000800001101600003316003003016003001303305019061743800571800001010160000320000800108006180043800618004380061
480024800606000003518004521212025560010800103200001600008001032000016000048004995999696000001800418004580060003425600102016000032000020240000640000800608006011800211091010800008000001016000033160030130160000010005019031743800571800001010160000320000800108006180043800618006180061
4800248006060000035180045212120255600828001032007216000080010320000160000480049960330960000018004180060800600034256001020160000320000202400006404328005280060118002110910108000080000010160000331600300301600306103305019041744800571800001010160000320000800108004380061800618006180061