Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, three registers, 2D)

Test 1: uops

Code:

  ld1 { v0.2d, v1.2d, v2.2d }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.000

Integer unit issues: 0.000

Load/store unit issues: 3.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)03070a0e0f1e223a3f43464951inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)5f696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a3a6a8acafb5b6bbdcache load miss (bf)c2cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0eaebecld/st retires (ed)f5f6f7f8fd
6300529446221252500331469728849000240363000300030001500001626228702292523103000300030002908029166116100110001000030009300101300451490132459125696431634772062432253825855522839815362137031551830002937029263292742919029371
6300429402219161501614558289490032417130003000300015001316184287352926131030003000300029166291231161001100010000300063001013001514901281194796858303555520739333038191753552838816239136981569830002929029373292512929929306
6300429302220151700904557289480002400730003000300015000016173285902921331030003000300029090291191161001100010000300063001013004504601276595146827303564820602311838211655602831216398135491519330002925929274293332926429308
6300429238219221900614773289360002408630003000300015000016240285652928931030003000300029143290961161001100010000300033004013001511601294190286805307384520545312238221953512833116360137131587030002928729219292772919629254
63004292172191519009146032893000024044300030003000150010161762866229292310300030003000291672919311610011000100003000630010130055116012778908968073157115320698321238231054542875816458136241550330002931429275294122936829290
63004293202192124009150022885500024169300030003000150000162672865729170310300030003000291262909911610011000100003000630010430045119012872907568192988105620687306038231153492840316384136171573630002929329398294502931929443
6300429342219182300104878288740002406330003000300015003116232288492936531030003000300029191292431161001100010000300063003013002514901287994056791305675520588306538281457472840216195136661545530002929529219293142916629416
6300429458220211600904555288570002415230003000300015002316156286232923331030003000300029220291941161001100010000300093003043003511601311791046830315475320599305338231554552841916467137481542930002927029352292342929529349
6300429328220182000714557288490022412230003000300015000016277287052929531030003000300029214291021161001100010000300063001013001501901286691006816302685020706304138201453542835116482135341548430002920629259292382933029257
6300429451219212600104664288930002419730003000300015000016153286762932931030003000300029081292381161001100010000300093005013001501601286892326839308285220613313738151156572843016396136421553030002925729299292752922429394

Test 2: throughput

Count: 8

Code:

  ld1 { v0.2d, v1.2d, v2.2d }, [x6]
  ld1 { v0.2d, v1.2d, v2.2d }, [x6]
  ld1 { v0.2d, v1.2d, v2.2d }, [x6]
  ld1 { v0.2d, v1.2d, v2.2d }, [x6]
  ld1 { v0.2d, v1.2d, v2.2d }, [x6]
  ld1 { v0.2d, v1.2d, v2.2d }, [x6]
  ld1 { v0.2d, v1.2d, v2.2d }, [x6]
  ld1 { v0.2d, v1.2d, v2.2d }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0009

retire (01)cycle (02)030508090b0e0f181e1f22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
240205800715991110010680102800553771925240100100240000100240000500350436108015008004880048499921550005240100200240000200240000800618006111802011009901001008000080000010024000004302400400003924003951043000511021622800620101072400001008006680066800418007280066
2402048006460000000005300018005021020252401001002400001002400005003537804080042080065800654998835001924010020024000020024000080040800631180201100990100100800008000001002400000002400000003824003860044000511021622800620141072400001008006680066800668006280041
240204800655990000000440000800252012025240100100240000100240000500349543618003708004080040499883499982401002002400002002400008006480040118020110099010010080000800000100240000043024000001038240038503944000511021622800632101002400001008006680062800668004180062
240204800645990000000000018004601120252401001002400001002400005003519717080061080070800704997135002824010020024000020024021680069800481180201100990100100800008000001002400201943024001910121240000015943190051102162280067001302400001008007080070800718007080049
24020480070600110000067000080055207202524010010024000010024000050035034290800530800488004849970350028240100200240000200240000800698006921802011009901001008000080000010024002020430240059010632400410159431900511021622800660131352400001008007080050800718004980070
24020480069600101000021010380055300202524010010024000010024000050034862891801760800508004849992350006240100200240000200240000800478007011802011009901001008000080000010024002119430240060001622400410160431920511021622800440131352400001008004980071800708004980049
2402048006960010010002100128005500720252401001002400001002400005003486289180059080069800484997035002824010020024000020024000080069800481180201100990100100800008000001002400192043024001900161240041515943191051102162280067001352400001008007180070800498007180070
2402048004859910110006700038005420720252401001002400001002400005003501095180058080069800694999235002724010020024000020024000080048800701180201100990100100800008000011002400201943024001900162240040511943191151102162280045001352400001008004980049800718007180079
2402048004859911110006600038005407019252401001002400001002400005003503429180057080069800694997035000624010020024000020024000080070800701180201100990100100800008000001002400191900240059100622400005160431800511021622800660131352400001008007080070800718007180049
240204800696001100010670000800333971925240100100240000100240000500352095318005708004880048499923500272401002002400002002400008006980069118020110099010010080000800000100240021190024006010262240000506043190051102162280067001352400001008004880070800718007080070

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0008

retire (01)cycle (02)030e0f18191e22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a5a6a8a9acafb5b6bbdcache load miss (bf)cfd0icache miss (d3)d5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
24002580398599000001008006921212195124001010240000102400005035384940800360800688007150119350021240010202400002024000080061800411180021109101080000800000102400004302400401062240040514043502000516468005801404240000108004280066800628006280061
2400248007060000004500080026212002524001010240000102400005035286090800360800618006150006350021240010202400002024000080061800611180021109101080000800001102400004302400400039240040514005020006162480058010104240000108004280065800428006280042
2400248006159900004510080046201216252400101024000010240000503523026080016080061800615000635004024001020240192202400008004180061118002110910108000080000010240000002400001040240039510435020005165280038010104240000108006280042800428006680066
2400248006860001005410180046212122025240010102400001024000050352178508003608006180065500063500412400102024000020240000800618006111800221091010800008000001024000043024000000402400395140435020002162480038014104240000108006580062800428004280042
240024803626000000460018002620116252400101024000010240000503522117080036080065800655001035004124001020240000202400008006180041118002110910108000080000010240000430240040004024003951400502000216468005801004240000108007380064800428006280042
24002480347601000045101800552121202524001010240000102400005035102270800360800618006150006350041240010202400002024000080061800411180021109101080000800000102400004302400400040240039510445020004164380057010104240000108006680042800628006280042
2400248006160000004510080046211025240010102400001024000050352238008004038006580065499863500212400102024000020240000800618006111800211091010800008000001024000043024000000402400400140435020002163480058010100240000108006280042800428006280062
2400248035860000000001800462121202524001010240000112400005035112630800160800658004150006350041240010202400002024000080061800611180021109101080000800000102400004302400390040240040614005020004165580057010100240000108006180062800628006280066
2400248037160111004500180046201216252400101024000010240000503490485080036080061800614998635004124001020240000202400008006180061118002110910108000080000010240000002400000028624004050005036006166480057010104240000108004280042800428004280062
2400248035660100010000800532121216252400101024000010240000503531036080016080061800654998635004524001020240000202400008004180041118002110910108000080000010240000431312400390040240039014043502000416428005800104240000108006280062800628006680063