Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD4 (multiple, 4S, post-index)

Test 1: uops

Code:

  ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 12.000

Issues: 13.018

Integer unit issues: 1.000

Load/store unit issues: 4.000

SIMD/FP unit issues: 8.018

retire (01)cycle (02)030407090a0b0e0f1e1f22243a3f43464951inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a3a6a8a9acafb5b6bbdcache load miss (bf)cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
72005292822203000000000045522904804015290130001000800040001000800040005000203309844170024736291662920931013000400080005000200002931529202116100110001000040008400600640065100127859116687231190521929231303818959402850410001655713127133324000800010002928329290292572926329299
72004292352190000000120004522290602041523813020100080184000100080004000500020350984440002480429192292713101300040008000500020000291692919711610011000100014000840060064006510121274490976876308204319172307638151952502854910001639712985133984000800010002928829289292722932629211
72004292792190000000000045002913500015260130141000802040001000800040005000203299824920024722291702918431013000400080005000200002913229202116100110001000040001240060064006510121288491016870312903819315307438141057472847510001634713034132684000800010002938829237292952924429174
72004293382191000000120104663290590001526313020100080184000100080004000500020330982895002469829119292813101300040008000500020000291702927811610011000100004000114006006400401501294090696852306803919276303738251354442849810001643112937132874000800010002930529334292752935229254
72004292362200000000100004479290150001527913020100080204000100080004000500020319980045002477029147292753101300040008000500020000292432916411610011000100004000040060064006600111292790226873307304219311300238191257492861810001652113061132804000800010002932829302292512925929265
7200429276220000000012000453729054040153321301810008014400010008000400050002030598295600247452919129323310130004000800050002000029279292181161001100010000400084006006400551301282091046953305204619227304538221256532847510001652512949131484000800010002930729258293332929229210
7200429198219000000080004541290992001531213018100080144000100080004000500020334984115002476929093292063101300040008000500020000292312916111610011000100004000124000000400051681279592096863305604719297305938211457482845510001644212988131934000800010002924329322292662929929251
720042923821900000001801045762905600415321130141000801440001000800040005000203189807230024757291882926731013000400080005000200002921629178116100110001000040001240060094006513111278690836862305404219222307538161261542846810001642313024132904000800010002925929310293082920829268
72004293202190010000120004537290330001530213014100080184000100080004000500020308982963002470829204292523101300040008000500020000291782917811610011000100004000114006006400000001295490066842307904519327306738191457512848710001647112949133384000800010002928729326291772930029272
7200429316219001001010010454529070000152121300010008020400010008000400050002031198107000247622921529298310130004000800050002000029158291971161001100010000400084006003400361681287790856837308804019299309038161467542844210001648812723131794000800010002931529227292372924629268

Test 2: throughput

Count: 8

Code:

  ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0008

retire (01)cycle (02)030508090b0e0f18191e1f2223243a3f4346494e4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)6067696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
960205160069119911000000580000316003421717102510401168010064005632000080100640000320000479601168098221760156101600460160065160065016434810401002003200006400002004000001600000160065160065118020110099100100800008000001003200141343032005200052320039615243131005109116111600468000009132000064000080100160066160066160066160066160050
960204160065119912000140478000011600502171710271104139280100640056320000803026416003210844796011680979217601521116039701600491600650034810401002003200006400002004000001600000160065160065118020110099100100800008000001003200131343032012110013320039615243130005109116131600468000099132000064000080100160050160050160066160050160050
9602041600651199120000001301002160181217010251040116801006400523200008010064000032000047960116809782176015210160046016004916006700332104010020032000064000020040000016000001600651600651180201100991001008000080000010032001314003200523005232003901520131005109116111600628000099132000064000080100160066160050160066160066160050
9602041600651199100100005800003160050217171025104016880100640328320000801006400003200004796011680098217601561016016401600491600490033210401002003200006400002004000001600000160065160049118020110099100100800008000001003200131343032005110052320039605243131005109116111600628000000032000064000080100160066160066160066160066160066
960204160065119810020000130000316003400171025104015680100640052320000801006400003200004796011680982217601561016004601600491600490033310401002003200006400002004000001600000160066160065118020110099100100800008000001003200141443832005212252320039611343131005109116111600628000009032000064000080100160066160050160066160052160066
96020416006511991001000013000031600502170102510401568010064006832000080100640000320000479601168098221760156101600460160065160049003481040100200320000640000200400000160000016006516004911802011009910010080000800000100320014130032005210152320000015243130005109116111600628000009132000064000080100160066160066160051160066160066
9602041600651198110100007000003160050017171025104014880100640016320000801006400003200004796011680978115200241016003001600651600650034810401002003200006400002004000001600000160065160081118020110099100100800008000001003200131443032005211252320000615243131005109116111600628000099132000064000080100160050160066160066160066160050
960204160065119911010000570000316003421717102510401528010064005232000080100640000320000479601168097921760152101600460160065160065003481040100200320000640000200400000160000016006516006511802011009910010080000800000100320013130832001310152320039615243130005109116111600628000090132000064000080100160066160066160066160066160066
960204160049119910000000580000216005030170025104015680100640016320000801006400003200004796011680046217601521016003001600651600490031201040100200320000640000200400000160000016006516006511802011009910010080000800000100320014144303200521011906320039015143131005109116111600628000009032000064000080100160066160050160050160066160066
9602041600651198100000001300002160034017170025104014880100640056320000801006400003200004796011680979217601521016004601600501600650033210401002003200006400002004000001600000160065160065118020110099100100800008000011003200141443032005210052320000011343131005109116111600628000099032000064000080100160066160066160066160066160050

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0008

retire (01)cycle (02)030508090b0e0f1e1f22233a3f4346494e4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)6067696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cfd0d5d6d9ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
960025160069119910200059010316003326600251040070800106400163200008001064000032000047204116811912176075210160050016004916006900332104001020320000640000204000001600000160069160069118002110910108000080000110320014144403200131005332003960134413000501915316036160066800001313032000064000080010160070160070160050160050160070
960024160069119911010059010216005426650251040078800106400683200008001064000032000047204116811882176075210160052016004916004900332104001020320000640000204000001600000160049160069118002110910108000080000010320013134403200530011332000061534413000501915316065160066800001313132000064000080010160070160070160050160049160050
96002416005111991100005900011600542065025104007080010640060320000800106400003200004720411680046217607521016005001600691600693033210400102032000064000020400000160000016006916006911800211091010800008000001032001414440320053021523200400153441300050191561603316004680000013232000064000080010160070160070160070160181160070
960024160069119911010059000316003420051251040066800106400563200008001064000032000047204116812172176075210160050016004916006930353104001020320000640000204000001600000160069160069118002110910108000080000010320013134403200531115332000061534413100501915316053160066800001313232000064000080010160070160070160070160050160070
9600241600691199110100130002160034260512510400668001064001632000080010640000320000472041168004821760752101600500160069160069303321040474203201366400002040000016000001600691600691180021109101080000800000103200151400320052101533200406053441310050191261606516006680000130032000064000080010160070160050160183160070160070
96002416006911991000005900031600342600025104002680010640016320141800106400003200004720411681342217607521016005001600491600693035210400102032000064000020400000160000016006916006911800211091010800008000001032001413440320013000623200400113441310050270316065160066800001313032000064000080010160150160071160070160070160070
9600241600691199100000590101160054266502510400708001064006032000080010640000320000472041168119111520028101600500160069160049303521040010203200006400002040000016000001600691600691180021109101080000800000103200131544032005300153320039603235000050270216053160056800001010032000064000080010160065160044160044160044160060
96002416005911990000000010016004431212002510400668001064000032000080010640000320000472041168000118560012101600450160059160059003321040010203200006400002040000016000001600691600491180021109101080000800000103200000350320032000332000061035000050270316065160056800001010032000064000080010160060160044160044160060160065
9600241600431198000000380103160054260502510400668001064005632000080010640000320000472041168004621760752101600500160069160049303521040010203200006400002040000016000001600491600701180021109101080000800000103200141444032005310153320040615244131005027051603216006680000013232000064000080010160070160070160070160070160070
96002416006911991101005900011600342600025104007080010640060320000800106400003200004720411681191217607521016005001600691600693033210400102032000064000020400000160000016006916004951800211091010800008000001032001414440320013001533200400053441310050270316063160066800001313232000064000080010160052160070160070160050160070