Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD4 (single, B, post-index)

Test 1: uops

Code:

  ld4 { v0.b, v1.b, v2.b, v3.b }[1], [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 6.004

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 4.004

retire (01)cycle (02)03050708090a0b0e0f18191e22243a3f43464951inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f6061696b6d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a3a6a8acafb5bbdcache load miss (bf)c2cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
6500528724215014001100000210478927913011162566000100040041000100040001000500050004753060022930028228281573106000100040002000800028153281411161001100010001100021001141000210013947103847087349454419108318338151841382800610001392012425132731000400010002816628137281632815728573
65004281872120140019000002005229278880111608460041000400410001000400010005000500047458300229450281312820831060001000400020008000281212849611610011000100001000210012651001110014185105467254344563819058336538191641362804810001411712280126451000400010002816028200280392816728393
65004281852130150012000007104905278730101622160001000400410001000400010005000500047452600229680280222804731060001000400020008000281092816611610011000100001000210010221000100013376100137168319194019153345938151741382780810001533611657122861000400010002867028506280342803328556
650042854221401800150000015105256280130111633260001000401110001000400010005000500047468500229900283072823531060001000400020008000281612804111610011000100001000010012801001102014080103347274345384719576348938131134382819610001375912532126701000400010002819828176284922849628201
650042863721501300150000031048222828901116099600410004004100010004000100050005000475464002294202843828059310600010004000200080002810428042116100110001000010002100127410022220139179532725734056441941632003818642402783810001399711865131901000400010002811828107280782806228331
6500428646211015001100000310518327896001162566004100040041000100040001000500050004743640023006027905285893106000100040002000800028494285041161001100010000100021001271000102013459103917232348754019057339638181038362785010001407312712132121000400010002808828130281632861328333
65004282062110130010000002004907279310111614660001000400410001000400010005000500047488900229720284502852231060001000400020008000285862851111610011000100001000210012641000000013409103977281349694119027338538201237402805610001529312607124421000400010002868328110282232863628288
65004282252150170014000000005295281100011616360081000401110001000400010005000500047534500230140285322815831060001000400020008000281512817711610011000100001000210013241001012014049103067301323764119529346438181036412782910001434711801123161000400010002858228508281472827828490
650042855121401200141100001047992799501116193600410004004100010004000100050005016475425002295802812228181310600010004000200080002814828032116100110001000010002100128110012120141189496727634976381902034543815739402779910001415911796134261000400010002806928133281142862028384
65004283442100160011000000104836283580111621760041000400410001000400010005000500047564500230120280292814931060001000400020008000279482810211610011000100001000210012571001002014109967970643466134119005343638191139412779410001380811777134471000400010002832628285282972806128368

Test 2: throughput

Count: 8

Code:

  ld4 { v0.b, v1.b, v2.b, v3.b }[1], [x6], x8
  ld4 { v0.b, v1.b, v2.b, v3.b }[1], [x6], x8
  ld4 { v0.b, v1.b, v2.b, v3.b }[1], [x6], x8
  ld4 { v0.b, v1.b, v2.b, v3.b }[1], [x6], x8
  ld4 { v0.b, v1.b, v2.b, v3.b }[1], [x6], x8
  ld4 { v0.b, v1.b, v2.b, v3.b }[1], [x6], x8
  ld4 { v0.b, v1.b, v2.b, v3.b }[1], [x6], x8
  ld4 { v0.b, v1.b, v2.b, v3.b }[1], [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0007

retire (01)cycle (02)0308090b0e0f18191e1f2223243f4346494e51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)c2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0e7? int output thing (e9)eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
40020516005511990100000150100160092066799553748010880100320048800008010032000080000480499400055227158081160021016004016005579949038003748010020080000320000200160000640000160040160055118020110099100100800008000011008000000800090501080000509130000511021722160052180000678000032000080100160056160056160041160056160041
40020416005511990000000150000160025066799552548010880100320000800008010032000080000480499400064227158081160021016005516004079934038003748010020080050320000200160000640000160040160055118020110099100100800008000011008000001381508002289358154000101320005439243123162198181218608000032000080100163188163128163130163119163233
40020416266712121010000180000160040066799552548010880100320000800008010032000080000480499400368227126921160036016005516005579949038003748010020080000320000200160000640000160055160055118020110099100100800008000001008000001380000000108001050900000513021722160052080000608000032000080100160056160041160056160056160056
400204160055119900000001600001600401667995525480108801003200008000080100320000800004804994000642271580811600360160055160055799130338002248010020080000320000200160000640000160055160055118020110099100100800008000001008000000800000001280009009130000511021722160132080000068000032000080100160041160041160095160056160056
4002041600551199000110015000016004010679955254801088010032000880000801003200008000048049940005822712692116002101600551600557994903800224801002008000032000020016000064000016005516005511802011009910010080000800000100800000080010000108000900000000511021722160052080000608000032000080100160056160056160056160056160041
4002041600551199000000015001016004006079940254801088010032000880000801003200008000048049940005822715808116003601600401600407994903800374801002008000032000020016000064000016005516005511802011009910010080000800000100800000138000000010800005010130000511021722160052180000608000032000080100160041160041160056160056160056
40020416004011990000000160010160025106799552548010880100320008800008010032000080000480499400064227158081160036016005516004079934038003748010020080000320000200160000640000160040160055118020110099100100800008000001008000001380010000980000501000000511011722160037080000068000032000080100160041160056160056160041160041
400204160055119900001001600101600401067995525480108801003200088000080100320000800004804994000582271269211600360160040160055799340380022480100200800003200002001600006400001600551600551180201100991001008000080000010080000008000000008000050900000511021722160037080000608000032000080100160056160041160056160056160056
40020416004011990000100300101600401667995525480108801003200488000080100320000800004804994000852271580811600360160055160040799490380022480100200800003200002001600006400001600401600551180201100991001008000080000010080000013800100009800105010130000511021732160052080000068000032000080100160098160041160056160056160056
400204160055119901000001600101600400007995525480108801003200008000080100320000800004804994000742271269201600210160040160055799490380037480100200800003200002001600006400001600551600551180201100991001008000080000010080000008005000098000050000000511021722160052180000668000032000080100160041160056160056160056160041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0007

retire (01)cycle (02)030a0b0e0f18191e1f2223243f4346494e51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8acafb5b6bbdcache load miss (bf)c5cdcfd0d2icache miss (d3)d5d6daddinst fetch restart (de)e0e7? int output thing (e9)eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
4000251600561199000000220100160044106799402548001880010320008800008001032000080000480049400064227160440160037016004016005679950328003848001020800003200002016000064000016005916005611800211091010800008000001080000008001400800166102000502000251704416003718000013108000032000080010160041160060160060160041160060
400024160040119900000021000016004410079959254800188001032000880000800103200008000048004940006122712692016004001600561600407995038003848001020800003200002016000064000016005616004011800211091010800008000001080000018800160158000001142000502000241703416005318000013108000032000080010160060160041160057160060160057
4000241600591199000000200100160025100799592548001880010320008800008001032000080000480049400064227160440160040016005916004079953298002248001020800003200002016000064000016005916005611800211091010800008000001080000008001301868001661000050200022170241600371800000108000032000080010160060160057160041160060160041
400024160059119900000020010016004410079959254800188001032000880000800103200008000048004940005822716044016003701600561600597995038026248001020800003203602016000064000016038916004031800211091010800008000001080000018800140158001660140005020000617024160037080000008000032000080010160060160041160060160060160060
40002416004011990000002100001600441007994025480018800103200008000080010320000800004800494000622271656001600400160056160059799533800414800102080000320000201600006400001600561600561180021109101080000800000108000001880016008001601162000502000222602416005608000010138000032000080010160060160041160041160060160041
4000241600401199000001220000160025100799592548001880010320008800008001032000080000480049400062227165600160040016005916004079934380022480010208000032000020160000640000160040160056118002110910108000080000010800000188000001680016010000502000141703416005618000010108000032000080010160060160041160060160060160041
4000241600591198000000000001600440667994025480018800103200088000080010320000800004800494000672271269201600210160059160056799343800224800102080000320000201600006400001600591600401180021109101080000800000108000001880014108000001142000502000141704416003718000010108000032000080010160060160060160060160041160041
400024160040119900000022000016004400679959254800108001032000880000800103200008000048004940006822716044016002101600591600407995038004148001020800003200002016000064000016005916005911800211091010800008000001080000018800160168000001130005020002217024160053180000008000032000080010160060160060160041160058160060
40002416005911990000000010016002506679959254800108001032000880000800103200008000048004940006322712692016004001600401600407993438004148001020800003200002016000064000016005916005611800211091010800008000001080000018800000080000612300050200024170441600561800000108000032000080010160057160057160060160060160060
4000241600401199000000575280001600251667994025480018800103200088000080010320000800004800494000682271604401600370160094160056799503800224800102080000320000201600006400001600561600401180021109101080000800000108000001880014116800166102000502000361704216005618000013108000032000080010160060160060160041160041160057