Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD4 (multiple, 2D, post-index)

Test 1: uops

Code:

  ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 12.000

Issues: 13.022

Integer unit issues: 1.000

Load/store unit issues: 4.000

SIMD/FP unit issues: 8.022

retire (01)cycle (02)030405080a0b0e0f1e22243a3f43464951inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
72005292252188001000100045402904620215292130141000801440001000800040005000203279805592484429106292223881300040008000500020000292662920111610011000100014000084005000040076108001269093196861306308219289313438172053532848810001653112963133454000800010002926129280292202925729254
7200429368219200001146400455029147044153341301410008000400010008000400050002031398293924724291562919231013000400080005000200002921529203116100110001000040000114000000040065150001286992536813306114719263304038121154542847510001656113151133604000800010002932029373293512937729360
7200429285219200000010104743290481021527713018100080144000100080004000500020336983271024761292132928831013000400080005000200002913329132116100110001000040000114000000540005070001286690786837308105619306310138171551542847810001628612961132934000800010002922029255293252932829231
72004293002195110100630047902907500015253130281000802240001000800040005000203269833032477429157292073101300040008000500020000291992916111610011000100004005584006001640006068411292192656885308605519265305238211555622853010001642912930132594000800010002927929230294182920829255
72004292762193110100281045122906900015229130221000802240001000800040005000203319837732478229237293323101300040008000500020000291572920811610011000100004006684008011940020160421296091286851308804619193305938131554522853410001595212948134304000800010002925429306292012924429343
7200429429220611010016104588290630001529113026100080084000100080004000500020336980745248082925629248330130004000800050002000029139292231161001100010000400541240080024400001812401290095766881306725719246304638241355532856810001632812906133184000800010002927929257292762923329339
720042924422051001003551050022911100015374130301000802240001000800040005000203159833532476429200291953101300040008000500020000291962921411610011000100004006604008002640040188421287991666878306205219285307838181453522873510001592312851133234000800010002935229320292362925329222
72004292932198100100174104595290670001527813022100080284000100080004000500020326983880247642915729187310130004000800050002000029261292341161001100010000400658400700110400001615421332991607000302225819248322338181352542847910001628112922132424000800010002938429326292922933029197
7200429417220411010038810461829058000153101302210008030400010008000400050002033398405024830292182924331013000400080005000200002920329212116100110001000040056040070021040026198411293590926877306856019244321738181658562844710001645612813133414000800010002941129250292322942129288
72004293332196110100360104624291540001523313008100080084000100080004000500020326983480247952914529283310130004000800050002000029218292241161001100010000400640400700210400251415421273090746909314115419355306738141552502850910001644712916134084000800010002925929317291922929729453

Test 2: throughput

Count: 8

Code:

  ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6], x8
  ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6], x8
  ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6], x8
  ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6], x8
  ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6], x8
  ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6], x8
  ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6], x8
  ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0009

retire (01)cycle (02)0305080b0e0f18191e1f22243a3f4346494e4f5051inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)6067696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
960205160069119910110005180101160034200000251040168801006400683200008010064000032000047960116812362176075200160050160069160071303311040100200320000640000200400000160000016007116006911802011009910010080000800000100320014140032005310213320040611343130000510901161116004580000130232000064000080100160050160070160050160070160050
96020416004911991100000119010316005426650025104011680100640060320000801006400003200004796011680046115200280016005016006916006930332104010020032000064000020040000016000001600491600691180201100991001008000080000010032001315440320053000533200406153441300105109011611160066800001313132000064000080100160050160070160050160070160070
9602041600691199101000011700021600542065004610401568010064001632000080100640000320000479601168143621760752001600501600691600713035210401002003200006400002004000001600000160050160069118020110099100100800008000001003200131444032005310053320040615344130000510901161116006680000130232000064000080100160070160070160050160070160070
9602041600491199100000011301031600342605002510401168010064006032000080100640000320000479601168123811520024001600301600691600493035210401002003200006400002004000001600000160069160069118020110099100100800008000001003200151444032005315153320040605344132000510901161116004680000013132000064000080100160070160070160070160050160070
9602041600691199100000058000216005406050025104016080100640068320000801006400003200004796011682007217607520016005016006916006900352104010020032000064000020040000016000001600691600491180201100991001008000080000010032001315440320053000533200006113441300105109011611160066800001313232000064000080100160072160050160050160070160070
96020416006911991010000122000016003426650025104016480100640056320000801006400003200004796011680098217607520016005016006916004900352104010020032000064000020040000016000001600691600691180201100991001008000080000010032001413440320053000133200006153013000051090116101600468000000032000064000080100160050160070160070160050160070
96020416006911991000000590003160054066535025104016080100640056320000801006400003200004796011680098217607520016005016004916006930352104010020032000064000020040000016000001600691600691180201100991001008000080000010032001314440320054100533200006053441300005109011611160066800001313032000064000080100160070160070160070160070160070
9602041600691199111000011901001600342660002510401608010064006832000080100640296320000479601168004621760752001600301600491600693035210401002003200006400002004000001600000160049160069118020110099100100800008000001003200141444032005210154320000601344130000510901161116006680000130132000064000080100160050160050160050160070160070
96020416006911991010000590001160054000000251040116801006400683200008010064000032000047960116812622176075200160030160049160049003521040100200320000640000200400000160000016006916006911802011009910010080000800000100320015144403200540005332004060130130000510901171116006680000013232000064000080100160050160050160070160070160070
960204160069119911010005900021600542665002510401168010064005632000080100640000320000479601168113121760752001600501600711600693033110401002003200006400002004000001600000160071160069118020110099100100800008000001003200151344032005400053320000615344131000510901161116006680000013232000064000080100160050160070160050160070160070

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0007

retire (01)cycle (02)030708090e0f191e1f2223243a3f43464951inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)6061696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8acafb5b6bbdcache load miss (bf)c5branch mispredict (cb)cdcfd0d2d5d6daddinst fetch restart (de)e0? int output thing (e9)eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
9600251600571199001000380000116004701212251040010800106400443200008001064000032000047204116802461663998000160040160059160043003391040010203200006400002040000016000001600431600561180021109101080000800000103200002732003211024320000612435000501900816057160040800000032000064000080010160060160057160057160060160057
9600241600591199000010380000116004430122510400108001064004432000080010640000320000472041168000181600000016004016005916004300342104001020320000640000204000001600000160056160056118002110910108000080000010320000273200320323200000024270025019005160751600568000001032000064000080010160044160060160044160060160057
96002416005611990000003701001160041012025104006680010640304320130800106400003200004720411680001166399800016004016005616004300339104001020320000640000204000001600000160056160056118002110910108000080000010320000273200321272732002400243500050195061607416005380000101032000064000080010160060160044160046160057160060
9600241600561199000010122000001600440002510400668001064004432000080010640000320000472041168024216639980051600401600591600590032610400102032000064000020400000160000016005916005611800211091010800008000001032000027320024032320032610000050190081606416004080000101032000064000080010160060160044160044160057160057
960024160061119900000490000116004430122510400108001064004432000080010640000320000472041168000116639980101600401600561600430034210400102032000064000020400000160000016005616004621800211091010800008000001032000003200320333200326132350005019006160461600568000010032000064000080010160060160044160044160044160057
96002416005611990000003801000160044312122510400108001064004432000080010640000320000472041168040616639980001600241600601600590034210400102032000064000020400000160000016005916004311800211091010800008000001032012827320024116323200000024350005019005160661600408000010032000064000080010160060160060160044160060160057
960024160056119800000038000011600280121245104006680010640000320000800106400003200004720411680408816000000160037160056160043003421040010203200006400002040000016000001600591600431180021109101080000800000103200002732000003232000050035000501904616046160042800000632000064000080010160044160057160044160060160059
960024160059119800100038000001600443012251040054800106403043201308001064000032000047204116803088478696001600371600431600590032610400102032000064000020400000160000016005916004311800211091010800008000001032000003200241460320024602400005019006160661600538000010032000064000080010160060160060160057160060160057
96002416005611980010004800001160044312122510400548001064004432000080010640000320000472041168000116639980001600241600591600590034210400102032000064000020400000160000016005916005611800211091010800008000001032000027320000003200246124270005019004160571600408000010032000064000080010160044160044160044160060160057
960024160059119900100038010011600413120251040054800106400003200008001064000032000047204116800018160000001600411600591600430033910400102032067664052820400190160000016005616005611800211091010800008000011032000027320000010532008000242700050190071607616005380000101032000064000080010160060160060160044160044160057