Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD4 (multiple, 16B, post-index)

Test 1: uops

Code:

  ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 12.000

Issues: 13.022

Integer unit issues: 1.000

Load/store unit issues: 4.000

SIMD/FP unit issues: 8.026

retire (01)cycle (02)03040507080a0b0e0f181e22243a3f464951inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
72005292722191131111109004714291280415285130321000802840001000800040005000203099815910024773292522934731013000400080005000200002924429201116100110001000040054040070044006616042128879106700030891511928430773812747482852810001621613042133244000800010002920629255292862922329350
720042927521901010100015004630290440415326130281000802240001000800040005000203409837012002473929199291613101300040008000500020000291642927311610011000100004007612401102940020170401279590736916309604819246299738141142422851810001646613081132754000800010002918529231293462930029299
7200429273220010101000120046002904400153061302610008008400010008000400050002030998505100024805291392932231013000400080005000200002924729320116100110001000140044840080164002501012421281192046849307603919184308738111540442852310001635213004133084000800010002924429253292712930629245
72004292012190101010001600461329060001532513022100080224000100080004000500020340985341002479829136292713101300040008000500020000292732926111610011000100014006684011004400561612411284591226851308913919244308538171148442847610001631612969134284000800010002926029335292832930429219
7200429264219010101000400457929009001530213022100080264000100080004000500020309985251100247712909329110310130004000800050002000029267292121161001100010000400668400712440036090421285391146883306004119283306838061241352848910001635213117132054000800010002923529294293312924329314
72004293252190101110001200453529069001527913022100080224000100080004000500020309983172002480529216293073101300040008000500020000292582927411610011000100004005504010026400061411421316793106848306504619274303438101042452848110031612713083131084000800010002931829359293322927929181
72004293552200101100004004668290530015311130221000802640001000800040005000203019809100024767292252934131013000400080005000200002916529202116100110001000040066124012026400201411411294594026848306204019253305338091542412846110001630613035133234000800010002921229319293422923929279
72004292422190101010001200459629131001529313022100080264000100080004000500020326984720002476429086292743101300040008000500020000292462917411610011000100004005411400800440066148421295191186858313604919235308738121342382849110001645513086133854000800010002933429315292462930629204
72004292802190101010009004576290670015363130281000802840001000800040005000203159841660024743292042932331013000400080005000200002922129226116100110001000040066040110194000611011411300592046838311904119317306338141342482862410001631012965135184000800010002933029285293632939129223
7200429323219010001000130045602912300152691302210008022400010008000400050002032098516300248072918829350310130004000800050002000029163291361161001100010001400668400912640060060421274190426858305504019342314738131743462850710001632613136133504000800010002926329345293392925429262

Test 2: throughput

Count: 8

Code:

  ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0007

retire (01)cycle (02)030508090a0b0e0f18191e1f22243a3f4346494e4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)6067696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2c5cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
960205160072119911000000059000216005426600251040160801006400603200008010064000032000047960116812432176075200160024016004516005900326104010020032081264231220040350516093041622831624441618020110099100100800008000011003200000350320032010583200376132350000510911611160040800001010032000064000080100160060160044160044160060160065
96020416006411990000000006700001600283121200251040164801006400643200008010064000032000047960116804081856001210160024016004316006600326104010020032000064000020040000016000001600431600591180201100991001008000080000010032000003503200000000320000613240000051091161116005680000010032000064000080100160060160067160065160065160065
9602041600431199000000000430100160054266602510401688010064006832000080100640000320000479601168124411520028101600300160069160049303541040100200320000640000200400000160000016007116006911802011009910010080000800000100320013144403200531015332017260534413100510911610160051800001410032000064000080100160060160060160049160044160065
96020416004311980000000003800021600490000025104016480100640064320000801006400003200004796011680408816000010160024016006416006400347104010020032000064000020040000016000001600431600431180201100991001008000080000010032000000032000000037320037600400000510911611160041800001410032000064000080100160044160067160065160044160125
9602041600591199000000000000001600490121200251040156801006400643200008010064000032000047960116804081856001210160024016006416006400326104010020032000064000020040000016000001600641600621180201100991001008000080000010032000003503200370003732003700000000510911611160056800001010032000064000080100160065160044160065160044160065
960204160064119900000000000002160044312000251040100801006400003200008010064000032000047960116800011920001610160045016006416004300331104010020032000064000020040000016000001600591600601180201100991001008000080000010032000003503200320003320037003200000510911611160056800001410032000064000080100160065160067160065160065160065
960204160064119800000000000102160028312121025104010080100640064320000801006400003200004796011680881816000010160024016004816004300326104010020032000064000020040000016000001600641600591180201100991001008000080000010032000000032003700003200006032400000510911611160061800001410032000064000080100160224160065160067160060160044
960204160043119900000000043000016004931200025104010080100640260320000801006400003200004796011680932816000010160024016006416004300347104010020032000064000020040000016000001600431600431180201100991001008000080000010032000000032000000037320000603235000051091161116005680000010032000064000080100160044160044160067160044160060
96020416004311990000011004301021600443121200251040100801006400563200008010064000032000047960116804151856001200160024016004316006600342104010020032000064000020040000016000001600641600431180201100991001008000080000010032000003503200370003232000061360000051091161116004380000010032000064000080100160060160044160065160065160067
960204160064119900000000000102160044312120025104016480100640056320000801006400003200004796011680001185600161016004501600591600590034210401002003200006400002004000001600000160064160043118020110099100100800008000001003200000003200000003532000061374000005109116111600638000000032000064000080100160062160065160065160065160065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0008

retire (01)cycle (02)0308090e0f1e1f22233a3f4346494e4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8acafb5b6bbdcache load miss (bf)c5branch mispredict (cb)cficache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
96002516006811990001800101160053128194025104006680010640024320000800106400003200004720411681228217606080116004901600511600682351104001020320000640000204000001600000160068160068118002110910108000080000010320000553200500503200506150430050190181630061716006580000010132000064000080010160069160052160069160051160052
9600241600511199000056010016005318819012510400348001064002432000080010640000320000472041168122521760608011600313160068160068035310400102032000064000020400000160000016006816006811800211091010800008000001032000055320050050320018611700050190171620081716006580000010032000064000080010160052160071160052160051160069
96002416006811990100590101160053170190025104007080010640056320000800106400003200004720411681225217606080116004901600681600680351104001020320000640000204000001600000160050160068118002110910108000080000010320000032001801732001760504400501901616200717160065800001010132000064000080010160069160069160416160226160069
9600241600681199000017010016005314004025104006680010640056320000800106400003200004720411681221217606080116004901600681600512334104001020320000640000204000001600000160068160068118002110910108000080000010320000032001805032018760504400501901716100516160065800001010132000064000080010160069160051160070160069160051
9600241600681199000056000116005413000025104006680010640056320000800106400003200004720411681228217606040116005101600681600682353104001020320000640000204000001600000160050160068118002110910108000080000010320000543200500173200500150440050190171620017816004880000010132000064000080010160069160069160052160069160052
960024160068119900001800011600531380102510400668001064005632000080010640000320000472041168009921760608011600490160145160051235110400102032000064000020400000160000016005016006811800211091010800008000001032000003200180533200506150000501901716200171516006780000100132000064000080010160069160069160052160051165158
960024160068119900001700001600531280012510400348001064006032000080010640000320000472041168122512800028011600490160068160050035110400102032000064000020400000160000016006816005311800211091010800008000001032000054320050218320050001700050190171620014616006580000010132000064000080010160052160069160052160051160069
9600241600681199000068010116005313800025104006680010640024320000800106400003200004720411681194217606080116004901600681600680613410400102032000064000020400000160000016006816063451800211091010800008000001032000055320018150320050611743005019017162001717160065800001010032000064000080010160052160051160070160069160051
9600241600681199001056010016005313001125104003480010640024320000800106400003200004720411681257134399960116004901600681600681343341040010203200006400002040000016000001600681600681180021109101080000800000103200000320050050320050601744005019017164101616160065800001010032000064000080010160052160051160052160069160069
960024160068119900001700001600361781901251040066800106400243200008001064000032000047204116812281280002801160049016006816006803341040010203200006400002040000016000001600681600681180021109101080000800001103200005432014111832005001504300501901716300616160065800001010032000064000080010160069160069160052160069160069