Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, post-index, 4 regs, 16B)

Test 1: uops

Code:

  ld1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 5.000

Integer unit issues: 1.000

Load/store unit issues: 4.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
64005294772361901180000012000046632917034232775000100040001000400050002071270016934289142930431050004000500029267291911161001100010000400001140060000400661000131979566698231468402025932833812134345285821000160081320214509400010002936129420293862935129251
640042923123616001600000170000461529082442334550001000400010004000500020702400169332883529409310500040005000292942916611610011000100014000004006000640066160481327193896896313810412035332303808184442286351000163931320614370400010002931829336294302924329278
64004292722361400120000012000047272913440233195000100040001000400050002071880016920288522941531050004000500029118292161161001100010000400001140000007400001600130269605695531146432032531873810174442286531000160861316114579400010002937929392294492931929319
64004293602371700100000030000047052923600234105000100040001000400050002071000016927288042940531050004000500029195291621161001100010000400001140060006400761700132159423692731374382037033243815104146286021000162631321814429400010002943729347295222922829312
6400429299236120114010001300004634291464023347500010004000100040005000207020001694928797294403105000400050002934529250116100110001000040000840000102400000000131539575697431927422038932893819104643286711000160301298614501400010002947129352294132946329394
64004293992361700160000080000477129158002339750001000400010004000500020709000169592883429294310500040005000293892927411610011000100004000084002000240006038013319929569173176438203123216382194146287261000159841321214358400010002947029479294062947429364
64004292772351800190000017000047092921100233475000100040001000400050002069300016985288612935031050004000500029360292641161001100010000400008400200034000610884132069534689031407432027732563822144741286971000161831317414422400010002950729254294282938529519
640042939523616001700000500004589291450023318500010004000100040005000207002001693728816292623105000400050002931229322116100110001000040000040030002400251200131579593690131406412049232403821103737287251000163871299414629400010002931029405295092928229315
640042943123518001700000801004625292480023422500010004000100040005000207140001694528848292733105000400050002927029207116100110001000040000040000002400261080131459371691531595412040533233823133941286671000161391322214150400010002948729378294032946529313
640042947123616001600100000004568291500423424500010004000100040005000207100001690228828293813105000400050002937429207116100110001000040000040000002400061200130309376689831503382049233173820144046286641000161651330414785400010002950129287293552938729400

Test 2: throughput

Count: 8

Code:

  ld1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  ld1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  ld1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  ld1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  ld1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  ld1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  ld1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  ld1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.3341

retire uop (01)cycle (02)030e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
3202051067308571000500100106722241517514001008010032013080100320178480494473447909106702010672210672726656326710400100200320000200400000106878106721118020110099110010080000800000100320000003032004200003201646134450051101161110687080000106032000080100106722106871106722106728106738
320204106721828001048000210671921515932540027880100320130801003201784804944734479051067080106876106727267453268024001002003200002004000001068791067281180201100991100100800008000001003200000380320034000423200416134380051101251110671880000106032000080100106728106722106858106728106873
3202041067218270010180000210686421515175040010080100320000801003200004804964663797001068340106727106877266753267044001002003201922004002401067271068731180201100991100100800008000001003200002380320042000423200426134450051101161110671880000106032000080100106728106730106728106731106728
32020410672782700004800121066932915172540010080100320000801003200004804934734479051066960106727106727266693267104001002003200002004000001067211067211180201100990100100800008000001003200000380320042000343200426134450051092161110672580000126032000080100106729106729107025106728106879
3202041067358570000312000210671221518175040010080100320000801003200004804944704308001070150106727106727266753268034001002003200002004000001068791067081180201100991100100800008000001003200002380320042000423200346138382051101162110686080048106032000080100106880106722106729106879106728
320204106880827000048000010671221501725400100801473200008014732000048078046440560110687501068741067272674932680840010020032000020040000010687710672111802011009901001008000080000110032013204103201740008323201726134450051231161110672480000116032000080100106728106877106728106728106728
32020410677182710004800021067122151517254001008010032000080100320000480494473447904106711010672710672726656326710400100200320000200400000106727106727118020110099010010080000800000100320000038032004200042320042613445005110116111067188000066032000080100106722106728106728106722106722
32020410672182700004000101067122151502540010080100320000801003200004804944721339001068360106875106727266753267104003252003200002004000001067271068701180201100991100100800008000001003201300380320042000423201726134450051231161110672280000107036032000080100106744106735106756106729106736
3202041067308581110690102106721344232540010180100320130801003200004804844734672011067100106735106736266831126718400100200324608200400000106731106735118020110099510010080000800000100320000045032004201084532004261414500524611711106866800001410032000080100106735106739106764106890106736
3202041068818670000618800210672034424254001008019432000080100320178480493473486302106849010673610673526660326812400100200320000200400240106743106882118020110099510010080000800000100320130045032003401037320042613445005109117111067238000066032000080100106732106727106730106725106876

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.3341

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)67696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
320025106733827001100690002106716344212540001080010320000800103200004800424734671010670610673110673526679326710400010203200002040000010672710673111800211095101080000800000103200000450320042000453200426104500050204163310673280000141032000080010106874106735106732106735106713
3200241067278270000004700021066963442125400010800103200008001032000048004447346710106706106731106731266823267144000102032000020400000106731106728118002110951010800008000001032000004503200420104532004261424500050204165510673280000141032000080010107040106861106750106709106713
32002410673282800000048000210671931402540001080010320000800103200004800444662421010670610673510673126679326710400010203200002040000010673110673111800211095101080000800000103200000450320042010453200426142000050205163310672480000141032000080010107035106745106745106709106732
3200241067358270000006000001067162442125400010800103200008001032000048004447346710106710106731106731266793267144000102032000020400000106727106727118002110911010800008000001032000023603200410104232004161424501050203163310672880000141032000080010106850106746106729106732106732
32002410673182700000048000210671631421254000108001032000080010320000480044473467201067061067311067312667932671440001020320000204000001067311067081180021109510108000080000010320000045032004201045320042610000050204163310672480000141432000080010107032106892106747106732106732
32002410676682700000060000210672034421254000108001032000080010320000480044468902901067111067351067432667932671540001020320000204000001067331067271180021109510108000080000010320000045032004200003200426104500050204165310672480000151032000080010107032106888106749106732106713
320024106708827000000480000106720344212540001080010320000800103200004800474734479010670610673110673126683326718400010203200002040000010673110673111800211091101080000800000103200000450320042000032004260424500050203164310672880000141432000080010107036106845106737106733106709
320024106731827000000480002106716344125400010800103200008001032000048004447348630106706106731106731266793267144000102032000020400000106735106731118002110951010800008000001032000004503200000000320000604245000502041644106735800000032000080010106867106752106732106732106732
32002410672782700100048000010701504417379400365801043201308005732035648060946694541106975107029107033268271926901400685203205762040024011006911019623180021109510108000080000010320780445210320780001590732095261424500050984345510765180047101432000080010107343107051107033107184107008
320024107013829100000000021067163042325400010800103200008001032000048004447346710106706106739106735266793267144000102032000020400000106735106730118002110951010800008000001032001904503200420104532003961424500050203164310673080000141032000080010106876106746106732106709106732