Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD3R (post-index, 1D)

Test 1: uops

Code:

  ld3r { v0.1d, v1.1d, v2.1d }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 6.009

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 3.009

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
65005288032244121130110070104639283662216548601210003012200010003000200050001000035684111822956285992877131060002000300030006000287092883211610011000100002003342002011220004202101318493726919303115419657318438162165622819310001569313094148092000300010002889128868287862880428777
65004287282230120121000033000467028451001647360031000300920001000300020005000100003564061022892287312885231060002000300030006000287932868611610011000100002003342002001220004242101321194896937314506219657324238202052572827910001562212723140632000300010002885428812289002886028796
6500428955222001012100006000461128425001658760091000300920001000300020025000100003570411022929286762892931060002000300030006000287102873211610011000100002003002002101220000242001311291356944311416319711329638252155702827110001566012649140302000300010002886428856288522885128678
6500428892223012001100006000476728538001658560091000300920001000300020005000100003570270022941286272851131060002000300030006000286262855011610011000100012003242002101520024242001328895186947320006219532329138232459632825510001563212746138162000300010002862228723287582869828710
6500428850224011110000006000477228504001638960091000300920001000300020005000100003570680022969286382885131060002000300030006000286762854911610011000100002004342002001220024242201325493306889317016119480318838212462652815010001547012742135522000300010002881528804289282884928863
6500428826224012011100005435200482928517001628360091000300920001000300020005000100003574040022944285452868831060002000300030006000288072851511610011000100002003202004001220004202101323696137061317905719734330438192365682827210001545212375138892000300010002863328715287812888028805
650042867322200111110000270004776283930016378600910003009200010003000200050001000035611120822932287842886731060002000300030006000285392855111610011000100002003042004011220004242101333093336977316707119664315338212666592825710001576812775137972000300010002866528811287872871228755
65004287942230121011000144401047032848400165246009100030092000100030002000500010000357464002292828685287663106000200030003000600028681287581161001100010000200244200310322000424222161310392756949320506919730318738252362682824610001531612718136242000300010002899729018290012894928968
65004286422220111030000060005069279530015899600310003009200010003000200050001000035721700229272834628329310600020003000300060002831028106116100110001000020033420030108200042421013724103007170339705819173339438292056632786610001408912153128162000300010002832828460281302832128404
6500428363213012002100002670105159280820016171600310003009200010003000200050001000035721400229672827428368310600020003000300060002824228186116100110001000020033020030022200042020013839100737234340006019189341738242460572788010001462411969129982000300010002840528265282902827728365

Test 2: throughput

Count: 8

Code:

  ld3r { v0.1d, v1.1d, v2.1d }, [x6], x8
  ld3r { v0.1d, v1.1d, v2.1d }, [x6], x8
  ld3r { v0.1d, v1.1d, v2.1d }, [x6], x8
  ld3r { v0.1d, v1.1d, v2.1d }, [x6], x8
  ld3r { v0.1d, v1.1d, v2.1d }, [x6], x8
  ld3r { v0.1d, v1.1d, v2.1d }, [x6], x8
  ld3r { v0.1d, v1.1d, v2.1d }, [x6], x8
  ld3r { v0.1d, v1.1d, v2.1d }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4002058007062010010000230102800272151502548016980100240044160000801002400001600004804999599963121073800238004280042003244801002001600002400002002401624800008004280042118020110099100100800008000001001600000016003000090160030610000051091171180039180000106160000240000801008004380043800438004380043
40020480042621000000003500018002701200254801558010024005416000080100240000160000480499963347312106580023800428004200324480100200160000240000200240000480000801278004211802011009910010080000800000100160000001600000025716003661474010005109117118003908000099160000240000801008004380043800438004380043
4002048004262010000000540002800272151502548016980100240054160000801002400001600004804999602233121073800238004280042003244801002001600002400002002400004800008004280042118020110099100100800008000001001600000016002200069616000061223300051091171180039080000010160000240000801008015680043800438004380043
4002048004262010000000760012800273150025480172801002402101600008010024000016000048049996033831210658002380154800420032448010020016000024000020024000048000080042800421180201100991001008000080000010016000002516003009502116003061293300051091171180039080000010160000240000801008004380043800438004380043
4002048004262100000000270001800272121202548015580100240042160000801002400001600004804999603283121073800238004280042003244801002001600002400002002400004800008004280042118020110099100100800008000001001600000016000000025160000612133000510911711800391800001010160000240000801008004380043800438004380043
40020480042621000000001200018002721212025480154801542405101602168026224048616032448146697129331445028034980341803397604221448074820016021624048620024048648097280420804154180201100991001008000080000010016010400160336100178816000000223300051091251180039180000106160000240000801008004380043800438004380043
4002048004262000000100358800180027212120254801558010024005416000080100240000160000480499960338312010680023800428004200332448010020016000024000020024000048000080042800421180201100991001008000080000010016000002516003000069716003061223300051091171180132180000106160000240000801008004380043801568004380043
400204800426200000000036000180027212120254801008010024005416000080100240000160108480499959996312007980023800428004223032448010020016000024000020024000048000080042800421180201100991001008000080000010016000002516002900030160029003033000510911711800390800001010160000240000801008004380043800438004380043
4002048004262000000000480001800270121202548015580100240054160000801002400001600004804999603383121065800238004280042071624480100200160000240000200240000480000800428004211802011009910010080000800000100160000025160134020361600206129330005109117118003908000006160000240000801008004380043800438004380043
400204800426200000010036000080027012002548010080100240055160000801002400001600004804999603283121073800238004280042003244801002001600002400002002400004800008004280042218020110099100100800008000001001600000251600290033316002961303300051091171180039180000126160000240000801008004380043800438004380043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
400025800556200000000540102800272121202548007380010240063160000800102400001600004800499607243123203008002308004280042032448001020160000240000202400004800008004280042118002110910108000080000010160000033160037104016000001300050190091798800390800001410160000240000800108004380043800438004380043
4000248004262000000004301028002721212025480010800102400001600008001024000016000048004996064931218570080023080042800420324480010201600002400002024000048000080042800421180021109101080000800000101600000331600370038160029613741050190082589800390800001414160000240000800108004380043800438004380043
4000248004262000000004388102800272121202548007380010240000160000800102400001600004800499606493123335008002308004280042032448001020160000240000202400004800008004280154118002110910108000080000010160000001600370040160037613041050190091789800390800001514160000240000800108004380043800438004380043
4000248004262100000004301028002721212025480073800102400631600008001024000016000048004996070331217630080023080042800420324480010201601242400002024000048000080042800421180021109101080000800000101600000331600370037160037613741050190091799800390800001410160000240000800108004380043800438004380043
4000248004262100000005500028319521212025480010800102400631600008001024000016010848004996065931218270080023080042800420324480010201600002400002024000048000080042800421180021109101080000800000101601020411600371037160037610005019005179980039080000010160000240000800108030080043800438004380043
400024800426220000000430002800272012025480073800102400541600008001024000016000048004996065031217630080023080042800420324480010201600002400002024000048000080042800421180021109101080000800000101600000411600000029160000612941050190010179980039080000150160000240000800108004380043800438004380043
400024800426200000000430102800272121202548006480010240063160000800102400001600004800499608523121776008002308004280042032448001020160000240000202400004800008004280042118002110910108000080000010160000041160000207411600376037410501900517911800390800541714160000240000800108004380043800438004380043
40002480042620000000000002800272121202548007380010240063160000800102400001600004800499606503121781008002308004280042032548001020160000240000202400004800008004280042118002110910108000080000010160000001600300030160030000005019009179980039080000014160000240000800108004380043800438004380043
40002480042620000000043001080027200025480073800102400631600008001024000016000048004995999931217760080023080042800420324480010201600002400002024000048000080042800421180021109101080000800000101600000331600370029160030613741050190091788800390800001410160000240000800108004380043800438004380043
4000248004262000000005500028002720120254800108001024006316000080010240000160000480049959996311999400800230800428004203244800102016000024000020240000480648825418278226180021109101080000800000101601040411600370037160037613941050190091798800390800001416160000240000800108004380043800438004380043