Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD3 (multiple, 2S)

Test 1: uops

Code:

  ld3 { v0.2s, v1.2s, v2.2s }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 5.009

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 3.009

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)0e0f1e22243a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696b6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)dfe0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
650052932922011400108104543287960021699050093012200030002000100003573362292102904129247310500020003000200060002914229129116100110001000020006200200420044061332690696874309438420213319538091662571028336163961327215024200030002938529327294052939829220
65004292672203100006004568288490221698750123009200030002000100033574632289202910529375310500020003000200060002916329066116100110001000020006200000220004241287391286853307226220188305838081559611028408163151332914986200030002927729376293022936029379
65004292362204000006004556288740221703350093006200030002000100003579632295402905029358310500020003000200060002921729111116100110001000020006200000220024261296892406820308036320269313138151352531128419165971333715025200030002928229291294182925329407
6500429286220300000310459728815020171065006301220003000200010000357708228970291632939431050002000300020006000292622919711610011000100002000620000022004404128009041690231310642023730313823236766928405163951344415220200030002928529318293102937429305
65004292402198100008004576288370021694450063009200030002000100003575092290602907529316310500020003000200060002913529100116100110001000020006200200020024261282891046913306306320158311538171968661228412163251325514961200030002930429303293652936329458
65004293042197000006104466287950221707750093012200030002000100003577332292702910629257310500020003000200060002919129172116100110001000020006200400220024241285891126928306707620161312138152063561028456162781335214941200030002929829238292622924529334
65004293472195000008104577288280001692050093006200030002000100033574932290002908929363310500020003000200060002918229185116100110001000020004200400220004261300790476955309116720243310038111865671028464159961321214948200030002917429279293412935729340
65004292802196000008004599288130011700450093012200030002000100003578352287202913429324310500020003000200060002911129167116100110001000020006200000020044261277190456895312316220176311538141970701228365162391325414926200030002923929192292752922629345
65004293092205000004004606287970101696150123009200030002000100003576652290102912229168310500020003000200060002913529158116100110001000020006200200220024061285691626858308636520277308538121260621128479164111337115023200030002936329337294402916629229
65004293632205000008104544287880201706350063009200030002000100003577922290602907429253310500020003000200060002912829226116100110001000020004200000420024261307992876937316846120203308038171771661028454164941325215240200030002933629359293152930129296

Test 2: throughput

Count: 8

Code:

  ld3 { v0.2s, v1.2s, v2.2s }, [x6]
  ld3 { v0.2s, v1.2s, v2.2s }, [x6]
  ld3 { v0.2s, v1.2s, v2.2s }, [x6]
  ld3 { v0.2s, v1.2s, v2.2s }, [x6]
  ld3 { v0.2s, v1.2s, v2.2s }, [x6]
  ld3 { v0.2s, v1.2s, v2.2s }, [x6]
  ld3 { v0.2s, v1.2s, v2.2s }, [x6]
  ld3 { v0.2s, v1.2s, v2.2s }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f181e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
400205800696000101103801008002621200254001571002400001600001002400001600005008003772881982180022800418004103234001002001600002400002001600004800008004180041118020110099100100800008000001001600003516000000016000061323505109117118003810101600002400001008004280042800428004280042
4002048004160000000038010080026212120254001581002400581600001002400001600005008003772881975080022800418004103234001002001600002400002001600004800008004180041118020110099100100800008000001001600000160032003516003261035051091171180038110101600002400001008004280042800428004280042
4002048004160000000038010080026012120254001571002400571600001002400001600005008003742882019180022801288004103234001002001600002400002001600004800008004180041118020110099100100800008000001001600003516003200016003200035051091171180038110101600002400001008004280042800428004280042
40020480041599000000380100800262121202540015710024011116015210024000016000050080037428819681800228004180041032340010020016000024000020016000048000080041800411180201100991001008000080000010016000035160032003216000061323505109117118003810101600002400001008004280042800428004280042
4002048004160000000038010080026212120254001581002400581600001002400001600005008003772882023080022800418010503234001002001600002400002001600004800008004180041118020110099100100800008000001001600003516003200016000061324005109117118003811001600002400001008004280042800428004280042
4002048004160000000038000080026212120254001571002400571600001002400001600005008003772880000080022800418004103234001002001600002400002001600004800008004180041118020110099100100800008000001001600003516003200321600006100051091171180038010101600002400001008004280042800428004280042
4002048004159900000031117610080026212120254001571002400581600001002400001600005008008532880000180022800418004103234001002001600002400002001600004800008004180041118020110099100100800008000001001600003516000000016003200324005109117118003811401600002400001008004280042800428004280042
4002048004159900000000000800262121202540010010024000016000010024000016000050080000028800001800228004180041032340010020016000024000020016000048000080041800411180201100991001008000080000010016000035160036000160032613240051091171180038110141600002400001008004280042800428004280042
4002048004160000000000000800260121204640010010024005716000010024000016000050080000028800000800228004180041032340010020016000024000020016000048000080041800411180201100991001008000080000010016000035160032014116003261035051091171180038114101600002400001008004280042800428004280042
400204800415990100004200108002601200254001571002400631600001002400001600005008003742881968180022800418004103234001002001600002400002001600004800008004180041118020110099100100800008000001001600003516003200016003261035051091171180038110101600002400001008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e1f22233a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dbddfetch restart (de)e0e7eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
400025800556000000005401028002621212254000681024006316000010240000160000508000002881982018002280041800410032340001020160000240000201600004800008004180041118002110910108000080000110160000351600320351600366132000502231112217002318800381010160000240000108004280042800428004280042
4000248004159910100054010080026212122540001010240063160000102400001600005080096028897570080022800418004100323400010201600002400002016000048000080041800411180021109101080000800000101600003516003603616003660035005020000123170018238003801410160000240000108004280042800428004280042
4000248004160010100000100800262121225400010102400631600001024000016000050800886288341600800228004180041003234000102016000024000020160000480000800418004111800211091010800008000001016000001600000016003661323500502030012317002212800381010160000240000108004280042800428004280042
400024800415991010004201028002621212254000731024006316000010240000160000508003772881982008002280041800410032340001020160000240000201600004800008004180041118002110910108000080000110160000016003600160036003200050223001131700231280038000160000240000108004280042800428004280042
400024800416001010003801028002620124740044110240000160000102400001600005080007528819770080022800418004100323400010201600002400002016000048000080041800411180021109101080000800000101600003516003603616003261360005020000123170019238003801414160000240000108004280042800428004280042
400024800415991010004201028002621202540007310240000160000102400001600005080037728833270080022800418004100323400010201600002400002016000048000080041800411180021109101080000800000101600003516003603616000060324000502000012317002322800380010160000240000108004280042800428004280042
4000248004160000000042010080026012122540006710240063160000102400001600005080000028820210080022800418004100323400010201600002400002016000048000080041800411180021109101080000800000101600003516000003616003261000050203001231700182280038000160000240000108004280042800428004280042
400024800415991010004201008002601202540001010240063160000102400001600005080000028833270080022800418004100323400010201600002400002016000048000080041800411180021109101080000800000101600003516000003616003261035005020300115170022138003801410160000240000108004280042800428004280042
40002480041600101000420100800260121225400010102400581600001024000016000050800000288197500800228004180041003234000102016000024000020160000480276800418004111800211091010800008000001016000035160000039160049610000502030012317002223800380140160000240000108004280042800428004280042
400024800416001010013801028002621202540007310240063160000102400001600005080375128819750080022800418004100323400010201600002400002016000048000080041800411180021109101080000800000101600000160000236160032613635005020000123170013238003801014160000240000108004280042800428004280042