Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1R (post-index, 1D)

Test 1: uops

Code:

  ld1r { v0.1d }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 3.002

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.002

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e18191e1f3a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
620052930722926040000404625289420017198300310001002100010001000100050005000119461227052918529306310300010001000200010002919529269116100110001000100001100000001000202001304293816891317005920765320838232058642858710001626813473148941000100010002940929465293692957629461
6200429510227020400002046672892000174003003100010001000100010001000500050001193672270329092293213103000100010002000100029234292191161001100010001000021000054031000203001294792536905305715820806326138161564602855110001618313331145901000100010002927829313293482934529377
620042930722701030000204601287970017252300210001002100010001000100050005000119397226112916729261310300010001000200010002927829288116100110001000100002100001001000200001308993346881307316820792326638262259622852810001631513325143431000100010002937429306292842928829406
620042936722801020000304652289260017326300310001003100010001000100050005000119317226422915229412310300010001000200010002923129395116100110001000100003100000001000212001296792826887310014920744318038202162642850610001625113323145391000100010002936829388294152931829383
6200429181227040100002047042883610172943003100010031000100010001000500050001190610225552919129380310300010001000200010002922529211116100110001000100003100100011001000001321494196877309616320878325838241762642850510001622313032145481000100010002932329357294152947329338
6200429358228040401001604592287891117312300210001000100010001000100050005000119404226092916429265310300010001000200010002922829195116100110001000100000100100001001213001304194566936309316520716327838111864612845110001593013304146971000100010002952529344293102944829371
620042927322704030000004655288720117317300210001003100010001000100050005000119358225992921729319310300010001000200010002923429102116100110001000100003100102011000000001322494016954312206620651317738231362582858910001618113319145741000100010002926229451292842936229325
620042928722602020000304599288970117325300310001003100010001000100050005001119422226472916029256310300010001000200010002927729176116100110001000100001100100001001213001304692836888310805620737326138091867692850410001622013386145831000100010002928329327293592936629458
620042936822703020000404577288471017331300310001003100010001000100050005000119277226642903429269310300010001000200010002917729226116100110001000100003100000011001213001299891976925312216220715318638142165612867410001606013244145511000100010002938929221294082927129395
620042930222604020000404660288571117316300310001003100010001000100050005000119458226282912729366310300010001000200010002932029154116100210001000100001100000001001213001277494296858309806420638321838111859612846210001608813110145271000100010002925629391294112926129320

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ld1r { v0.1d }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0050

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22243a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
60205140051108501000001000014003913957525801005010020002100004010020000100001245718530431910710598140030014005414005413196831324417010030200100002000060200200002000014005414005111502011009910040100100001000001001000000100000031000011000032126936613971850000131013100001000050100140055140055140055140055140055
602041400511125000000010000140039139556258010250100200021000040100200001000012456735303927107105221400320140056141136133314313243570100302001000020000602002000020000140057140054115020110099100401001000010000110010002211000023091000211011132205165213983350000969100001000050100140051140051140051140052140051
602041400351085000000010000140035139571258010250100200021000040106200111000612457515303984107132081400110140067140050132036613250070123302191000720014602382001420014140050140047115020110099100401001000010000010010000011000000010001110111322063245613985850000969100001000050100140051140051140051140051140051
6020414005010850000072649901668800014003613957125801025010020000100004010020000100001245637530448710710210140011014003514003513196431324317010030200100002000060200200002000014005014004711502011009910040100100001000001001000001100000001000010000032126936613969950000969100001000050100140051140051140051140051140051
602041400501049000000028000014003813957125801005010020002100004010020000100001245607530452510710210140029014005014003513196431324347010030200100002000060478200002000014005214004911502011009910040100100001000001001000000100000031000011000032126936613979150000969100001000050100140051140051140051140051140036
60204140047104900000001000014003613957125801025010020002100004010020000100001245637530448710710210140026014005014005013196431324317010030200100002000060200200002009814008214005411502011009910040100100001000001001000001100001001000011000032123936313976550000969100001000050100140036140051140051140036140037
60204140035108500001000000014003913957125801025010020002100004010020000100001245637530448710710210140027014005014005013196531324317010030200100002000060200200002000014005014004711502011009910040100100001000011001000001100001061000011000032126936613974250000999100001000050100140051140048140048140051140036
602041400551086000000010000140035139571258010250100200021000040100200001000012456075304487107102101400110140050140096131965313243170100302001000020000602002000020000140052140049115020110099100401001000010000010010000011000000910000110000321269336139773500006179100001000050100140051140051140054140051140051
602041400561086000000013000014003513957125801025010020000100004010020000100001245637530448710710210140023014003514005013196431324317010030200100002000060200200002000014003514004711502011009910040100100001000001001000001100001001000010000032126937713972650000660100001000050100140051140036140036140051140051
60204140050108500000001000014003513957125801025010020002100004010020000100001245637530392710710210140026014005014005113196431324317010030200100002000060200200002000014005114004711502011009910040100100001000001001000001100000031000011000032126933613980550000600100001000050100140036140048140048140048140051

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0056

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f23243a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
600251400531049110000014000014002613966025800145001020002100004001020000100001245842530750410717454140032014005014005613200403132467700103002010000200006002020000200001400411400531150021109104001010000100001101000111100020011000011110314003883313972750000767100001000050010140062140060140054140042140057
6002414004110861100000140000140035139645258001250010200041000040010214181031712458425307618107171091400323140056140056132004013132458700103002010000200006002020000200001400501401411150021109104001010000100001101000241100000041000110110314003883313972850000999100001000050010140057140057140051140057140057
60024140053108610100002000014004113965825800145002020002100004001020000100001245842530773210717109140032014005614005613199803132464700103002010000200006002020000200001400561400561150021109104001010000100004101000011100010032451000010111314002882213972750000061100001000050010140358144113140329140054140054
60024140056108510100002000014004113965725800145001020004100004001020000100001245851530777010717454140026014013714005613199803132464700103014010000200006026220000200001400561400561150021109104001010000100000101000021100011061000001101314003883313971250000999100001000050010140057140054140054140042140052
600241400561085100000020000140074139660258001250010200081000040010200771000012458395307732107174541400320140056140056131990031324647001030020100002000060020200002000014005614015311500211091040010100001000001010002211000010110000111113140038822139727500006610100001000050010140135140057140130140060140060
60024140056108611010002000014013613962925800145001020002100004001020000100001245842530773210721850140032014005314005613200103132467705343050410202203226100220322204081405891405476150021109104001010000100000101000611100001044861000010110323202883313972750000999100001000050010140060140142140052140057140057
600241400531085111000014000014004613964525800145001020000100004001020000100001245821530773210718987140017014014514005613199803132464703023002010000200006002020000200001400561400551150021109104001010000100000101000021100021001000010111314003882313972450000666100001000050010140057140057140051140057140042
60024140056108610100002000014004213964525800145001020004100004001020000100001245842530773210717811140011014005014005613199803132464700103002010000200006002020000200001400561400561150021109104001010000100000101000021100011001000011110316402873313972450000969100001000050010140057140057140057140057140057
600241400411086100000014000014014613966025800145001020004100004001020000100001245842530773210717454140032014005614005613198503132443700103002010000200006002020000200001400501400531150021109104001010000100000101000221100000011000011101314003882213972750000966100001000050010140059140051140054140141140051
600241400561085100000020000140035139660258001250010200041000140010200001000012458425307732107171091400320140056140056131990031324587001030020100002000060020200002000014031314005321500211091040010100001000001010004101000100630610003101133163031143213987950040999100001000050010140244140329140242140320140236

Test 3: throughput

Count: 8

Code:

  ld1r { v0.1d }, [x6], x8
  ld1r { v0.1d }, [x6], x8
  ld1r { v0.1d }, [x6], x8
  ld1r { v0.1d }, [x6], x8
  ld1r { v0.1d }, [x6], x8
  ld1r { v0.1d }, [x6], x8
  ld1r { v0.1d }, [x6], x8
  ld1r { v0.1d }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22243f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)d9ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
160205800416200100000120008002616662524011980100800188000080100801408000043590023758824491866610800228004180041599813599992401002008000080000200160000800008004180041118020110099100100800008000001008000021480010009800136113170005110116011800380800001068000080000801008004280042800428004280042
1602048004162000000001800080026106425240118801008001980000801008000080000435900237588244918666008002280041800415992435999924010020080000801402001600008000080041800411180201100991001008000080000010080000008001200124380000611317000511011601180038080000998000080000801008004280174800428004280042
160204800416200000000190008002616652524011980100800188000080100800008013443590023758824491867700800228004180041599243599992401002008000080000200160000800008004180041118020110099100100800008000011008009001780013101380013611317000511011601180038080000988000080000801008004280042800428004280042
16020480041620000001000008002616652524011980100800198000080100800008000043590063758824491867700800228004180041599243599992405252008000080000200160000800008004180041118020110099100100800008000001008000001780013002880012611317000511011601180038080000668000080000801008004280042800428004280042
160204800416210000000000080026166425240118801008001980000801008000080000435900237588244918666008002280041800415992435999924010020080000801442001600008000080041800411180201100991001008000080000010080000017800130008001261120010511011602180038180000968000080000801008004280042800428004280042
160204800416200001000200008002616648252401188010080019800008010080000800004359010376288149186220080022800418004159924359999240100200800008014420016000080151800418004111802011009910010080000800000100800000148001300116380012611017000511011601180038080000908000080000801008004280042800428004280042
1602048004162000000001900080026166425240389801008001980000801008000080000435901437588244918666008002280041800415992435999924010020080000800002001600008000080041801761180201100991001008000080000010080000014800121012078001261017000511011601180038080000968000080000801008004280042800428017780042
160204800416210000000210008002616692524011880100800198000080100800008000043590023758824491866300800228004180041599243600882401002008000080000200160000800008004180041118020110099100100800008000001008000001480015001380009611217000511011601180038080000998000080000801008004280042800428004280042
16020480041620000000019000800261664252401198010080020800008010080000800004359002375882449186660080022800418004159924359999240100200800008000020016000080000800418004111802011009910010080000800000100800000178001300138001361017000511011601180038080000998000080000801008004280042800428004280042
160204800416210000000280018002616656024011980100800008000080100800008000043590023758824491867700801288004180041599243599992401002008000080000200160280800008004180041118020110099100100800008000001008000001780013101280015611017000511011601180038180000908000080000801008004280173800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22243f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16002580041621000011001900080026000525240029800108001980093800108000080000435843337588244918542108002208017280041599463600212400102080000800002016000080000800418004111800211091010800008000001080008823800260012580000612623705020502162280038180000998000080000800108004280042800428004280042
1600248004162111011100500008002616692524001780010800328000080010800008000043583973758824491842700800220800418004159946360021240010208000080000201600008000080041800411180021109101080000800000108000772380027000268001860623605020002162280038080000998000080000800108004280042800428004280042
1600248004162111121100310108002616612524004180010801208000080010800008000043583973758823491842200800220800418004159946360021240010208000080000201600008000080041801731180021109101080000800000108000672380027000108000061260715020502162280038080000998000080000800108004280042800428004280042
160024800416201000110044000800260661025240017800108003280000800108000080000435838137588234918770158002208004180041599463600212400102080000800002016000080000800418004121800211091010800008000001080006723800260102580019612623705020502163280038180000998000080000800108004280042800428004280042
160024800416201110100032000800260061425240017800108003280000800108000080000435840137588244918884008002208004180041599463600212400102080000800002016000080000800418004111800211091010800008000001080007723800070012880019612523705020002162280038180000998000080000800108004280042800428004280042
160024800416201010110031001800260662612400428001080031800008001080000800004358429375882449187620080022080041800415994636002124001020800008000020160288800008004180041118002110910108000080000010800086238002501029800186125236050200031622801450800001098000080000800108004280042801768004280042
160024800416201000110032000801601661125240041800108003180000801548000080000435838937588234918758158002208017280041599463600212400102080000800002016000080000800418004111800211091010800008000001080008708000802028800196170705020002163380038180000998000080000800108004280042800428004280042
160024800416201101110043000801601661225240041800108003180000800108000080000435839737588234946577008002208004180041599463600212404352080000800002016000080000800418004111800211091010800008000001080006823800280002680022612423605020502163280038180000908000080000800108004280042800428004280042
1600248004162010011100163001800260668252400418001080030800008001080000800004358429375882349187630080022080041800415994636002124001020800008014420160000800008004180041118002110910108000080000010800087268002600011768001961707150205121622800381800901198000080000800108004280042800428004280042
16002480041620100011003100080026006825240041801038003280000800108000080000435843337588244918758158002208004180041599463600212400102080000800002016000080000800418004111800211091010800008000001080006823800070103780019602623715020012163380146180000998000080000800108004280042801788004280042