Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1 (single, B)

Test 1: uops

Code:

  ld1 { v0.b }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 2.001

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.001

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e1f22243a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6200528445212023030011410105009281250011641120001002100010001000500011901622668281402820231020001000100010002000283912823811610011000100001000021000000100001200140109892715134191067198323450381618566327897144481313413409100010002835228426283442822328201
620042833521401902101030105147281510101626120011001100010001000500011926622655282632823831020001000100010002000280472825311610011000100001000001001000100121300138139764729333631143197563366381611585928049140661252513351100010002846828239282912832028439
620042826621302702900000005008280960001617120011000100010001000500011926522694281812825131020001000100010002000282622821311610011000100001000021000000100001000138169825728334571152196093454381914556928038143261245013176100010002826028223283432817928119
6200428211215021023011435010513027959000162842000100110001000100050001192752270028057282193102000100010001000200028173282401161001100010000100002100010010002120013581995670803365855196243321382225575627849149131268113281100010002828428394284552830928253
6200428289212021019000001050792811901016286200010011000100010005000119044227162827328192310200010001000100020002813428213116100110001000010000010000001000202001368210176724134451056197103434382120565927899144791243013976100010002851128320284742827728243
62004282822130250280001780004919281060101628120011001100010001000500011929222657284752854431020001000100010002000284202826611610011000100001000021000000100000300135551023072123270953196153382382013575528000148251234113049100010002819928143285232834028326
62004283902120240250002701049242816600016252200110011000100010005000119320226522841828296310200010001000100020002813128080116100210001000010000010000001001200001387310409723833871259196223350381912555527888146571236213108100010002849928546284532837028485
6200428233211025016000310004976280510001650620011001100010001000500011922322729282012857231020001000100010002000282452827611610011000100001000021000000100020000135669842727434191253194963392382314515127929148871242013346100010002842928314281512815728187
620042837421202502400030005080283100001642520001000100010001000500011926322696281782825431020001000100010002000283692820611610011000100001000001001000100021200135561012971623423961197963378381716555227870145281241813294100010002859228152281452832528237
620042814821102302301130004885280290101634420011001100010001000500011929122722284412822731020001000100010002000281402813611610011000100001000021000000100120200140979797721433361253196083318381617555728029142411253313322100010002830428234282062849428235

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ld1 { v0.b }[1], [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0053

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
602051400531049100000042501011400431394561293672570102401002000410000301002000010000126397666939741431222910140032014004114005613181031323916010030200100002000060200100003000014042914004511502011009910040100100001000001001000121100020011100001111000321011391113956640000966100001000040100140057140054140054140054140042
6020414005310481100000700001400261394561293642570102401002000410000301002000010000126381466938301431222910140017014005614005313179931323916010030200100002000060200100003000014004114004111502011009910040100100001000001001000211100020001100001101100321011391113955040000600100001000040100140042140057140057140057140042
60204140056104910100006500001400261394881293642570104401002000410000301002000010000126397666932441431222910140032014005614004313179931323916010030200100002000060200100003000014004114005311502011009910040100100001000001001000111100020001100000101010321011391113955040000999100001000040100140057140057140057140054140042
6020414005610491110004201011400261394291293642570104401002000210000301002000010000126397666932441430966710140029014004114005613187631324016010030200100002000060200100003000014005614004111502011009910040100100001000001001000111100030011100001111000321011141113956740000069100001000040100140057140042140057140042140042
60204140041104910010005601001400381394291293642570104401002000410000301002000010000126400366932441430966710140017014005614005613179931323916010030200100002000060200100633000014005614005311502011009910040100100001000001001000240100020011100001111000321011141113955040033090100001000040100140057140057140057140057140057
60204140056104910100003400011400381394561293642570104401002000410000301002000010000126381466946691430966710140032014005614005613179931323916010030200100002042360200100003000014005314005311502011009910040100100001000001001000221100020004100031101000321011391113956740000996100001000040100140054140057140042140042140054
6020414004110491010000893520001400381394131293532570104401002000210000301002041810000126381466932441431222910140029014044114005913187231324016010030200102152000060200100003000014005614004111502011009910040100100001000001001000111100020011100001111100321011391113956240000699100001000040100140042140042140057140045140055
6020414005610491110000101011400261394301293672570104401002000410000301002041810000126396366932441431222910140017014005614005313184031323916010030200100002000060200100003000014005314004111502011009910040100100001000001001000210100010101100000101000321011141113956740000990100001000040100140042140057140042140046140054
60204140041104910111002010014004113942912935325701044010020004100003010020000100001263963669324414312487001400320140042140059131907313240160100302001000020000602001000030000140056140053115020110099100401001000010000110010001311000110116505100001111100321011141113955040000966100001000040100140054140054140057140042140042
6020414005610491010000200001400261394291293672570104401002000210000301002021210000126397666932441431248710140017014007614005513179931323916010030200100002000060200100003000014004214004111502011009910040100100001000001001000231100020011100000101200321011391113956740000099100001000040100140042140057140054140057140057

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0055

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
6002514004710490010110010000014002013939712935925700124001020002100003001020000100001264534669324414325254311400171400411400411318240313243960010300201000020000600201000030000140041140041115002110910400101000010000010100022010002011100001111003140081133313957540000069100001000040010140196140056140060140057140042
6002414005310491100010020000014004113938812936725700144001020006100003001020000100001264507669383014326437411400321400561400531318240313242460010300201000020000600201000030000140056140041115002110910400101000010000010100011110002021100000111103178031133713956040000969100001000040010140057140057140057140054140057
6002414005610491111000020000114003813940012936425700144001020002100003001020000100001264534669397414325254311400321400531400531318210313243960010300201000020000600201000030000140056140041115002110910400101000010000010100021110001021100001101003140031133713957240000960100001000040010140057140042140054140062140057
6002414005310491011000010000014002613940312936725700144001020004100003001020000100001264507669397414326437311400321400561400561318240313244460010300201000020000600201000030000140125140035115002110910400101000010000010100000010000000100001010013140041134713957540000990100001000040010140057140042140057140057140057
6002414004110491101000020100014002613940312936425700144001020004100003001020000100001264534669397414326716311400171400411400411318240313243960010300201000020000600201000030000140056140053115002110910400101000010000110100011110001001100001111103140041134713956040000999100001000040010140051140051140036140051140036
6002414004710690000000060000014003513939712936225700124001020002100003001020000100001264507669397414326716411400171400411400411318240313243960010300201000020000600201000030000140056140053115002110910400101000010000010100011010001001100001111003140031133313957540000900100001000040010140042140042140054140042140062
600241400561049110101001010001400201393971293622570010400102000010000300102000010054126478466939741432643741140029140041140056131809031324246001030020100002000060020100003000014005614004111500211091040010100001000001010000011000000258100000010003140031133313956640000969100001000040010140036140051140051140051140051
6002414003510480000000010000014003513939412936225700124001020004100003001020000100001264370669397414325254411400171400561400561318240313242460010300201000020000600201000030000140053140053115002110910400101000010000010100013110003000100000010203140031133313957540000900100001000040010140042140042140057140057140057
6002414004110491001110020000014004113940312935325700144001020004100003001020000100001264534669324414325254311400321400561400531318240313243960010300201000020000600201000030000140056140053115002110910400101000010000010100011110002001100001111003140031134313957540000090100001000040010140042140057140042140057140054
6002414004110491001100020000014003513939412935925700124001020004100003001020000100001264534669397414326437411400321400561400531318097313243660010300201000020000600201000030000140056140053115002110910400101000010000110100023110002012100001111003140071133313957540000699100001000040010140057140057140057140057140042

Test 3: throughput

Count: 8

Code:

  ld1 { v0.b }[1], [x6]
  ld1 { v0.b }[1], [x6]
  ld1 { v0.b }[1], [x6]
  ld1 { v0.b }[1], [x6]
  ld1 { v0.b }[1], [x6]
  ld1 { v0.b }[1], [x6]
  ld1 { v0.b }[1], [x6]
  ld1 { v0.b }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0006

retire uop (01)cycle (02)03mmu table walk data (08)0e0f18191e1f2223243a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16020516006111990000000000016002521801596900251601001008000280000100800008000050042181022939240116003301600521600401396873140019160100200800008000020080000160000160052160052118020110099100100800008000001008000003908000000358003560353900051101161116004906628000080000100160041160053160041160053160053
16020416005211990000041010001600252001596860251601021008000080000100800008000050042128622939203116003301600401600401396753140019160100200800008000020080000160000160052160052118020110099100100800008000011008000003908016500080035000000051102431116004900008000080000100160041160041160053160041160053
160204160052119900000001001160037218181596861225160102100800028000010080000800005004210172293924011600330160040160052139675314001916010020080000800002008000016033616005216004011802011009910010080000800000100800000390800350035800006103900051101161116004900608000080000100160053160041160041160053160041
1602041600401199000004100100160041201815969011251601001008000080000100800008000050042181022939240116002101600521600521396873140019160100200800008000020080000160000160040160040118020110099100100800008000001008000003908000000080000003539003511601611160053010048000080000100160041160057160057160057160057
160204160056119800000450001116002501212159690162516010010080002800001008000080000500421998229395481160037016005616004013969131400231601002008000080000200800001600001600651600461180201100991001008000080000010080000043080039003980039010430005110116111600370101008000080000100160057160041160057160057160089
1602041600601199000000000101600412121215968402516010010080002800001008000080000500422041229395751160037016005616005613967531400141601002008000080000200800001600001600561600561180201100991001008000080000010080000043080039003980039013943000511011611160053010048000080000100160041160057160041160057160057
16020416005611990000045000111600413120159688025160102100800028000010080000800005004218102293924001600330160052160040139687314001916010020280000800002008000016000016004016005211802011009910010080000800000100800000008003500358003501350000511011611160049010608000080000100160041160053160053160041160041
160204160040119900000000001160037218015969002516010210080002800001008000080000500421810229381471160021016004016004013968731400101601002008000080000200800001600001600521600521180202100991001008000080000010080000000800350035800356003900051101161116004906628000080000100160053160041160041160053160041
1602041600521199000000000111600370181815969012251601021008000280000100800008000050042181022939240116003301600521600401396873140010160100200800008000020080000160000160052160052118020110099100100800008000001008000003908000000358003560353901051101161116004900008000080000100160041160053160041160041160053
1602041600521199011000001011600372181815968616251601021008000280000100800008000050042181522939240116003301600521600521396753140010160100200804948000020080000161314160052160052118020110099100100800008000011008000003908003510478003561353900051101161116004906028000080000100160041160041160053160053160053

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0007

retire uop (01)cycle (02)03mmu table walk data (08)090e0f1e22233a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
160025160060119900004500116004121115968502516001210800028000010800008000050422600229395591160041016006016004013969731400401603222080000800002080000160000160040160040118002110910108000080000110800000438000003880039613944000502081655160037000800008000010160061160041160061160041160061
1600241600601199010100001600450101596840251600101080000800001080000800005042154522939547116004101600601600601397173140040160010208000080000208000016000016004016004011800211091010800008000001080000008003800800000039440005020616781600370140800008000010160061160090160061160061160061
1600251600601198011144100160025210159684192516001210800008000010800008000050422643229381471160021016006016006013969731400201600102080000800002080000160000160060160040118002110910108000080000010800000438003900804286139000050207165516005710140800008000010160041160041160061160057160057
16002416004011990100114001160045201159684025160012108000280000108000080000504210852293814711600410160060160040139717314004316001020800008000020800001600001600561600561180021109101080000800000108000000800380388000060384400050205165616005714140800008000010160061160061160061160041160061
160024160056119800000001160045001159685192516001210800028000010800008000050421564229381471160037016006016006013971731400401600102080000800002080000160000160060160040118002110910108000080000010800000080038138800000038430005020616651600371407800008000010160109160061160061160061160041
1600241600401199010044001160025200159684162516001210800028000010800008000050421638229381471160041016006016006013969731400401600102080000800002080000160000160040160056118002110910108000080000010800000438003800800006138440005020616751600571007800008000010160041160041160061160041160057
16002416005611980100440011600252111596901625160010108000280000108000080000504227412293954711600410160040160060139717314003616001020800008000020800001600001600561600401180021109101080000800000108000004380054039800006104400050208166516005710100800008000010160061160041160061160061160041
1600241600601199000001011600410101596840251600121080002800001080000800005042163022939975116002101600601600401397173140040160010208000080000208000016000016006016005611800211091010800008000001080000043800000428003960394300050208166516005310140800008000010160041160057160041160061160057
16002416006011980000561011600452101596901925160012108000080000108000080000504225172293997511600410160040160040139697314002016001020800008000020800001600001600601600561180021109101080000800000108000004380038138800006004300050205168816005310144800008000010160057160041160041160041160041
1600241600601199000044001160045211159684192516001210800028000010800008000050422540229396751160041016010916006013971731400361600102080000800002080000160000160060160056118002110910108000080000110801310438000000800396139000150207165616005310140800008000010160041160061160061160061160041