Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD3R (post-index, 8H)

Test 1: uops

Code:

  ld3r { v0.8h, v1.8h, v2.8h }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 5.006

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 3.006

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
6400529753239202003011004000471629002011174255009100030091000100030001000500050003571810002289429252296093105000100030002000300329440293651161001100010000100020100000110000130001317193086900311113620610337838122336372884910001624213368144571000300010002965529701297362951429552
640042953923800000301100400046602903500117511500910003009100010013000100050005000357818002285229441295903105000100030002000300029452293551161001100010000100003100210310000000001310593386892317003520529341838042137382885510001621813625147321000300010002968129617295642962129601
64004296392380031010000088800464729017001173895009100030001000100030001000500050003578710002282629422296283105000100030002000300029416294791161001100010000100300100100210003000001315494566901313013520648334638112635362881210001644113228145091000300010002957529591295762962229485
640042947623800320000010100047572905800017391500910003009100010003000100050005000357476092279129546296823105000100030002000300029487295751161001100010000100230100030310002031001330493516968319414020602338738132537352882110001645413332145991000300010002964829735296532956329684
640042960323900200100000500046292905800017461500910003000100010003000100050005000357895002287829345296123105000100030002000300029414295621161001100010000100320100101610000001301312093416980318624020684327338082534342880810001664313534145631000300010002963629639296332965029660
64004295492380021030000114800046642906700017426500910003006100010003000100050005000357185002288329429296103105000100030002000300029502294991161001100010000100023100000310000120001322593816958313503520649324338162831302874210001646213696145701000300010002956829717295112951729608
640042965423800300210000110004667290170001740650061000301210001000300010005000500035643900229083022730870310500010003000200030002930129387116100110001000010000210000043210002120201312394636929323303620468327038142636372866210001620313442145451000300010002933929467294732942029309
64004294892360110010110010004762288140001724650091000300910001000300010005000500035725100228282913229391310500010003000200030002927129274116100110001000010000010000040610002130001320096107002309203720435327838172031372866410001616013364143241000300010002933829384293962943429628
640042946123801200301100100046562890500017295500010003006100010003000100050005000356924002282929277294923305000100030032000300029331292391161001100010000100003100000010002120001329893976965318014020431327338142430402856510001630613665145071000300010002947229416295232942229408
640042961823700100206000300045722887400017310500610013006100010003000100050005000356381002287229256294753105005100030002000300029313293181161001100010000100003100000010002020001329595366982316423720511322638142134372857410001639113204143001000300010002939829486294992945629513

Test 2: throughput

Count: 8

Code:

  ld3r { v0.8h, v1.8h, v2.8h }, [x6], x8
  ld3r { v0.8h, v1.8h, v2.8h }, [x6], x8
  ld3r { v0.8h, v1.8h, v2.8h }, [x6], x8
  ld3r { v0.8h, v1.8h, v2.8h }, [x6], x8
  ld3r { v0.8h, v1.8h, v2.8h }, [x6], x8
  ld3r { v0.8h, v1.8h, v2.8h }, [x6], x8
  ld3r { v0.8h, v1.8h, v2.8h }, [x6], x8
  ld3r { v0.8h, v1.8h, v2.8h }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f233a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
3202058006962100000161000800275600025400148801002400158000080100240240800004804994800063120178800858011880119795240030020080000240000200160000240000800428004211802011009910010080000800001100800000138001001880005601000510912511800390800006680000240000801008004380043800438004380043
3202048004262001110028012800275610002540017380140240070800008010024000080000480499480027312234280023800428004203244001002008000024000020016000024000080042801181180201100991001008000080000010080000013800130012800090010130510911711800391800006680000240000801008004380043800438004380043
32020480042620000000350008002756100025400170801002400158000080100240000800004804994800233122342800238004280042082440010020080000240000200160000240000800428004211802011009910010080000800000100800000080022002380022509220510921711800390800006680000240000801008004380043800438004380043
320204800426200000001400108002756100025400170801002400708000080100240000800004804994800063120172800238004280042032440010020080000240000200160000240000800428004211802011009910010080000800000100800000258000900575800056021210510911711800391800000680000240000801008004380043800438004380043
32020480042620000000260108002750100025400170801002401868000080100240000800004804994800353122342800238004280042032440010020080000240000200160000240000801188004211802011009910010080000800000100800000080021012180020612100510911711800390800006680000240000801008004380043800438004380043
3202048004262100010016010800275010002540012180100240070800008010024000080000480737480027312234280023800428004203244001002008000024000020016000024000080042800421180201100991001008000080000010080000426800210124800080022220510921711800391800000780000240000801008004380043800438004380043
320204800426200000001601080027561011025400172801002400708000080100240000800004804994800273120349800238004280042032440010020080000240000200160000240000800428004211802011009910010080000800000100800000258002200345800216121210510911711800391800000680000240000801008004380043800438004380043
3202048004262100000016010800277610002540036480100240072800008010024000080000480499480002312234280023800428004203244001002008000024000020016000024000080042800421180201100991001008000080000010080000026800130020800076022220510911711801020800006080000240000801008004380115800438004380114
320204800426200011002001080095561000254001218010024001580000801002400008000048049948002731223428008580042800420324400100200800002400002001600802400008004280115118020110099100100800008000001008000002680010002080042605130510911711800390800006680000240000801008004380043800438004380043
32020480042621000000260108002756600254001158010024001580040801002400008000048049948002331208528002380042800420324400100200800002400002001600002400008004280049118020110099100100800008000011008000000800200121800100120220510911711800390800006080000240000801008004380043800438004380043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
32002580056620000000034000800270660025400263800102400598000080010240000800004800494800153121493800238004280042003244000102080000240000201600002400008011880042118002110910108000080000010800000188001300178001401132000501907177580039080000010080000240000800108004380043800438004380119
320024800426200001000340008002706600254000698001024005980000800102400008000048004948001531199948002380042800420032440021020800002400002016000024000080042800421180021109101080000800000108000001880016001480000611422005019041775800391800001310080000240000800108004380043800438004380043
3200248004262100000003400080027166002540006980010240000800008001024000080000480049480015312005080023800428004200324400010208000024000020160000240000800428004211800211091010800008000011080000018800160018800000100005019051785800391800001410080000240000800108004380043800438004380043
32002480042620000000035000800271660025400069800102400658000080010240120800004800494800213121469800238022380042003244000102080000240000201600002400008004280042118002110910108000080000010800000080000101480013611320005019071777800390800001310080000240000800108004380043800438004380043
320024800426200000000340008002716600254000698001024000080000800102400008000048004947999831199948002380042800420032440001020800002400002016000024000080042800421180021109101080000800000108000000800160008001561141800501905177580039180000130080000240000800108004380043800438004380043
320024800426200001000340108002716600254000698001024005180000800102400008000048004948000731199948002380042800420032440001020800002400002016000024000080042800421180021109101080000800000108000001880016000800146100005019071777800391800001310080000240000800108004380043800438004380043
32002480042620111000035000800271600025400061800102401148000080010240000800004800494800193121542800238004280042003244000102080000240000201600002400008004280042118002110910108000080000010800000188000000178001361132000501907175780039180000130080000240000800108004380043800438004380043
320024800426200000000340008002706600254000698001024005180000800102400008000048004948163231214938002380042800420032440001020800002400002016000024000080042800421180021109101080000800000108003701880016000800006002000501906178880039080040010080000240000800108004380043800438004380043
3200248004262000000016010800271600025400069800102400598000080010240000800004800494800153119994800958004280042003244002102080000240000201600002400008004280042118002110910108000080000010800000080016001680014611620005019052557800391800001313080000240000800108004380043800438004380043
320024800426200000000166000800271660025400010800102400598000080010240000800004800494800153121493800238004280042003244000102080000240000201600002400008004280042118002110910108000080000010800000188001520553800006116000501907176480039180000130080000240000800108004380043800438004380043