Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD4R (post-index, 8B)

Test 1: uops

Code:

  ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 6.008

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 4.008

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e1e233a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
65005293212211032012200304562288550111693860001000401610001000400010005000500047420172282129101293293106000100040002000400029235291641161001100010001000021000000100020300012797910368033033126020245307138152158572842610001627413291142341000400010002935129326292402927029362
650042937022000260023002046812886610016939600810004012100010004000100050005000475513229702921929388310600010004000200040002924229112116100110001000100122100301110012101135113366922668913080106720294333638151863622801910001631813115144621000400010002929929363292362930729300
6500429488228012310241030452228838000169936008100040081000100040001000500050004742032289329126293963106000100040002000400029189292851161001100010001000001000000100020200013083924868653092135620282309438211958592840610001633413239142611000400010002929429242294642932029545
650042935622000220021004046672886000117030600810004008100010004000100050005000475523228642902729394310600010004000200040002919629188116100110001000100002100000010002020001283192636825314665820201307838162358572850610001640413204145091000400010002928329335293912942429342
6500429324219002600220000454728860000169956016100040081000100040001000500050004758402288329146292493106000100040002000400029169291821161001100010001000021000303100000000012940923068613064115820106304838131755572846910001623313116143281000400010002935029352292822939229371
6500429289219002400250030458128886011169586008100040161000100040001000500050004759732285129054292943106000100040002000400029156292001161001100010001000001000000100000200012998928168823078115920197306638151572612848910001642013336143421000400010002934329295293182927329282
650042935922000230022002046832883701017030600010004016100010004000100050005000475480227062921629359310600010004000200040002926229194116100110001000100003100260010002100001278990626836303496620273304938192050582852210001637113220142561000400010002933329402293172932929343
650042947424200220024000045992880400016963600810004008100010004000100050005000475526228872910429403310600010004000200040002922329266116100110001000100003100000210000220001276691906842307246120342310538172053542842410001643813199145351000400010002929129336294332941329261
650042929922001211021105046992880810017132600410004004100010004000100050005000474363228812908529324310600010004000200040002922829147116100110001000100143100302110010231301284291306833304497120303310838111959592840610001606713256144321000400010002924129311293252943529439
6500429232220012200221040457828831000169086008100040081000100040001000500050004758222287429132293093106000100040002000400029259292311161001100010001000021000000100020000012946922268313066116820186310538152360622853910001625013089142301000400010002938429291292622933329278

Test 2: throughput

Count: 8

Code:

  ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0007

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f233a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
40020580069599000100019000800421660254801568010032005280000801003200008000048049948000576800200800388005780057003394801002008000032000020016000032000080054800541180201100991001008000080000010080000138000000128001361100000510911721800551800004680000320000801008005880058800588005880058
400204800546200000000520008004216602548010080100320048800008010032000080000480499480005768002008003880057800570033948010020080000320324200160000320000800578005411802011009910010080000800000100800001380013101980087611017000510911711800540800009680000320000801008005880058800588004680058
400204800576200000000200008003016602548010080100320048800008010032000080000480499480005415999208003880057800570038948010020080000320000200160000320000800428005411802011009910010080000800000100800001780090101380012611017000510912511800540800000680000320000801008004380043800588005880058
400204800576200000000190008004210602548056480100320056800008010032000080000480499480007768002008003880057800570033948010020080000320000200160000320000801968005411802011009910010080000800000100800001780013001178001361917000510911711800540800009780000320000801008005880058800588005880195
40020480047621000000018000800421660254801568010032004880062801003200008000048049948000576800200800388005780057003394944962008000032000020016000032000080057800541180201100991001008000080000010080000080000032580013611017000510911711800541800009080000320000801008005880058800438005580043
4002048005762000100001900080042066025480156801803200248000080100320000800004804994800068960020080038800578005700339480100200800003200002001600003200008005780054118020110099100100800008000001008000008001301138001361017000510911711800541800009980000320000801008005580058800588005880196
40020480057620000000019000800420660254801568010032005680000801003200008000048049948001376800200800238004280057003394801002008000032000020016000032000080057800571180201100991001008000080000010080000138001310188001261100000510911711800540800009980000320000801008005880058800588005880058
4002048005762000000002300080042066025480156801003200488000080100320000800004804994799987680020080038800578005700324480100200800003200002001601503200008004580057118020110099100100800008000001008000013800130012800100100000510911711800391800000980000320000801008005880058800588005880058
400204800436200000000000080039106025480164801003200568000080100320000800004804994816954159992080038800428004200324480100200800003200002001601083200008005780057118020110099100100800008000001008000013800120019800000000010510911711800541800009980000320000801008005880058800588005880058
40020480057621000000019000800420060254801568010032005680000801003200008000048049948000686399880800388005780057003364801002008000032000020016000032000080057800571180201100991001008000080000010080000138001301188001261130000510911711801661800009980000320000801008005880058800588019780058

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0007

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4000258007062200000001320100080194166025480082800103200608000080010320000800004800494800198960020080038080057800570033948001020800003202802016000032000080057800571180021109101080000800001108000001880014102080014611418050191171180055180000101080000320000800108006180061801978005980058
40002480057621000000002201000801811660254800708001032024080000800103203048000048004948000786400200800380800578005720034248001020800003200002016000032000080057800571180021109101080000800001108000001880014001480014611418050191171180054080000101180000320000800108005880058800588005880058
4000248019762000000001200100080042166025480070800103200608000080010320000800004800494800118640020080038080057800570033948001020800003200002016000032000080057800421180021109101080000800000108000001880014101480013611418050191171280054180000101080000320000800108005880058800588005880058
4000248005762000000000320000080042166025480070800103200608000080010320000800004800494800118640020080038080057800450033948001020800003270042216447633024080451800571180021109101080000800001108000001880014001380014611318050191171180054180000101080000320000800108005880058800588005880058
4000248006062000000000200100080042166025480070800103200608000080010320000800804800494800128640020080038080057800570033948001020800003200002016000032000080057800571180021109101080000800000108000001880000001480013611418050191171180054180000131080000320000800108005880058800588005880059
40002480057620000011102000010800421660254800708001032006080000800103200008000048004948000786400200800380800578005700253948001020800003200002016000032000080057800571180021109101080000800000108000001880013001380013611418050191171180054080000101080000320000800108005880058800588005980058
4000248005762100000000200001080042166025480082800103200728000080010320000800004800494800118640020080038080057800570033948001020800003200002016000032000080057800571180021109101080000800000108000001880014101380013611418050191171180054180000101080000320000800108005880058800588005980058
400024800426200000110020000108002716606248007080010320060800008001032000080000480049480007864002008003808005780057003394800102080000320000201601503200008005780057118002110910108000080000010800000188001400148001361141805019117118005418000001380000320000800108034880058800588005880058
4000248005762000001000200001080042166025480070800913200608000080010320000800004800494800078640020080038080057800570034248001020800003200002016000032000080057801961180021109101080000800000108000001880000101380014611418050191171180054180075101080000320000800108006180061800588019780058
40002480058621001010221528800008079616622102480480801723203208008080010320000800804805264833817735888080276080348801954027541411344809702080076320644201603203203208035080349218002110910108000080000010800690188009200265480169611418050491252182271081285101080000320000800108035180203801998019780504