Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD4R (post-index, 4H)

Test 1: uops

Code:

  ld4r { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 6.008

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 4.008

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22243a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
6500529382228029123111005000463228686001170576012100040191000100040001000500050004765324228562911029329310600010004000200040002921829316116100110001000010000310030002100001400012841930768443037116220287320838061474692845710001621413155144801000400010002940829341294002935329369
650042936522902802000001000045662884801017087601610004000100010004000100050005000475601522937292242937431060001000400020004000291682918411610011000100001000001001000210012200001309792416873313867020355326638082371732853210001612413308144271000400010002934129538294172943329303
650042937522802601700000000046002883100017088601210004012100010004000100050005000476117229192905929502310600010004000200040002923329320116100110001000010000310000103100001300013142939769213076147520260313638121975752848510001633413017143131000400010002939829425294282946529336
650042935722902901700000000046662892501117026601210004012100010004000100050005000476139229392924829442310600010004000200040002926229188116100110001000010000310010000100200300013014934668323143126820392319038071866672856710001647013247143171000400010002925429409295152943329258
650042929522702403000000300046242887301017017601210004000100010004000100050005000475496229102921329463310600010004000200040002916929254116100110001000010000310010101100220300013096931268623090106720295313138041665642855210001644213194143571000400010002939229503294692931629377
6500429365227023124011001200046912884901117197600010004016100010004000100050005000475564228532922929259310600010004000200040002929629157116100110001000010000310000000100001300013117919768953080106920137329838032570682848210001649313051145561000400010002948229353294002940829403
65004293402270240200110040004603288900111719160161000400010001000400010005000500047426622897291992944231060001000400020004000291992921011610011000100001000001000000010002030001297094426856314196620274319738022066752856010001619613362144871000400010002941929349294322930729374
6500429442227026025000004000461928845000170396012100040121000100040001000500050004759515228422919729273310600010004000200040002923029208116100110001000010000310010000100100300012848938869343114117120403315138081372702864810001603713193142261000400010002946329390293392939329381
65004293962270280261000030004712289570011710160121000400010001000400010005000500047400422858290662938931060001000400020024000292432936411610011000100001000001001000310012000001308793306863307397120347312238102071772854710001649213303144371000400010002936129451293422930129345
650042920122702302200000000046402887100117128601210004000100010004000100050005000476214229242915329362310600010004000200040002921729234116100110001000010000310010005100122300013087926369353100127420226318038121768662850710001630613358145341000400010002935429383294172934629368

Test 2: throughput

Count: 8

Code:

  ld4r { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  ld4r { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  ld4r { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  ld4r { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  ld4r { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  ld4r { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  ld4r { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  ld4r { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0007

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f23243a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)abacafb5b6bbl1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
40020580057620000100018000080042166025480156801003200008000080100320000800004804994800207680020080038800578005700339480100200800003200002001600003200008005780047118020110099100100800008000001008000001380009101680009611000510911711800390800000680000320000801008004380043800438004680058
400204800546200100100310000800300600254801568010032005680000801003200008000048049947999876800200800388005780057003364801002008000032000020016000032000080057800541180201100991001008000080000010080000013800000012800006010170510911712800540800000080000320000801008006380058800438004380055
4002048004362000001001900008004210002548010080100320056800008010032000080000480499480014768002008003880057800420032448010020080000320000200160000320000800578005411802011009910010080000800000100800000080012100800126110170513924401804530802300080000320000801008042980266803458044480433
40020480484624100000090100800270600254801568010032005680000801003200008000048049948000676800200800388004280057003244801002008000032000020016000032000080042800541180201100991001008000080000110080000008001300080013619170510911711800541800009080000320000801008005880058800438005880046
40020480042621000000000000800271660254801568010032005680000801003200008000048049947999847970160800388005780057003394801002008000032000020016000032000080057800541180201100991001008000080000010080000017800000098001360000510911711800391800009980000320000801008004380056800588005880055
400204800576491000001148010080180160025480156801803200568006880100320308800004804994800115021508180038800428004200339480100200800003200002001600003200008005780194118020110099100100800008000001008000001780000001380037611002510912511800391800000080000320000801008008780058800438004380196
40020480196620000110013201008004216002548015680100320304800008010032000080080480499479998768002008003580042800420032448010020080000320000200160000320000800578019611802011009910010080000800000100800000138007600080009610170510911711801630800009680000320000801008005880058800588019680043
4002048004262010001100000080182166025480156801813200568000080100320320800004804994799984197432180023800578019501338948010020080000320000200160000320324800458005411802011009910010080000800000100800000080013001280010611000510911711800390800009680000320000801008004380058801968005880111
400204800426210000000180000800390060254801008010032000080000801003200008000048049948001476800200800238005780054003394801002008000032000020016000032000080042800541180201100991001008000080000010080000008001300080013610005109117118004208000010680000320000801008004580043800588005880043
4002048004262000010000010080027006025480100801003200568000080100320000800004804994800058639988180038800578005700339480100200800003200002001600003200008005780054118020110099100100800008000001008000001380000000800126110170510911712800540800000080000320000801008005880058800588005880451

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd2l1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
40002580066621100000000350100080051166225480058800103200488000080010320000800004800494800401088030408004780066800660034848001020800003200002016000032000080066800661180021109101080000800000108000752780031002880023012927625019001317161180044180000131380000320000800108006780067800678007380067
4000248019662210000000035000008003216622548006680010320048800008001032000080000480049480040108803040800298006680066003484800102080000320000201600003200008006680066118002110910108000080000110800076278002900318002361292660501900131713680063180000131380000320000800108004980067800678006780067
40002480066620100001100590000080051166225480058800103200488000080010320000800004800494800071088030408004780066800660032948001020800003200002016000032000080066800661180021109101080000800000108000672780029002880023612906050190051751480063180000131380000320000800108006780067800678004980067
4000248006662010000000035010018005110022548006280010320048800008008732064080160480049480038108803040800478006680066003484800102080000320000201600003200008006680066118002110910108000080000110800077080029002880023616276150190051714780063180000131380000320000800108006780067800678006780067
40002480066621100011000501000800510002254800588001032002480000800103200008000048004848004510880304080047800668006600348480010208000032000020160000320000800668006611800211091010800008000001080005626800280128800236129266150190051713780044180000131380000320000800108006780067800678006780067
4000248006662110101010035000008005106622548006680010320048800008001032000080000480049480040108803040800478004880066003484800102080000320000201600003200008006680066118002110910108000080000010800085278000600308002161282752501900151714138004518000001380000320000800108006780049800678006780067
400024800666201010011003500100800511062254800588001032005680000800103200008000048004948004510880304080047800668006600348480010208000032000020160000320000800668006611800211091010800008000001080005627800291168002661626625019001117151380044180000131380000320000800108006780067800678006780067
400024800666201000111003300000800521662254800668001032004880000800103200008000048004948004010880304080047800668006600348480010208000032000020160000320000800668006611800211091010800008000001080006727800281128800006152750501900517514800630800000080000320000800108006780067800678006780067
4000248006662110110000055001008005116622548006680010320048800008001032032480000480049480040108803040800478006680066003484800102080000320000201600003200008006680066118002110910108000080000010800056278003000288000061627515019001517141480063180000131380000320000800108006780067800678004980067
4000248006662110101000034000008005110622548005880010320048800008001032034880000480049480045108803040800478004880066003534800102080000320000201600003200008006680066118002110910108000080000010800056278003000328002100282661501900141714780063180000141380000320000800108006780067800678006780067