Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD4R (post-index, 2S)

Test 1: uops

Code:

  ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 6.008

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 4.008

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696b6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
65005294772282020140110013200004656287540117051601210004012100010004000100050005000475581522909029083292743106000100040002000400029219291341161001100010001001231001001210010100001298692696941316405920206316538071753522850510001629913270142771000400010002930729319293482932329224
650042932322600100000000550100462028675011702460001000400010001000400010005000500047596922832029127293553106000100040002000400029220292041161001100010001000331002000110000100001308392626951314215820176319637971656572854510001586613342141361000400010002936829223293712930429304
6500429289227012002100004501004649288190117063600410004004100010004000100050005000476211322882029098293463106000100040002000400029192292021161001100010001002001003000210003130001294991896872313705620236323038081561602853410001617713352142831000400010002922429261292412920329204
6500429307227001002000003600004625287490017006600410004000100010004000100050005000474661222972029153293713106000100140002000400029100292001161001100010001001201003011110000231001317294546928311435620252315938061857562841110001625913206141971000400010002922429288293652929529321
650042931822700000100000460100462128907101708260121000401210001000400010005000500047428022903029103291873106000100040002000400029203292251161001100010001001331002002110013130001309490856980310505920286315738051556612846510001624113231145551000400010002926429262292642927929201
650042941922700200201000510100471328782011691860161000400010001000400010005000500047576922872029138293523106000100040002000400029227291741161001100010001000231001022410000000001324991136903308705620249331838071663562842910001626313156143571000400010002938429293292522936629226
6500429310226003000000005401004624287630017077601910004012100010004000100050005000476081022872029120293233106000100040002000400029210291031161001100010001001231002001110002130101316391576880315016420260310538121557632857810001639513138141761000400010002941029348293492929029335
650042926822800200200000510100454128795001702160081000400810001000400010005000500047554422915029249293713106000100040002000400029252293211161001100010001002121002000110000120001305992876891311415820258312238131852572854410001627413206143401000400010002932229389293382930729421
650042920622801210200000600000468828950001700560081000400810001000400010005000500047554122891029115293573106000100040002000400029227292671161001100010001002221003002110002020001316691466821312736420326317938071657552854510001640513419143531000400010002928029334293732935029440
650042936922700200200000500000469028906001699860081000400810001000400010005000500047398922907029135293163106000100040002000400029342292261161001100010001000201003000110003020001314792216872318125920210318238141557532857710001637613007143621000400010002927429347294212928929468

Test 2: throughput

Count: 8

Code:

  ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
400205800716211000000001801008004216602548015680100320000800008010032000080000480499480010768002008002380059800420033648010020080000320000200160000320000800548005411802011009910010080000800001100800000138001000080010501317000510911711800540800006680000320000801008005880058800588004380058
400204800576210000001001800008004206002548015680100320048800008010032000080000480499480005768002008003880057800420032748010020080000320000200160000320000800548005411802011009910010080000800000100800000178001310168001250013000510911711800521800009680000320000801008005880055800588005880058
4002048005762000000000018000080039166025480156801003200568000080100320000800004804994799987680020080038800578004200339480100200800003200002001600003200008005780054118020110099100100800008000001008000001380009001080009511217000510911711800540800009680000320000801008005880058800588005880058
400204800546200000001005500008004216602548014880100320056800008010032000080000480499480006768002008003880054800570033948010020080000320000200160000320000800578005411802011009910010080000800000100800000138001310980010001217000510911711800541800009680000320000801008005880055800588005880058
4002048005762100000000022000080042166025480100801003200568000080100320000800004804994800137680020080038800578005700339480100200800003200002001600003200008005780054118020110099100100800008000001008000001380012001080009001214000510911711800541800009080000320000801008005880058800588005880058
40020480057620000000000310000800421660254801568010032005680000801003200008000048049948001376800201800388004280042003244801002008000032000020016000032000080057800571180201100991001008000080000010080000015800130012800106190000510911711800391800009980000320000801008004380055800588005880058
400205800546200000000002200008004216602548010080100320056800008010032000080000480499480005768002008002380054800570033948010020080000320000200160000320000800578004211802011009910010080000800000100800000080012001080010511017000510911711800541800000680000320000801008005880055800558005880055
4002048005762100000100018000080042166025480100801003200568000080100320000800004804994800057680020180038800578005700339480100200800003200002001600003200008005780054118020110099100100800008000001008000001380013101280013611017000510911711800511800009680000320000801008005880058800588005880058
4002048005762000000000015000080042166025480156801003200568000080100320000800004804994800147680020080038800578005700339480100200800003200002001600003200008004280054118020110099100100800008000001008000001680010001380012611018000510911711800540800009980000320000801008005880058800588005880058
4002048005762100000000018001080042166025480156801003200008000080100320000800004804994800231024002008004380062800620034448010020080000320000200160000320000800628006211802011009910010080000800000100800847238002500238001960230610510911711800591800000980000320000801008004980049800498006380049

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f233a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4000258006262010010000000008004716602548005880010320056800008001032000080000480049481658102400240800430800628006200344480010208000032000020160000320000800628006211800211091010800008000011080005723800241124800180123000501941745800591800009680000320000800108006380058800638006380063
40002480048620000000000300008004716602548005880010320048800008001032000080000480049480029102400200800430800628006200344480544208000032000020160000320000800628005411800211091010800008000011080007623800250190780018612523505019417548005908000091180000320000800108004880058800488004880063
40002480047621001120000410008004716602548006680010320056800008001032000080000480049480029576001608004308006280054003464800102080000320000201600003200008006280062118002110910108000080000110800076238001302278001861252250501941734800450800009980000320000800108006380063800638005880063
4000248006262110101000031000800421660254800588001032005680000800103200008000048004948002376800200800230800628006200344480010208000032000020160000320000800628006211800211091010800008000011080006508002400258001861232260501951745800590800009980000320000800108006380063800638006480063
400024800626201000000004100080137106025480066800103200488000080010320000800004800494800051024002008004308004880047003394800102080000320000201600003200008005780062118002110910108000080000110800066238002601238001860232361501961733805581800009080000320000800108019780350803498019780196
40002480062623100200019329588008033516606448003480089320304800008016032000080000480497480033552066408003808019580196202134448001020800793203002016047032032480196803472180021109101080000800001108019892380159009088009461242460503431745800591800779080000320000800108019780049800488034980196
4000248019462210000001116200080047166025480066800103200568000080010320000800004800494800231024002008004308006280057003444800102080000320000201600003200008005780062118002110910108000080000110800060238002620278001261252350501941744800590800009980000320000800108005880063800498006380063
40002480062621101100000300008004716602548003480010320048800008001032000080000480049480034102400200800440800578020900344480010208000032000020160000320000800628006211800211091010800008000011080005508002400288001861251701501941733800591800009680000320000800108006380063800588006380058
40002480062621100110000300008004716602548006680010320024800008001032000080000480049480011544002408004308006280062003444800102080000320000201600003200008006280062118002110910108000080000110800065238002500248001961242361501941734800441800009980000320000800108006380048800488006380058
400024800626201001100002900080047066025480058800103200488000080010320000800004800494800231088001608004308006280057003444800102080000320000201600003200008006280062118002110910108000080000110800076238001301248001861242360501931735800451800009980000320000800108006380063800638006380063