Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD4R (16B)

Test 1: uops

Code:

  ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 5.012

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 4.012

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e2223243a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
65005294892290180019000300046812890500016980500840121000400010005000475774229032908929285310500010004000100040002911429119116100110001000010000210000000100020000012882925569113038105120326308638179554928359164921351115083100040002939029334293832943529333
65004292452200170013010310045682899800016963500040121000400010005000475182229362917129372310500010004000100040002915829174116100110001000010000210000003100020200012859913468183252651202363114382116565428338160141402314476100040002936629254292832934729333
650042937622001200170003100460028793000169375000400810004000100050004754602288029198293153105000100040001000400029268292001161001100010000100000100000001000200000134779194690130631448202333099381713564628294162501411515165100040002931729382293402937129312
650042925222001500150003100464828698000170865008401210004000100050004750402286229153293413105000100040001000400029227292261161001100010000100002100000001000202000128749201700330251048202483072382411534928511163811373614999100040002934529464293552942329324
6500429217220017001600001004562287810001699450084008100040001000500047514522918290622927331050001000400010004000291292923711610011000100001000021001000010002020001292491716821325074820169312038158524828369163801381414488100040002928529316292462936229278
65004293252200130011000310045732877701017032501240081000400010005000474387228882907829370310500010004000100040002920829202116100110001000010000210000000100020200012912916368383069748203083040381312535628599157121396315134100040002941129324292692936029265
65004292902200130011000310046462873800016966500840081000400010005000475263228542911729384310500010004000100040002915329273116100110001000010000210000000100020200012833964368213270843202563081381814545328340161221407514861100040002926829331292462933829313
65004293122200160020000310049192899600017007500840081000400010005000474065228832912429259310500010004000100040002920829206116100110001000010000210000000100020300012837970570623045104620342311338208514928431163921403714539100040002925029351292602936129295
65004294692380180013000000045452873100017024500040121000400010005000473966228732910329360310500010004000100040002926529174116100110001000010000210000000100120200013499905070383025844202463105381817525828670162941384014580100040002932729312292882935829225
65004293252200160014000310048702881700016973500840081000400010005000476035228432907129286310500010004000100040002910429205116100110001000010000310000000100020200012784999467993078847202983177381612634528383165501397414440100040002923229264292862930029313

Test 2: throughput

Count: 8

Code:

  ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0007

retire uop (01)cycle (02)03l2 tlb miss data (0b)0e0f18191e1f22233f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
400205800696000000018000800391662540015210032005280000100320000800005004000107680008180035080054800560338400100200800003200002008000032000080054800551180201100991001008000080000010080000158001111080011601115000051092171180051066800003200001008005580042800558005580055
400204800545990000017010800391062540015210032005280000100320000800005004000007680008180035080054800540336400100200800003200002008000032000080054800541180201100991001008000080000010080000158001101180011501115000051091171180051066800003200001008005580055800558004280055
40020480041600000001701080039166254001001003200528000010032000080000500400008768000808003508004180054033640010020080000320000200800003200008004180054118020110099100100800008000001008000015800110080011601115000051091171180051166800003200001008005580055800558005580055
40020480054599000001701080039166254001521003200528000010032018080000500400021768000808003508005480041033640010020080000320000200800003200008004180054118020110099100100800008000001008000015800110080011001015000051091171180051006800003200001008004280055800558004280042
4002048005460000000001080026166254001521003200528000010032000080000500400007768000808003508004180054033640010020080000320000200800003200008005680041118020110099100100800008000001008000008001001380010601115010051091171180208060800003200001008077280045800428004280042
4002048005459900000160108002600625400152100320052800001003200008000050040000776800080800353800468005403364001002008000032000020080000320000800548005411802011009910010080000800000100800001580000011800110000000051091171180051166800003200001008006080055800428005580055
40020480054599010001701080039166254001521003200008000010032000080000500400000768000808003508005480054033640010020080000320000200800003200008005480054118020110099100100800008000001008000015800111080000601115000051091171180053106800003200001008005580055800558004280057
400204800416000000000108004116615340010010032005280000100320000800005004000007680008080035080041800540323400100200800003200002008000032000080054800541180201100991001008000080000010080000158001411080011601115000051091171180051066800003200001008005580055800558005780055
40020480054599000003701080039106254001521003200528000010032000080000500400007384000008003508005480054033640010020080000320000200800003200008004180054118020110099100100800008000001008000015800110080000601115000051091171180051060800003200001008005780055800558004280055
40020480054600000000010800391602540015210032000080000100320000800005004000117680008080022080054800540323400100200800003200002008000032000080054800541180201100991001008000080000110080000158001101080011601115000051091171180123066800003200001008005580055800558005580055

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfl1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
40002580460602101101010031176100080047006025400062103200528000010320000800005040006510240016080043800628004703444000102080000320000208000032000080062800621180021109101080000800001108000762380027001268002001250610501902117516800591000080000320000108006380289803318004880063
40002480062599101001000070100180032000025400062103200528000010320000800005040003354400240800988006280100034440001020800003200002080000320000800628006211800211091010800008000001080007723800280116800180125061050190817817800590009280000320000108006380063800638004880063
40002480062600101101000032010008004706002540006210320052800001032000080000504000331024001608004380062800620329400010208000032000020800003200008006280062118002110910108000080000010800068238000700178001901260710501901817817800590099280000320000108004880048800638006380063
4000248006259910110000006010008003210602540005810320052800001032000080000504000391024001608002880047800620329400200208000032000020800003200008006280062118002110910108000080000010800067080028022268002001723710501901717186800590099080000320000108004880063800638006380063
400024800626001011010000950100180047066025400062103200488000010320000800005040000210240016080043800478006203444000102080000320000208000032000080062800621180021109101080000800000108000872380027001388002060272370050190617178800440099280000320000108004880048800638006380048
40002480062599100000000031010008003210602540003410320048800001032000080000504000035440028080028800628004703294000102080000320000208000032000080062800621180021109101080000800001108000782480026000268001861707205019012171717800441099080000320000108004880063800638004880048
4000248006260110110100007000008004700002540005810320048800001032000080000504000395440028080043800628006203294000102080000320000208000032000080062800481180021109101080000800001108000772480028000298002061262470050190617617800440099080000320000108006380063800638006380048
4000248006260010110100003101001800470600254000341032005280000103200008000050400039102400160800438006280062034440001020800003200002080000320000800628006211800211091010800008000001080007824800260002680018012506005019017171717800590090280000320000108004880063800638006380063
400024800475991011010000320100080032166025400058103200528000010320000800005040004010240016080043800628006203444000102080000320000208000032000080062800621180021109101080000800000108000882380058100268002001262371050190617178800440099280000320000108006380048800488006380063
40002480047600101100100070100080032166025400062103200248000010320000800005040003310240016080043800628004703294000102080000320000208000032000080062800621180021109101080000800000108000972480025000680018612624710501901617617800591090280000320000108006380063800638006380063