Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD4R (8H)

Test 1: uops

Code:

  ld4r { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 5.008

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 4.012

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e22243a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696b6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)dfe0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6500528785215550130002105140280880011607250124012100040001000500047520822938028363284543105000100040001000400028225282811161001100010000100002100006100022201357710244706032012741930932153811145356227944152761260513553100040002834628175285442843328411
6500428423213450060004005138281350111632750084012100040001000500047533322940028314282233105000100040001000400028269283601161001100010001100002100103100122301390410020716533992571923832563814145250227895144011276713055100040002839627991286292841528455
650042835721422005000410509327947000160925012400810004000100050004753402294102824828426310500010004000100040002823228287116100110001000010000210000310002220133659738711234222541929632193802146756227956147381277013999100040002830928424286522839928423
650042863321545005000210511228077000161285008401210004000100050004753812294202835928530310500010004000100040002828828249116100110001000010000210010310002121133831005171023191158194353378380795853227985143141284113317100040002828928405284022819228289
650042862921244015000310482128254100160075008401210004000100050014751832299102823128239310500010004000100040002830428353116100110001000010000210000410001130138319604721333866611939732643810185852228011145571316013889100040002854528453283632827528590
6500428322213550050003105015281091001595150084012100040001000501047558162289902824428291310500010004000100040002825828174116100110001000010000210000310002120133049912708933186561926432493809115957227998146221275613400100040002845628425282902848628539
6500428490213560070002105068280241001614050084012100040001000500047534132294802835728667310500010004000100040002829628260116100110001000010000210000310002220139059913706932687541926732913812176063227882145991254013182100040002820028325283252849328268
6500428380213530050002005062282591001611650084012100040001000500147563622976028153283083105000100040001000400028236281971161001100010000100002100103100020201394410206717133344551924933533799165853227864150361237413427100040002844928248283642826228366
6500428599214540040003105026279661001618350084012100040001000500047559522966028225284833105000100040001000400028309282871161001100010000100002100119410002030135819973711633617581928532903808105960227896147031254713765100040002841728270285852846528582
650042833121274004000210495128049100159195008401210004000100050024756252295402821528171310500010004000100040002805428609116100110001000010000210000610002020136399987709433002601926332733806195661227955147911276313139100040002834428437283882844028474

Test 2: throughput

Count: 8

Code:

  ld4r { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  ld4r { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  ld4r { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  ld4r { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  ld4r { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  ld4r { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  ld4r { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  ld4r { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e1f22243a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0e7eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4002058006659911111060100804321062254001521003200248000010032000080000500400054108803040800478004780047003484001002008000032000020080000320000800718007111802011009910010080000800000100800078288003001178002361310610051090117118004410132800003200001008054780048800678006780048
400204800665991112003701008003210602540010010032007680000100320000800005004000294403096180027800608004100342400100200800003200002008000032000080060800411180201100991001008000080000010080000008001300014800006114180000510901171180038010100800003200001008004780061800618006180061
4002048004159900000000100800420660254001641003200648000010032000080000500400009864002018003880057800570034240010020080000320000200800003200008005780057118020110099100100800008000001008000001880018010080000000220000510901171180057110100800003200001008005780061800588005880061
4002048005760000000024000080045166025400100100320076800001003200008000050040002986400201800418005780041003424001002008000032000020080000320000800578005711802011009910010080000800000100800000188001400016800140113180000510901171180054010100800003200001008005880045800588005880061
4002048004159900000000000800451060254001001003200648000010032000080000500400000960002018002280041800600034240010020080000320000200800003200008006080057118020110099100100800008000001008000001880018010080018001418000051090117118005410100800003200001008005880045800588005880058
400204800415990000000000080045066025400176100320076800001003200008000050040002138400001800228009280057003394001002008000032000020080000320000800578005711802011009910010080000800000100800000188001300008001460130000051090117118003800100800003200001008005880058800588034980042
400204800576000000001900008002616602540017610032006480000100320000800005004000008640020080022800608006000339400100200800003200002008000032000080060800571180201100991001008000080000010080000018800180001780018011822000051090117118005400100800003200001008006180045800618004280042
400204800605990000002400008002616602540017610032000080000100320000800005004000008640020180038800578005700342400100200800003200002008000032000080060800411180201100991001008000080000010080000018800130000800186114220000510901171180057010100800003200001008006180058800588004280042
40020480060599000000000008002606602540017610032007680000100320000800005004000249600020180041800448005700342400100200800003200002008000032000080060800571180201100991001008000080000010080000008001400008001861000000510901171180041113130800003200001008006780048800678006780067
4002048004760011110137010080051006025400148100320072800001003200008000050040005454400281800418004180041003234001002008000032000020080000320000800608005711802011009910010080000800000100800000188000000017800186014220000510901171180054010100800003200001008005880058800588005880058

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e22243a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6esimd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9ddfetch restart (de)e0e7eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
400025800626001100103110080047100254000341032005280000103200008000050400038102400168004380062800470344400010208000032000020800003200008004780047118002110910108000080000010800078238000801129800196025237050190051705580059009280000320000108004880063800638006380063
4000248006260011000071008004700625400062103200248000010320000800005040000310240016800438006280047034440001020800003200002080000320000800628006211800211091010800008000011080006608002700025800190126247050190041705580059090280000320000108004880063800488004880063
4000248006259911000032000800321062540006210320024800001032000080000504000031024001680028800478006203444000102080000320000208000032000080047800621180021109101080000800001108000870800280027800006025236050190041746580044099080000320000108006380048800488004880063
40002480062599110000310008003206025400062103200528000010320000800005040000354400248004380062800620329400010208000032000020800003200008004780047118002110910108000080000110800086248002600026800206125246150191041705480059090280000320000108006380063800488006380063
40002480062599100000321008004706625400062103200528000010320000800005040003810240016800288006280062034440001020800003200002080000320000800628006211800211091010800008000011080007724800250112580000602607150190051705680059199280000320000108006380048800638004880048
4000248006259910010031001800471662540003410320052800001032000080000504000385440028800438006280047034440001020800003200002080000320000800628006211800211091010800008000011080008824800260002680000607246150190051708580059009080000320000108006380048800488006380063
4000248004760010000032000800471662540006210320024800001032000080000504000381024001680043800628006203294002752080000320000208000032000080062800621180021109101080000800001108000672480026000268002061706150190041706680059009280000320000108004880063800638004880048
400024800476001001003300080047166254000621032005280000103200008000050400004102400168002880062800620329400010208000032000020800003200008006280062118002110910108000080000110800076248002600026800206125236250190051705580044199080000320000108006380063800638006380063
400024800625991101003200080047106254000621032005280000103200008000050400033544002480043800628004703444000102080000320000208000032000080047800471180021109101080000800000108000770800270017800206126247050190051705680044000280000320000108004880063800638004880063
4000248006259911000132100800470062540006210320052800001032000080000504000035440024800438006280062034440001020800003200002080000320000800478006211800211091010800008000011080008824800080012680018007237150190051705580044109080000320000108004880063800638004880063