Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, post-index, 3 regs, 8H)

Test 1: uops

Code:

  ld1 { v0.8h, v1.8h, v2.8h }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 4.000

Integer unit issues: 1.000

Load/store unit issues: 3.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
6300529088233212111100001701004747283920002369440001000300010003000500015003916151282792879031040003000400028810288031161001100010003004493009002830045179310130509454696231660682020432183811146667283041000157811301214391300010002890928918289742892428853
6300428926233004004000001101004646285830232365040001000300010003000500015000916149283502893831040003000400028746287471161001100010003000063004000430045136000130729369696431111622029331433814247172283451000156661311714650300010002887828909289622885428849
6300428898232012112100001301004617285930232373840001000300010003000500015000316148284282897831040003000400028945287921161001100010003000063001000330043146000132519263695531412712024232213807297374282251000155291293014567300010002895528885289712891528870
630042884223300200200000901004805285030232372040001000300010003000500015000816127283862894231040003000400028824287511161001100010003000063004000430045139000131219114696331143742013532183814166166283661000157281269814430300010002892228911288352877728890
630042890323300200300000901004816285730002364440001000300010003000500015000316135283362886931040003000400028755287751161001100010003000083004000430045039000133809199694831251642030131463817196665282651000157801298614546300010002901128872289952889228946
6300428858232001003000001500004778286370332359940001000300010003000500015000316148284882900231040003000400028805288451161001100010003000063004000430045139000132309599695530942702015532543813176770283091000157001297014540300010002880128855288842897128928
6300429000232002002000001801004720284930322368840001000300010003000500015000216134283872894631040003000400028705287521161001100010003000003003000330035009000132949266700530900642032332143811266565281751000158241314314717300010002897328870289462876828851
6300428835232002002000009010046482845402023764400010003000100030005000150021016151283322877731040003000400028734287681161001100010003000063004000330035039000132069293690631801692030931703814266979282611000155581296014802300010002895228874288392890628734
63004289102310020020000090100472528511030237144000100030001000300050001500071616128379289263104000300040002873928850116100110001000300559300901383004514931114133219299690031161702034232033809227271282641000156351298114477300010002888428763288872891028965
630042874323200100200000901004678285011332373640001000300010003000500015002716115283472906231040003000400028703287431161001100010003000063004000430044139000130919304694131360682025632883811247161282111000156911287814512300010002879128919288902896828904

Test 2: throughput

Count: 8

Code:

  ld1 { v0.8h, v1.8h, v2.8h }, [x6], x8
  ld1 { v0.8h, v1.8h, v2.8h }, [x6], x8
  ld1 { v0.8h, v1.8h, v2.8h }, [x6], x8
  ld1 { v0.8h, v1.8h, v2.8h }, [x6], x8
  ld1 { v0.8h, v1.8h, v2.8h }, [x6], x8
  ld1 { v0.8h, v1.8h, v2.8h }, [x6], x8
  ld1 { v0.8h, v1.8h, v2.8h }, [x6], x8
  ld1 { v0.8h, v1.8h, v2.8h }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2402058023164412100011350100080051219191053320100801002401308010024000067872535040088003080207800554997814501563201002002400002003202578021080069118020110099100100800008000001002400000363324017100362400335133360051101162180056080000106240000801008006280060800618023380073
24020480055621000110148000008004221918922532010080161240000801002400006375613510413800308005580208500153501383201002002400002003200008005580055118020110099100100800008000001002401300360240040081052240164513336025110216218005908008366240000801008005680059800618023080061
24020480055621000001038010028019621719112532030680100240000801822400006489373515413801678021180057504003500133203522002400002003202578005580055218020110099100100800008000001002400000360240163013724003351333500511011611800510800001011240000801008010680056800618005680056
240204800556210000000590000080040217181025320100801002400008010024000064114135128678003080055800555026635001332010020024000020032000080060800551180201100991001008000080000010024000003602400370040240033513336005110116118005208000066240000801008005580061800568005680056
24020480055620000000038001008004021917112532010080100240000801002400006511443500724800308005580055502723500133201002002400002003200008005580057118020110099100100800008000001002400000362824003300402401635133000511011611800580800021311240000801008006980065800728006980066
24020480065652000000066000128005321717182532010080101240000801082400006557063500736800438036380865499893500223201002002400002003200008006480064118020110099100100800008000001002400000430240042014424004051404300511011611800680800001011240000801008006580065800658006580065
2402048006864900011001920010080044219191325320100801002400008010024017856394935152698003480176800864998635001732010020024000020032000080063800591180201100991001008000080000010024000003602400340136240033613136005110116118005508000067240000801008005980223800658006080063
2402048005865300010203500010180049219198625320100801002400008010024000086558235071618003480195800714998435001732010020024000020032000080075800621180201100991001008000080000010024000003602400330249240033513336005124116118019908000066240000801008007380060800638006280160
2424208008064300000005900012800432181712253203138010024000080100240000646736347922780034802458006049982350150320100200240000200320000802288005911802011009910010080000800000100240000036024003301877240033513336005110125118005808000066240000801008005680060802188006080063
240204800656440000000508800008020121919125332010080100240130801002400006235283507921800348019280069499823500173201002002400002003200008005980219118020110099100100800008000001002400000363324004002853240130513336005110116118005508000066240000801008005780063800608006080057

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494c4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f606167696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2400258005962100101100450000180040217190162532001080010240000800102400006636933496713010080038800618005850000350045320010202400002032000080069800662180021109010108000080000010240015154302400540005924004151000050200351634158004280000106240000800108006980109800618006180048
240024800606210000000045000028004020170192532001080010240130800102400006755473520969010080024802028004550004350040320010202400002032000080053800621180021109010108000080000010240000036024003300040240033513343005020034163434800428000006240000800108006180065800618005680056
2400248006062000000000000001800402171700253202018001024000080010240000666680351940901008004280060800555001035004032001020240000203200008006080062118002110901010800008000001024000003502400330004024004050036005034015163634800608000066240000800108005680056800468005680056
24002480210621000000004500002800493141709225320010800102400008001024000067554735089950100800308006380060500053500353200102024000020320257800608005511800211090101080000800000102400000360240040010432400005100005020034163434800588000006240000800108005680056800568005680056
24002480060620000010006200000800452170020253200108001024000080010240000666680350116101008003980055800604999035004032001020240000203200008006080059118002110901010800008000041024000003602400370004224003351036005020034153416800528000066240000800108005680056802038005680046
24002480061621000010004500002800402017016253200108001024000080010240000612348351940901008003580062800605001035003932001020240000203200008005680056118002110901010800008000001024000003633240040010024004051036005020034163213800578000066240000800108005680056800608004680049
2400248006062000001100450000080030217170162532001080010240000800102400006666803501161010080030800558005950006350064320010202400002032000080060800551180021109010108000080000010240000036024003300044240033504000050340341634348005780000106240000800108005680061800468005680057
2400248022362000001000380100080030019190925320010800102400008001024000065370135499990100800358006580060500013500413200102024019320320000800558005511800211090101080000800001102400000360240040010332400365132360050200351634358019380000106240000800108006280064800628006180055
2400248006162000000000450100280040319190025320010800102400008001024000064473834871200100800308006080066500003501713200102024000020320000800658004511800211090101080000800000102400000430240033000324004051360005020017153415800428000006240000800108004680218800468006280056
24002480059620000011003801000800401019016253200108001024000080010240000633977354980701008003580045800625001235002432001020240000203200008006080055118002110901010800008000001024000003931240040010402400406133430050200341635168006180000106240000800108006180056800678004680046