Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, post-index, 3 regs, 16B)

Test 1: uops

Code:

  ld1 { v0.16b, v1.16b, v2.16b }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 4.000

Integer unit issues: 1.000

Load/store unit issues: 3.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5e5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
6300529465237413100011002801046012903110324015400010003000100030005000150000161016166288032948331040003000400029319293101161001100010001300440300800830075149320132569441693231530542071133623807115658286841000165421358315148300010002938929478293632937629331
630042946523711010101000160004787289930002419840001000300010003000500015000010001614128828293723104000300040002920829296116100110001000030044930120111300700312310132539574687431361542074332893809124857286181000161821364814888300010002942629345294422941829359
6300429391236111001011001600046832896303124017400010003000100030005000150000900161662874829570310400030004000292192926411610011000100003003303003017300000312320131489398693531431552075131633803185954285561000162211331214986300010002929829403293912933629359
630042937623601110011000160004642290041302420140001000300010003000500015003070016182288802940531040003000400029271291581161001100010000300440301201153004511112310129939595694931762472080931963810194956286071000162341339615026300010002945829549294462944029497
630042950023621110011100280004629289781312420840001000300010003000500015004040016152288762945631040003000400029183293211161001100010000300359300300103000011112310132149305690130880532070433113812225251286241000163611365714938300010002933029447293712947829491
630042949723621000111100130004502289400032419940001000300010003000500015004050016140288702936931040003000400029016293822161001100010000300330301010530040079310131719501694331500592089132643811185153286131000162081333115014300010002942929445294442948829320
6300429490237011100111001600046832893810124232400010003000100030005000150000600161662887829384310400030004000291832936411610011000100003005412300300430005130320131299371695731350592073832983810135655286191000161551337215081300010002955629592294292932529384
630042944423601110111100260004747289760002424240001000300010003000500015000050016147287122947231040003000400029334293121161001100010000300349301201730015149310130979262694731400572070332323808174962286681000162341346614833300010002946029424294892945429538
630042942623701000110100250004679290300002426540001000300010003000500015003060016155288082940731040003000400029394293401161001100010000300449301100430075136300132359249693031450562082732743811185352287001000162091340314839300010002944329483293752943929388
63004295072361110000100010000464328991010241884000100030001000300050001500006101616728850294313104000300040002926029256116100110001000030033123005008300751312310132869407697931290632067832223815135658286391000161011350515194300010002944229552294142950329433

Test 2: throughput

Count: 8

Code:

  ld1 { v0.16b, v1.16b, v2.16b }, [x6], x8
  ld1 { v0.16b, v1.16b, v2.16b }, [x6], x8
  ld1 { v0.16b, v1.16b, v2.16b }, [x6], x8
  ld1 { v0.16b, v1.16b, v2.16b }, [x6], x8
  ld1 { v0.16b, v1.16b, v2.16b }, [x6], x8
  ld1 { v0.16b, v1.16b, v2.16b }, [x6], x8
  ld1 { v0.16b, v1.16b, v2.16b }, [x6], x8
  ld1 { v0.16b, v1.16b, v2.16b }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f23243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
240205800796200101100460002800492601925320100801002400008010024000066139834907731080042800648007249991350025320100200240000200320000800698006711802011009901001008000080000010024000004302400410041240041014143051101161180061800001410240000801008006580065800658006580066
240204800646200101100460002800492017202532010080100240000801002400006784703522214008003980201800644998935002232010020024000020032025680045800622180201100990100100800008000001002400002433424000000024004051400051101251180061800001414240000801008005780202804568004680888
2402048004862101000001780002800492662052320100801002401308010024000010136983501497008004080064800645009035002232036220024000020032000080064802171180201100991100100800008000001002400000450240132004124004051400051101161180205800001415240000801008006880065802248006780065
2402048006462101000001770002800492661925320100801002400008010024017867847035222140180039800648004549987350031320100200240000200320000802218004511802011009901001008000080000010024000024530240041008502400410140450512412411800638000000240000801008022180046800468004680201
24020480203621000100046000280209266025320291801002400008016124000067834835073750080171802208006449987350022320100200240000200320000800458006511802011009901001008000080000010024000004302400410041240042614244051101161180061800001416240000801008006580066800658006580065
240204800646200101000460002800492062025320100801002400008010024000066732335222140080039800658006549987350023320100200240000200320000800648006111802011009901001008000080000010024000004302400000103240041014044051101161180062800001514240000801008006580065800658004680222
2402048020462001110011790002800452361954320100801002400008017924000062242835243260080039802288006449987115040132035220024000020032000080157800601180201100990100100800008000001002400000430240041024424004050404505110116118007180000010240000801008006980065800718007280070
240204800696430000000670000800382171728253201008010024013080100240000632150347922600800398004580062499873500223201002002455662003200008004980061118020110099110010080000800000100240000036024004002839240040513300511011611800568000096240000801008022780065800618004980049
240204800496430000001570002800510171715253202918010024000080100240000610121349233300800398006480063499873500253201002002400002003200008005280218118020110099010010080000800000100240130036024004001324004051334305110116118006280000106240000801008004980050800688022280065
240204800656440000073057000280049000182532029480100240000801702401996626443538167108003580061800684997435002232010020024000020032000080221800591180201100990100100800008000001002400000360240130014024004151404305124116118004580000010240000801008005080069802278005080049

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)d9ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
240025800706201000000001600003800492181716253200108001024000080010240000676539351917580040800668006550011350045320010202400002032000080066800561180021109101080000800000102400000360240054000332400405133430005020516055800638000096240000800108006680067800628006180061
240024800616210000000004500002800513191718253200108001024000080010240000671069350116180030800558006050010350026320010202400002032000080079800661180021109101080000800000102400000002400400004024004101544200050205160458005780000109240000800108006180063800678006280046
240024800606200010000000000008003001717162532001080010240000800102400006384623522027800358006580045499933500353200102024000020320000800598005511800211091010800008000001024001416430240054010572400395040430005020417054800678000066240000800108006180063800618006780062
24002480060620100000000450000280049217016253200108001024000080010240000778485350708980040800698006150006350041320010202400002032000080061800591180021109101080000800000102400001436024004000033240040513243140050204160448005280000100240000800108006780061800628005680066
2400248006662110000000073010028004631817182532001080010240000800102400006765393486748800358006580065500063500453200102024000020320000800658004911800211091010800008000001024000003502400540004324004151144401050205150768005880000910240000800108006580067800628006780056
2400248004562100000000046000028003231717162532001080010240000800102400008608343520776800358006080065500063500403200102024000020320000800558006511800211091010800008000001024001414430240054000332400405154440005020416044800578000060240000800108006880061800668006180062
2400248006162010100000045000008004020170253200108001024000080010240000653701348712080044800458006450011350040320010202400002032000080060800601180021109101080000800000102400140430240000010562400405033361410502051604480066800001010240000800108006880061800718007080062
240025800506211000000003800012800462181815253200108001024000080010240000671880350708980040800708006050000350046320010202400002032000080060800491180021109101080000800000102400001435024005400034240000513343140050209160548005880000100240000800108006680062800618006180066
240024800656200000000006200013800492181716533200108001024000080010240000647117348925480030800608004749990350046320010202400002032000080069800591180021109101080000800000102400000430240054010392400406132430005020316054800528000096240000800108006380067800628006180061
240024800616210000100005700002800542141715253200108001024000080010240000979245348712080036800608006150006350044320010202400002032000080061800651180021109101080000800000102400000430240040200402400405154440015020516055800578000099240000800108007080067800678006580061