Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD3 (multiple, post-index, 8B)

Test 1: uops

Code:

  ld3 { v0.8b, v1.8b, v2.8b }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 6.006

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 3.009

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
6500528959230270025000007010047892829500166406000100030092000100030002000500010002356201522925287082877031060002000300030006000287232875411610011000100002000042002000200200200013150927968943105125119568318438081152542835710001551312819138952000300010002889228894289362900328715
6500428774231180023000007000046702844800166356009100030092000100030002000500010000357391522966286052878931060002000300030006000287702882311610011000100002000062002002200240240013104950669053187165319655320938141752542823410001559612952138652000300010002878328835288522881228947
650042905723122002000000700004853284681016437600910003009200010003000200050001000035721122300428693288353106000200030003000600028656287991161001100010000200006200200020000006001318695036980322095319726326338081454482817210001545412860138562000300010002879728669286902876028809
65004287892322100210000010000046722846500164396009100030092000100030002000500010002356151922976285652868831060002000300030006000287582868711610011000100002000042002102200240240013118943869713131114619778324138071860482819510001543612780139662000300010002896528692288032891128894
6500428910231240030000007010046492837111165296006100030002000100030002000500010002356981722916286922884431060002000300030006000287302863211610011000100002000062000005200240260013028948369233075135419807315438131253532826010001560912576139692000300010002883928747287352884928800
6500428749232240018000013100004694284211216485600910003000200010003000200050001000335591142290428762288483106000200030003000600028680287191161001100010000200000200000020004026001323792206860311395119665316738111656542819810001562812695136802000300010002880028990288262890328902
6500428851231230025000001900004666285001116594600010003009200010003000200050001000235698522960286922888831060002000300030006000287132877811610011000100002000002000002200000060013259930768573108125719716320338171456552828910001543212738138502000300010002883428786288732898128774
6500428939232180021000008000047132847721164396009100030092000100030002000500010000357181222950287232877731060002000300030006000286632876511610011000100002000002000000200200260013210905569433115185419880329838091446502833110011559612750138822000300010002882928689288402894328885
650042889323122002600000600004734285040116684600610013009200010003000200250051000035724622944287402888910466006200030033003600028712286932161001100010000200020200004465200440260012890947569193129125419823327738211952512817010011578112820139672000300010002889428984287272879028995
650042881623214102710010700004724283891016528600910003009200010003000200050001000235682822942287452874231060002000300030006000287172878111610011000100002000002002025200200200013303950869863226156019767318838212556502832410001571412942140602000300010002899528844289332893228818

Test 2: throughput

Count: 8

Code:

  ld3 { v0.8b, v1.8b, v2.8b }, [x6], x8
  ld3 { v0.8b, v1.8b, v2.8b }, [x6], x8
  ld3 { v0.8b, v1.8b, v2.8b }, [x6], x8
  ld3 { v0.8b, v1.8b, v2.8b }, [x6], x8
  ld3 { v0.8b, v1.8b, v2.8b }, [x6], x8
  ld3 { v0.8b, v1.8b, v2.8b }, [x6], x8
  ld3 { v0.8b, v1.8b, v2.8b }, [x6], x8
  ld3 { v0.8b, v1.8b, v2.8b }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f23243a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4002058007464300011015400028013921212025480155801002400541601088010024000016000048082196005231219381080115800428004203744801002001600002400002022400004800008004280042118020110099100100800008000001001614302331601380105616003061294002000512212611800390800001610160000240000801008015680043800438004380043
4002048016564200011002140000800272121202548048180100240057160000801542400001600004804999637633121838008002380042800420324480100200160108240000200240000480324800428004211802011009910010080000800000100160011114316004902050160039612841110000510912611800390800001414160000240000801008004380155800438004380043
40020480042643000110070880028002701212272548017180100240219160000801002400001601084804999609193121763008011880156800420172448010020016000024016220024000048000080155800421180201100991001008000080000010016000011351600360203716002961041100000510911711800390800001414160000240000801008004380043800438004380043
40020480042620000100055000280027212120254801738010024000016000080100240000160000480499960331312187700800238004280042032448010020016000024000020024000048000080042800421180201100991001008000080000010016000004316003701037160039613743000005109117118003908000000160000240000801008004380043800438004380043
4002048004262000010000000280027377025480172801002400711600008010024000016000048049996128231245490080023800458004203244801002001600002400002002400004800008004280042118020110099100100800008000001001601211101600100205316014161494311100151170160080040080006106160000240000801008012980045800458004580044
400204800436520001000480000800290121202548017480106240060160008801082400241600164805389604203121397008002380044800440614480148200160016240024200240024480048800438004411802011009910010080000800000100160002025160032010251600326122390011151160160080040080006106160000240000801008004580045800448004580045
40020480044644000000054000180028212120264801628016224006016000880108240024160016480538960499312136001800238004480044071448014820016001624002420024018648004880044800421180201100991001008000080000010016000000160030020321602346122330000051091341280039080000109160000240000801008004380043800438004380043
4002048004264410000006500028002731514025480169801542400641600008015424000016000048049996093431207550080023800428015503244801002001600002400002002400004800008004280042118020110099100100800008000001001600111140160150010511600370111401100005109017118003908000009160000240000801008004380043800438004380043
400204801576431101010668800080027301408348017280100240015160000801002400001600004804999609203123963008002380154800420324480100200160000240000200240162480000800428004211802011009910010080000800000100160011104016011311152160037614901000005109125128003908005499160000240000801008004380156800438004380043
4002048015664311000007300028002720150254801698010024001816010880154240000160000480499961206312408600800238004280042031234801002001601082400002002400004858508331983455311802011009910010080000800000100160011114016004801051160037011101000005109117118003908000009160000240000801008004380043800438004380043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)606167696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd2d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
40002580057620000000043000280027012120254800738001024006316000080010240000160000480049959996312176300080023800428004203244800102016000024000020240000480000800428004211800211091010800008000001016010203301600370037160038013040015019481794800390800001410160000240000800108004380043800438049780043
400024800426200000000000028002721200254800658001024006316000080010240000160000480049960650311999400080023800428004203244800102016000024000020240000480000800428004211800211091010800008000001016000004101600300030160032613041005019051788800390800001414160000240000800108004380043800438004380043
4000248004262001000004801028002701212025480073800102400631600008001024000016000048004996065231217760008002380042800420152448001020160000240000202400004800008004280042118002110910108000080000010160000033016003820016000061304100501908175980039080000010160000240000800108004380043800438004380043
40002480042621010100000002800272121202548007380010240000160000800102400001600004800499599963121776000800238004280042032448001020160124240000202400004800008004280042118002110910108000080000010160000041016003700697160037613041005019081785800390800001410160000240000800108004380043800438004380043
40002480042621000000043000080027212120254800738001024021916000080010240000160000480049960684311999400080023800428004203244800102016000024000020240000480000800428004211800211091010800008000001016000003701600370029160037613041005019051774800390800001410160000240000800108004380043800438004380043
400024800426200001100460002800272121202548007380010240000160000800102400001600004800499646103121885000800238004280042032448001020160000240000202400004800008004280042118002110910108000080000010160000033016003710301600376130410050190101785800390800541410160000240000800108004380043800438004380043
4000248015562100001004300028002721212050480073800102400541600008001024000016000048004996065031217630008002380042800420324480010201600002400002024000048000080042800421180021109101080000800000101600000410160039003716003701374100501908178580039080000140160000240000800108004380043800438004380043
40002480042620010000054000280027212120254800738001024000016000080010240000160000480049960685312177600080023800428004203244800102016000024000020240000480000800428004211800211091010800008000001016000003301600370037160030613741005019081758800390800001410160000240000800108004380043800438004380043
40002480042621010000012000080027212120254800108001024000016000080010240000160000480049960683311999400080023800428004203244803822016000024000020240000480000800428004211800211091010800008000001016000004101600000037160037613941005019081785800390800001414160000240000800108004380043800438004380043
4000248004262000000005000028002721212025480010800102400631600008001024000016000048004995999631200770008002380042800420324480010201600002400002024000048000080042800421180021109101080000800001101600000330160000000160000613741005019081759800390800001011160000240000800108004380043800438004380043