Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST2 (multiple, post-index, 16B)

Test 1: uops

Code:

  st2 { v0.16b, v1.16b }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 5.000

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 2.000

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22243a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
640072931923419112103126417610475828934221804850001000200220021000200020025005216191601622217902905329332142750002000200450004000293152923311610011000100020000002002002000000001312992676859307297620043337437964271642840010001550112941138172000200010002866028626287402889628959
640042888623220002400018010475828608021773650001000200020001000200020005000216191600028216892858029003310500020002000500040002865428581116100110001000200006020001220020002234913155930068473143127319876327137963767682838510001562712894141432000200010002909129089290452902529018
640042914723320102012128819300459628952221844050451008200820161009201420125035220661612825220862933029726842905041202220065035402629235294401616100110001000200820420101556820120602012793879166933018156219931318437993865672847010001589912667140172000200010002901429025290642930228900
64004288402241700170000100466028720221790050001000200020001000200020005000216171600019217532876028962310500020002000500040002903029078216100110001000200400020000220000600013021906868363122157320297321038013572642841110001616312867143432000200010002916129193291482903029025
64004290242342300210000100461328913221797450001000200020001000200020005000216271600021217622881129288628500020002000500040002898129108216100110001000200006020000020002022012982926868643118136419905315037983971672870110061580612785140212000200010002953929610296792936029639
640042950922921001910115961056004661294302217874500010002000200010072011201850402215516096152180829114294243105000200020005000400028972295311416100110001000200000020000020080600013037929169223109107520034310138035169682907810001646313449147512000200010002999530090300953017729584
6400429601239120019000147101461629235201848550001000200020001000200820105060220361609632184129767301343105000200020005030402430018300341516100110001000200000020000020000000013265932768473088125620589328238183462592877410001637613229146382000200010002967729685296522945729912
640042958922820011800000014744292260018382500010002000200010002000200050002161416000321794291812962931050002000200050004000294222946811610011000100020000202000002000040001326093356915308776720706335738193766652877510001650913360145932000200010002957629512295392958729545
640042965222923101900000004663292720018463500010002000200010002000200050052161116000221827291692955371050002000200050004000294302949111610011000100020002402000002000000001317893416907312496320632334038183463652886910001634213338147232000200010002954229681295132966429610
64004295542302200160000001464029190001859750001000200020001000200020005000216051600062181829108296093105000200020005000400029470295431161001100010002000040200011502000040001318894336892312696520431328138172963662859510001650013216145692000200010002960229543295202955929488

Test 2: throughput

Count: 8

Code:

  st2 { v0.16b, v1.16b }, [x6], x8
  st2 { v0.16b, v1.16b }, [x6], x8
  st2 { v0.16b, v1.16b }, [x6], x8
  st2 { v0.16b, v1.16b }, [x6], x8
  st2 { v0.16b, v1.16b }, [x6], x8
  st2 { v0.16b, v1.16b }, [x6], x8
  st2 { v0.16b, v1.16b }, [x6], x8
  st2 { v0.16b, v1.16b }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f222324373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
32020780071621000000015100210128003316160254037668010016100316000080100160000160000480499239853212877500800258006180059033240010020016000016012020040000032000080049800491180201100991001008000080000010016001213380016001410151600021201205109117118004780000160000160000801008006080063800508005280052
32020480061620100000012100388728003401602540172580100164666160000801001600001600004804993679188130473708003480052800510314240666320016000016000020040000032000080051800511180201100991001008000080000010016000003200160002048160002232005109117118004380000160000160000801008007180062800528005180051
3202048006162010020001500043332800411616025404902801001665931600008010016000016000048049920795011305868080029802118004933233140010020016000016000020040000032000080044800451180201100991001008000080000010016000003200160002005160002232005122117118004280000160000160000801008004880046800518004680046
32020480045621000010300004259080031000254065388010016425616000080100160000160000480499207887413018990800238005080050032640010020016000016000020040000032000080049800491180201100991001008000080000010016000003200160000002160002232005109117118004780000160000160000801008006180046800478005080046
320204800456200000000000059710800290160254046728010016474616000080100160000160000480499207891912872630800238005080049032740010020016000016000020040000032000080049800451180201100991001008000080000010016000003200160002005160002232005109117118004780000160000160000801008004580051800458004580045
32020480050621000000030015907080029161502540192380100165152160000801001600001600004804992399916128003508002580057800500334400100200160000160000200400000320000800498004911802011009910010080000800000100160013133800160014001416000214381215109117118004680000160000160000801008005180051800508005280050
32020480061621100000015000563628004416002540523180100166145160000801001600001600004804992399741131642608002580061800610331400100200160000160000200400000320000800508005711802011009910010080000800000100160012133800160012011516000212381215109117118004980000160000160000801008006080051800518004980051
320204800506211100000150004700280044161602540614580100164706160067801001600001600004804992319855129612108003480052800480332400100200160000160000200400000320000800508005011802011009910010080000800000100160012133801160014001416000214381205109117118004780000160000160000801008007180052800518006280062
320204800516201001000160014700280046161602540278180100162681160000801001600001600004804993679174130822708003680051800500343400100200160000160000200400000320000800618005011802011009910010080000800000100160012133701160014001716000214361205109117118004880000160000160000801008006080051800638006280061
320204800506201001000180014975280035161602540253480100164728160000801001600001600004804992319939129924108002580050800610331400100200160000160000200400000320000800618005011802011009910010080000800000100160012123800160014011416000012371235109117118004880000160000160000801008005880062800528005180050

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f222324373f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320027800456210010003000413980030016025402061800101627891600008001016000016000048040121459101300400800238004480044032840001020160000160000204000003200008004580045118002110910108000080000101600000001600020021600020424501912176148004180000160000160000800108004680045800458004680046
3200248004562101000132951003996800291600254039168001016512116000080010160000160000480049207941413031958040780045800440326400010201600001600002040000032000080044800441180021109101080000800001016000042001600020221600022400501912176138019780000160000160000800108004780045802148004580046
32002480044620000000300043298002901602540206180010165626160000800101600001600004800492079721130315180025800458004403264002952016000016000020400000320272800458004511800211091010800008000010160000400216000200216000224005019131713128004080000160000160000800108004580046800468004680046
3200248004562100000030003968800290160254042888001016661116000080010160000160000480049209973912953168002480045800448532640001020160000160000204000003200008004480046118002110910108000080000101600004000160002009221600022400501913178148004180000160000160000800108004580046802138004780047
3200248004662000000213000536282693160025402352800101658131600008001016000016010848004921553121303217800308004480044843274000102016000016000020400000320000800458004511800211091010800008000010160000000160000102160000104205019131714148004180000160000160000800108004680045800458004680046
32002480045620000000010047528002901602540225980010166158160000800101600001600004800492237556130448080023800448005403274002952016000016000020400000320000800458004511800211091010800008000010160000400016006200216000224005019122513138004280000160000160000800108004680045800478004780046
320024800526200000009110039418003016160254038588001016446316000080010160000160000480049223652912937458048780044800540327400010201600001601202040000032000080044800471180021109101080000800001016000040001600020001600626400501913171268005180000160000160000800108021180045800458004580045
32002480046620000000300064768019816160254038468001016378316000080069160000160000480049207967112929888002480045802110327400010201600001600002040000032000080046800441180021109101080000800001016000040001600021021600026005019131714158004180000160000160000800108004680045800558004680046
320024800456210000003100616880028161602540379280010163782160000800101600001600004800492234587130094780024800458004603274000102016000016000020400000320000800458004511800211091010800008000010160000406201600000051600002400501912171058004180000160000160000800108004780045800468005580046
320024802126200000103100570480029016025404277800101676641600008001016000016000048004922284471292886800238004580044832740001020160000160000204000003200008004580045118002110910108000080000101600004000160000009251600022440501912176138004180000160000160000800108004580055800468004680046