Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST2 (multiple, post-index, 2D)

Test 1: uops

Code:

  st2 { v0.2d, v1.2d }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 5.000

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 2.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2223243a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e5f60696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
6400728903232210010011230004688287522217877500010002000200210002000200050002160916000010021828287152891031050002000200050004000288712872811610011000100020000602002002200224001336692896850319003419828328037721436382832110001557212605140112000200010002879828878288842872028865
64004291152320000001003100469628755221778350001000200020001000200020005000216001600005121911286392894631050002000200050004000289462882311610011000100020040602000040200204001320793106970314304120001324438211035402856410011600412861139652000200010002908528902290872888628984
640042909523400100014132177000468028877201795950051000200220041000200620005015216061603217021861287952894334950052006200250054004289352890231610011000100020000402002000200004001291693316924311503420142318638172533382862310011591412895142082000200010002920529044291822911929289
64004291852330010012227317700046222905822179835010100020022000100020002000500021613160000302187828896289983105000200020005000400029268291071161001100010002004060200002220000405081325693276948310802620315328138241338322850510001581312669139042000200010002877928828288612895028972
640052876723200000000151000476528544221771550001000200020021000200020005000216011600005021811286802904231050002000200050004000285432864711610011000100020000402000000200004025861325394726973316303319741316138251333282828010001537312504136092000200010002876128742287852875928970
64004288402310000000090000474028531221808850401006201020161008201020205045221981614400021970291392930574290505520142020503540362940029218171610011000100020182202016005670201224001306988676645298803220643314638174335412839310091559212786134902000200010002873829529298362977129840
6400429520234001001109158417600047312911300180195010100420132006100920102006504021979161280302180028889292743179503520122008500040242935029103111610011000100020102602000000200004001324697627007320103819755324338231139352828910001542812649137772000200010002865828847286092866428963
64004288952240000000001000475328512201757950001000200020001000200020005000216151600000021832284582881731050002000200050004000286532868111610011000100020000602000002200000001316595667038314703519543314638281636372831210001560912567135112000200010002885928651285662870828692
64004286932230000000001000464528703221778850001000200020001000200020005000216151600005021820287052888733050002000200050004000287612877311610011000100020000602000000200006001309792416928314203819916331238212038382840210001545612586138922000200010002899228967289132897028936
640042887823300000100132100046352881800178055000100020002000100020022000500021613160000102185728620288393105000200020005000400028797288371161001100010002000040200000165520002000133349381694831600341970232933826537362825310001541212781137982000200010002865828760287412877328778

Test 2: throughput

Count: 8

Code:

  st2 { v0.2d, v1.2d }, [x6], x8
  st2 { v0.2d, v1.2d }, [x6], x8
  st2 { v0.2d, v1.2d }, [x6], x8
  st2 { v0.2d, v1.2d }, [x6], x8
  st2 { v0.2d, v1.2d }, [x6], x8
  st2 { v0.2d, v1.2d }, [x6], x8
  st2 { v0.2d, v1.2d }, [x6], x8
  st2 { v0.2d, v1.2d }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320207800526210000001233734080029161602540390880100162587160000801001600001600004804992319215129561108002380044800440334400100200160000160000200400000320000800458004511802011009910010080000800001001600000400016000200516000224000051098215878004280000160000160000801008004780046800548004680045
32020480045620000000123411608002916160254038828010016540316000080100160000160000480499215786112951390800238004580045032740010020016000016000020040000032000080045800452180201100991001008000080000100160000040001600021051600022400005109817848004180000160000160000801008004680046800468004680046
3202048004562000000007447708003016160254063518010016378216000080100160000160000480499215804413020870800238004580045032640010020016000016000020040000032000080045800451180201100991001008000080000100160000040001600000051600022400005109817888004180000160000160000801008004680046800458004680046
3202048004562000000012040370800300160254067808010016589916000080100160000160000480499223635912971991800238004580043032740010020016000016000020040000032000080045800441180201100991001008000080000100160000040001600021021600022400005109817888004180000160000160000801008004680046800468004680046
3202048004562100000003523408003016160254047428010016480216000080100160000160000480499207411613001570801798004580045032840010020016000016000020040000032000080214800441180201100991001008000080000100160000040001600021021600022400005109717888019580000160000160000801008071480046800478021580378
320204800436210000003720415308003116160254041928010016361316000080100160000160000480851207927312977220800238004580045032640010020016000016000020040000032000080044800441180201100991001008000080000100160000040001600020001600022400005109717788004280000160000160000801008004780046800488004680046
3202048004462000001003474408003016160254059488010016393716000080100160000160000480499207886913016280800238004580044032640010020016000016000020040000032000080044800441180201100991001008000080000100160000040031600020051600022400005109817998035380000160000160000801008013680214802148004580045
3202048021162100102113226762051806971616182734035478015916660416006080159160118160216480851233159213042730803318021080377849226400670200160120160120200400600320240802148037721802011009910010080000800001001600620406221600620293516078224000051091034948035280059160000160000801008054980217803798037980213
3202048054862200001113288251908002916160254044618010016436916000080100160000160000480499207917013142320800248004480044032840010020016000016000020040000032000080054800451180201100991001008000080000100160000040001600021021600022400005109817888004280000160000160000801008004780045800468004680045
3202048004462000000003387508003016160254029678010016420316000080100160000160000480499207966313059170800248004580045032740010020016000016000020040000032000080044800451180201100991001008000080000100160000042001600020061600022400005109717888004280000160000160000801008072280049800478004680046

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2223373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
32002780057621000008439110454008002916160254049638001016610516000080010160000160000480049207734512950060800298004480045033440001020160000160000204000003200008021580045118002110910108000080000101600000000160000102160002240050190016177158004180000160000160000800108004680044800468004580045
3200248004462000000792310626808002916160254039868001016440616000080010160000160000480049207966512937451800248004480045032740001020160000160000204000003200008004580045118002110910108000080000101600000000160062102160002240050190061717158004280000160000160000800108004680045800458004680045
3200248004562000000909310544408003016160494038218001016615616000080010160000160000480049215900212951390800248004480045032540001020160000160000204000003200008021280045118002110910108000080000101600000400016006210216000224005019008171568019680000160000160000800108004580046800888004580046
3200248004462000000177310367008002916160254051348001016432516000080010160000160000480401207932113086500800238004680045032740001020160000160000204000003200008004580045118002110910108000080000101600000400016000200216000220050190018178198004280059160000160000800108004580046800468004580046
320024800446200000141101041700800300160254058728001016450916000080010160000160000480049207945112940900800248004580044032740001020160000160000204000003200008004480045118002110910108000080000101600000400016000002516000224005019007171678004280000160000160000800108004680046800458004580046
32002480045620010009273103909080028161602540648080010166295160060800101600001600004800492157526130492708002480287800450333400010201600001600002040000032000080044800452180021109101080000800001016000004000160002003160002240050190015178168004480000160000160000800108004680046800458004680045
3200248004562000000903310226808002900025402391800101638751600008001016000016000048004921593421298585080023800458004403354000102016000016000020400000320000800458004411800211091010800008000010160000040001600020021600022400501900151716148004180000160000160000800108004680045800468004680046
3200248004562000000873310413908003016160254065088001016562616000080010160000160000480049191953413041300800238005480045033440001020160000160000204000003200008004580045118002110910108000080000101600000400016000000016000000050190081716168004180000160000160000800108004680046800468004680045
320024800446210000094831040500800311616025404348800691637381600008001016000016000048004920794971306533080023800458004403274000102016000016000020400000320240800458004511800211091010800008000010160000040001600000001600022420501900161717158004280000160000160000800108004680046800468004680046
32002480044621000004083103826080030161602540357880010164363160000800101600001601084800492158675129865908002480044800440334400010201600001600002040000032000080045800451180021109101080000800001016006204000160002000160002040050330016176178004280000160000160000800108004580046800468004580047