Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST3 (multiple, 4S)

Test 1: uops

Code:

  st3 { v0.4s, v1.4s, v2.4s }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 6.000

Integer unit issues: 0.000

Load/store unit issues: 3.000

SIMD/FP unit issues: 3.000

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e18191e1f2223373a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6600829250233220015000000000476328741031662060003000300030003000330452400013227092868928854310600030003000600090002865228582216100110001000300397003002104023000000013278945169393125737195543286381919343328114152671225614190300030002893428892289452901628841
6600428923231800171011132100047722848712165976000300330003009300933104240967228812932529584331216018300630126024905429429296087161001100010003000070030000003000190013302942469913175639196123166381715374028378152991346515753300030002906929171292362909129147
660042905523313101800010890004740289910216966600030003003300030033305924000022771288932906512106000300030006000900929077289172161001100010003000200030000043000060013067947969023129736199523205381315383828570160681269314572300030002903729057290242914929161
66004291112341510150011121000476529249001709760003000300030033000339332400082275728797290303106000300030006000900028880288671161001100010003000000030000003000060013122943369743109737196663307382019394128382158211244214399300030002890128890291092886728882
660042893623112001210001321000476028921001677760003000300030003000329982400052414828633289133106000300030006000900028886289051161001100010003000000030000013000000013078943669443160741196313256381010383628353157701247014512300030002890828933288482884428809
66004288002321500110000001004740288710016722600030033000300030003303024000102281028695289163106000300030006000900029306292151161001100010003000090030001013001000013120924169073119538199403327381610424028656161261246014398300030002921728960289472889728915
6600429042234150090001241000471128939121683660003000300030003000330102402422276928653290113106000300030006000900028742288261161001100010003000000030000013004060013171930868893112742196893274381511443428417156281241614275300030002897728889289332889128979
660042885823218001300000100047872881400168387621300030003000300033039240005227982869429031328600630003000600090002893628809216100110001000300026003000000300007001317795506951310493519774331038149353528440156621250214422300030002892828942288902895628982
66004288102311000900000000047422895102167926006300030003000300033040240244226842867428904310600030003000600690002895728856116100110001000300000003000000300006001317594636922304634119765324938178373928473154151242914470300030002892228958289222887228896
6600428969233180017100001000473828916001683760003000300030003000330092400042271728638290533106000300030006000900029145288821161001100010003000260030010213000160013441935569803132739196343180381914414128326155461238613911300030002873228749288212867228576

Test 2: throughput

Count: 8

Code:

  st3 { v0.4s, v1.4s, v2.4s }, [x6]
  st3 { v0.4s, v1.4s, v2.4s }, [x6]
  st3 { v0.4s, v1.4s, v2.4s }, [x6]
  st3 { v0.4s, v1.4s, v2.4s }, [x6]
  st3 { v0.4s, v1.4s, v2.4s }, [x6]
  st3 { v0.4s, v1.4s, v2.4s }, [x6]
  st3 { v0.4s, v1.4s, v2.4s }, [x6]
  st3 { v0.4s, v1.4s, v2.4s }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f222324373a3f46494f5051schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4802081200529311001103021000812911200431616002548890610024560224000010024001524001050055198781948561012002612005212005839999740032480128200240018240138200480036720054120058120051118020110099100100800008000010024001414360024001600158240002034021115117116001200460240000240000100120044120050120043120043120044
480204120057930111100919000233411200331616002548543810024540124000010024001824001050055257401957246012002412004812004339987740014480124200240138240018200480036720054120048120042118020110099100100800008000010024000003400240002002624006016361411115117016001200390240000240000100120043120043120044120044120043
4802041200589311010000190006007112003300002548144410024156724000010024001524001050055197291929272012002312004912004239987740015480128200240018240018200480036720054120049120043118020110099100100800008000010024000003400240000001824000216361411115117016001200400240000240000100120043120043120049120044120043
480204120058931100100132180007219112004416160025486844100244766240000100240018240010500551961519417330120021120043120042399877400144801282002400182400182004800367200541201931200481180201100991001008000080000100240000034002400001018240002160001115117016001200390240000240000100120206120043120043120049120049
4802041200599301100000180004146112003516164025486765100246516240000100240015240010500551961519390630120024120202120043399877400144801282002401202400002004800007200001200421200421180201100991001008000080000100240000034002400020018240122234000005110117111200400240000240000100120043120043120043120043120043
480204120066932101000019000720711200351616302548676610024513724000010024000024000050055195671956111012002112004312004239984340129480100200240000240000200480000720000120042120049118020110099100100800008000010024000003400240002002324000014361410005110117111200470240000240000100120059120052120052120050120059
4802041200489310000000300022450120027016002548890710024580224000010024000024000050055196921947314012002612005012005339996340039480100200240000240000200480000720000120050120060118020110099100100800008000010024000003400240002002124000216361410005109117111200560240000240000100120050120059120053120049120050
48020412004893100000063000481101200281616002548942410024300024000010024000024000050055197641940325012003312005812006139986340136480100200240000240000200480000720000120058120057118020110099100100800008000010024001514360024001610224000000000005110117111200390240000240000100120043120044120049120043120044
4802041200519301100006180006797112004316164025482432100248796240000100240000240000500551956719528351120164120196120049399849401324803252002402582400002004800007200001201971200421180201100991001008000080000100240062001020240120002336424012014361400005134226211203391240000240000100123588123872124205124206120464
4802041203539330111312642670005267012033516162025483683100244581240000100240000240000500551961819328030120035120060120052399973400414801002002400002400002004800007200001200501200591180201100991001008000080000100240014153610240014015240002234000005110117111200400240000240000100120043120043120043120050120043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e18191e1f24373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0ea? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
480028120058964100100001908076112004501642548747310248885240000102400002400005055201701931680012002401200491200503999634003348001020240000240000204800007204141200491200521180021109101080000800001024001414001240016001724000216361405019117111200565524000024000010120060120051120061120059120051
4800241200579641100000615072001120044161632548500210242136240000102400002400005055201481932619012003431200501200583999934003348001020240000240000204800007200001200581200601180021109101080000800001024001417000240014001824000014361405020117111200472024000024000010120059120052120062120051120060
48002412005896411000003210466511200441616025483916102463852400001024000024000050552024419483650120035012004812005939996340042480010202400002400002048000072000012005012005811800211091010800008000010240014144001240016012024000214361405019117121200491724000024000010120051120060120053120050120052
48002412005296411000003190118701120037160025483743102469902400001024000024000050551981219476680120033012021112006139988340032480010202400002400002048000072000012005812020211800211091010800008000010240014143600240016102324000216401405019126111201931824000024000010120052120060120359120050120208
48002412005296610000100140334811200431616647487556102439972400001024011724010850551986019542870120034012016812004739989740041480235202401202400002048000072000012020312005011800211091010800008000010240074143601240016002024000216361435032117111200581724000024000010120059120053120216120059120059
4800241200599661001031132109041441120200161627692484277102475252401201024023424010850552627919615690120182012005712020539988740147480235202401202401202048000072072012004712035821800211091010800008000010240794144310222401360389024018214361435058144121203421824000024000010120051120052120214120051120051
4800241201479641100000019060011120037151602548567910245600240000102400002400005055197641957334012003501200581200523999634003248001020240000240000204800007200001200501200581180021109101080000800001024001514000240016101824006214361405019117111200471724000024000010120051120054120049120060120051
4800241200509641000000010604905112018716092254823121024826524006010240000240108505538207194120601200250120080120703399943400214800102024000024000020480000720000120048120050118002110910108000080000102400141444012400141019240002140141502011711120047024000024000010120055120053120056120054120055
4800241200559641011001211702933112004716162254866711024479724000010240000240000505519908195090611200360120050120052399913400364800102024000024000020480000720000120210120053118002110910108000080000102400151444032400141020240002160141502011711120050024000024000010120053120210120051120052120052
4800241202089641010001121706007112004716161254855711024420924006010240000240000505519764195907101201850120052120050399903400334800102024000024000020480000720720120208120053118002110910108000080000102400141544022400141020240002140140501911711120189024000024000010120209120054120053120051120211