Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST3 (multiple, 8H)

Test 1: uops

Code:

  st3 { v0.8h, v1.8h, v2.8h }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 6.000

Integer unit issues: 0.000

Load/store unit issues: 3.000

SIMD/FP unit issues: 3.000

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2224373a3f51schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f61696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafl1d cache miss st nonspec (c0)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6600829283228240031000000004666292561724860003000300030003000330222400080227592901029242310600030003000600090002932029409116100110001000300000300000300060129649300687531151449199513210381114555728569163501261614188300030002929229313293942930529406
6600429369227210023000010004789291821713560003000300030003000330322400090227842902229377310600030003000600090002923329215116100110001000300006300000300060131749280694230551452200353193380712505128692162301259314976300030002943629286293312933729349
660042937722721001700001000462829195171796000462830003000300033024240003022776289642931831060003000300060009000292572934211610011000100030000630000030000013354923268983049114920144318638158485128644160221276714579300030002937029366293302939529171
6600429342227250023000010004690292801727160003000300030003000330662400048227572894629370328600030003000600090002925029322116100110001000300000300000300060130209267691231561157201463188381212535828703162221294214694300030002927229247293412921429301
6600429502228240025000000004581292701713360003000300030003000330302400060227292901129299310600030003000600090002933229193116100110001000300006300000300060130109281691531261446200683224381016484728663160961270414856300030002936929385293532938629393
6600429370236260022000000004760292251728360003000300030003000330392400050227772901829422310600030003000600090002939429277116100110001000300006300000300360129579260685930971350201963244381611614828605162291276614645300030002942729380292482937629427
6600429264227190019000089000455329213170957625300030003000300033027288210022780289992935331060003000300060009000292922941411610011000100030000630001030003012943922768673148124920093325938079575228675161721290014842300030002931929284293272934929251
6600429336227270020000010004575293071724260003000300030003000330562400050227302909629290310600030003000600090002928529410116100110001000300000300000300060130569231687430801160202053214380911565528569161001281414821300030002914829365292942924329293
6600429278226240023000000004672292311721160003000300030003000330632400000226942902729440310600030003000600090002938629412116100110001000300006300000300060129269244688530981348202103114381616485028751164201271514754300030002942729304290912936529357
660042924722724002600060100474529217173426000300030003000300033027288270022781290862927631060003000300060009000294072923811610011000100030000030001030006013014917069303116647201933182380814504728634159811261714768300030002939529280293892929129355

Test 2: throughput

Count: 8

Code:

  st3 { v0.8h, v1.8h, v2.8h }, [x6]
  st3 { v0.8h, v1.8h, v2.8h }, [x6]
  st3 { v0.8h, v1.8h, v2.8h }, [x6]
  st3 { v0.8h, v1.8h, v2.8h }, [x6]
  st3 { v0.8h, v1.8h, v2.8h }, [x6]
  st3 { v0.8h, v1.8h, v2.8h }, [x6]
  st3 { v0.8h, v1.8h, v2.8h }, [x6]
  st3 { v0.8h, v1.8h, v2.8h }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f222324373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)dbddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4802081200539301000100019000507511200481616125487164100241337240000100240000240000500551983519750360120028120053120054399903400344801002002400002400002004800007200001200511200531180201100991001008000080000100240014154402240016011624000216441410051103170351201920240000240000100120359120208120520120203120208
48020412020693410200314201050004388112050316161827148249210024428624018010224315924291652256829361969628012045812021012066540351104035048077520024036024024020248048072108012036912051141802011009910010080000800001002404371444152324013600261124000214441410051092170351200510240000240000100120054120052120053120054120055
48020412005293010001000180005194112003601602548570710024333924000010024000024000050055199271935330012002812005112005439991340033480100200240000240000200480000720000120049120053118020110099100100800008000010024001414440124001600172400021601400051093170331200480240000240000100120054120053120053120055120054
48020412005493110100001217000679111200381616025486002100243110240000100240000240000500552034019273340120026120052120050399923400334801002002400002400002004800007200001200531200521180201100991001008000080000100240014144400240016011824000216441400051104170331200450240000240000100120052120063120055120054120054
48020412005493110101001217000500311200391616025484765100249186240000100240000240000500551981019405690120023120052120050399903400344801002002400002400002004800007200001200531200631180201100991001008000080000100240015144400240016001724000016441410051093170331200480240000240000100120054120053120054120054120055
4802041200539301000000020000719711200401616725483101100245066240000100240000240000500551990819320020120028120054120051399913400354801002002400002400002004800007200001200521200531180201100991001008000080000100240014144400240016011624000216441400051093170331200510240000240000100120055120054120053120054120055
4802041200559311000000017000533411200471616125484896100247200240000100240000240000500551993219476470120027120052120053399923400444801002002400002400002004800007200001200511200541180201100991001008000080000100240254154400240016001724000216441400051105170231200490240000240000100120052120054120054120052120052
48020412005193110000002121000680011200391616125486765100245329240000100240000240000500551988419374710120027120062120052399913400364801002002400002400002004800007200001200511200531180201100991001008000080000100240014144401240014102124000216441400051093170231200480240000240000100120054120048120053120055120054
4802041200549311000000014000640011200351616125487022100249053240000100240000240000500551993219403300120037120062120060399913400344801002002400002400002004800007200001200511200531180201100991001008000080000100240015144400240016111824000216441410051093170331200510240000240000100120063120055120052120054120063
4802041200529301000000018000680011200321616125483321100242677240000100240000240000500552034019543930120027120051120053399923400444801002002400002400002004800007200001200531200541180201100991001008000080000100240014144600240014011724000214441400051093170231451340240000240000100120052120064120055120053120052

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.5005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f222324373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
48002812005093000000010119100546911200271616025488581102466592400001024011724000050551956719506911012002112004312004239986340042480010202400002400002048000072000012004212004211800211091010800008000010240000034002400020082400022340502071771412103924000024000010120199120050120043120043120050
480024120049930000000001002707012002701641254850321024733424000010240000240108505519567194810100120169120049120043399843400344800102024000024000020480000720000120042120048118002110910108000080000102400000340024000210024000220050207178812003924000024000010120050120043120043120050120043
48002412004393000000012000088640120118161602548470710244557240000102400002400005055195671965563001200211200421200493998734002548001020240000240000204800007200001201981200421180021109101080000800001024006003803240002105240002034050207178812003924000024000010120043120043120133120050120043
480024120042930000000132000028460120027161602548867610244763240000102400002400005055195671951435001200211200421200493998634002448001020240000240000204802407200001210871200421180021109101080000800001024006003600240002002240000234050339176812003924000024000010120043120043120049120044120043
4800241200439310000000300085140120033161602548595910248581240000102401172400005055195671945606001200211200421200433998434002548001020240000240000204800007200001200431200421180021109101080000800001024000003400240000005240060234050318177812018824000024000010120044120050120043120043120050
480024120042930000000498500040000120027161602548588210247173240000102400002400005055196631936182001204831200431200423998434002448001020240000240000204800007203601200421200431180021109101080000800001024000023400240002102240002234050208179912004024000024000010120050120044120200120043120044
48002412004293000000012200057200120028160025487628102450272400001024000024010850551956719414740012002112013212004239984340024480010202400002400002048000072000012004312004211800211091010800008000010240000000024000210824000223405020101761112003924000024000010120044120043120043120049120143
4800241200429310000000911003590012017916160254867921024714124000010240000240000505519687195714500120021120042120049399903400254800102024000024000020480000720000120042120202118002110910108000080000102400000345202400020011240002234050198178812004524000024000010120044120044120043120043120050
4800241200499310000006310064290120027161602548501510246663240000102401172400005055195671955356001200211200491200423998416400304802352024000024000020480000720000120043120042118002110910108000080000102400000340024000200870240062034052519446912046524000024000010124487123727123266146320120512
48002412034993300013113217600065640121711161617894486331102460582401201024023424021650553743219355910012016412035212019740076840275480460202402402401202048048072072012019512035721800211091010800008000010240000004702400641022400022340502010177712003924000024000010120043120050120044120044120043