Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STEORH

Test 1: uops

Code:

  steorh w0, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 3.000

Issues: 3.003

Integer unit issues: 1.003

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e1f2223243a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f696a6d6emap rewind (75)map stall (76)dispatch uop (78)79map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
73005331472484060700110061100596432834002095430041004200010002000109501684644929650329063306031330000100020002000400032970261611710011000100002000022100310000022002213221159281178218378400111112337238094448196468324091003168901459515509200010003291532857328223291733063
73004330942472030301110200100600432730012076730001003200010002000109501685624930052328213289831030000100020002000400033278259611710011000100002003302100410010262002312221160531175118322400341032338338574437166770324481004164241512814932200010003273332908328013278732812
7300432904247304060001003010060313272700208253004100420001000200010964168630492975532569327083103000010002000200040003317326091171001100010000200002010021000002200100220016266111890840137191962336139274441216566324061004166341525115630200010003283133177326633284433027
73004329162472151411010021100588632769112100830031003200010002000109641684834930037329433312631030000100020002000400032979258811710011000100002004422100410010142002212221160391203908400391321002355838454439276366324131002171831502915610200010003277232844329673280732907
7300432952249303040001004010061013273500207273004100520001000200010959168682492977533013327413103000110002000200040003288025941171001100010000200002210021000002200110220016234118500840540492962314738294438185561324081006165601496215379200010003292232797328413278932765
7300432949246414041001002100059613270211209873004100320001000200010974168491492973932836328833103000010002000200040003302725861171001100010000200343210061001014200231322216309119350838638621932316537884437146766323921000162541424016043200010003280732803327503288633007
73004328652482040500010040100619632788002084930041004200010002000110021686914929926327093294631030000100020002000400032815256611710011000100002000022100210000022003202200162221189808401390821012330438474437206561324311006165551522615288200010003292232914329473290032987
73004330692463121400010071100595032635112114630031005200010002000109751687644929892327543284631030000100020002000400033197256111710011000100002003322100510010142002313222164531202908394402731002298537964436246162324461003164141428315542200010003313132806329033282233043
7300432866246205030001004010059323278111208973004100320001000200010947168725492989632821329843103000010002000200040003298426221171001100010000200433210031001024200251322216367118941824740172872320538834440195653324991000167921461315474200010003305432963328093295732882
7300432924248305030001003010058423290400207923002100420001000200010969168195492989632727332653103000010002000200040003315326341171001100010000200002010021000301200220020015912117970845039952992329737764442266570324321003172421476116042200010003290733267330433308232913

Test 2: throughput

Code:

  steorh w0, [x6]
  add x6, x6, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0075

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f181e1f20222324293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
402063007522600000341943710032140300374718231912902254046120448200002010820000912776141296202414926995300723007524823625028401042020820008302124001630075571120201100991001000010100100000100200530129571007610004414224732010214134417611113160160030072204080620000201003007630076300763005330076
40204300752250010034274381000220300503018221912920254046820431200002010420000912638141298402744926995300753006524825725027401002020820008302064001630075611120201100991001000010100100000100200490289681009510006314228562009705044523011113160160030072203806020000201003007630076300763007630053
402043007522500000341933910001520300605926193112916254052820496200002010420000912758141304912424926972300723006524825725027401042020820008302064000830065611120201100991001000010100100000100200390273861008710005216416602011126225420111113170160030072203646620000201003007630076300663007630076
40204300752250200034154381000170300604019182312924254047820446200002010420000912756141294712494926995300723007524816625027401002020820008302124000830075611120201100991001000010100100000100200440147601010210005014936722012205355024811113160160030072203436620000201003007630076300763007630076
402043007522500000341664110001732300604023213212969254048320472200002010420000912612141299012714926985300723005224818325033401002020020000302004000030075611120201100991001000010100100000100200470229511009610005216542552008747046014200013101161130062203426620000201003006630076300763005330076
40204300752250000034074411000140300603222202712961254048220491200002010020000912248141190912404926995300753007524818325023401002020020000302004000030075611120201100991001000010100100000100200540446961008310005213212772007904956519700013101161130072203749620000201003007630076300763007630076
40204300752250000034413361000160300604914292112919254055720413200002010020000912512141303812594926995300753007524819325022401002020020000302004000030075611120201100991001000010100100000100200450223861008410002014754572011715744819500013101161130072204120620000201003007630076300763007630076
4020430075225200003437250100242603006049162223129322540457204782000020100200009127711412913127049269853007530065248163250334010020200200003020040000300756111202011009910010000101001000001002006204061071009210013205120622008726643817000013101161130072203459620000201003007630076300763007630076
40204300522250000034132401000240300605520191512931254044620452200002010020000912542141305712494926995300523007524818325009401002020020000302004000030075611120201100991001000010100100000100200560150711007410006214032532009603336719700013101161130072203716620000201003007630053300763007630076
40204300752250000034216361000100300604119212712890254044920446200002010020000912733141295512544926995300723007524817325022401002020020000302004000030075611120201100991001000010100100000100200540161731008610003214838702007413327322000013101171130049203179620000201003006630076300763007630076

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0075

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f202223293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2cfd0l1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
400263007122600000034004321002503005428117151291925403362034020000200102000091191414124440225492698530062300752482903250554001020020200003002040000300656111200211090101000010010100000102003701736501006110002313022432007112248314801270002162230072203186620000200103006630076300763005330066
400243005222500000034135371001003006026115251292425403302028520000200102000091194414129810232492698530062300652483403250554001020020200003002040000300756211200211090101000010010100000102002001415501005910003415416302004703233716501270002162230062203346020000200103006630076300663006630076
40024300752250001003400425100120300502201191291525403192029420000200102000091204714129921263492697230049300752483203250454001020020200003002040000300706111200211090101000010010100000102003901255201006710006212428482007302542919801270002162230072203516620000200103006630066300763006630066
400243006522500000033851261001803005033119161291725403412030120000200102000091202614129680254492698530062300752483303250454001020020200003002040000300656111200211090101000010010100000102002101195701005110004414016422005501446526701270002162230062203456020000200103007630076300763007630076
40024300752250000003398228000240300502062317129002540352203052000020010200009120281412483026149269853006530075248410325045400102002020000300204000030065611120021109010100001001010000010200190142490100561000711220382004701934111701270002163230049202949620000200103007630066300763005330066
400243007522500100033833241001403005035117121293325403582032320000200102000091175514130050288492698530062300652484003250554001020020200003002040000300916111200211090101000010010100000102004301646101005010003213734512005902338318301270002163230062203239920000200103007630076300763007630076
400243007522500000033957351004143006035115191290125403422032820000200102000091139714129690268492698530072300652484303250554001020020200003002040000300526111200211090101000010010100000102004301945701008310005515136422005814676224801270002162230062202930920000200103007630053300763006630076
400243007522500000033903201001403006028118221290325403512029820000200102000091206614125290236492698530062300522483803250454001020020200003002040000300526111200211090101000010010100000102002501724501005810007113232252005613213013101270002162230049203376620000200103006630076300663005330076
4002430065225000000339613110015030050172151412933254028320344200002001020000911446141263002594926995300723007524839032505640010200202000030020400003007561112002110969101000010010100001102004001485801005010007204436202004602224020301270002162230072203396620000200103006630076300763006630066
400243005222510000033835161001403005025213231291525403102033020000200102000091194014124820228492698530072300752483903250554001020020200003002040000300636111200211090101000010010100000102002801902901007110007313824432005912054310001270002162230060203116020000200103007630066300663007630076

Test 3: throughput

Code:

  steorh w0, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 10.3453

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss instruction (0a)0e0f181e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)67696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
30206111019818001102178710011039582908509825397801965920329101002000046832904965551520491029231020021031649663969738630100113887410200200002020040000104299553111020110099010010000100100000100200000021149100010108373068392241001117221901600100884201981410002000010100102679102044101666100905102343
302041016067680000019716100110309400887864254061520789204371010020000452499749674471934910082610388510297699293797775301001248867102032000520206400091038965741110201100990100100001001000001002000034019772100010967630943889302933111716001600107135624374141002000010100104592104954104673105593100596
30204104232783000001837110011025980611849632540626195902019210100200004588596490298111449986761030591030319925861003853010013831301102032000520206400091004945891110201100990100100001001000001002000035302241610000110397302539837030351117170016001019341629938141002000010100104301105710105853106165103682
3020410386777300000198250001101546189892492547076215652042010100200004536627484851422349951021004841005579911671026833010357651610203200052020640009104965605111020110099010010000100100000100200003429201251000101033330119901902234111716001600103484721047141002000010100103892104786103055104017104037
302041030777780010020930110010167001088480725407502026120721101002000048764124874312164499955910119198477954597987363010083987610203200052020640009104542524111020110099010010000100100000100200003530206191000109243294431127702931111717001600101957192110514100200001010010119810371810335910337499127
302049898972900000189770100102635199843572541339207442025310100200004536036491854712149992921034211032909910781002613010011279281020320005202064000910052051411102011009901001000010010000010020000029200521000001000927847108311273411171630016001032851020125141402000010100101896101279102880103092102090
30204998717570000019400100110211319118347125405602107320665101002000037606924547133137499685010191110206698375699040301008306171020320005202064000910423762411102011009901001000010010000010020000332719446100000985930600959110341117162001600102690261826701002000010100104736103838103050103497103157
302041018787690000020136010199936007818132540552192852085710100200004243158478746820949100778103525103402987787101304301005708841020320005202064000910251555511102011009901001000010010000010020000026203081000101003229910109751034111716160160010400527194861401200001010010069710150710159799927104385
30204101997756000001745500029564509081908254018119149206871010020000397790847327641654994673985459630493141690870301002117177410203200052020640009938846001110201100990100100001001000011002000035021106100000120683043993470313311171621016001034206207501410020000101009832798080987829872298567
30204100201735000101952900009893230085255254157221182207791010020000454111148917901894999547102295102290996197968733010010189701020320005202064000910419434411102011009901001000010010000110020000312920006100000118603052710986000111718210160010265531930014002000010100102984103356102860102900104534

1000 unrolls and 10 iterations

Result (median cycles for code): 11.1598

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
30026111663835111110026549160021117439161791768254511324356200051001020000550297353213250969491086831113761114831071853107783300103034100202000020020400001113084511110021109256391010000101000001020027282742237661001842143563529914258152802616403921622111408224039092000010010111642111551111682111569111582
3002411136383511111002324615101111733916189159925439612362520030100102000054840885320339110094910841511152611155310706331079673001016261002020000200204000011150546011100211092920510100001010000010200282630402235510016001383532529145341428332536404121622111168024319092000010010111708111744111566111461111418
3002411150583511000002527116002111506917179225925458362771120094100102000054775935317580010254910816511156411159510732431078923001016221002020000200204000011126244511100211092767710100001010000110200262631412152410017011230832924130431525312606402821622110931024466902000010010111410111506111475111665111522
300241116298371100000243021500211157491717918032545550235092005910010200005476618532329711181491084151115481115711072303107833300101724100202000020020400001114364941110021109246971010000101000001020026270025773100152212598323241187515002606403521622111232222793092000010010111727111554111414111453111422
300241115708371010000242811510111141601818917502545642263202000810010200005503199532371111043491086831119901117011073323107841300103832100202000020020400001113304641110021109247441010000101000001020027273241244021001600148723351212577142702416404421622111183224718992000010010111260111373111607111566111290
3002411173583610000002431716101111803000917813447054244422012910010200005454833531813611028491084841112871115851072243107664300104328100202000020020400001113594401110021109257181010000101000001020026272342242841001601124303441613139152802516404521622111416224540992000010010111783111591111469111405111403
300241116128361110000235671501211131890189168025447102464320004100102000054256085312161093349108623111404111562106988310748330010444110020200002002040000111493436111002110922935101000010100000102002626039237201001602124733265514918150342626403621622111374123419992000010010111367111448111409111721111423
3002411159883511000002347115012111565901791673254500925287200281001020000548413453276511977491085741118141118291072233107809300102819100202000020020400001115024791110021109246011010000101000001020026283302661410017001366535493126391429322516404121622111269125117902000010010111704111623111502111675111659
3002411160983510100002201515002111732817169148125443432381920010100102000054787365318558010844910868711149111159110719031079253001013171002020000200204000011162138411100211092605610100001010000110200262723402517210016001384033785132941629332616403821621111404225436992000010010111386111360111277111200111522
300241113148351110000236981600211157791717917872545309250952001610010200005455112530150411032491086251117441117641075083107804300101118100202000020020400001115514561110021109267861010000101000001020026262341249691001502161723406315533150312526404021622111135323712002000010010111266111229111534111545111551