Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STADDLH

Test 1: uops

Code:

  staddlh w0, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 3.000

Issues: 3.005

Integer unit issues: 1.005

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)0e0f181e22243a3f46494d51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbbl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2c9cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
73005330162461401161001005105755326041102076830061008200010002000110441693113492967432530326543143000100020002000400032838260911710011000100002000022100510000320053220016373120592844540434502277736544429194349323641004167661491516355200010003272832648331553270232677
73004331982481401120001008105906331391102098230041007200010002000110681705510493003232512327523123000100020002000400033082255111710011000100002000022100610001620083220016432119502807140077452278241304433174542325711007167151464516022200010003261332770325233279732692
7300433100248160010010100610612232744110206313006100820001000200011223169449492956532603332253133000100020002000400033114257611710011000100002000021100310000520045220016111122493829236775422283040324440204247323201007163221552714902200010003314532599326503268732624
73004326722441200170001004106145327300102085930061004200010002000110061699716492956632595325263103000100020002000400032672258011710011000100002000022100510000320037220016285120331844240506502286939774434214446323291004175641537315915200010003261333055330963284432594
73004327652451300110001032106059325551102071430061006200010002000109751688484929760325373262614363000100020002000400032914255511710011000100002000022100310000720033220015612112684819640698432277540634437114645324731007174441561016542200010003261532660328723264532828
73004328152461600170001002106108330311002059530051006200010002000110311688312492968832959326633133000100020002000400033357253811710011000100002000022100310000920056220015736119833844640548432267437504434174746324191006165631454616228200010003265033104327133260432567
730043266924580011000101400612932646110206563006100420001000200010997169689492960132933327053133000100020002000400032730259211710011000100002000020100610000220032220015711122542842640274422287639594439124646325251006160101435014985200010003264232623326393263632664
7300432784246160016000100810590732896010210063009100620001000200011086168899492966932617331033133000100020002000400032843258711710011000100002000002100610000620035220016026112102813337887442264439344441134542323871007174171454914859200010003315433159327523306232613
7300432709245130020000100510606832549110212033005100620001000200011112169518492964632892327263133000100020002000400032775260211710011000100002000022100510000720023020016419121542824540667482269037134435154440323931004173351541215465200010003276432706328063269233165
7300432659246140018000105510607632741110206013005100620001000200011189169509493013232622331443133000100020002000400032691256411710011000100002000022100210001520053220015996117992843838475512305837224439134644325261004163871460915072200010003277233095328293305132771

Test 2: throughput

Code:

  staddlh w0, [x6]
  add x6, x6, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 7.0069

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f1e1f2022233a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
402067006952500105116353750016070054331101158854254035820374200002010020000204483233218951258496698970033700696450736503640100202002000030200400007006963112020110099100100001010010000010020031011253198711007510019913638312006212013792001310116116985620522101020000201007007070070700707007070070
4020470069525000054243301014070054261101658093254038620296200002010020000204699433212971222496698970033700696450736502740100202002000030200400007006964112020110099100100001010010000110020026011044198711005210000113134262004511813991001310116116985620407101020000201007007070070700707007070070
4020470069524000052191321010070049320171158126254029720297200002010020000204490233217101107496698970033700696450736502740100202002000030200400007006963112020110099100100001010010000010020035094521987110067100073137283320051119136130001310116116985620132101020000201007007070070700727007070065
402047006952500005098128101307005419114105818925403232039720000201002000020452003321897198496698970033700696448836502740100202002000030200400007006963112020110099100100001010010000010020046011058198711006610013004403220093125247130001310116116985620450101020000201007007070070700707007070070
40204700695250000520356310160700540118175785225403702036420000201002000020450523321215114049669897003370069645073650274010020200200003020040000700696311202011009910010000101001000001002002601304819874100661000131426322008112814112900131011611698562028001020000201007005170070700707007070070
402047006952400005113905610110700542011013580472540222203502000020100200002044992332204501124966989700337006964507365027401002020020000302004000070050631120201100991001000010100100000100200320128441987110066100021131323020070124236130001310116116985620249101020000201007007070070700707007470070
402047006952500005092428107287010677121175843225404022033220000201002000020446293321771018549669897003370069645073650274010020200200003020040000700696311202011009910010000101001000001002001905649198711004710010215728302007202023055001310116116985620175101020000201007007070070700707007070070
40204700695250000540804610140700354311317583102540514203652000020100200002045114332093811054966995700337006964507365008401002020020000302004000070069631120201100991001000010100100000100200290148461987110059100020143204320088135135128001310116116985620232101020000201007007070070700707007070070
40204700695250000511024510160700542611613571162540500205032000020100200002044771332138315749669897003370069645073650274010020200200003020040000700696321202011009910010000101001000001002004001295319871100581001220502227200950212307300131011611698372043301020000201007007070070700707007070070
402047006952510105412231109287005481131358220254036620557200002010020000204479833213321115496397070033700696450736502740100202002000030200400007006963112020110099100100001010010000010020026011544198711006210001013124232005622413193001310116116985620499101020000201007007070070700707007070070

1000 unrolls and 10 iterations

Result (median cycles for code): 7.0069

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f20223a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4002670074525000010050962291180700540420195795225402632027820000200102000020440573321437074496698907003370070645273650474001000200202000030020400007007463112002110910100001001010000010200301176621987110083100022231242020047121327751001270216226985620377101020000200107012470070700707007970065
4002470069525100000052293261907005427211115758525406042027620000200102000020444933320996113949669890700337007164531365047400100020020200003002040000700676311200211091010000100101000001020028213335198711005310003123118312005211923893100127021632698542022301020000200107006870068700707007070070
40024700675241100000505133811007005424110558208254018220355200002001020000204484533211631694966984070033700676452736504940010022002020000300204000070069631120021109101000010010100000102003211314619871100561000202341032200751242381131001270215226985620327131020000200107006870070700687007070068
400247006752510111005040232214070052193812579892540470204012000020010200002044755332149911224966970070033700696452936504940010002002020000300204000070069691120021109101000010010100000102003527949198711003510002023283620045119236921001270216226985620241131020000200107007070068700687007070070
400247006952411100005073420012070054202101157319254044820256200002015820000204426133199321176496698707002970069645291765068400100020020200003002040000700696311200211091010000100101000001020021197451987110048100021124122420052120329741001270216226985120538131020000200107007070070700707006870068
400247006952511200005050431113070052263101058020254022220316200002001020000204429733215880154496698707003370069645293650494001000200202000030020400007006763112002110910100001001010000010200191744319871100511006232332034200421302442251001270216226985620272101020000200107007070070700657006870051
400247006952511100005066230180700542525125878525401072040120000200102000020446533321654199496698907003470069645273650494001000200202000030020400007006963112002110910100001001010000010200213925119870100571000212362034200451203481301001270216226985420290101020000200107006870070700687007070068
4002470067525110000050413201907003525112115835925402612045020000200102000020437573321366121049669890700337007064527365049400100020020200003002040000700646311200211091010000100101000001020022292481986610055100022233184120046120232741001270216226985620186101020000200107007070070700687005170068
40024700695251100000505833112044700543631011579392540156207512000020010200002044284332180011164966989070033700696452936504940010002002020000300204000070069631120021109101000010010100000102003421174219870100661000203303038200711212451651001270216226991820289131020000200107007070068700707006870051
4002470067524111010050552311120700352229115771425402162027620000200102000020452873321801194496698907003370071645273650494001020200202000030020400007006963112002110910100001001010000110200512134501987010062100022130242920049130334751001270216226985420291101020000200107006870070700707007070070

Test 3: throughput

Code:

  staddlh w0, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 22.0133

retire uop (01)cycle (02)03l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
3020522016716480010026799110022008830020224625509262676723454101002000084739851052587214974921706622018622012621537862163163022413031404102002000020200400002201357961110201100991001000010010000010020000300266971000101712637410172551170011172210243224222194221292779210602000010100220120220112220134220162220134
30204220117164900000271861001220194199202105255063827236224921010020000854388710525461147149217043220133220177215455621633230100213320621020020000202004000022011993411102011009910010000100100000100200003119272051000101683537526170320173101117221041522522219401125281660602000010100220128220130220132220156220138
3020422012716490000327177010122011748920243825503132771423312101002000085203271052454914994921703422018322014321539362164083010017631689102002000020200400002200958681110201100991001000010010000010020000024269771000183090837086171331142901117161005301600219389179272749602000010100220107220123220112220151220131
302042201531649100002731011012201040010201928255073827348230931010020000841854210600475152349217027220169220133215369621633630100182117881020020000202004000022011684011102011009910010000100100000100200002915266161000111732937091175281182701117221029022522219429133274956602000010100220142220144220172220146220118
3020422009316490000027032100122011401010202379255068327287235021010020000823575010524860051649217060220157220192215466621632230100259819801020020000202004000022013791211102011009910010000100100000100200003018266621000101646136457164470223001117221026902522219473102282236602000010100220132220134220110220110220134
30204220127164900000277030001220106289202227255079327377228611010020000836749410525283051949217039220145220183215431621630030100183426101020020000202004000022009184911102011009910010000100100000100200003021275381000101678637315173440142901117229778225222194981732717710602000010100220140220138220128220149220124
302042201051649000002678700012201103107202515255041426859228491010020000837403310525789053449217063220095220161215420621632030100201419381024820000202004000022013984611102011009910010000100100001100200002902710610001016750371261722901827011172210412225222194252322724210602000010100220161220146220122220126220162
30204220135164900004270581001220112100201922255053427735228381010020000862747710526280049749217059220203220143215426621632430100188219041020020000202004000022013389611102011009910010000100100000100200003115271441000001704536900170731163001117221050122522219416233279380002000010100220146220120220146220126220120
3020422012116490000027184000122013226020214825506972720223284101002000083980961052420714914921706722012522016521543262163493010021441830102002000020200400002201519221110201100991001000010010000010020000271726477100001170333721817570102901117221031122522219376135277130602000010100220134220106220132220124220130
3020422010916490110027670100122009016620254625507372784823163101002000083972161052447215564921704922011722015921544562163303010020592197102002000020200400002201119001110201100991001000010010000010020000023265831000101599436423172700163001117221024902422219491224277776602000010100220133220112220128220112220162

1000 unrolls and 10 iterations

Result (median cycles for code): 22.0253

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f181e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
300252201691649000000003173011022202390002010042552877327662007010010200001064507810524808114064921718202202842202742153513216529300101491471007420000200204000022025673111100211094551510100001010000010200282733031569100154202157041490214591000000064015241141622219330032835992000010010220227220260220230220289220300
30024220242165011001000315991400122023100020086325529853289720054100582000010690362105244821135449217224022025322027721533332164633001013413010020200002002040000220312711111002110946780101000010100000102002727220315241001713202156341582216121628342600006401535121622219448232816992000010010220223220247220270220235220322
300242202531650111010003154714001220240817172012422552910328142006710010200001062229710526283113444921717002202672202662154473216512300101211351002020000200204000022023773311100211095205610100001010000010200282831031578100151131216154159321538162702610006401535321622219380132803092000010010220251220285220285220281222898
3002422026016501110000031413150012202398018201326255279332758200771001020000106105301052634011421492171910220268220312215428321652930010137125100202000020020400002203036571110021109490041010000101000001020000027313152310001130021445414122144312931000006401522221522219400132759062000010010220252220210220259220228220288
30024220289164900000000315280001220198210102009042552889328092006010010200001066251110523666113064921717702202372202612154403216461300101511471002020000200204000022023472611100211095785110100001010000010200000312731517100000021553414842152710310000064015325216222195015327850102000010010220254220214220322220286220268
300242202471649000000003160410012202663012201330255283932854200581001020000106607411052478611351492171650220313220237215391321651730010139145100202000020020400002202417241110021109462861010000101000011020000031283141410001002138041419213791000000064015290216222194421328006102000010010220266220258220282220254220262
300242202351650000000003138701002202220131320155825527573278720073100102000010625423105251471143449217173022025122022921539832164893001012412710020200002002040000220249696111002110948838101000010100000102000003127314851000100214224140321561129300000064015264216222193841327211002000010010220286220278220224220324220288
300242202591650000000003148000012202763121420141425528193276020046100102000010625944105239921137549217155022027322026321537732165363001012411010020200002002040000220259717111002110947738101000010100000102008400283148410001002152541584215520283100000640152382162221937623275010102000010010220278220254220252220274220294
300242202531650000010003143810012202582012201350255284932787200951001020000106263201052626711336492172350220230220239215426321646130010112110100202000020020400002202516601110021109551681010000101000001020000031293160210001002159241610215570312700000640153392162221933313280210102000010010220268220228220258220224220264
30024220241165000000000315011101220238211112015762552792327652008310010200001060295810528543114324921719702202472202732154323216495300101551151002020000200204000022027372411100211094523710100001010000010200000272931428100010021479414742149003000000064015209215222193610327416102000010010220282220214220258220228220266