Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STEORB

Test 1: uops

Code:

  steorb w0, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 3.000

Issues: 3.005

Integer unit issues: 1.005

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e1f223a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f696a6d6emap rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
73005330582491121110100100510598732733012107730031006200010002000110591699484929595327333282231030000010002000200040003307325591171001100010000200330310071001004200521332215736116991848140447452308340394437154540323891003165051482115282200010003264132680329363299532915
73004327552491161017000100311602432906112075430051006200010002000110741695364929709326603289931030000010002000200040003291226001171001100010000200243310031001004200441332016176118121846236991135227744084444284242323771006164981461516351200010003288132643324303273232699
730043261424711401150001008105841327630020880300610062000100020001108016848749295863293232688310300000100020002000400032746258411710011000100002004233100810010242009212320164651204518231389911382285439404440164051323621003175491450115333200010003261232890327073285232635
7300432964247114111910010071158043306711208263006100320001000200010937168989492969432728328283103000001000200020004000325522563117100110001000120032031004100102620055133201601711618081693792639225383942444183637323741006172661498516187200010003290032689326803299632780
73004328962471191017100100210595232619102075330041006200010002000110501695874929855326563290431030000010002000200040003288325821171001100010000200440310051001016200441332216034119231831639619392306140924440163942322691006167071481015736200010003273432692327993299532672
730043282224611401181001007105900327111020601300510052000100020001101916900104929820325033250431030000010002000200040003295826001171001100010001200243310071001016200461332116367120361836239207382306738594435103945325071006161211491815513200010003280932985331283284432869
7300432814247112102010010081058293267711209083007100720001000200011064169957492963232928328893103000111000200020004000328082555117100110001000020023331004100100620035133201569511836183023906636231293992443184344323791005165421524915934200010003274832739327713286532669
730043262024611411161001003105830326011020506300610062000100020001096516942749298803261132745310300000100020002000400032830259011710011000100002002423100410011252002412322160101214108237389112382298439694445184039323691006165321437415497200010003259332587329243304932823
730043277224411511130001007105860326540120669300710042000100020001111216980104929757330653261331030000010002000200040003264125681171001100010000200320310051001017200451332216296118460825739493362317238894436174043323651009170381457115943200010003309832809327403279132749
73004326152461160122010100711616933032102086430061003200010002000110471683784929778326273294631030000010002000200040003256425661171001100010000200353310041001027200551332316657117371811740969342297039624444144042324791004162371415615794200010003258432872329483255633284

Test 2: throughput

Code:

  steorb w0, [x6]
  add x6, x6, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0075

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f1e1f202223293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
402063007522520003442233100416300601812139129322540390203992000020100200009126551413103027049269953005230069248191725033401002020020000302004000030075641120201100991001000010100100001100200260107620100771000351343464200751342802531311316233007220327101020000201003007630076300763007630070
4020430069225001033781251001103005431420201291825404262043020000201002000091274114127720266492699530075300752482232503240100202002000030200400003007564112020110099100100001010010000010020033010876010076100062139065200602292511431311316233006620316101020000201003007630076300533005330070
40204300752250000340442210010030054175251512945254040520434200002010020000912550141264902564926989300753007524819325033401002020020000302004000030069641120201100991001000010100100000100200340149460100611000421302830200641423581951311216233055320337101020000201003007630076300703007030076
4020430075225000035011181002320300372071415129042540415203842000020100200009122451413005024649269953007530075248223250274010020200200003020040000300756411202011009910010000101001000001002002012244870100321000821351239200444142211281311217333004920280131020000201003007630070300763007030076
40204300752250000340022700018030060179182212932254044520448200002010020000912379141287002674926989300753007524813325010401002020020000302004000030069641120201100991001000010100100000100200190101390100581000911193030200753483902351311316233007220315131020000201003007630076300533007630070
4020430069225200033864191001903006031431361289725404122042420000201002000091279114129290235492698930072300752481232503340100202002000030200400003007564112020110099100100001010010000010020019097420100431000221444046200702393431071311317323007220358101020000201003007030076300533007630053
40204300692250100339842010011283005416102337129102540481204452000020100200009126091412909024549269893007230075248193250274010020200200003020040000300526411202011009910010000101001000001002003612269740100731000721251838200682221803671311316323006620331131020000201003007030076300703007030076
40204300522260000342942010015163006083191212908254046120460200002010020000912659141266502584926989300663006924813325033401002020020000302004000030075641120201100991001000010100100000100200380129440100671000121492248200623303622021311316323018620322131020000201003007030076300703005330070
40204300692250000341862410561603006017422181290725404622045620000201002000091250514127440233492697230069300692482232503340100202002000030200400003005264112020110099100100001010010000010020018014052010080100152137631200810233351231311216333007220284101020000201003007030076300533005330070
402043006922500003418416100251630061167171812914254042720416200002010020000912191141300602514926989300693007524820325027401002020020000302004000030069641120201100991001000010100100000100200290122530100601000431363646200833533431301311217323007220372101020000201003007630070300763007030076

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0071

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f20222324293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)7bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst simd alu (9a)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
400273007122611100003434928100014030050191012171291225402982029620000200102000091139314130360217492699530065300652483303250494001002002020000300204000030065611120021109010100001001001000001020032016884010066100043135344520058118322960012701161130072203239620000200103006630066300763006630076
4002430075225001000033962280000420300568611111292925403152030220000200102000091176814121390238492699130071300582483703250514001002002020000300204000030075611120021109010100001001001000001020025089340100691001201251229200540273501810012701151130072203289620000200103006630076300663006630066
4002430052225010000033852381000170300501812181012969254036420330200002001020000912134141253612644926995300723006524843032504640010020020200003002040000300716511200211090101000010010010000010200341315069010076100110319187320062334103710014112701161130072203056620000200103007630076300663006630066
4002430075226000000033993261000110300601811162612903254037120282200002001020000911805141295802204926985300723006924840032504540010020020200003002040000300756111200211090101000010010010000110200380173630100491000411443045200610291401203012701161130066203276620000200103007630066300763007630066
40024300752250001000338012010001028302462510141512880254032820353200002001020000911827141281902194926991300683007124835032505140010020020200003002040000300656111200211090101000010010010000010200270120610100631000411283037200521302311650012701151130062203276620000200103007630076300763007630076
4002430075225000000033983201000152030056811131112894254030020364200002001020000912072141270402374926991300713007124835032505140010020020200003002040000300756111200211090101000010010010000010200580250630100631000141423043200770352401770012702161130068203099920000200103007230072300723007230072
4002430071225100000034041219100012283006030916141292625403152032620000200102000091212214124680247492699530065300752481903250554001002002020000300204000030065611120021109010100001001001000001020019084390100531000301191229200461273321130012701161130072203536620000200103007630076300763007630076
400243005222501100003408420100060301794412211212956254033520336200002001020000912487141276002714926992300713007124839032505140010020020200003002040000300656111200211090101000010010010000110200450149460100451000420251235200481301211430012701161130072203196620000200103007630076300763006630076
400243007522500000003394431100080300603010141212911254034920327200002001020000911894141300502634926985300623007524842032505540010020020200003002040000300716411200211090101000010010010000110200471514461010062100170229105320060120114812013212701161130062203486020000200103007630066300763020630076
4002430052226000100033832191000150300370727271295825403632031420000200102013891181214126000258492699530075300652483403250454001002002020000300204000030071651120021109010100001001001000001020041131263701006710010125640482005432082312114112701161130072203379620000200103006630076300663006630076

Test 3: throughput

Code:

  steorb w0, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 10.2348

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
3020511165583400000202271000104074310118012325395981937120677101002000044792234834721112649100520103615101747985256101459301007456121020320005202064000910290348311102011009910010000100100001100216723018187181091907753830242843101600111108315227711997972020829002000010100104308104069104304103850105465
302041027147880011019379010010457331308333325412312128920448101002000044052694860749111849100823104230104240100083798747301009761137102032000520206400091018248361110201100991001000010010000010020000302620679100010938729969981412027011171615116111034974190000102000010100102471103238103220103089101689
3020410092274800000193710101984313100780012541075187112066710100200004433088488978111384999644101176100549988516997453010092375910203200052020640009104344484111020110099100100001001000001002000002321612100010125852965992551220011171816116111019213520219962000010100103458103640102289102281103385
3020410395978500000180820010104099089837872541673209322035210100200004555064486829411374998631992449750392440691160301001990139610203200052020640009996494281110201100991001000010010000010020000001848310001010740294098218119270111716181161110425113198780102000010100104744103712102780102331102586
3020410129975700000184481001103422001285238254317319776205201010020000463224949615690691491000311039231047781009078986363010014631229102032000520206400091028124381110201100991001000010010000010020000302219728100000102602989610301020021117161111611999126191981062000010100104767101181102827103260103151
30204106199797000002035610001021441012835952540075196652058910100200003919177479534117624999609104649105984994986967213010010081144102032000520206400091034385041110201100991001000010010000010020000312518948100010102933044411211127270111716011611980451218803610200001010010033799834991029969299208
3020410132075700000187880000104887301282292254028819071209281010020000455656649670370106499441892755929519087468839630100207115141020320005202064000910057744211102011009910010000100100001100200003120205091000101240332198952901827011171622116111030596209111062000010100103568103432102650103139102397
3020410297576900000205311001102775212085141254469129812203041010020000473599750769531161491005051023021022739808869604030100142112841020320005202064000910021743911102011009910010000100100000100200003125207141000001032731458954402300111716211161110340829211741062000010100105789105377105392106618103508
30204107267798000001908410111045472908509125436612197120094101002000048081944998847133649100121103923103866999786101690301008817741020320100202064000910268349211102011009910010000100100000100200002726199731000001146729734101140230011171620116111017396196021062000010100101868103302103163101472102176
30204994967330001018479100110060120128539925407942055520375101002000043624484822970117649949119941710072197132898877301007157041020320005203024000910332050711102011009910010000100100001100200240272082910001010138289311006811931011171613116111001762720609106200001010010332310290710068597442100048

1000 unrolls and 10 iterations

Result (median cycles for code): 11.1693

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
30026111656837100100263931100111642301491866254522824415200741001020000546301653308841115449108355111766111950107456310796830010142010020200002002040000111656530111002110926677101000010100001102000000270218131000100143263209012993128000006402321622111764225084662000010010111785111885111892111664111829
300241115098350000002379910001115883015913592543007234152000910010200005461946531704711000491085721117991116611075883108035300103632100202000020020400001117685251110021109264361010000101000001020000000024067100010014974352471409112431000064029215221113821247821062000010010111790111757111837111685111585
3002411138183700000024189000111177441413918152545941250692011110010200005461645532265501078491085751117091116011072063107793300101442100202000020020400001114964571110021109256431010000101000011020027282442023314100150314240345111386415270250006402621622111445325065992000010010111632111580111669111568111424
300241115568361110002344315002111455901891302254227123744200971001020000550430353283890982491084191115601115041071033107934300102340100202000020020400001114174361110021109257351010000101000011020027263340026108100180016466349231341916280250006404521622111298023520902000010010111632111503111615111573111749
30024111704837110000236831410211150791716915692542964230762000210010200005491036532078419244910845411167911168110724731079273001028211002020000200204000011187346111100211092716510100001010000010200262522420248091001500146543645715337142825261006404421622111144224290092000010010111840111812111696111598111807
30024111895836111000251241600111150090091766254488324440200061001020000550066153273421109949108663111780111796107389310783530010231510020200002002040000111674384111002110928146101000010100000102002727000228881001602114543224213948152625262006403121532111282223943902000010010111397111455111421111535111689
30024111232835111000232281500111130091717918832545662241762000310010200005496305531357201055491085421115531115091071783107915300103623100202000020020400001115264771110021109262311010000101000001020025260400229461001501127203265414176142824261006404121622111439225377992000010010111620111687111488111401111613
30024111666836100100239811400011150620139169825450832536620046100102009554936395332628010034910877811188811152710744431079043001030331002020000200204000011206439011100211092515610100001010000010200000270021934100001011956323701387812431000064025216221106883249051062000010010111521111572111370111694111265
3002411168283800000024187100011166331513918752543066237102002710010200005491009532291909604910876911167811165010724031076703001028231002020000200204000011158435311100211092553510100001010000010200000310023982100010012452353721474402331000064021216221113420269021002000010010111762111800111879111515111727
3002411175483800000023962000111179440149154825459562802520004100102000055427175350751011824910868411175711264810739131080213001021171002020000202344063811200644161100211092731310100001010000010200000027024861100001015021356761624812427000064024215221113802269731062000010010111794111779111734111984111306