Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STR (pre-index, Q)

Test 1: uops

Code:

  str q0, [x6, #0x10]!

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)1e1f2022293a3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)aaabacafl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? int retires (ef)f5f6f7f8fd
100510408000330116121610258112252000100010001000100050778458241101510401040824389820001000200010401040111001100010001016019243100003622010161973116111037100001000100010411041104110411041
1004104080002161010121025011225200010001000100010005077845824110151040104082438982000100020001040104011100110001000102401519100002430010161173116111037100001000100010411041104110411041
1004104071102161013010258032252000100010001000100050778458241101510401040824389820001000200010401040111001100010001024019124100402822310161573116111037100001000100010411092104110411041
1004104070063181012010258032252000100010001000100050778458241101510401040824389820001000200010401040111001100010001016019328100302418010241573116111037100001000100010411041104110411041
1004104080005241018010258011252000100010001000100050778458241101510401040824389820001000200010401040111001100010001000031001000000010003173116111037100001000100010411041104110411041
1004104080002161011010258211252000100010001000100050778458241101510401040824389820001000200010401040111001100010001028023312100303224010162373116111037100001000100010411041104110411041
1004104080004161012010251615225200010001000100010005077845824110151040104082438982000100020001040104011100110001000101601519100002516010161973116111037100001000100010411041104110411041
100410408000324101201025810225200010001000100010005077845824110151040104082438982000100020001040104011100110001000102402724610030160010241973116111037100001000100010411041104110411041
100410408000218101201025145412520001000100010001000507784582411015104010408243898200010002000104010401110011000100010160232910000160010161973116111037100001000100010411041104110411041
10041040700621610121210250111252000100010001000100050778458241101510401040824389820001000200010401040111001100010001016027128100002430010161573116111037100001000100010411041104110411041

Test 2: Latency 3->3

Code:

  str q0, [x6, #0x10]!

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0040

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)1e1f2022293a3c3e3f404446494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
102141004075550104046723211171222096410025227402022035825201001010010000101001000052212546882410017100401004086743874720100200100002002000010040100401110201100991001001000010000100125175117551394015221108315080247550465512475337647007101171110037100002882110000101001004110041100411004110041
1020410040757701033272226111464181972100252281022518029252010010100100001010010000522007468824100171004010040867438747201002001000020020000100401004011102011009910010010000100001001249843180114340146511024151102494504584124773227467007101171110037100002885110000101001004110041100411004110041
102041004075600103746523041146420096010025226042361923625201001010010000101001000052216546882410017100401004086743874720100200100002002000010040100401110201100991001001000010000100124984617981430014771109915170248250465912487388587607101171110037100002441110000101001004110041100411004110041
102041004075660102907122971165615095610025223301791984325201001010010000101001000052214146882410017100401004086743874720100200100002002000010040100401110201100991001001000010000100125244318571405014461106614926249550458812468378907007101171110037100001802110000101001004110041100411004110041
102041004075600103988623031166419176410025225301881954225201001010010000101001000052214946882410017100401004086743874720100200100002002000010040100401110201100991001001000010000100125004617711394014371107515416248250460212468388357007101171110037100002954110000101001004110041100411004110041
1020410040766601081267229711656220980100252267017618730252010010100100001010010000522093468824100171004010040867438747201002001000020020000100401004011102011009910010010000100001001248146181014260149911073149112247850468512477328007007101171110037100002072110000101001004110041100411004110041
102041004075666103207823051194419178010025226802121953625201001010010000101001000052210946882410017100401004086743874720100200100002002000010040100401110201100991001001000010000100124914817181427014321106415150249150453912479306707007101171110037100003394110000101001004110041100411004110041
102041004075700102997622901198423071210025228202011894525201001010010000101001000052210146882410017100401004086743874720100200100002002000010040100401110201100991001001000010000100125195118201421015311108215227249150452712472369587707101171110037100003835110000101001004110041100411004110041
102041004075707103476723191198425098010025222902091602825201001010010000101001000052211746882410017100401004086743874720100200100002002000010040100401110201100991001001000010000100125044619031424014501104215240249550458612474377357007101171110037100001872110000101001004110041100411004110041
102041004075777105036522811196827194810025226701851843925201001010010000101001000052209346882410017100401004086743874720100200100002002000010040100401110201100991001001000010000100125014617911465014881110215137248750452412468318057007101171110037100001635110000101001004110041100411004110041

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0040

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)191e1f2022293a3c3e3f404446494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10034100407500000101049622821166410102410025221402262524025200101001010000100101000052108946882410022100401004086963877020010201000020200001004010040111002110910101000010000101246501593154601579109401508124735045041252639793006403164310037100004917010000100101004110041100411004110041
1002410040750000010149522340112482075210025221502282333425200101001010000100101000052108146882410022100401004086963877020010201000020200001004010040111002110910101000010000101247301606154601551109601506024815045911249834789006404164410037100002693010000100101004110041100411004110041
1002410040751000010224922268116646072410025222202322923025200101001010000100101000052112146882410022100401004086963877020010201000020200001004010040111002110910101000010000101250101683155701559109391545024895046661250346661006404164410037100002434010000100101004110041100411004110041
1002410040750000010035692252114721094810025222302452432925200101001010000100101000052106546882410022100401004086963877020010201000020200001004010040111002110910101000010000101247441625153701586109541524024935046161252946755016403163310037100003686010000100101004110041100411004110041
10024100407500000101797722531144020118810025222602012762825200101001010000100101000052113746882410022100401004086963877020010201000020200001004010040111002110910101000010000101248241578156001574109331501124815045571250434871006404164410037100002542010000100101004110041100411004110041
1002410040751100010215552235117046072410025222102642204225200101001010000100101000052104946882410022100401004086963877020010201000020200001004010040111002110910101000010000101247301546154101580109341519024975045921251929755006403163310037100004041010000100101004110041100411004110041
1002410040761000010341792257116640072410025221602092454125200101001010000100101000052112146882410022100401004086963877020010201000020200001004010040111002110910101000010000101250101763156731585109341533024775046651250939662016725254410115100002943010000100101004110091101921009310091
1002410040750000010563752253117521070410077212502493033346201161003810026101561006952108147101810022101481011386963883520153201000020201601004010191111002110910101000010000101250301587155501579109011471024735046381250240691046404164410037100003725410000100101004110041100411004110041
10024100407500000107045122551162460724100252220021724244252001010010100001001010069519533468824100221004010040869638770200102010160202032010040100401110021109101010000100001012503816081525015771096015280250150458012507377140464041633100371000033111010000100101004110041100411004110041
100241004081120001026662224811704110143210125222202553021311262011810066100761016110208517537473156101411027910343875015877020443201008020204801014110192411002110910101000010000101247971676153901542108991511024535067661250023673006877574410115100272555010000100101019110041100411004110041

Test 3: throughput

Count: 8

Code:

  str q0, [x6, #0x10]!
  str q0, [x7, #0x10]!
  str q0, [x8, #0x10]!
  str q0, [x9, #0x10]!
  str q0, [x10, #0x10]!
  str q0, [x11, #0x10]!
  str q0, [x12, #0x10]!
  str q0, [x13, #0x10]!
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5021

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)1e1f2022293a3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8021440182301404102363422461194414264401542219634773542516010280102800008010080000400535184307212401540401484020030084330115160100200800002001600004016440104118020110099100100800008000010082492262147243862478800501526024922544624825015530921400511011611402398000280000801004015540167401754019140143
80204401283015009948662264117041052040147233358988357251601028010280000801008000040053118446601240098040113401893004133019016010020080000200160000401684018811802011009910010080000800001008250634205324131224758010615340248425446388250811423541400511011611401738000280000801004014440200402084020740211
802044015430260099273022461195292644014822125747543625160102801028000080100800004005351843936124013204014040180300293301611601002008000020016000040128401781180201100991001008000080000100824842614072422102471800451557024681164458582490302592000511011611401298000280000801004020640188401174018040193
80204401953014401017063223211968626440132219850772252251601028010280000801008000040053518442480240110040140401453006533013716010020080000200160000401654014511802011009910010080000800001008247914171824226246480049154602484254467082505182151030511011611401858000280000801004016040311402264021840197
802044014830133310143402239119447264401092191446676702516010280102800008010080000400535184424802401570401504015530088330064160100200800002001600004013740182118020110099100100800008000010082467141859244615246580043152002452254456182489342228000511011611401558000280000801004020140122401074013540188
802044014930040010083462278117121152040144223772779657251601028010280000801008000040053518454000240142040174401563008233013016010020080000200160000401404013311802011009910010080000800001008249214183124367247980049148802492254467382488271384000511011611401818000280000801004016840124401994017840165
8020440170300404100687022641172010520401322251630695472516010280102800008010080000400535184295202401000401534018430070330128160100200800002001600004014140172118020110099100100800008000010082458201219242622461800401495324761074462582510291673000511011611401558000280000801004018840127401504014540163
80204401233005069981372250117041252040163225167362851251601028010280000801008000040053518452800240097040111401373004233012816010020080000200160000401514015311802011009910010080000800001008248414218224323249180027150262492254464182555652325030511011611401528000280000801004021440155401614017040129
802044013130133098824122571169615520401082230759626652516010280102800008010080000400535184780002400880401164018230050330100160100200800002001600004014740173118020110099100100800008000010082492222147240911249280032150582500254464582488352643000511011611401248000280000801004015040165401474024740150
802044015430140410287472232119761526440173219171577769251601028010280000801008000040053518450880240146040128401683005633014716010020080000200160272401354012211802011009910010080000800001008247630266224254244680049155452476254459682489292649000511011611401978000280000801004021540164402134016340180

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5015

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f202224293a3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6167696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)c2branch mispred nonspec (cb)cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)dbddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800344010230000000010191412299101488214840123225737448153251600128001280000800108000040008718424720240098401134011830068330068160010208000020160000401024012911800221091010800008000010825050112724623248480015152802502974459182516411701205020081261702524400928000280000800104011740129400994017640094
800244009630000000010152242299101296425240110231134729918251600128001280000800108000040008718424240240102401084011630068330092160010208000020160000401264014411800211091010800008000010824940129424803247980036155302505796458782530309340050200110264901527403948000280000800104011040149401464014640108
800244012330000000010272732305101600426040134236042749024251600128001280000800108000040008718424960240123401984009730065330136160010208000020160000401014010011800211091010800008000010824860992246412249780048151702494976449282526371080005020080101702515400738000280000800104012340120400854014140124
800244009930100000010065342346101480311240071230742354378251600128001280000800108000040008718434560240099401294010830051330134160010208000020160000400904006911800211091010800008000010824870110124680249980020152702478884462382519271668005020070221702626401028000280000800104011740104401144011240110
80024401243000000001028751232610151262324008023106084673425160012800128000080010800004000871841608024004140101401093001433020016001020800002016000040094400901180021109101080000800001082478019012495122492800381519025079864439825303613370050200100251711728400968022680000800104012540103401304011640173
800244015330000000010233422348101488316840098229649532027251600128001280000800108000040008718444400240088400974011130037330294160010208000020160000401524010011800211091010800008000010824640130824727250180041153402494756466482521201055005020070161701527401118000280000800104014540098401354009140091
800244011130100000010122402302101384332440081227352036431251600128001280000800108000040008718433360240133401084012230062330150160010208000020160000401274010511800211091010800008000010824780141724712250680038149202477984467582521451659005020090274901729401528000280000800104010840109401154018040089
8002440102300000010999048230810146451884007122784553663251600128001280000800108000040008718442480240073401164013930036330067160010208000020160000401094011311800211091010800008000010825000110924654247680032149102507846457682511201056005020060231702827401318000280000800104009640098401064010640096
80024400963010000001024551233810152052804012122985615114251600128001280000800108000040008718418000240103400944008530022330084160010208000020160000401044010211800211091010800008000010824940151024867250080028147602507792456982500171331005020060231702725400948000280000800104013140143401064009940106
8002440105300000000104402923001014964212400822293706372302516001280012800008001080000400087184444002400874009840131300663300911600102080000201600004012040105118002110910108000080000108248902122246382476800181490024869864517825052216390050200100251702717401568000280000800104012040146401394014140090