Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STR (pre-index, 32-bit)

Test 1: uops

Code:

  str w0, [x6, #8]!

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)1e1f2022293a3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)aaabacafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1005104070005000111210251710325200010001000100010005077845824104010408243898200010002000104012411100110001000102005521410031216010160317311611103710001000100010411041104110411041
10041040700042210160102512062252000100010001000100050778458241040104082438982000100020001040124111001100010001020047418100102812010180717311611103710001000100010411041104110411041
10041040810063000001025032325200010001000100010005077845824104010408243898200010002000104012411100110001000101804001810030200010180397311611103710001000100010411041104110411041
1004104080004141080102513963252000100010001000100050778458241040104082438982000100020001040124111001100010001018055414100001612010000637311611103710001000100010411041104110411041
1004104080005181050102512202252000100010001000100050778458241040104082438982000100020001040124111001100010001015063514100002214010080957311611103710001000100010411041104110411041
1004104080104000001025000325200010001000100010005077845824104010408243898200010002000104012411100110001000101203901210030146010190397311611103710001000100010411041104110411041
1004104070004000712102513243252000100010001000100050778458241040104082438982000100020001040124111001100010001020079220100402412010150637311611103710001000100010411041104110411041
100410408000518100121025173522520001000100010001000507784582410911040824389820001000200010401241110011000100010210635610080240310200557311611103710001000100010411041104110411041
10041040800041610012102513003252000100010001000100050778458241040104082438982000100020001040124111001100010001000055001000000010000717311611103710001000100010411041104110411041
100410408000522107010251522325200010001000100010005077845824104010408243898200010002000104012411100110001000101804652010000106010200557311611103710001000100010411092104110411041

Test 2: Latency 2->2

Code:

  str w0, [x6, #8]!

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0040

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)1e1f2022293a3c3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10209100407500022059082117366801161002581757986225201001010010000101061000052207146882404969601004010040868168742201062001000820020016100401221110201100991001000010010000100109340142440106371023327708885610321095718124570111718016001003710000010000101001004110041100411004110041
1020410040751002223898221752780132100257746688492520100101001000010106100005220794688240496960100401004086816874220106200100082002001610040122111020110099100100001001000010010906712973760677102433060930348821094418122970111718016001003710000210000101001004110041100411004110041
1020410040751002337988241744720120100258149478512520100101001000010100100005220294688240496960100401004086743874720100200100002002000010040122111020110099100100001001000010010928813443730666102612990912408621092219123271000710117111003710000110000101001004110041100411004110041
1020410040751112256838281752760120100257938883512520100101001000010100100005220854688240496960100401004086743874720100200100002002000010040122111020110099100100001001000010010887813753930695102642712895368951095221105971000710117111003710000010000101001004110041100411004110041
1020410040751112229808241712800164100257879076522520100101001000010100100005220714688240496960100401004086743874720100200100002002000010040122111020110099100100001001000010010916713223960698102572780893349221091219114370000710117111003710000410000101001004110041100411004110041
10204100407510023348981817526101681002581186122542520100101001000010100100005220574688240496960100401004086743874720100200100002002000010040122111020110099100100001001000010010887712644100681102472880888387841089618129770000710117111003710000710000101001004110041100411004110041
10204100407510124638582417048301521002580896123542520100101001000010100100005221274688240496960100401004086743874720100200100002002000010040122111020110099100100001001000010010891713083970675102513130905429581091525109670000710117111003710000210000101001004110041100411004110041
1020410040751002304898241776700116100257939483622520100101001000010100100005220514688240496960100401004086743874720100200100002002000010040122111020110099100100001001000010010927811854000671102523200917348641093820118271000710117111003710000410000101001004110041100411004110041
10204100407510122629379217368201121002580294955325201001010010000101001000052206946882404969601004010040867438747201002001000020020000100401221110201100991001000010010000100109381613163730685102712830920429741090820129771000710117111003710000110000101001004110041100411004110041
1020410040751002280908321760680108100257865694492520100101001000010100100005221314688240497113100401009086743874720100200100002002000010040122111020110099100100001001000010010913814393782668102512940885349061093323120971000710117111003710000410000101001004110041100411004110041

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0040

retire uop (01)cycle (02)031e1f2022293a3c3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)aaabacafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1002910040752169778281768771120100258091051046025200101001010000100101000052110546882414969601004010040869638770200102010000202000010040124111002110910100001010000101090001306358722102302639183283910904111202640216331003710000010000100101004110041100411004110041
10024100407521516983217528211481002579210488572520010100101000010010100005210894688240496960100401004086963877020010201000020200001004012411100211091010000101000010109220130238370210236291922368921094161226640316331003710000010000100101004110041100411004110041
1002410040752184767701768805961002579410582512520010100101000010010100005210654688241496960100401004086963877020010201000020200001004012411100211091010000101000010109180134236770410222275918428041099781067640316231003710000010000100101004110041100411004110041
1002410040752019808131792742164100258041271185325200101001010000100101000052108146882414969601004010040869638770200102010000202000010040124111002110910100001010000101089001179384695102342749064082010912141176640316331003710000310000100101004110041100411004110041
100241004075221488804180864196100258231071224725200101001010000100101000052108146882414969601004010040869638770200102010000202000010040124111002110910100001010000101090001340406708102372779103281710941151156640316221003710000010000100101004110041100411004110041
100241004075216082800178463612010025760111975425200101001010000100101000052112946882414969601004010040869638770200102010000202000010040124111002110910100001010000101090401221360686102462709023282810920131082640216221003710000010000100101004110041100411004110041
100241004075201683814180865412010025804931095025200101001010000100101000052113746882404969601004010040869638770200102010000202000010040124111002110910100001010000101094601321379695102432829084882810947111165640316331003710000110000100101004110041100411004110041
10024100407524128982618247661161002580393665125200101001010000100101000052109746882404969601004010040869638770200102010000202000010040124111002110910100001010000101089801169414688102322649164083910929101177640216321003710000310000100101004110041100411004110041
1002410040752040768201776953116100258231021275025200101001010000100101000052109746882404969601004010040869638770200102010000202000010040124111002110910100001010000101088601200387685102272459189278710916121061640216331003710000010000100101004110041100411004110041
10024100407522027780618247811601002580596845125200101001010000100101000052112946882404969601004010040869638770200102010000202000010040124111002110910100001010000101089801296400684102182709383671610935161149640216331003710000310000100101004110041100411004110041

Test 3: throughput

Count: 8

Code:

  str w0, [x6, #8]!
  str w0, [x7, #8]!
  str w0, [x8, #8]!
  str w0, [x9, #8]!
  str w0, [x10, #8]!
  str w0, [x11, #8]!
  str w0, [x12, #8]!
  str w0, [x13, #8]!
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5053

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2022293a3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80209404773030000001893500833172897964034877819852020972516399080461800008010080000403744186020805014937334404004052630334330326160100200800002001600004051475118020110099100800001008000010080912042885131886803012720926441191812272403987511031732404218083080000801004034640392403874049940404
802044037430300000019773688091672110120404357971802175570251606288311080000801008000040275718593200794937352404794047030353330336160100200800002001600004046175118020110099100800001008000010080901040915149889802242780929741249811342284087511021733404398043880000801004040440451404594040340424
80204404673020000002172378802169689100404707901803192910525160661825388001980212800004015401859776039049373684039440377304053304211601002008000020016000040353821180201100991008000010080000100809020451050910881802602510904781132811232504406511021633403438216980000801004045140364404014038940411
80204404153030000002070345793168811110840388764182920088025161639854218000680100800004120141856944038649374834045740384304413303681601002008000020016000040476751180201100991008000010080000100808380455252210862802432610912341201811352454287511031633404058027680000801004040340427404734042240359
802044046230200002021363628371728108100404077971969191011725163774828908001780100800004024371857520017814937325404494039330377330356160100200800002001600004035375118020110099100800001008000010080916041135079871802463080887441061811462474312511031632404448380680000801004040540347404014043540434
80204403643020000002010336822172095924045477017891797128251614058052480000801008000040258018587680544937428403784039130309330317160100200800002001600004054875118020110099100800001008000010080932043005484880802462900918401135810742684259511031633404278097580000801004036240480404054038640428
802044043530400000021183888191672110112404477991814196586251627458065980000801008000040254218575920233749373234040540420302673303531601002008000020016000040430751180201100991008000010080000100809100390451111888802782850876341234810902374124511031733403818045480000801004040740383404064042840389
8020440480303000000205531380216969211240443788211319627425164102829348000080100800004045901853896040249373274045340397302953303321601002008000020016000040366751180201100991008000010080000100809000447957212847802633060906461127811342374531511031733404598014780000801004047740459404124034740381
8020440401303000000185437981514161161364033777518261844892516047583093800008010080000402235185850402494937289403714043130273330427160100200800002001600004038982118020110099100800001008000010080860045035367911802422780877501190811572344010511031633404298028080000801004035540422403654043540386
802044043130200000019893488051760911404043779019861930912516042483293800008010080000401726185759212574937339403744039530369330372160100200800002001600004052882118020110099100800001008000010080913044145205884802462600885441155811182484379511031633405308049580000801004051340440403884037240425

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5098

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2022293a3e3f4046494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)dbddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800294063230622200018729267711760115140408097761606186613625160384804958013280010800004106811875880012654937732408014076830638330773160010208000020160000408507511800211091080000108000010809222736224393811804782514886461666813595143865260050200004170464080480501080000800104081840801407964078040753
8002440751306333000189085479516561241404078774215971721141251602208306580000800108000040183418754961118124937730407544087330681330760160010208000020160000408197511800211091080000108000010809052836444631829805282320840441767814365613969270050200006160444081483404080000800104082740735408304077540825
800244091431820000018698887371712110120407847822089176316525160630804338000080010800004015961871344012064937648407964081030670330802160010208000020160000408948211800211091080000108000010809002639654788857805692860848321740813095463622240050203007160444085480382080000800104080440772407804080840810
8002440786306330000204389077017281281084087275917661791190251604578071280014800108000040227818757360111974937727408424071130827330823160010208000020160000408178211800211091080000108000010809052039074636881804982670873341568814325503705270050220008170444083980455080000800104081540765408674082040815
800244075630533000019958878221656100128407787791718182513125160644826358000080010800004010901877920013564937675408454083730826330783160010208000020160000407897511800211091080000108000010809432536455035832805162753881301655813705294021353050200004160444079980428080000800104079940821408784080040789
80024407753063003001911900779166413510840781785186018471492516051680353800008001080000402039187693601294049377484080640715307453308171600102080000201600004073075118002110910800001080000108092828365649610867805392590857281698813555023240122050200004160694085980589080000800104085940907407664081440835
800244074730520220017738278151720114116407688091544172317125160633859878006080010800004020221877992012924937680412674085430764330658160010208000020160000407397511800211091080000108000010808892843544738876805512980875401504813974613852270050200004160384080680605080000800104071840846407014077140828
8002440836306200000197482276816961261324088676618921855138251648568067980213800108000040150218918160143449377104076840732307413307011600102080000201600004080775118002110910800001080000108091021426541511865805572580888401595813265543969242050203004160864078585875080000800104079840738407674072240788
800244073030620000018997377721680128136407657841706168619225162395804248000080010800004021981878208012504937759407694092830797330749160010208000020160000407368211800211091080000108000010808902739254819814805362580815441612813405334058270050200007170434076483069080000800104078440873407884079940757
8002440824306202000164185578817041071044074674816521765156251646648071880011800108000040170318760000134349377284075140748307743308171600102080000201600004078582118002110910800001080000108092543438943012865805212810901421674813425354235393050560007160434084780434080000800104083140827412954080040811