Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

CASPL (64-bit)

Test 1: uops

Code:

  caspl x0, x1, x2, x3, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 6.002

Issues: 3.006

Integer unit issues: 0.000

Load/store unit issues: 3.006

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)0e0f1e22243a3f464951schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f63696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
76011331662502524001005105874327871026598300630063006159295149297593266533251729300620044008200470143283712952171001100010000300642100420040530109991412016235119172846740131670223573772446019575732614166301483015355300030023296233012329363293733237
76006334002482526111069105794332710126868300630063003159984149297283292632787628300320044008200470353320213102171001100010000300600100620040430129991302015954120311826038381165226953851445922636332550169911479816437300030023276433190331053304932954
760063281424526230010061060063307010267763006300330061597141492976332864329147273006200440042004700732795128521710011000100003006421006200201030109991412015801113271839439091367223243918445014646132614163631474815267300030023283132992332923293532953
76006332432472826001005006067328211126779300630063006159747149300213270532920728300620044004200270143279912952171001100010000300600100520040430119991412016192118881844540081159225873977446215645432665167721500615366300030023290733272328413295632822
76006329132451625001017106019327841126839300630033006159676149298573271732832728300320044004200470143303312922171001100010000300600100420040530109991510016242116422837839601659223663890446117616832593164981479715724300030023277133001331323293132883
7600632977246282900100610596632990102680430063006300615970014929815326113290972830062004400820047014327491319217100110001000030060210062004083006999131201613812097282983846963226783962445315686332573172071470715179300030023284932800329833285833221
76006330392472527011005105981327001026821300630033006159725149299343267633133728300320044008200470073274512952171001100010000300642100520040630069991312016281118201841940651160225003924445511636632685173901521315388300030023300632797331363291532837
76006329672452226001005105891329131026711300330063006159663149300643290932974728300320044008200270143287313082171001100010001300602100520040430099991312016084120792822138691952223434039446317707132646166731530815331300030023289732873328553294232908
76006329062493331001005105997327761126985300630033006159665149302373274632840628300620024008200270143282713262171001100010000300342100520040430099991312016005117892837438971158224383991445225586232501173171523314937300030023293433208330093282533167
76006329962462426001007005784328050126879300330063006159737149298653282132882727300620044008200270143304512952171001100010000300642100320041430109991302115761117361812639551360222953868445412666632469166681488715840300030023293532778332633314533245

Test 2: throughput

Code:

  caspl x0, x1, x2, x3, [x6]
  add x6, x6, 16

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 18.1004

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f20222324293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2c3branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
702341814621356000000021840023751001016931218103223262991608832540100101003000010100300005055086153720491778240181031180990169975317081840100302004000030200700001812845011202011009999329100100001010010000010032475416223712224344954204171497124867921592842609999820103490876403010131011711180630010000663000040100180945181034181246181208181030
702041811111356000000022171023511001280731218106222782661620622540100101003000010100300005055086223420491778730181052181007169844317091840100302004000030200700001809505011202011009999508100100001010010000010032483435218712377253964203261540024947961628270288999820106830746202000131011711180363010000663000040100181014181280181032180917181307
702041809581356000200020489023361001248141761809282348281116138725401001010030000101003000050550863512904917789601810451809671699713170831401003020040000302007000018126250112020110099820091001000010100100000100324974922229123209562482031815201249810021476844234999820123620876407000131011711180207010000663000040100181093180933180997180958180959
7020418089913580000000199520232710011525124181015229017716113225401001010030000101003000050554862156504917791101812711809121701703170987401003020040000302007000018099150112020110099920311001000010100100000100324934312007123047234832031715394248610264013166958999820128870986403000131011711179965010000663000040100181063181296181276181036181172
702041810451355000000021400023161001296818018095022942691611882540100101003000010100300005055086196000491779430180908180993169837317083840100302004000030200715401809075011202011009999304100100001010010000010032479475213112420422270203351535024981006172514366299982012512010164011000131011711180538010000663000040100180942180972180888180953180964
702041810731355000000049019022851009525220180955229731291613002540100101003000010100300005055086133500491779020181050181097169844317094440100302004000030200700001812165011202011009999050100100001010010000010032482464215412327223876204031509424528484262344063999820110852766400000131011711180330010000663000040100180937180979181313181511181145
7020418130613590000000534290234310012407192181112225726101612592540100101003000010100300005055086349460491778610181074180895169903317091840100302004000030200700001809685011202011009998639100100001010010000010032475433212412362722626204001532124829961502443093999820117000856403000131011711180260010000663000040100181057180972181300181301180874
70204181002135600000002138502310100144882641809362276268160713254010010100300001010030000505518616901049178014018091918106816978331708804010030200400003020070000181034501120201100999853510010000101001000001003248340321741238332276220360151512478100815962430389998201150801226403001131011711180272010000663000040100181163180999180947181263181091
7020418103013560000000210520233010016485348181023229528816107925401001010030000101003000050550863129104917785901810251809841698812017077940100302004000030200700001809595011202011009990283100100001010010000010032493417215712385722628204091512124848843791570139999820124390726409000131011711180720010000663000040100181056180948180974181255181030
702041809941355000000020744023021001288679218136022772561611882540100101003000010100300005055086154860491779690181296181053170183317086440100302004000030200700001810155011202011009999109100100001010010000010032481414218112375723705203741508124827781739765256999820124280866433000131011711180341010000663000040100181045181045181057181050181057

1000 unrolls and 10 iterations

Result (median cycles for code): 18.1096

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f202224293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)cdcfl1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
70054181285135810010000205539229410148873321814612275178160608254001010010300001001030000501008574547014917823201814451810501700403170948400103002040000300207000018131651112002110994390101000010010100000103251614460218012379323737204231583024948823906666871999820140439936214110012700117121802300100009903000040010180948181088181242180911181044
70024181275135810100000417329233910144010172181236227319101584354140010100103000010010300005010086369460149178476018100618136717010331713904001030020400003002070000181039501120021109994051010000100101000001032510144312178123376224432038915342198615375458429998201579910566214150012930217221807250100009903000040010181071181051181053181040181059
70024181096135711010000362319230310128031041813292245057158847254001010010300001001030000500998639470014917803001809971813681705663171019400103002040000300207000018167052112002110999379101000010010100000103251713458225112473222571204321555025048044183542895999820106109886214320012700217231802470100009903000040010181073181366181330181015181478
700241810961358100000002742792345101176734018105322560413161079254001010010300001001030000501018654029004917785901812401813331700873171013400103002040000301907000018107950112002110999198101000010010100000103250315484217012333322514204321523125048121729061342999820104469656214101012700217221801750100009903000040010181284180965181113181001180962
70024181293135611010000579619233210896630818092622771651607692540010100103000010010300005010086183280049178214018164118108117008331713894001030020400003002070000181343521120021109994101010000100101000001032505144402179123607223272038515331249699241386448799998211109610766214300012700317221804081100009903000040010181068181052181687181031181295
700241809701355100000002142910229610124034201810952264088159837254001010010300001001030000500998618434004917803701812891809881701263171489400103002040000300207000018031851112002110999291101000010010100000103248815431215812432322272203671526024888025541845923999820677059956213020012700117221803070100009903000040010181396180911181455181301180943
7002418135313561000000020411923351013282100181244230108111612902540010100103000010010300005010186449610049178333018098618154916994631710384001030020400003002070000180894501120021109917041010000100101000011032470144582186123917239052038815411075043527656309998202626291066214200012700317841802830100009903000040010181460181431181251181101181095
70024181031135911000000220979228610128082161814002292467159943254001010010300001001030000501008634478004917847801810641816421700673171227400103002040000300207000018089951112002110990236101000010010100000103249415438212712231746232203901531250107581498743155999820367799836213220012700217221802650100009903000040010181666180990180920181030181831
70024181075135911000000205899230910145632361809652292065161461254001010010300001001030153501018621391014917794301810971811021701573171329400103002040000300207000018142052112002110993611101000010010100000103251216440217212389523325204311504124949864308642652999820133009896213030012700217221803430100009903000040010181384181118181000181020181732
70024181061135611110000203569233910125662841809882284297158546254001010010300001001030000500998640246014917801701813891816011701493171201400103002040000300207000018135351112002110988870101000010010100000103249615435218112522028059204511520247309843123044370999820112989676214330012700317221803810100009903000040010181059181023181048181623181019

Test 3: throughput

Code:

  caspl x0, x1, x2, x3, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 18.0745

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)0e0f18191e1f22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
6021118009013540001000460070100180076401114939932301991003009810030093500861585011491770140180766180104169273111707043021700202664016020266702801807671162110201100991001000010010000010030098035211252584535620062103533465431999913531901201117292922111811179531501303000030102180758180092180772180101180758
6020618075813490001100453372100180751488150058333019910030099100300935008623456114917702401807581801041692771117004030214002028440168202667023118007011621102011009910010000100100000100300980412112563145354200620035346655299999135328212011172929215118111801810010103000030102180101180768180092180738180095
602061801041354000110045326200018008501081493763330198100300991003011450086098281149177670018009118076416994112170701302170020284401602028470231180771116211020110099100100001001000001003009803501254484535920062003534165430999913531221201117292913311811179530300103000030102180771180105180758180105180771
60206180770134900000004534901001807430101015006332301991003009810030093500859175011491776700180104180758169938111707063019300202664013220266702941807571072110201100991001000010010000010030088035012561545333200620035395655159999135328202011172929175118111795443010103000030102180759180101180746180097180772
6020618076613490000010453322000180735410014938933301981003009810030117500861048411491776870180100180767169936111707033021700202804016020280702801807661072110201100991001000010010000010030089041211257564535120062003532765393999913531821001117292908711811179531401003000030102180092180772180092180767180105
60206180104135300000004532500001800894710149405323019910030099100301175008621965114917702401807671800911692821117001530217002028440168202847030118010411621102011009910010000100100000100300980352112503445335200620035338654149999135320212011172829177118111801734013133000030102180770180092180771180755180768
602061807571349000100045335200018075247715007232301991003009110030117500862246911491770170180767180104169276111700373021700202664013220266702311800911162110201100991001000010010000010030098000125452453552005800353216540199991353072120111728290971181118020300003000030102180105180767180101180746180101
602061801001354000100045322010018008500015005932301981003009910030114500859055011491776570180093180771169916121707063019300202664013220280702801807581162110201100991001000010010000010030098041211254174535920062003533665417999913531721201117292914911811180191400103000030102180105180767180101180746180092
602061801041354000010045340210018075407715007333301981003009910030114500859073411491770110180767180104169275121700193019300202844016020266702941800911072110201100991001000010010000010030098002112527445366200620035329654319999135322012011172929150118111795320013133000030102180770180092180771180084180758
60206180770134900011004534621001800890101014939632301911003009910030116500862365411491770230180758180104169273111700393021700203744016020284702941807701072110201100991001000010010000010030089041271254194537220062003533165400999913531421001117292910411811179530000133000030102180759180071180746180101180750

1000 unrolls and 10 iterations

Result (median cycles for code): 18.0743

retire uop (01)cycle (02)03l2 tlb miss data (0b)0e0f1e1f22233f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)cfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
60031180076135400045361210180747387149397333010510300991030105508591340014917701601807661800961692518170687301211620090401402018270539180763112211002110910100001010000010300942716125622453742005803536065443999913534321265229162219221794862663000030012180097180763180104180774180097
600261800991354000453490101807483881500583330105103009510301055086232731149177011018076318009916925081700203010341020090401402009070245180091112211002110910100001010000110300932716125336453642006203534265422999913534621265229228219221801713663000030012180097180763180076180743180092
60026180096135300045357210180076080150058333010710300951030105508595229114917768501800961807621699138170708301152620090401402018670245180107112211002110910100001010000010300932715125433453652006203535565422999913533421265229115219221801773663000030012180092180763180076180745180100
600261800961353000453452001807473701500603330108103010010301055086232731149177009018076318008916925181700413011575200904014020090702451800961122110021109101000010100000103009300125344453662005913536065423999913533921265229232219221795123603000030012180764180097180743180076180766
60026180763134901045338210180081278149399333010110300951030111508625766114917768301800751807411698988170707301153520090401242017470245180096107211002110910100001010000110300942716124892454262006203534265429999913534621065229199219221795131963000030012180090180743180097180764180071
60026180093135400045344210180747377150065333010510300951030105508625637114917727601807631800911692508170041301153620354401402009070245180086112211002110910100001010000010300943316125140453632006203535365421999913532121265229217219221802382663000030012180097180763180097180738180097
60026180091135400045353200180060078149388333010510300911030105508591333114917767701800961807531699148170703301150820090401402008270245180096112211002110910100001010000010300942716124701453632005813535265516999913532901265229183219221801793663000030012180076180742180097180764180076
6002618008913540014542721018005528014938433301081030091103010550862324711491776770180096180763169915817070630115592008240148200947024518076311221100211091010000101000001030094016125161453622006213534965464999913533321265229138219221795130963000030012180764180097180742180071180764
600261807621349000453532101807210881500733330105103009510300935085913301149177682018009618076216991481707063011555200904014020182702171807631122110021109101000010100000103009300125548453662006213536365424999913540521265229173219221795080663000030012180758180097180743180076180763
6002618076313490004534921018074830715006733301011030095103011150862225011491769950180742180075169231817004130115592009040300200907024518074111221100211091010000101000011030094016125641453682006203535065435999913540501265229161219221801540003000030012180092180738180076180763180090