Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

CASALB

Test 1: uops

Code:

  casalb w0, w1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 4.001

Issues: 3.006

Integer unit issues: 0.000

Load/store unit issues: 3.006

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f181e1f22243a3f43464951schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f63696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2c9cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
74009329332462001700001011010610932806101206193006300930092300731492997832970327398283009100330031002601232742263821710011000100013003064101020060730179991701200163081224648249395275022402403044337445032447162341451815018300010013269532742327743272933410
7400532920249180160000101101060833272311220551300930093009231255149297893294333099829300910033009100360183275126122171001100010000300606410102006093016999170120991522910918380333680946236213851444220465033514185661668117259300010013444234420342043425134431
74005338782761601301001013010530033315102213433009300930062300251493149634108346678253003100230061003601234413275321710011000100013006403100820020930119991601200155871115238074372784924529399144448404633242188071641317679300010013443734321354303450034209
740053443726920019011010080015392339621012200630033009300623132414931016336153413972530061002300610016012340242724217100110001000130060031008200408301399914012001509010610679423566656238393609444217464333056189161647217568300010013417634155341523415934030
740053404225520017101010090015321340011022198830093006300623139214931118347493505372630121005301510046012346112769917100110001000130060431005200407301299914012032951537710775480663629649238923569444210424933104190761673717755300010013399634067340873405034140
74005341032551301600001010001535033969101219843006300630062302421493104233563341577263006100230061002601233986276021710011000100003006043100920020730129991501200152561072238071363175123863360644435454633164188031641417681300010013407334092341013408034059
740053420425516015000010110005323339681012196030063006300623015014930955336513410072530061002300910026012340302727217100110001000030060431007200407301299916000001502310629579553555548238543664443916475133131190481655217553300010013399133999340953409534084
74005341472551408001010050005417338441012212030063006300923040214931001335773410372630061002300610026012340642724217100110001000030060431008200608301399916002001497210724379343596946237723592444416414933108190551667717672300010013416234118341153409334027
740053407925516017000010090015354339671002206330063006300623013014931011336833411882530061001300610026012340562714217100110001000030060401007200608301099914212001508910771279783587646238243643444310445033100188171640217571300010013406734060340973410234097
7400534019255150170100102800053313385710022063300630063006230285149310053358434138725300610013006100260063406027552171001100010000300600410082004083014999160100015031106064799736191051237963602443513435333098190681653817717300010013397034028341023407934116

Test 2: throughput

Code:

  casalb w0, w1, [x6]
  add x6, x6, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 7.0073

retire uop (01)cycle (02)03mmu table walk data (08)0e0f181e1f20223a3e3f4043464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
50208700745251110505503311207005838107933401001010030000101003000050513250641149669937006170073360055401002020030000202006000070073358112020110099100100001010010000010030020052351005620000326028293004799982135048720013101171270038100001010830000201007007570074700747007470074
5020470073525011050562361107005845979334010010100300001010030000505132506140496699370061700733600554010020200300002020060000700733581120201100991001000010100100000100300390122581004820008131028313005999982128240880013101171170038100001010830000201007007570074700757007470074
50204700735250100506923810070058259783340100101003000010100300005051325057904966993700617007336005540100202003000020200600007007335811202011009910010000101001000001003002809065100612000424004303007099982132034780013101171170038100001010830000201007007470074700747007470074
5020470073524010050492271147005825776334010010100300001010030000505132506821496699370061700733600554010020200300002020060000700743581120201100991001000010100100000100300190745210055200002270363130102999821232631083013101171170038100001010830000201007007470074700747007470074
502047007352501005060236120700581896733401001010030000101003000050513250654049670047006570076360055401002020030000202006000070073358112020110099100100001010010000010030020052441005720000125042393006499982129237660513101171170038100001010830000201007007470074700747007470074
5020470073525010050520141107005819855334010010100300001010030000505132506820496699370061700733600554010020200300002020060000700733581120201100991001000010100100000100300300524210052200022260202930053999821292331080013101171170039100001010830000201007007470074700747007470075
50204700735250100505722910527005830981233401001010030000101003000050513250796149669937006170073360055401002020030000202006000070073358112020110099100100001010010000010030044074521005820002237038303008999982127026640013101171170038100001010830000201007007470074700747007470074
50204700735250100506004110070058251071033401001010030000101003000050513250802049669937006170073360055401002020030000202006000070073358112020110099100100001010010000010030039064491005220004022032333006299982136229660013101171170038100001010830000201007007470074700747007470074
502047007352501005066447120700585078733401001010030000101003000050513250695149669937006270073360055401002020030000202006000070073358112020110099100100001010010000010030033081471005820002141014283005599982125029820013101171170038100001010830000201007007470074700747007470074
502047007352501005073220110700583675933401001010030000101003000050513250694049669937006170073360055401002020030000202006000070073358112020110099100100001010010000010030041084631006920001458036383007899982127033940013101171170039100001010830000201007007470058700747007470074

1000 unrolls and 10 iterations

Result (median cycles for code): 7.0064

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e1f202223243a3e3f4043464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6061696a6b6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
500307007552511011150381524000107005924156334001010010300001001030000500632505260049669940700627007536005640010200203000020020600007007435911200211091010000100101000001030058156447100622000902640393006899982124630603001270002171170031100001010030000200107006670066700667006770066
5002470065525100011506072910010700502518930400101001030000100103000050063250481154967194070057700653600474001020020300002002060000700653501120021109101000010010100000103003244533100512000803720353005999982128830523101270501171170030100001010030000200107006670066700667006670066
50024700655251100115059161910020700592311333340010100103000010010300005006325067505496698507005370065360047400102002030000200206000070065350112002110910100001001010000010300321610229100652000623324303005899982122426443101270001171270030100001010030000200107006670066700667006670068
500247006554310001150294301002070050182910304001010010300001001030000500632505271549669940700627007536005640010200203000020020600007007435911200211091010000100101000001030048186250100542001523118383005799982126437523201270501171170030100001010030000200107006670066700667006670066
50024700655251100115046132710010700591414533400101001030000100103000050063251004154966994070063700743600564001020020300002002060000700743591120021109101000010010100000103004719744410059200150353649300529998212713305016201270001171170039100001313130000200107007570076700757007570075
5002470074525220011505052110020700502319930400101001030000100103000050063250415054966985070053700653600474001020020300002002060000700663501120021109101000010010100000103002455146100452000613728313005199982123433603001270511171170030100001010030000200107006670066700667006670067
500247006552511011050645412001070050222121430400101001030000100103000050063250473004966994070062700743600564001020020300002002060000700743591120021109101000010010100000103004319566210066200212506383004399982119432503101270501171170030100001010030000200107006670066700667006770066
50024700655241101105047661100116700502419103040010100103000010010300005006325049705496698507005370065360047400102002030048200206000070065350112002110910100001001010000010300423745010057200062402434300549998212415266817001270011171170039100001313130000200107007570075700757007570075
5002470074524220010506614221001070059230723340010100103000010010300005006325052310496698107005170061360043400102002030000200206000070061346112002110910100001001010000010300394563410062200081322432300589998202310316230012700011712700271000066030000200107006270062700627006270062
500247006152410010150561441010236700472921411314001010010300001001030000500632505720049669810700537006136004340010200203000020051600007006134611200211091010000100101000001030030440451004520010228284030045999820258386630012700011711700261000066030000200107006270062700627006270062

Test 3: throughput

Code:

  casalb w0, w1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 21.1399

retire uop (01)cycle (02)030e0f18191e22233a3f4346495051schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6b6d6emap rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
40208211412157401002403210021140806603430142100300421003004250028768271149206996021142321007613201372301420010214300421021460084211421149521102011009910010000100100000100300420172403220028014016440569999114013001117277416017002098840023000010101211400210079211411210079211413
4020521142115740000240141002113934000343014210030042100300425002876827114920833202100652114121320136330142101021430042102146008421008915182110201100991001000010010000010030042280240322002801401844056999911401202111727741801700211225101003000010101210066211423210077211423210090
4020521007815830000240261002100744600343014210030042100300425002876829114920833202111922100891320004030142001022830042102146008421139915082110201100991001000010010000010030042280240322002801401744054999911401910111728741601700211226101053000010101210079211400210079211409210797
402052114081575001524012100210074066034301421003004210030042500287667601492083320210078211399132013763014210102143004210214600842114111484211020110099100100001001000001003004228024026200280193014410799991140831011172774140170020988201023000010101211411210090211400210079211411
4020521007615840000240261002100744500343014210030042100300425002876832014920833402100782113981320138530142101021430042102146008421008915032110201100991001000010010000010030042281724026200280140224405499991140160211172774030170020988401023000010101210066211424210090211409210090
4020521007815830000240140002100614660343014210030042100300425002877017014920699702114102100781320004030142101021430042102146008421008915032110202100991001000010010000010030042281824027200280140244405499991140170211172774130170021122610003000010101210079211411210090211423210090
402052100781583100024029000210077466043301421003004210030042500285884001492070090211408210089132013743014210102143004210214600842114101508211020110099100100001001000001003004201724026200280140174406099991140220211172774150170020989610003000010101211400210066211409210090211413
4020521139715740000240150002100634660343014210030042100300425002858501014920833202100762114211320004230142101021430042102146008421006515072110201100991001000010010000010030042017240212002801401444060999911401902111727741001700211216111023000010101210090211422210090211424210090
402052100781584100024028010211407006034301421003004210030042500285884001492083430210076211410132013763014210102143004210214600842114121497211020110099100100001001000001003004228024026200280140214405299991140181211172774160170020989501003000010101211423210077211411210077211410
40205211410157400002403200021138406604330142100300421003004250028766370149207010021142321007613200029301421010214300421021460084210089150521102011009910010000100100001100300422817240262002801402544061999911401802111727741401700209883101023000010101210066211411210090211424210066

1000 unrolls and 10 iterations

Result (median cycles for code): 21.0102

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6b6d6emap rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d1d5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4002821008715831110000240441210221007268834300491030039103003950285895301492083360210098211432102000663004921100333008410033600782114201517211002110910100001010000110300571928024043200390014050440699999114036121031651074142182221119501353000010011211420210099211433210099211433
40025211421157411000002404712102211415186343004910300391030039502859026014920700702114192100871020006630049111003330039100336007821009815272110021109101000010100000103005819028240452003972140544406499991140221212326510742821822209875131343000010011210099211433210088211420210099
4002521009815831110100240861310121140108834300491030039103003950285897801492070070211424210087102000663004932100333003910033600782100821516211002110910100001010000110300571828262404820038750140384406099991140251212316510742521822211189131343000010011211433210088211420210083211433
400252114301574101000024037131012100833873430049103003910300815028769951149208352021009821143210201389300492210033300391003360078211421150621100211091010000101000011030060212828240452003900140474407099991140191202326510742421822211271131303000010011210099211422210088211421210099
400252101021583111110024048120022100720893430049103003910300395028589510149207007021142121008710200066300492210033300391003360078211420151721100211091010000101000001030058192828240492003801140484406999991140271202306510743821822211207131353000010011210099211431210099211433210099
40025210087158310100002403112100210072688343004910300391030039502876934014920835202100982114211020003930049221003330039100336007821009815272110021109101000010100001103005919280240792003800140544406599991140261212326510743221822211207131363000010011211431210099211433210099211431
40025211431157311000002405012101211415088343004910300391030039502876943014920835002100982114301020138730049221003330039100336007821142115062110021109101000010100000103005920282724046200380114054440639999114023121233651074292182220986401353000010011210088211406210099211433210088
400252100871584100000024028131022100833983430049103003910300395028770090149208334021009821143110201389300492210033300391003360078211431151721100211091010000101000011030057192826240392003801140584406399991140371212306510741921822211207131343000010011211422211431211406211433211431
40025211421158410100002403512001211405499343004910300391030039502876945014920834102114212114321020140030049221003330039100336007821143015113110021109101000010100000103005819282824049200381114048440579999114030131231651074242182220987513063000010011211432211420211433211421211431
400252114301583111000024038131002100832873430049103003910300395028588010149207018021009821008710200055300492210047303751003360078210087151721100211091010000101000001030058192827240342003801140564406399991140391212306510743621822211194131353000010011210075210099210088210099210088