Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

CASAH

Test 1: uops

Code:

  casah w0, w1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 4.001

Issues: 3.006

Integer unit issues: 0.000

Load/store unit issues: 3.006

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22243a3f464951schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f63696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
74009342292561127112411000100920053623397301220423006300630062301981493092933761341397253009100130091002601234037272821710011000100013010503100820040214301199916210311494010722280823623875238703588444216576333219189721641717391300010013423434193342313413634106
740053414825500211126010001006210532433891002203030093006300922995114931012337613419472530031002300610026006340312742217100110001000030114431012200603113009999152003214896104773799435781368237693596444515606433046191221651817799300010013405234047340953408434050
7400534158257012811240000010452005361340470022123300930033006229600149311013376534063725300610023006100260183412327552171001100010000300944010122006018301099915212301521310616479883546864239173716443820616233028191741617817607300010013406334073340953408034079
74005341362560025111710000100920052763395000220043006300630062299551493106833746342027253009100230091002601234037276221710011000100003011400100720061283010999152123014852105992800435691166239173587444418666133012192941664017870300010013408634129340513417134176
74005341932550022112010000100820053093399500220233009300630062294751493095633835341518253006100230061002601234025274821710011000100003011444100820060283010999162023014863106103797035401064238983600444315676933069190861641517661300010013402034147341863411634095
74005342022560026112211100101120053533394600221073006300930062297381493109133855340197263006100330061002601233993270521710011000100003014403101220060110300999915202321503010601279723536962238673550445512665633076191151653117446300010013413934076340083404734223
7400534182256012601211000010092005352339460122163300330063003230042149310633380634107725300310023006100360123403827282171001100010000301154410112006640113012999152023215092107303796035061167239023763444318605932999188511633517505300010013414934119340313410934213
740053414225601231124100001011210541233904002216430093006300622938214931120337413422382630061002300610026012339782741217100110001000030085031012201030143015999172003015528107522791836011159237423584444720656533047190501634117574300010013405434096341123404934102
7400534186255012511301000010092105310339381021955300630063006230680149310503387534171725300610023009100260123411527512171001100010000301144310092006661143010999152023014955108773798335641068239023615444614646133070189031642417449300010013418834134341743409034142
74005341522570130112600100101020052793397400219893006300930032290111493093433854340946253006100230031001601234065272421710011000100003011544100820082216273010999172003115052111093799635581269239353644444019736933053190131634517569300010013416534061340733405934064

Test 2: throughput

Code:

  casah w0, w1, [x6]
  add x6, x6, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0109

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f181e1f2022243a3e3f4043464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
50208301962260000000340352610403010531111418314010010100300001010030000505132527690049270203010330113320090401002020030000202006000030107194112020110099100100001010010000010030040152427210085200033226018543005799982134242114005131011741300951000096030000201003009730094301703011730093
5020430126234000000034055191061230119291028232840100101003000010100300005051325279200492701630118300963200944010020200300002020060000301081851120201100991001000010100100000100300210230871006220006244101449300759998206439582000131011711301021000060030000201003011830140302273012530117
50204301222250000000339402810203008424101216284010010100300001010030000505132528280049270123013830105320110401002020030000202006000030091185112020110099100100001010010000010030019016778100672001422360247530077999822691349861400131011711301291000099130000201003009930113301413011230090
5020430103226000000033942221044300961991721284010010100300001010030000505152531010049270293010230112320092401002020030000202006000030104185112020110099100100001010010000010030031017710710086200104310307330070999822549571021400131011711301251000099030000201003010330109301953012430124
502043011322600000003406020104030074179192928401001010030000101003000050513253324014927037301113010532009540100202003000020200600003011719411202011009910010000101001000001003004615133841009720009223503069300789998216136196000131011711301071000066030000201003009030109302123010930095
5020430583225000000033914311030300771810161528401001010130000101003000050513252860004927025301403011632008740100202003000020200600003012319411202011009910010000101001000001003004516198751007420004182401456300999998214224292001131011711300981000066030000201003010930111301653012130097
502043009922500000003405625104030109161124223140100101013000010100300005051325314500492701230089300923200734010020200300002020060000300981851120201100991001000010100100000100300270195911006920021131412267430095999822501246961450131011711301021000099030000201003010830121301913010630127
50204306022260000000341882610203007418102114284010110101300001010030000505132534410049270493012230112320101401002020030000202006000030114185112020110099100100001010010000010030031021559100792001525400107130066999822461036691420131011711300951000009030000201003012830110301523012230099
5020430096225000000033936311038300903592017284010010100300001010030000505132531310049270233009630089320080401002020030000202006000030101185112020110099100100001010010000010030018014469100932001126310286330063999822431241811300131011711301131000099030000201003010630103302093015230109
5020430102226000000033972211030300811810201328401001010030000101003000050515252774004927020301033010632008240100202003000020200600003011918511202011009910010000101001000011003002502137610070200072530040703007999982240153110000131011711301101000066030000201003009330117302073013630117

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0133

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e18191e1f2022243a3e3f4043464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6067696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9e9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2c3branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
5002830169226000100343628351010300962821651284001010010300001001030000500632534450104927064301553014932010040010200203000020020600003012618911200211091010000100101000001030066012811010084200173440346630093999824441070126000012701171130122100001311030000200103013830146301293062930272
5002430133226000000342426290011630123394194228400101001030000100103000050063253208010492703830118301233201154001020020300002002060000301311761120021109101000010010100000103004001301161008420013138034913007499982340107712801001270216113013010000010030000200103013130134301493013430136
500243012322600000034201730101030083505204028400101001030000100103000050063253961000492710330154301683201444001020020300002002060000301731891120021109101000010010100000103006102149610074200152350306130090999823451068156000012701172130142100001010030000200103014630122301313016530129
50024301492260000003414144210103012743419462840010100103000010010300005006325338301049270763013130113320119400102002030000200206000030133189112002110910100001001010000010300380182831011420013337036653009399982345878126000012701171130119100001010330000200103019330149301603014530173
500243015422600000034271737101030098294213128400101001030000100103000050063253181010492703330123301163201054001020020300002002060000301691891120021109101000010010100000103004401427010089200212590426730107999823406115148010012701171130133100001010330000200103015630147301593018730115
500243014122510000034381624101430103173162829400101001030000100103000050063253477010492705930139301303201314001020020300002002060000301411891120021109101000010010100000103005681501321006020024440034783006899982354864144000012701171130110100001010330000200103012730154301513049930131
50024301232260000003403103210003010630418292840010100103000010010300005006325348401049270183010930101320090400102002030000200206000030138176112002110910100001001010000010300480154103100802001933803670300679998223784694000012701161130152100001010330000200103011130129301273011330128
50024301132260000003437163010143011834518472840010100103000010010300005006325295801049270403014030141320108400102002030000200206000030149190112002110910100001001010000010300490988210108200252470406230117999824431080168010012701161130142100001010030000200103022930175301703016230134
500243013922600000034102332101430132404204028400101001030000100103000050063253320010492704630098301093200904001020020300002002060000301261891120021109101000010010100000103003001301151009020011035018653009599982440108313401001270116113011910000100330000200103014830154301453017230127
5002430103225000000340724331010300932531131294001010010300001001030000500632535540104927025301053012332010840010200203000020020600003013518911200211091010000100101000001030051014813610071200172570408030102999824611080176010012701161130123100001013030000200103013530124301653014630174

Test 3: throughput

Code:

  casah w0, w1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 19.1397

retire uop (01)cycle (02)03mmu table walk data (08)090e0f191e1f2223243a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6b6d6emap rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4020819141014240001024357010001900664083430142100300421003004250026169161149188342019006519141414181361301420010214300421021460084191399136121102011009910010000100100000100300422802436620028014354443959999114344010011172777430170018991300131053000010101190082191416190066191414190082
40205190081143400000243610100019006645634301421003004210030042500261691211491883170190065191397131813743014200102143004210214600841914151361211020110099100100001001000001003004228182436620028014360443949999114350012011172777380170018991300131353000010101191741190084191423190066191416
4020519139914231100024361010001900503063430142100300841003008450025986991149186985019139819006514180045301841010214300421021460084190081137121102011009910010000100100000100300422818243622002801435744385999911435701201117277747017001912450013053000010101191414191018190082191398190066
4020519006514340000024361000001914000063430142100300421003004250025988681149186985019141519008119180028301420010214300421021460084190081135821102011009910010000100100000100300422818243632002801434844392999911435101201117277739017001899130001353000010101191412190079191413190079191414
402051914151424000002436700000191487266343014210030042100300425002616812014918699401914061900771318045730142111027230084102146008419007413652110201100991001000010010000110030042271424364200290143494438899991143461120111727774201700191236009003000010101191400190078191409190078191412
4020519140814240000024347100001900622663430142100300421003004250025989031149188328019006519141113181344301421010214300421021460084190078137121102011009910010000100100000100300392802441620026414347443919999114360012011173377512272219121600131003000010101191412190082191412190066191416
402051914131424000002435900001191384465343013910030039100300395002616643014918833501900651913991318003630139101021330039102136007819008113692110201100991001000010010000010030039280243642002601435844389999911434101201117337749227221911980013053000010101191430190067191411190066191414
4020519141414240000024359000011900664553430139100300391003003950025989330149188335019008119141613181368301391010213300391021360078190110136121102011009910010000100100000100300392817243622002601434744386999911434900201117337737227221898680013053000010101191418191415191413191398191414
40205191413143400000243620001119006645534301391003003910030039500261698901491883340191422191409131800333013910102133003910213600781900811361211020110099100100001001000001003003928172435420026114349443899999114351012011173377482272219120000131053000010101190082190082190066190082190082
4020519006514230000024372000205119139845634301391003003910030039500259866501491869850190081190065131800333013910102133003910213600781900651361211020110099100100001001000011003003928024355200260143554439099991143510020111733775501700191238109613000010101190078190078190066190066190066

1000 unrolls and 10 iterations

Result (median cycles for code): 19.0077

retire uop (01)cycle (02)030e0f1e1f22233a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6b6d6emap rewind (75)map stall (76)dispatch uop (78)79map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4002819007414340024355000219006820734300491030084103003950261678311491870070190077190065101800333004911003330039100336007819139913672110021109101000010100000103003900243662002700143564439099991143471120065177422182218987916603000010011190075191407190075191409190066
4002519006514340124360010019140066634300491030039103003950259883501491869850191407190077101800423004911003330039100336007819006513662110021109101000010100000103003900243552002600143534439599991143461120065177342182218987006603000010011190078191412190066191410190081
400251900771433002436001001901232663430049103003910300395025988351149188319019006519099810180045300491100333003910033601681914071357211002110910100001010000010300392814243552002600143594438299991143480020165177452182218988200603000010011190066191410190078191412190078
40025190065143400243590000190050260343004910300391030039502598664014918698501913971900771018003330049110048300391003360078191411134821100211091010000101000001030039015243552005400143454438199991143480020065177462182218988209603000010011190075191398190078191409190066
400251900741434002435410001901332603430094103003910300395026166530149188329019007719140910181365300491100333003910033600781913971348211002110910100001010000010300392813243622002700143474438299991143420020065177482182219121200003000010011190066191410190078191412190078
40025190065143300243470101191391207343004910300391030039502616936014918832801900771913991018137930049110033300391003360078191410135721100211091010000101000001030039270243642002610143594438899991143490120065177342182218988206013000010011190078191398190078191400190078
40025190065143300243470000190638066343004910300391030039502625390014918895701900771914113918183430049010103300391003360078190077135621100211091010000101000011030039280245342022302160974439199991143480120065177472182219121400613000010011190066191412190078191399190066
400251900771433002435600001914412003430049103003910300395026166520149188331019006519141110181377300491100333003910033600781914091357211002110910100001010000010300392813243542002600143544439099991143420120065177342182218988200903000010011190066191410190077191400190066
400251900771433002436000001914522663430049103003910300395026167730149188331019006519140810181379300491100333003910033600781914111357211002110910100001010000010300392713243572002600143504439099991143431120065177512182218987006613000010011190066191410190066191410190075
40025190074143300243600000191392280343004910300391030084502598792014918699401914061900771018004230049110033300391003360078190077135621100211091010000101000001030039270243652002600143524438999991143481020065177402182219121306603000010011191400190078191409190075190078