Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

CASL (64-bit)

Test 1: uops

Code:

  casl x0, x1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 4.001

Issues: 3.006

Integer unit issues: 0.000

Load/store unit issues: 3.006

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)09l2 tlb miss instruction (0a)0e1e22243a3f464951schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f606163696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)l1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
740073293124637261100200602832760002072030033006300622912700149298053287432857829300310023006100260123284026722171001100010000300602100220040030069991021598011158082094048371226283742444616515332538167591450415058300010013280332786331273290033182
740053271724515050100000548333059002064330063006300622827500149296983255333246725300610023006100260123290326022171001100010000300602100220040130069991121607312180084574044254228743974443816555532491162201430114704300010013272933249328613275533146
740053277324514020100200557432780002061730063006300022858000149297973259932577326300610023006100260123280026832171001100010001300342100220040430039991021628011794082703935250225913917443917584932421165381469814797300010013276032996331623263033036
740053273324514160100010621932777002065330063006300622894800149302103266432727728300610023006100260123275826642171001100010000300640100220040130069991021649011948083983743352227473883444020556132403176341499115470300010013271532736332663322932785
7400532723245230401003006001331530021018300630063006229111300149296993265732938828300610013006100260123280726692171001100010000300640100220040130109991121640312189084894094251226293979443921535032383167671439515306300010013279932763326243292532789
740043271424404040100000598732820002094530063006300622829500149297523261532778729300610023006100160123314526822171001100010000300642100220040730069991001545311756084264021353225803936444317565932489169731458715504300010013314232722327673296432689
740053292724907060100200583532658012078030063006300622849600149297403270632927328300910023006100260123285226572171001100010001300642100220060130069991001636011389181184090353225233991443512585632416168431524416426300010013300532892332613278832646
740053301124905090100000593732723002111330063003300322909200149298763289933038726300610013009100260123289026692171001100010001300040100220040130009991101619711825084164028252226984011444021575732397177581559915290300010013327533242327263272232913
740053272024603040100710584532579002086130063006300622919500149296113281132741829300910023006100260123286026912171001100010000300600100220041030099991001564711692184864028360225364003444614625432677169201458914888300010013278632868329343280132925
740053264524904050100110598832856002062930063003300622897000149298883257432769729300610013006100260123315626622171001100010000300602100220040130039991121617711916084554054053224843885444113526632499170461486415225300010013285432938332543283033379

Test 2: throughput

Code:

  casl x0, x1, [x6]
  add x6, x6, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 7.0065

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)0e0f18191e1f2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
50214700655250010007228080417681111207005078822139150468522540100101003000010101300005055333152691496698670625700665967476004340101202033000620203600187006680112020110099100100001010010000010030921121276213575108492027230214879801052311299998251810404661071111316016006991810000101030000201007006770067700677006770068
502047006652411000074430801170410710070050808181291534700225401001010030000101013000050553331487904966985700377006559673660042401012020530009202036001270065801120201100991001000010100100000100309370211761138481080720252272392134979311019998251740342640021111316016006991810000101030000201007006670066700677006670066
502047006552500000073780813172098104700507661213914247667254010010100300001010130000505523314520049669857003770065606826600424010120203300062020360012700678011202011009910010000101001000001003099902257261358610876202602723893321063310749998251770367640021111317016106991710000101030000201007006670066700667006670066
50204700655250000007315078917201081247005079516149150470042540100101003000010101300005055233152450496698570037700655967366004240100202053000620203600127006581112020110099100100001010010000010030898020976113391108402025629538831001068310229998251660410640050001310117116990910000101030000201007006670066700667006670066
5020470065525010000734208181704106148700508181814014847811254010010100300001010030000505503314656149669857003770065596603621084010020200300002020060000700658011202011009910010000101001000001003091202117881360110876202522953897381217310909998251670384640050001310217116990910000101030000201007006670066700667006670066
50204700655250000007112079816801141527005076119140150469692540100101003000010100300005055033157020496698570037700655966136004740100202003000020200600007006580112020110099100100001010010000010030894021678913714108022026329139593610793108399982517703916400180001310117116990910000101030000201007006670066700667006670066
50204700655250010007507081417121061487005078414134140464552540100101003000010100300005055133143021496698670037700655966036004740727202003000020200600007006581112020110099100100001010010000010030898021977613502108612025429538793410433108899982518103876401860001310117116990910000101030000201007006670066700667006670066
5020470065524000000716608181688113108700508001615814347814254010010100300001010030000505523418188049669857003770065596613600474010020200300002020060000700657711202011009910010000101001000001003087402178181373710829202293003921341117311209998251800406640050001310117116990910000101030000201007006670066700667006670069
50204700665250000007429079616961231527005078417147141483242540100101003000010100300005055333148760496698570037700655966136004740100202003000020200600007006578112020110099100100001010010000010030916021882713203108282026329738811181106311149998251750387640500001310117117218110000101030000201007006670066700667006670066
50204700655250000007344082017441171527005077917148143479142540100101003000010100300005055033145030496698570037700655966023600474010020200300002020060000700658011202011009910010000101001000001003088801997271360510813202572833907341040311069998251830398640070001310117116990910000101030000201007006670066700667006770066

1000 unrolls and 10 iterations

Result (median cycles for code): 7.0070

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0f1e1f202224293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
50034700706201001072321083420560526470056806813412147021254001010010300001001030000501023315528149669907003870070596653600524001020020300002002060000700707911200211091010000100101000001030867142018321350710886202522968800941106310999998232091037742152110012702173369914100009930000200107007170071700717007170071
5002470071525201217212117962033652807005677881251184703225400101001030000100103000050426331470114966990700387007059666360052400102002030000200206000070070821120021109101000010010100000103087814187733132591089820224272874098112631089999822209114213814040012703172269914100009930000200107007170071700717007170071
50024700705251000071581192820504519270055812312411847088254001010010300001001030000501013315350049669907003870070596663600524001020020300002002060000700708011200211091010000100101000001030902141987791341010963202382698780701086310639998222101238859141120012702171269914100009930000200107007170071700717007170071
500247007052410000708012798304964220700558021112110847099254001010010300001001030000501033315799049669907003870070596663600404001020020300002002060000700708111200211091010000100101000001030915142037781381910873202522638750110116131056999823208114155613390012703171369914100009930000200107007170071700717007170071
50024700705241000072361280020544320070055791812012846809254001010010300001001030000501023315043049669907003870070596673600524001020020300002002060000700708111200211091010000100101000001030875141747411355110844202492868600120101131053999823181123925714360012702172369914100009930000200107007370071700717007170072
500247007154320010708197470251225248700607732613612747129254001010010300001001030000501033315280149669957004870074596723600564001020020300002002060000700748211200211091010000100101000001030900202007991356610871202302961626928410553106199982419893916213011001270217226991910000131330000200107007570075700757007670075
500247007552522000707597840249615272700597611412812746843254001010010300001001030000501023315741149669947004670074596693600564001020020300002002060000700748311200211091010000100101000001030858141887821328010850202352691646731449933106299982419510428621503001270117336991810000131330000200107007570076700757007570075
5002470074524200107083978120568181847005978719123135472322540010100103000010010300005010033162141496699470046700745966936005640010200203000020020600007007482112002110910100001001010000010308591920381113624108562024725916568812010413105399982420593904614513001270217226991810000131330000200107007570075700767007770075
5002470075525200007226982120504812167005978727138124474382540010100103000010010300005010333156551496699470042700745966936005640010200203000020020600007007479112002110910100001001010000010308871519077913963108382023226886001161090310839998252109441621434001270217136991810000131330000200107007670075700767007570076
500247007552520000698698150250413240700597682613113547036254001010010300001001030000501033314631049669947004270075596703600564001020020300002002060000700748211200211091010000100101000001030826171968141375310895202152208720661060310629998241999432461539001270317226991910000131330000200107007570076700757007570076

Test 3: throughput

Code:

  casl x0, x1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 7.0108

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f181e1f2223243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606367696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2st memory order violation nonspec (c4)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd0d1d5map dispatch bubble (d6)ddfetch restart (de)e0e2e7eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
418327011652500000016279210027010241712732410764442854014350943511328120271702836111496705307007770100599112139229911088948808126014193912116141961244908701177011631102011009910010000100100000100423481644331086229249216430210808501169999181311412303042114212871036331726327003433991130030000116247012170132701177011770117
417937011952610000015121131002701105151371081052843747370427513921126071874168872301049670430700937011666491200423792111215912813167403681203814214124951670094700942110201100991001000010010000010041701036201113328551207800010132494959999184882120028681162178805233270292670013331021010030000116357010870108700957009670108
417697010852500000015242200016700990906988109844251739643922397113945152417066891104967028070092701146365123012257111589578421250443090126270424852521747010870114211020110099100100001001000001004299420011166270602152200957349063999918229210003088016217331102630972831700073407100030000116967009570114701137011370109
417437017452500000015079365100227086751611808110164433483994291036011129520401748201110496703907009270105580012664238381132048627118554143412168641503244452701107010821102011009910010000100100000100422260360111872848520792009784501599999181392120030171152093602813151017003234870130030000116967011770117701217011970121
4178170116525111000150191310017700931067501103484337036143027336108999172816971431104967027070090701085688122152469410612491291244242984123546418612477527010870108211020110099100100001001000001004236503601099628757214780098754931699991731320200312510621420170351525247001334590010030000116307010870109701097011370095
41742701075250000001537020002370094076729711184441383994261242011631416551684810010496703007008170095546312087225421116608512119214206112224140318244944701027011621102011009910010000100100001100417692042371110128548214520210031499439999180361312303197114212031603385007003537450100030000116897011170108701177011770108
417677011352500000014698200023700920906526105734310539842968395107706194017357650104967028070088700945886123062252911459377681199241399123513419492498767009470108211020110099100100001001000011004230503619112942865421362009238506049999182992120030470152140114263269272670003336901010030000116467009570114701137011570095
417537010852500000014637140001970094068814110438428563884383343010909719481666367010496703007008370108526312290228721131888885122774235412292241287246126700957010821102011009910010000100100000100414600001141528047214940095014982999991904621000305311421129011036301070029355401010030000115987010970111701127009570109
41728700945250000001603501000700922608066105914375840143482308112802205217274141104967037070089701156315120802277911324691371272641727124728414192379247011070110211020110099100100001001000001004215004201130526423214760010201491829999177340020030200132228908033982829700243511000030000116357009570095700957009570108
4172370094525000110145012100257009706776809302423764034275041411281616101758121110496701407008870094656412212233551145478397120294153812031242288243636700947011021102011009910010000100100001100413670001077626202211480091794910799991870901000303700321339031037331070032328121313030000116377011770118701167011870118

1000 unrolls and 10 iterations

Result (median cycles for code): 7.0116

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2st memory order violation nonspec (c4)cfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e2e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
41690701245252000010002200820011701013802366125925156640515404712764821315184391149670310700917010795313450157711272584656424482913417244822268350701147011421100211090101000010100000105075703621896641422233060017154640339999115725131230249822113947460455547011341535001030000116737011770111701157011970126
416867011452520000000021663210670092388242912519517304151333501272102311511824114967027070081701089071343516034127136525530447711340494477226802670108701082110021109010100001010000010505450360915940820231760017242640369999115828212302513222169514565596470004401500101030000116777009570096701097010970109
41690701085251000000002242313108700923882352125805157540514784012749420015153281149670280701127013694013363156661276614586514487913438844885268764701087010821100211090101000010100000105088203621917641018232900017643642749999115949212002523220309604408535570017403150101030000116757010970095701097010970115
4168970094525200000000222422108700794082433125995114541511064112715819515080451149670480705737090593013481155681268213965654464813440044937269082700947010821100211090101000010100000105099203621892040619232620023565644999999115575012002521217929604292515970018400750101030000116767125070857708617164470858
41683701085253000000002242613101070093488246012579515854251429441277112111523330114967038070104701129641340715597127751455678448211345174492726896870117701172110021109010100001010000110507201942219125406462328800168186392199991155652123025202175215564558596570036412850101030000116677011770113701177011770117
41681701165252000000002189601087010338825531260551377495130449127381245151384311496704707006870118960133391568212766542965044895134421449152688967011670094211002110901010000101000001050722036218935411892330600176806430599991159392120025102191315624466636370023403910101030000116677010970109700957009570109
416817010852520000000022117200970093477243512626515044451455451275682351516848114967020070086701169511341415584127705471643448751343614491126895070118701182110021109010100001010000010506920021917140833232240017246640289999115795202002538220720534508484970031405640101030000116737011970117701197011770115
41688701145252000000002184220067009948823531260851519415147346127647218152011911496703707009070114965133551561712792747472444909134478449402691007011870116211002110901010000101000001050894036218951409172327800171316433799991159582123025542208815524488426270024414540101030000116667011870117700957011670117
4168070094525300000000221740007701024702411126765145141515015012722722715216681149670210700887011493813395156691278664457294501013475445014269592701187011821100211090101000010100000105097103621895340897232780016826641049999115890212002569219381164436061657002441355001030000116757011970118701157011770096
4169070094525200000000221732008701014882361125835147443514844712773721415142731149670370700857011497613343155721277084566854478113410644782268200701187009421100211098101000010100000105086003620890040752232660017301642919999115797202002531220880494606606670028404750101030000116737011970117701127009570111