Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

AXFLAG

Test 1: uops

Code:

  axflag

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)l1d tlb miss (a1)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)f5f6f7f8fd
1004103581261917251000100010006225011035103580538821000100010001035104111001007322722990100010361036103610361036
100410358061917251000100010006225011035103580538821000100010001035104111001007322722990100010361036103610361036
1004103580156917251000100010006225011035103580538821000100010001035104111001007322722990100010361036103610361036
100410358061917251000100010006225011035103580538821000100010001035104111001007322722990100010361036103610361036
100410357061917251000100010006225011035103580538821000100010001035104111001007322722990100010361036103610361036
1004103570156917251000100010006225011035103580538821000100010001035104111001007322722990100010361036103610361036
100410357061917251000100010006225011035103580538821000100010001035104111001007322722990100010361036103610361036
100410358061917251000100010006225011035103580538821000100010001035104111001007322722990100010361036103610361036
100410358061917251000100010006225011035103580538821000100010001035104111001007322722990100010361036103610361036
100410358061917251000100010006225011035103580538821000100010001035104111001007322722990100010361036103610361036

Test 2: Latency 1->1

Code:

  axflag

(non-fused SUB/CBNZ loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0035

retire uop (01)cycle (02)0318191e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst int alu (97)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
10204100357500061992025102001020010200647652149695510035100358656387321020010200102001003511011102011009910000710227119990101001001003610036100361003610036
10204100357500061992025102001020010200647652149695510035100358656387321020010200102001003511011102011009910000710127119990101001001003610036100361003610036
10204100357500061992025102001020010200647652149695510035100358656387321020010200102001003511011102011009910000710127119990101001001003610036100361003610036
10204100357500061992025102001020010200647652149695510035100358656387321020010200102001003511011102011009910000710127119990101001001003610036100361003610036
10204100357500061992025102001020010200648410149695510035100358656387321020010200102001003511011102011009910000710127119990101001001003610036100361003610036
10204100357500061992025102001020010200647652149695510035100358656387321020010200102001003511011102011009910000710127119990101001001003610036100361003610036
10204100357500061992025102001020010200647652149695510035100358656387321020010200102001003511011102011009910000710127119990101001001003610036100361003610036
10204100357500061992025102001020010200647652149695510035100358656387321020010200102001003511011102011009910000710127119990101001001003610036100361003610036
10204100357500061992025102001020010200647652149695510035100358656387321020010200102001003511011102011009910000710127119990101001001003610036100361003610036
10204100357500061992025102001020010200647652149695510035100358656387321020010200102001003511011102011009910000710127119990101001001003610036100361003610036

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0035

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst int alu (97)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
100241003575366199182510020100201002064729614969551003510035867838754100201002010020100351041110021109100064032733999310010101003610036100361003610036
10024100357506199182510020100201002064729614969551003510035867838754100201002010020100351041110021109100064032733999310010101003610036100361003610036
10024100357506199182510020100201002064729614969551003510035867838754100201002010020100351041110021109100064032722999310010101003610036100361003610036
10024100357606199182510020100201002064729614969551003510035867838754100201002010020100351041110021109100064032733999310010101003610036100361003610036
10024100357506199182510020100201002064729614969551003510035867838754100201002010020100351041110021109100064022722999310010101003610036100361003610036
10024100357838499182510020100201002064729614969551003510035867838754100201002010020100351041110021109100064022733999310010101003610036100361003610036
100241003575126199182510020100201002064729614969551003510035867838754100201002010020100351041110021109100064032733999310010101003610036100361003610036
10024100357506199182510020100201002064729614969551003510035867838754100201002010020100351041110021109100064032733999310010101003610036100361003610036
10024100357506199182510020100201002064729614969551003510035867838754100201002010020100351041110021109100064032722999310010101003610036100361003610036
10024100357506199182510020100201002064729614969551003510035867838754100201002010020100351041110021109100064032733999310010101003610036100361003610036

Test 3: throughput

Count: 8

Code:

  ands xzr, xzr, xzr
  axflag
  ands xzr, xzr, xzr
  axflag
  ands xzr, xzr, xzr
  axflag
  ands xzr, xzr, xzr
  axflag
  ands xzr, xzr, xzr
  axflag
  ands xzr, xzr, xzr
  axflag
  ands xzr, xzr, xzr
  axflag
  ands xzr, xzr, xzr
  axflag

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.6675

retire uop (01)cycle (02)031e3a3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9faccfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
16020453442400451419251601001601001601001063588149503240534045340433339203335916010016020080200534046611160201100991008010010001011231923534001600001005340553405534055340553405
1602045340440026713925160100160100160100106358814950324053404534043333933335916010016020080200534046611160201100991008010010001011221932534001600001005340553405534055340553405
160204534043999013925160100160100160100106358814950324053404534043333933335916010016020080200534046611160201100991008010010001011231932534001600001005340553405534055340553405
160204534044003313925160100160100160100106358814950324053404534043333933335916010016020080200534046611160201100991008010010001011231932534001600001005340553405534055340553405
1602045340440048122925160100160100160100106358814950324053404534043333933335916010016020080200534046611160201100991008010010001011231933534001600001005340553405534055340553405
160204534043995413925160100160100160100106358814950324053404534043333933335916010016020080200534046611160201100991008010010001011231933534001600001005340553405534055340553405
1602045340440066170425160100160100160100106358804950324053404534043333933335916010016020080200534046611160201100991008010010001011221933534001600001005340553405534055340553405
1602045340440045013925160100160100160100106358814950324053404534043333933335916010016020080200534046611160201100991008010010001011231933534001600001005340553405534055340553405
1602045340440087170425160100160100160100106358814950324053404534043333933335916010016020080200534046611160201100991008010010001011221932534001600001005340553405534055340553405
16020453404399301148225160100160100160100106358814950324053404534043333933335916010016031180200534046611160201100991008010010001011221932534001600001005340553405534055340553405

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.6672

retire uop (01)cycle (02)03191e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)5f60696a6d6emap stall dispatch (70)flags prf full (73)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)a9accfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaeb? int retires (ef)f5f6f7f8fd
16002453391400240249251600101600101600101029388114950294533745337433331033335116001016002080020533746611160021109108001010000100223112419211201953370160000157105337553375533755337553375
160024533744000043251600101600101600101029388114950294533745337433331033335116001016002080020533746611160021109108001010040100226111319212131853370160000157105337553375533755337553375
16002453374400018432516001016001016001010293881149502945337453374333317333351160010160020800205337466111600211091080010100001002231117194121916533701600003017105337553375533755337553375
1600245337440000492516001016001016001010293881149502945337453374333310333351160010160020800205337466111600211091080010100001002231217194121718533701600001517105337553375533755337553375
1600245337440000492516001016006916001010293880149502945337453374333310333351160010160020800205337466111600211091080010100001002462217194221717533701600001517105337553411533755337553375
160024533744000043251600101600101600101029388114950294533745337433331033335116001016002080020533746611160021109108001010000100223111919422142053370160000307105337553375533755337553375
1600245337439900292251600101600101600101029388114950294533745337433331033335116001016002080020533746611160021109108001010000100226211519221171853370160000307105337553375533755337553375
160024533743990049251600101600101600101029388114950294533745337433331033335116001016002080020533746611160021109108001010000100246221619221181653370160000307105337553375533755337553375
160024533743990049251600101600101600101029388114950294533745337433331033335116001016002080020533746611160021109108001010000100226211619221161653370160000307105337553375533755337553375
16002453374400023443251600101600101600101029388114950294533745337433331033335116001016002080020533746611160021109108001010000100226211519211171553370160000307105337553375533755337553375

Test 4: throughput

Count: 4

Code:

  fcmp s0, s0
  axflag
  axflag
  axflag
  axflag

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3353

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f3a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)flags prf full (73)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acc2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
50204134651090000150022727835078540194101244081010200586083811541135761365513710641520562171635048840684101234059520196134731370821502011009910010010000100000012358000032105812213701403011001371513479136521371213703
502041376810601550440031462216250781406251019440899101685857048156011371113816138256011262432735850974406821020140965203881388013822615020110099100100100001007030004100200335721012113718400001001341513415134151341513415
50204134141040063861528097115815069540618101704070510124578356809870137201387413824625025674073885109640987101974098220346138871388491502011009910010010000100000000360520033242192213411400001001341513415134151341513415
502041341410400001501450255010040100100004010010000572812801991134821353313597586820501271905036740499100504040420100135341354131502011009910010010000100003002101520032482362213556401611001341513415134151341513478
5020413474105111214488045612255010040100100004019810049590156802051134411347713536612824561171825036640296100494039420048134871347821502011009910010010000100020002151000032104192213411400001001341513415134151341513415
5020413414104000012264094025501004010010000401001002557475780000113385134141341461282456371195010040200100004020020000134141341411502011009910010010000100000010300032102192213459400941001341513415134151341513415
5020413414104000030066025501004010010000401001000057475780000113385134141341460722456371195010040200100004020020000134141341421502011009910010010000100000020300032672563413553403781001341513415134151341513415
5020413414103000000045025501004010010000401001000057475780000013385134141341461302456371195010040200100004020020000134141341411502011009910010010000100003006104500032483363213508401591001353713539135931353013539
50204135351010022315176029306350315401801007440317100255649538038911338513471134775887304877198503584040610050402972009213595134732150201100991001001000010020001050520032462353213411400001001341513415134151341513415
502041341410400001200143025501004010010000401001000057475780000113385134141341461302467371195010040200100004020020000134141341411502011009910010010000100000000000032102192213411400001001341513415134151341513415

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3346

retire uop (01)cycle (02)03l2 tlb miss data (0b)181e1f3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)flags prf full (73)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
5002413408100100066255001040010100004001010000573456800001133531338213382557537953710950010400201000040020200001338213382115002110910101000010202015053140619451337940000101338313496133831338313383
5002413382100000045255001040010100004001010025572032801910133531343813440557537843710950010400201000040020200001338213382115002110910101000010000003140419541337940000101346213495135011346313383
5002413382100000045255001040010100004001010000573456800000133531344013382557737843710950010400201000040020200001338213382115002110910101000010000003140619441337940000101338313383133831338313383
5002413382100000045255020440010100004001010000573456800000133531338213382557737843710950010400201000040020200001338213382115002110910101000010000003140719451337940000101338313383133831338313383
5002413382100000045255001040010100004001010000573456800000133531338213382557537953710950010400201000040020200001338213382115002110910101000010000003140519671337940000101338313383133831338313383
50024133821040000129255001040010100004001010013573456800000133531338213382557737953710950010400201000040020200001338213382115002110910101000010000003140419441337940000101338313383133831338313383
5002413382100000045255001040010100004001010000573456800000133531338213382557537953710950010400201000040020200001338213382115002110910101000010000003140419561337940000101338313383133831338313383
5002413382100000045255001040010100004001010000573456800001133531338213382557737843710950010400201000040020200001338213382115002110910101000010000003140419541337940000101338313383133831338313383
5002413382100000045255001040010100004001010000573456800000133531338213382557537953710950010400201000040020200001338213382115002110910101000010000003140419541337940000101338313383133831344113383
5002413382100000045255001040010100004001010000573456800000133531338213382557537843710950010400201000040020200001338213382115002110910101000010400003140619541337940000101338313383134581338313383