Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ADCS (64-bit)

Test 1: uops

Code:

  adcs x0, x0, x1
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f3a3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst int alu (97)l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
1004103580000000619172510001000100062250110351035805388210001000300010354011100110000000000073127119931000100010841036103610361036
1004103580000000619172510001000100062250110351035805388210001000300010354011100110000000000073127119931000100010361036103610361036
1004103570000000619172510001000100062250110351035805388210001000300010354011100110000000000073127119931000100010361036103610361036
1004103580000000619172510001000100062250010351035805388210001000300010354011100110000000000073127119931000100010361036103610361036
1004103580000000659172510001000100062250110351035805388210001000300010354011100110000000000073127119931000100010361036103610361036
1004103580000000619172510001000100062250010351035805388210001000300010354011100110000000000073127119931000100010361036103610361036
1004103580000000619172510001000100062250010351035805388210001000300010354011100110000000000073127119931000100010361036103610361036
1004103580000000619172510001000100062250010351035805388210001000300010354011100110000000100073127119931000100010361036103610361036
1004103580000000619172510001000100062250110351035805388210001000300010354011100110000000000073127119931000100010361036103610361036
10041035800000001059172510001000100062250110351035805388210001000300010354011100110000000006073127119931000100010361036103610361036

Test 2: Latency 1->2

Code:

  adcs x0, x0, x1
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0035

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9faccfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
10204100357506199202510100101001010064715214969551003510035865638772101001020030200100354011102011009910010100100071012711999510000101001003610036100361003610036
10204100357596199202510100101261010064715204969551003510035865638732101001020030200100354011102011009910010100100071012711999510000101001003610036100361003610036
10204100357506199202510100101001010064715214969551003510035865638732101001020030200100354011102011009910010100100071012711999510000101001003610036100361003610036
10204100357506199202510100101001010064715214969551003510035865638732101001020030200100354011102011009910010100100071012711999510000101001003610036100361003610036
10204100357506199202510100101001010064715214969551003510035865638732101001020030200100354011102011009910010100100071012711999510000101001003610036100361003610036
1020410035751561992025101001010010100647152149695510035100358656128732101001020030200100354011102011009910010100100071012711999510000101001003610036100361003610036
10204100357506199182510100101001010064715214969551003510035865638732101001020030200100354011102011009910010100100071012711999510000101001003610036100361003610036
1020410035751926199202510100101001010064715214969551003510035865638732101001020030200100354011102011009910010100100071012711999510000101001003610036100361003610036
1020410035751836199202510100101001010064715204969551003510035865638732101001020030200100354011102011009910010100100071012711999510000101001003610036100361003610036
1020410035751536199202510100101001010064715214969551003510035865638732101001020030200100354011102011009910010100100071012711999510000101001003610036100361003610036

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0035

retire uop (01)cycle (02)03l2 tlb miss data (0b)181e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb miss (a1)l1d cache writeback (a8)a9accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
10024100357500061991825100101001010010647246496955100351003586783875410010100203002010035401110021109101001010000064022722999710000100101003610036100361003610036
10024100357500061991825100101001010010647246496955100351003586783875410010101163002010035401110021109101001010000064022722999710000100101003610036100361003610036
10024100357500061991825100101001010010647246496955100351003586783875410010100203002010035401110021109101001010000064022722999710000100101003610036100361003610036
10024100357500061991825100101001010010647246496955100351003586783875410010100203002010035401110021109101001010000064022722999710000100101003610036100361003610036
10024100357500061991825100101001010010647246496955100351003586783875410010100203002010035401110021109101001010000064022723999710000100101003610036100361003610036
100241008375012161991825100101001010010647246496955100351003586783875410010100203002010035401110021109101001010000064022722999710000100101003610036101311003610036
10024100357501061991825100101001010010647246496955100351003586783875410010100203002010035401110021109101001010000064022722999710000100101003610036100361003610036
100241003575000103991825100101001010010647246496955100351003586783875410010100203002010035401110021109101001010000064022722999710000100101003610036100361003610036
10024100357500061991825100101001010010647246496955100351003586783875410010100203002010035401110021109101001010000064022722999710000100101003610036100361003610036
10024100357500061991825100101001010010647246496955100351003586783875410010100203002010035401110021109101001010000064022722999710000100101003610036100361003610036

Test 3: Latency 1->3

Code:

  adcs x0, x1, x0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0035

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
102041003575186199202510100101001010064715214969551003510035865638732101001020030200100354011102011009910010100100071022722999510000101001003610036100361003610036
10204100357566199202510100101001010064715214969551003510035865638732101001020030200100354011102011009910010100100071022722999510000101001003610036100361003610036
10204100357508499202510100101001010064715214969551003510035865638732101001020031055100354011102011009910010100100071022722999510000101001003610036100361003610036
10204100357506199202510100101001010064775514969551003510035865638732101001020030200100354011102011009910010100100071022722999510000101001003610036100361003610036
10204100357506199202510100101001010064715214969551003510035865638732101001020030200100354011102011009910010100100071022722999510000101001003610036100361003610036
10204100357506199202510100101001010064715214969551003510035865638732101001020030200100354011102011009910010100100071022722999510000101001003610036100361003610036
10204100357506199202510100101001010064715214969551003510035865638732101001020030200100354011102011009910010100100071022722999510000101001003610036100361003610036
102041003575456199202510100101001010064715214969551003510035865638732101001020030200100354011102011009910010100100071022722999510000101001003610036100361003610036
10204100357506199202510100101001010064715214969551003510035865638732101001020030200100354011102011009910010100100071022722999510000101001003610036100361003610036
10204100357508499202510100101001010064715214969551003510035865638732101001020030200100354011102011009910010100100071022722999510000101001003610036100361003610036

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0035

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9faccfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
100241003575061991825100101001010010647246496955100351003586783875410010100203002010035401110021109101001010064042744999710000100101003610036100361003610036
100241003575061991825100101001010010647246496955100351003586783875410010100203002010035401110021109101001010064042743999710000100101003610036100361003610036
100241003575061991825100101001010010647246496955100351003586783875410010100203002010035401110021109101001010064042734999710000100101003610036100361003610036
100241003575061991825100101001010010647246496955100351003586783875410010100203002010035401110021109101001010064032734999710000100101003610036100361003610036
1002410035758461991825100101001010010647246496955100351003586783875410010100203002010035401110021109101001010064042734999710000100101003610036100361003610036
100241003575061991825100101001010010647246496955100351003586783875410010100203002010035401110021109101001010064032732999710000100101003610036100361003610036
100241003575061991825100101001010010647246496955100351003586783875410010100203002010035401110021109101001010064042734999710000100101003610036100361003610036
100241003575061991825100101001010010647246496955100351003586783875410010100203002010035401110021109101001010064042743999710000100101003610036100361003610036
1002410035750619918251001010010100106472464969551003510035867810875410010100203002010035401110021109101001010064042734999710000100101003610036100361003610036
1002410035753961991825100101001010010647246496955100351003586783875410010100203002010035401110021109101001010064022744999710000100101003610036100361003610036

Test 4: Latency 1->4

Chain cycles: 1

Code:

  adcs x0, x1, x2
  tst x0, 1
  mov x0, 1
  mov x1, 2
  mov x2, 3

(non-fused SUB/CBNZ loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst int alu (97)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
20204200351500611992625202002020020200129765004916955200352003517406317481202002020040200200356411202011009920100201310128111999220100101002003620036200362003620036
20204200351500611992625202002020020200129765014916955200352003517406317481202002020040200200356411202011009920100331310128111999220100101002003620036200362003620175
20204200351500611992625202002020020200129765014916955200352003517406317481202002020040200200356411202011009920100001310128111999220100101002003620036200362003620036
2020420035150010319926252020020200202001297650149169552003520035174062117481202002020040200200356411202011009920100031310128111999220100101002003620036200362003620036
20204200351500611992625202002020020200129765004916955200352003517406317481202002020040200200356411202011009920100001310128111999220100101002003620036200362003620036
20204200351500611992625202002020020200129765004916955200352003517406317481202002020040200200356411202011009920100001310128111999220100101002003620036200362003620036
20204200351500611992625202002020020200129765004916955200352003517406317481202002020040200200356411202011009920100101310128111999220100101002003620036200812024320036
20204200351500611992625202002020020200129765004916955200352003517406317481202002020040200200356411202011009920100001310128111999220100101002003620036200362003620036
202042003515005361992625202002020020200129765004916955200352003517406317481202002020040200200356411202011009920100001310128111999220100101002003620036200362003620036
202042003515006119926252020020200202001297650049169552003520035174063174812020020200402002003564112020110099201002201310128111999220100101002003620036200362003620036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)0309l2 tlb miss data (0b)1e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)5f60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst int alu (97)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
200242003515000061199182520020200202002012972970049169552003520035174283175042002020020402522003564112002110920010001270127111999520010100102003620036200362003620036
200242003515000061199182520020200202002012972970149169552003520035174283175042002020020400202003564112002110920010001270127111999520010100102003620036200362003620036
200242003515000061199182520020200202002012972970149169552003520035174283175042002020020400202003564112002110920010001270127111999520010100102003620036200362003620066
200242003515000061199182520020200202002012972970149169552003520035174283175042002020020400202003564112002110920010001270127111999520010100102003620036200362003620036
200242003515000661199182520020200202002012972970049169552003520035174283175042002020020400202003564112002110920010001270127111999520010100102003620036200362003620036
200242003515000961199182520020200202002012972970049169552003520035174283175042002020020400202003564112002110920010001270127111999520010100102003620036200362003620036
200242003515000061199182520020200202002012972970049169552003520035174283175042002020020400202003564112002110920010001270127111999520010100102003620036200362003620036
2002420035150001561199182520020200202002012972970049169552003520035174283175042002020020400202003564112002110920010001270127111999520010100102003620036200362003620036
200242003515000061199182520020200202002012972970049169552003520035174283175042002020020400202003564112002110920010001270127111999520010100102003620036200362003620036
2002420035150000285199182520020200202002012972970149169552003520035174283175042002020020400202003564112002110920010001270127211999520010100102003620036200362003620036

Test 5: Latency 4->2

Chain cycles: 1

Code:

  adcs x0, x1, x2
  cset x1, cc
  mov x0, 1
  mov x1, 2
  mov x2, 3
  mov x3, 4
  mov x4, 5

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)03181e1f3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)st unit uop (a7)l1d cache writeback (a8)acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
202042003515002106119930252010020100201121297233149169552003520035174257174862011220224402482003564112020110099100201001010000000011113200162001220000201002003620036200362003620036
20204200351500006119930252010020100201121297233049169552003520035174258174862011220224408242003564112020110099100201001010000000011113190162001220000201002003620036200362003620036
20204200351500006119930252010020100201121297233149169552003520035174258174852011220224402482003564112020110099100201001010000000011113190162001220000201002003620036200362003620036
20204200351500006119930252010020100201121297233149169552003520035174258174852011220224402482003564112020110099100201001010000000011113200162001220000201002003620036200362003620036
20204200351500006119930252010020100201121297233049169552003520035174258174862011220224402482003564112020110099100201001010000000011113200162001220000201002003620036200362003620036
20204200351500006119930252010020100201121297233149169552003520035174258174852011220224402482003564112020110099100201001010000000011113190162001220000201002003620036200362003620036
202042003515004206119930252010020100201121297233049169552003520220174257175122011220224402482003564112020110099100201001010000000011113190162001220000201002003620081200362003620036
20204200351500006119930252010020100201121297233149169552003520035174257174862011220224402482003564112020110099100201001010000000011113200162001220000201002003620036200362003620036
20204200351500006119930252010020100201121297233049169552003520035174257174852011220224402482003564112020110099100201001010000000011113190162001220000201002003620036200362003620036
202042003515000053619930252010020100201121297233149169552003520035174257174852011220224402482003564112020110099100201001010000000011113200162001220000201002003620036200362003620036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
2002420035150726119918252001020010200101297247491695520035200351742831750420010200204002020035641120021109102001010010001270127111999520000200102003620036200362003620036
200242003515006119918252001020010200101297247491695520035200351742831750420010200204002020035641120021109102001010010001270127211999520000200102003620036200362003620036
200242003515006119918252001020010200101297247491695520035200351743931750420010200204002020035641120021109102001010010001270127111999520000200102003620036200822003620036
200242003515006119918252001020010200101297247491695520035200351742831750420010200204002020035641120021109102001010010001270127111999520000200102003620036200362003620036
200242003515006119918252001020010200101297247491695520035200351742831750420010200204002020035641120021109102001010010001270127111999520000200102003620036200362003620036
200242003515006119918252001020010200101297247491695520035200351742831750420010200204002020035641120021109102001010010001270127111999520000200102003620036200362003620036
2002420035150015619918252001020010200101297247491695520035200351742831750420010200204002020035641120021109102001010010001270127111999520000200102003620036200362003620036
200242003515006119918252001020010200101297247491695520035200351742831750420010200204002020035641120021109102001010010001270127111999520000200102003620036200812003620036
200242003515006119918252001020010200101297247491695520035200351742831750420010200204002020035641120021109102001010010001270127111999520000200102003620036200362003620036
200242003515006119918252001020010200101297247491695520035200351742831750420010200204002020035641120021109102001010010001270127111999520000200102003620036200362003620036

Test 6: Latency 4->3

Chain cycles: 1

Code:

  adcs x0, x1, x2
  cset x2, cc
  mov x0, 1
  mov x1, 2
  mov x2, 3
  mov x3, 4
  mov x4, 5

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)acbranch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfmap dispatch bubble (d6)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
2020420035150082199302520100201002011212972331491695520035200351742571748520112202244024820035641120201100991002010010100001111319162001220000201002003620036200362003620036
2020420035150061199302520100201002011212972331491695520035200351742571748520112202244024820035641120201100991002010010100001111319162001220000201002003620036200362003620036
2020420035150061199302520100201002011212972331491695520035200351742571748620112202244024820035641120201100991002010010100001111319162001220000201002003620036200362003620036
2020420035150061199302520100201002011212972331491695520035200351742581748620112202244024820035641120201100991002010010100001111319162001220000201002003620082201272003620036
2020420035150061199302520100201002011212972331491695520035200351742571748620112202244024820035641120201100991002010010100001111319162001220000201002003620036200362003620036
2020420035150061199302520100201002011212972331491695520035200351742571748520112202244024820035641120201100991002010010100001111319162001220000201002003620036200362003620036
2020420035149061199302520100201002011212972331491695520035200351742571748620112202244024820035641120201100991002010010100001111319162001220000201002003620036200362003620036
2020420035150061199302520100201002011212972331491695520035200351742581748620112202244024820035641120201100991002010010100001111320162001220000201002003620036200362003620036
2020420035150061199302520100201002011212972331491713720035200351742581748520112202244024820035641120201100991002010010100001111319162001220000201002003620036200362003620036
2020420035150061199302520100201002011212972331491695520035200351742581748620112202244024820035641120201100991002010010100001111320162001220000201002003620036200362003620036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)03l2 tlb miss data (0b)191e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
2002420035150001261199182520010200102001012972471491695520035200351742831750420010200204002020035641120021109102001010010001270227111999520000200102003620036200362003620036
200242006615000061199182520010200102001012972471491695520035200351742831750420010200204002020035641120022109102001010010001270127112005020000200102003620036200362003620036
200242003515000061199182520010200102001012972470491695520035200351742831750420010200204002020035641120021109102001010010001270127111999520000200102003620036200362003620036
200242003515000061199182520010200102001012972470491695520035200351742831750420010200204002020035641120021109102001010010001270127111999520000200102003620036200362003620036
200242003515000061199182520010200102001012972470491695520035200351742831750420092200204002020035641120021109102001010010001270127111999520000200102003620036200362003620036
200242007915000061199182520010200102001012972471491695520035200351742831750420010200204002020035641120021109102001010010001270127121999520000200102003620036200362003620036
200242003515000061199182520010200102001012972471491695520035200351742831750420010200204002020035641120021109102001010010001270127111999520000200102003620036200362003620036
200242003515000061199182520010200102001012972470491695520035200351742831750420010200204002020035641120021109102001010010001270127111999520000200102003620036200362003620036
200242003515000061199182520010200102001012972471491695520035200351742831750420010200204002020035641120021109102001010010001270127111999520000200102003620036200362003620036
200242003515000061199182520010200102001012972471491695520035200351742831750420010200204002020035641120021109102001010010001270127121999520000200102003620036200362003620036

Test 7: Latency 4->4

Code:

  adcs x0, x1, x2
  mov x0, 1
  mov x1, 2
  mov x2, 3
  mov x3, 4
  mov x4, 5

(non-fused SUB/CBNZ loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0035

retire uop (01)cycle (02)03l2 tlb miss data (0b)18191e1f3a3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst int alu (97)l1d tlb access (a0)l1d tlb miss (a1)l1d cache writeback (a8)acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)fetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
102041003575000000103992725102001020010210647712049695610035100358673887351021010224302721003540111020110099101000000011172001601001310100101001003610036100361003610036
10204100357500000061992725102001020010210647712049695610035100358673887351021010224302721003540111020110099101000006011172001601001310100101001003610036100361003610036
10204100357500000061992725102001020010210647712049695610035100358673887351021010224302721003540111020110099101000000011176113401008210168101001008310134101311013010130
1020410176760333962641726992625102001020010210647712049695610035100358673887351021010224302721003540111020110099101000001623011172001601001310100101001003610036100851012910131
1020410083750200002609927701027210247104636483340496956101741012986961587881038110417305651012940311020110099101000000011172001611001310100101001003610036100361008610036
10204101307502113500619926701027210224103806477120497095100831003586952188131021010423311211003540311020110099101000003011172001601001310100101001003610036100361008310036
1020410035750000001249927681024810224104626477120496956101781008486932188131029810512311511008340311020110099101002021628211172001601001310100101001003610036100361003610036
10204100357500000061992725102001020010210647712049695610035100358673787351021010224302721003540111020110099101000000011172001601001310100101001003610036100361003610036
102041003575000088061992725102001020010210647712049695610035100358673887351021010224302721003540111020110099101000003011172001601001310100101001003610036100361003610036
10204100358100000061992725102001020010210647712049695610035100358673887351021010224302721003540111020110099101000000011172001601001210100101001003610036100361003610036

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0035

retire uop (01)cycle (02)03mmu table walk data (08)18191e3a3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst int alu (97)l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbranch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
1002410035750000061991825100201002010020647296049695501003510035867838754100201002030020100354011100211091001000010064032733999710010100101003610036100361003610036
1002410035750000061991825100201002010020647296049695501003510035867838754100201002030020100354011100211091001000000064032733999710010100101003610036100361003610036
1002410035750000061991825100201002010020647296049695601003510035867838754100201002030020100354011100211091001000000064032733999710010100101003610036100361003610036
1002410035750000061991825100201002010020647296149695501003510035867838754100201002030020100354011100211091001000020064032734999710010100101003610036100361003610036
1002410035750000061991825100201002010020647296149695501003510035868938754100201002030020100354011100211091001000003064032734999710032100101003610036100361003610036
1002410035750000061991825100201002010020647296149695501003510035867838754100201002030020100354011100211091001000010064032733999710010100101003610036100361003610036
10024100357501000619918251002010020100206472961496956010035100358678108754100201002030020100354011100211091001000000064032733999710010100101003610036100361003610036
10024100357500000619918251002010020100206472960496955010035100358678387541002010020300201003540111002110910010000543064032733999710010100101003610036100361003610036
1002410035750000061991825100201002010020647296049695501008310035867838754100201002030020100354011100211091001000000064032733999710010100101003610036100361003610036
1002410035750000061991825100201002010020647296149695501003510035867838754100201002030020100354011100211091001000003064032733999710010100101003610036100361003610036

Test 8: throughput

Count: 8

Code:

  ands xzr, xzr, xzr
  adcs x0, x8, x9
  ands xzr, xzr, xzr
  adcs x1, x8, x9
  ands xzr, xzr, xzr
  adcs x2, x8, x9
  ands xzr, xzr, xzr
  adcs x3, x8, x9
  ands xzr, xzr, xzr
  adcs x4, x8, x9
  ands xzr, xzr, xzr
  adcs x5, x8, x9
  ands xzr, xzr, xzr
  adcs x6, x8, x9
  ands xzr, xzr, xzr
  adcs x7, x8, x9
  mov x8, 9
  mov x9, 10
  mov x10, 11

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.6675

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
1602045341640000000003725160100160100160100106358804950324534045340433339333359160100160200240200534045211160201100991001601001000000101101191153401160000801005340553405534055340553405
160204534044000000000637825160100160100160100106358804950324534045340433339333359160100160200240200534045211160201100991001601001000000101101191153401160000801005340553405534055340553405
1602045340440000000003725160100160100160100106358804950324534045340433339333359160172160200240200534045211160201100991001601001000000101101191253401160000801005340553405534055340553405
1602045340440000000003725160100160100160100106358804950324534045340433339333359160100160200240200534045211160201100991001601001000000101101191153401160000801005340553405534055340553405
1602045340440000000003725160100160100160100106358804950324534045340433339333359160100160200240200534045211160201100991001601001000000101101191153401160000801005340553405534055340553405
160204534044000000000372516010016010016010010635880495032453404534043333911333591601001602002402005340452111602011009910016010010000018101101191153401160000801005340553405534055340553405
1602045340440000000003725160100160100160100106358814950324534045340433339333359160100160200240200534045211160201100991001601001000000101101191153401160000801005340553405534055340553405
1602045340440000000003725160100160100160100106358814950324534045340433339333359160100160200240200534045211160201100991001601001000000101101191153401160000801005340553405534055340553405
1602045340440000000003725160100160100160100106358804950324534045340433339333359160100160200240200534045211160201100991001601001000000101101191153401160000801005340553405534055340553405
16020453404400000000070225160100160100160100106358804950324534045340433339333359160100160200240200534045211160201100991001601001000000101101191153401160000801005340553405534055340553405

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.6672

retire uop (01)cycle (02)03mmu table walk data (08)09181e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)5f60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fld unit uop (a6)l1d cache writeback (a8)a9acc2cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaeb? int retires (ef)f5f6f7f8fd
1600245339140000004162516001016001016001010293881049502945342153374333313333511600101600202400205337452211600211091016001010000001002231151921161053370160000157800105337553375533755337553375
16002453374400000043251600101600101600821029388114950294533745337433331333351160010160020240020533745211160021109101600101000030100223117192118653370160000157800105337553375533755337553375
16002453374399000043251600101600101600101029388114950294533745337433331333351160010160020240020533745211160021109101600101001000100223114192117753370160000157800105337553375533755337553375
1600245337439900004325160010160010160010102938811495029453374533743333133335116001016002024002053374521116002110910160010100101501002462231942234533701600663015800105337553375533755337553375
160024533744000000432516001016001016001010293880149502945337453374333313333511600101600202400205337452111600211091016001010000001002462241942244533701600003015800105342453375533755337553375
160024533743990000492516001016001016001010293880149502945337453374333313333511600101600202400205337452111600211091016001010000001004062261942266533701600003015800105337553375533755337553375
1600245337440000008422516001016007616001010293880149502945337453374333313333511600101600202401285337452111600211091016001010000001002462241942276533701600003015800105337553375533755337553375
1600245337440000004325160010160010160010102938801495029453374533743333133335116001016002024002053374521116002110910160010100001201002462231942243533701600003015800105337553375533755337553375
160024533744000001243251600101600101600101029388114950294533745337433331333351160010160020240020533745211160021109101600101000000100223117192117453370160000157800105337553375533755337553375
160024533744000000175251600101600101600101029388114950294533745337433331333351160010160020240020533745211160021109101600101000000100223117192117653370160000157800105337553375533755337553375

Test 9: throughput

Count: 4

Code:

  fcmp s0, s0
  adcs x0, x4, x5
  adcs x1, x4, x5
  adcs x2, x4, x5
  adcs x3, x4, x5
  mov x4, 5
  mov x5, 6
  mov x6, 7

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3353

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)6061696d6emap stall dispatch (70)flags prf full (73)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
50204134181060000150450255010040100100004010010000587442800000013383134121341260263342371175010040200100001202002000013412134121150201100991004010010000100000000003210119111340940215401001341313413134131341313413
502041341210000001080450255010040100100004010010000574746800000013383134121341260263353371175010040200100001202002000013412134121150201100991004010010000100000000003210119111340940000401001341313413134131341313413
502041341210000001530450255010040100100004010010000574746800000013383134121341255533342371175010040200100001202002000013412134121150201100991004010010000100000000003210119111340940000401001341313413134131341313413
50204134121000000004470255010040100100004010010000574746800000013383134121341255533342371175010040200100001202002000013412134121150201100991004010010000100000000003210119111340940000401001341313413134131341313413
5020413412101000000450255010040100100004010010000574746800000013383134121341255533081371175010040200100001202002000013412134121150201100991004010010000100000000003210119111340940000401001341313413134131341313413
5020413412100000000450255010040100100004010010000574746800000013383134121341260263353371175010040200100001202002000013412134121150201100991004010010000100000000003210119111340940000401001341313413134131341313413
5020413412100000000450255010040100100004010010000587442800000013383134121341255383081371175010040200100001202002000013412134121150201100991004010010000100000000003210119111340840000401001341313413134131341313413
5020413412100000060450255010040100100004010010000574746800000013383134121341260263081371175010040200100001202002000013412134121150201100991004010010000100000000003210119111340840000401001341313413134131341313413
5020413412100000000450255010040100100004010010000574746800000013383134121341255533342371175010040200100001202002000013412134121150201100991004010010000100000000003210119111340940000401001341313413134131341313413
5020413412101000000450255010040100100004010010000587442800000013383134121341260263081371175010040200100001202002000013412134121150201100991004010010000100000000003210119111340940000401001341313413134131341313413

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3346

retire uop (01)cycle (02)030f1e3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)flags prf full (73)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
5002413407100204525500104001010000400101000057345680000013353133821338255803913371095001040020100001200202000013382133821150021109104001010000103140219221337940000400101338313383133831338313383
5002413382100004525500104001010000400101000057345680000113353133821338255803913371095001040020100001200202000013382133821150021109104001010000103140219221337940000400101338313383133831338313383
5002413382100004525500104001010000400101000057345680000113353133821338255803913371095001040020100001200202000013382133821150021109104001010000103140219221337940000400101338313383133831338313383
5002413382104004525500104001010000400101000057345680000113353133821338255803913371095001040020100001200202000013382133821150021109104001010000103140219221337940000400101338313383133831338313383
5002413382100004525500104001010000400101000057345680000113353133821338255803254371095001040020100001200202000013382133821150021109104001010000103140219221337940000400101338313383133831338313383
5002413382100004525500104001010000400101000057345680000113353133821338255803254371095001040020100001200202000013382133821150021109104001010000103140219221337940000400101338313383133831338313383
5002413382100004525500104001010000400101000057345680000113353133821338255803913371095001040020100001200202000013382133821150021109104001010000103140219221337940000400101338313383133831338313383
50024133821010013125500104001010000400101000057345680000113353133821338255803254371095001040020100001200202000013382133821150021109104001010000103140219221337940000400101338313383133831338313383
5002413382100006825500104001010000400101000057345680000113353133821338259513913371095001040020100001200202000013382133821150021109104001010000103140219221337940000400101338313383133831338313383
5002413382100004525500104001010000400101000057345680000113353133821338255803254371095001040020100001200202000013382133821150021109104001010000103140219221337940000400101338313383133831338313383