Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ADCS (32-bit)

Test 1: uops

Code:

  adcs w0, w0, w1
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst int alu (97)l1d cache writeback (a8)acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
1004103570619172510001000100062250103510358053882100010003000103540111001100000073227119931000100010361036103610361036
1004103580619172510001000100062250103510358053882100010003000103540111001100000073127119931000100010361036103610361036
1004103573619172510001000100062250103510358053882100010003000103540111001100000073127119931000100010361036103610361036
1004103580619172510001000100062250103510358053882100010003000103540111001100000073127119931000100010361036103610361036
1004103570619172510001000100062250103510358053882100010003000103540111001100000073127119931000100010361036103610361036
1004103580619172510001000100062250103510358053882100010003000103540111001100000073127119931000100010361036103610361036
1004103570619172510001000100062250103510358053882100010003000103540111001100000273127119931000100010361036103610361036
1004103570619172510001000100062250103510358053882100010003000103540111001100000073127119931000100010361036103610361036
10041035801569172510001000100062250103510358053882100010003000103540111001100000073127119931000100010361036103610361036
1004103570619172510001000100062250103510358053882100010003000103540111001100000073127119931000100010361036103610361036

Test 2: Latency 1->2

Code:

  adcs w0, w0, w1
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0035

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)acc2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
10204100357508499202510100101001010064715204969551003510035865638732101001020030200100354011102011009910010100100000071012711999510000101001003610036100361003610036
102041003575061992025101001010010100647152049695510035100358656387321010010200302001003540111020110099100101001000780071012711999510000101001003610036100361003610036
10204100357506199202510100101001010064715204969551003510035865638732101001020030200100354011102011009910010100100000071012711999510000101001003610036100361003610036
10204100357506199202510100101001010064715214969551003510035865638732101001020030200100354011102011009910010100100000071012711999510000101001003610036100361003610036
10204100357506199202510100101001010064715214969551003510035865638732101001020030200100354011102011009910010100100030071012711999510000101001003610036100361003610036
10204100357506199202510100101001010064715204969551003510035865638732101001020030200100354011102011009910010100100130071012711999510000101001003610036100361003610036
10204100357806199202510100101001010064715214969551003510035865638732101001020030200100354011102011009910010100100000071012711999510000101001003610036100361003610036
10204100357506199202510100101001010064715204969551003510035865638732101001020030200100354011102011009910010100100600071012711999510000101001003610036100361003610036
10204100357596199202510100101001010064715204969551003510035865638732101001020030200100354011102011009910010100100030071012711999510000101001003610036100361003610036
10204100357506199202510100101001010064715214969551003510035865638732101001020030200100354011102011009910010100100000071012711999510000101001003610036100361003610036

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0035

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
10024100357506199182510010100101001064724614969551003510035867838754100101002030020100354011100211091010010100009664022722999710000100101003610036100361003610036
1002410035750619918251001010010100106472461496955100351003586783875410010100203002010035401110021109101001010000067822722999710000100101003610036100361003610036
1002410035751261991825100101001010010647246149695510035100358678387541001010020300201003540111002110910100101000059564022722999710000100101003610036100361003610036
1002410035750619918251001010010100106472461496955100351003586783875410010100203002010085401110021109101001010000065722732999710000100101003610036100361003610036
100241003575010409918251001010010100106472461496955100351003586783875410010100203002010035401110021109101001010000064022722999710000100101003610036100361003610036
1002410035750829918481001010010100106472461496955100351008186783875410093102133002010035401110021109101001010020064022722999710000100101003610036100361003610036
1002410035750619918251001010010100106472460496955100351003586783875410010100203002010035401110021109101001010000064022722999710000100101003610036100361003610036
10024100357506199182510010100101001064724614969551003510035867838754100101002030020100354011100211091010010100002164022722999710000100101003610036100361003610036
1002410035750619918251001010010100106472461496955100351003586783875410010100203002010035401110021109101001010000064022722999710000100101003610036100361003610036
100241003575126199182510010100101001064724614969551003510035867838754100101002030020100354011100211091010010100036064022722999710000100101003610036100361003610036

Test 3: Latency 1->3

Code:

  adcs w0, w1, w0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0035

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
102041003575061992025101001010010100647152149695510035100358656387321010010200302001003540111020110099100101001000071022722999510000101001003610036100361003610036
102041003575061992025101001010010100647152049695510035100358656387321010010200302001003540111020110099100101001000071022722999510000101001003610036100361003610036
10204100357506199202510100101001010064715214969551003510035865638732101001020030200100354011102011009910010100100012071022722999510000101001003610036100361003610036
102041003575061992025101001010010100647152049695510035100358656387321010010200302001003540111020110099100101001000371022722999510000101001003610036100361003610036
102041003575061992025101001010010100647152149695510035100358656387321010010200302001003540111020110099100101001000071022722999510000101001003610036100361003610036
102041003575061992025101001010010100647152149695510035100358656387321010010200302001003540111020110099100101001000071022722999510000101001003610036100361003610036
102041003575061992025101001010010100647152149695510035100358656387321010010200302001003540111020110099100101001002371022722999510000101001003610036100361003610036
102041003575061992025101001010010100647152049695510035100358656387321010010200302001003540111020110099100101001000071022722999510000101001003610036100361003610036
102041003575061992025101001010010100647152049695510035100358656387321010010200302001003540111020110099100101001000071022722999510000101001003610036100361003610036
1020410035760251992025101001010010100647152049695510035100358656387321010010200302001003540111020110099100101001000071022722999510000101001003610036100361003610036

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0035

retire uop (01)cycle (02)0309181e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
1002410035752006199182510010100101001064724614969551003510035867838754100101002030020100354011100211091010010100064032722999710000100101003610036100361003610036
1002410035750036199182510010100101001064724604969551003510035867838754100101002030020100354011100211091010010101364022722999710000100101003610036100361003610036
1002410035750006199182510010100101001064724614969551003510035867838754100101002030020100354011100211091010010100364022722999710000100101003610036100361003610036
10024100357500061991825100101001010010647246149695510035100358678387541001010020300201003540111002110910100101038064022722999710000100101003610036100361003610036
1002410035750006199182510010100101001064724604969551003510035867838754100101002030020100354011100211091010010100064022722999710000100101003610036100361003610036
10024100357600061991825100101001010010647246049695510035100358678387541001010020300201003540111002110910100101002464022722999710000100101003610036100361003610036
1002410035750096199182510010100101001064724604969551003510035867838754100101002030020100354011100211091010010100364022722999710000100101003610036100361003610036
1002410035750006199182510010100101001064724614969551003510035867838754100101002030020100354011100211091010010100064022722999710000100101003610036100361003610036
1002410035760006199182510010100101001064724604969551003510035867838754100101002030020100354011100211091010010101064022722999710000100101003610036100361003610036
1002410035750106199182510010100101001064724604969551003510035867838754100101002030020100354011100211091010010100064022722999710000100101003610036100361003610036

Test 4: Latency 1->4

Chain cycles: 1

Code:

  adcs w0, w1, w2
  tst x0, 1
  mov x0, 1
  mov x1, 2
  mov x2, 3

(non-fused SUB/CBNZ loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst int alu (97)l1d tlb access (a0)ld unit uop (a6)l1d cache writeback (a8)acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
202042003515000000480611992625202002020020200129765004916955200352003517406317481202002020040200200356411202011009920100000000001310128111999220100101002003620036200362003620036
20204200351500000000611992625202002020020200129765014916955200352003517406317481202002020040200200356411202011009920100000000001310128111999220100101002003620036200362003620036
20204200351500000000611992625202002020020200129765004916955200352003517406317481202002020040200200356411202011009920100000000001310128111999220100101002003620036200362003620036
20204200351500000000611992625202002020020200129765004916955200352003517406317481202002020040200200356411202011009920100000000001310128111999220100101002003620036200362003620036
20204200351500000090611992625202002020020200129765004916955200352003517406317481202002020040200200356411202011009920100000000001310128111999220100101002006720036200362003620036
202042003515000000300611992625202002020020200129765004916955200352003517406317481202002020040200200356411202011009920100000000001310128111999220100101002003620036200362003620036
20204200351500000000611992625202002020020200129765004916955200352003517406317481202002020040200200356411202011009920100000000001310128111999220100101002003620036200362003620036
20204200351500000000611992625202002020020200129765004916955200352003517406317481202002020040200200356411202011009920100000000001310128111999220100101002003620036200362003620036
20204200351500000000611992625202002020020200129765004916955200352003517406317481202002020040200200356411202011009920100000000001310128111999220100101002003620036200362003620036
20204200351500000000611992625202002020020200129765014916955200352003517406317481202002020040200200356411202011009920100000000001310128111999220100101002003620036200362003620036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)03181e1f3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst int alu (97)ld unit uop (a6)l1d cache writeback (a8)a9accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
20024200351500144061199182520020200202002012972974916955200352003517428317504200202002040020200356411200211092001000001270527121999520010100102003620036200362003620036
200242003515000061199182520020200202002012972974916955200352003517428317504200202002040020200356411200211092001000001270127111999520010100102003620036200362003620036
200242003515000061199182520020200202002012972974916955200352003517428317504200202002040020200356411200211092001020001270127111999520010100102003620036200362003620036
2002420035150132488165199182520020200202002012972974916955200352003517428317504200202002040020200356411200211092001001031270127211999520010100102003620036200362003620036
200242003515000061199182520020200202002012972974916955200352003517428317504200202002040020200806441200211092001001001270127111999520010100102003620036200362003620036
2002420035150024061199182520020200202002012972974916955200352003517428317504200202002040020200356411200211092001001001270127111999520010100102003620036200362003620036
200242003515000061199182520020200202002012972974916955200352003517428317504200202002040020200356411200211092001000001270127111999520010100102003620036200362003620036
2002420035150000611991810920020200642002012972974916955200352003517428317504200202002040020200356411200211092001001061270127211999520010100102003620036200362003620036
2002420035150015061199182520020200202002012972974916955200352003517428317504200202002040020200356411200211092001000001270127111999520010100102003620036200362003620036
2002420035150012061199182520020200202002012972974916955200352003517428317504200202002040020200356411200211092001000001270127111999520010100102003620036200362003620036

Test 5: Latency 4->2

Chain cycles: 1

Code:

  adcs w0, w1, w2
  cset x1, cc
  mov x0, 1
  mov x1, 2
  mov x2, 3
  mov x3, 4
  mov x4, 5

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)l2 tlb miss data (0b)1e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbranch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
2020420035150110061199302520100201002011212972331491695520035200351742581748520112202244024820035641120201100991002010010100000031111319116112001420000201002003620036200362003620036
2020420035150110061199302520100201002011212972331491695520035200351742581748520112202244024820035641120201100991002010010100000001111320116112001420000201002003620036200362003620036
2020420035150111061199302520100201002011212972331491695520035200351742571748520112202244024820035641120201100991002010010100000001111320116112001420000201002003620036200362003620036
2020420067150110061199302520100201002011212972331491695520035200351742581748620112202244024820035641120201100991002010010100000001111320116112001420000201002003620036200362003620036
2020420035150110061199302520100201002011212972331491695520035200351742571748620112202244024820035641120201100991002010010100000131111320116112001420000201002003620036200362003620036
2020420035150110061199302520100201002011212972331491695520035200351742581748520112202244024820035641120201100991002010010100000001111319116112001420000201002003620036200362003620036
2020420035150110061199302520100201002011212972331491695520035200351742571748520112202244024820035641120201100991002010010100000131631111320116112001420000201002003620036200362003620036
2020420035150110061199302520100201002011212972331491695520035200351742581748620112202244024820035641120201100991002010010100000001111320116112001420000201002003620036200362003620036
2020420035150110061199302520100201002011212972331491695520035200351742571748620112202244024820035641120201100991002010010100000001111320116112001420000201002003620036200362003620036
2020420035150110066199302520100201002011212972331491695520035200351742581748620112202244024820035641120201100991002010010100000001111320116112001420000201002003620036200362003620036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)031e1f3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)a9acbranch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
200242003515000611991825200102001020010129724749169552003520035174283175042001020020400202003564112002110910200101001000001270427341999520000200102003620036200362003620036
200242003515000821991825200102001020010129724749169552003520035174283175042001020020400202003564112002110910200101001000001270327431999520000200102003620036200362003620036
200242003515000611991825200102001020010129724749169552003520035174283175042001020020400202003564112002110910200101001000301270427441999520000200102003620036200362003620036
20024200351500038491991825200102001020010129724749169552003520035174283175042001020020400202003564112002110910200101001000001270327441999520000200102003620036200362003620036
200242003515000611991825200102001020010129724749169552003520035174283175042001020020400202003564112002110910200101001000011270427431999520000200102003620036200362003620036
200242003515000611991825200102001020010129724749169552003520035174283175042001020020400202003564112002110910200101001000001270427441999520000200102003620036200362003620036
200242003515000611991825200102001020010129724749169552003520035174283175042001020020400202003564112002110910200101001000001270327421999520000200102003620036200362003620036
200242003515000611991825200102001020010129724749169552003520035174283175042001020020400202003564112002110910200101001022001270327331999520000200102003620036200362003620036
200242003515000611991825200102001020010129724749169552003520035174283175042001020020400202003564112002110910200101001000001270427331999520000200102003620036200362003620036
2002420035150006119918252001020032200101297247491695520035200351742831750420010200204002020035641120021109102001010010009901270427431999520000200102003620036200362003620036

Test 6: Latency 4->3

Chain cycles: 1

Code:

  adcs w0, w1, w2
  cset x2, cc
  mov x0, 1
  mov x1, 2
  mov x2, 3
  mov x3, 4
  mov x4, 5

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)1e1f3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb miss (a1)l1d cache writeback (a8)acbranch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
2020420035150110061199302520100201002011212972334916955200352003517425817486201122022440248200356411202011009910020100101000001111320116112001520000201002003620036200362003620036
202042003515011006119930252010020100201121297233491695520035200351742571748620112202244024820035641120201100991002010010100001171111319116112001520000201002003620036200362003620036
2020420035150110061199302520100201002011212972334916955200352003517425817486201122022440248200356411202011009910020100101000001111319116112001520000201002003620036200362003620036
202042003515011006119930252010020100201121297233491695520035200351742581748520112202244024820035641120201100991002010010100001021111319116112001520000201002003620036200362003620036
2020420035150110061199302520100201002011212972334916955200352003517425717486201122022440248200356411202011009910020100101000001111319116112001520000201002003620036200362003620036
2020420035150110061199302520100201002011212972334916955200352003517425817485201122022440248200356411202011009910020100101000001111320116112001520000201002003620036200362003620036
20204200351491100611993025201002010020112129723349169552003520035174258174852011220224402482003564112020110099100201001010000871111319116112001520000201002003620036200362003620036
2020420035150110061199302520100201002011212972334916955200352003517425717485201122022440248200356411202011009910020100101000001111319116112001520000201002003620036200362003620036
20204200351501100536199302520100201002011212972334916955200352003517425717485201122022440248200356411202011009910020100101000061111320116112001520000201002003620036200362003620036
2020420035150110061199302520100201002011212972334916955200352003517425717486201122022440248200356411202011009910020100101000001111320116112001520000201002003620036200362003620036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f3a3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
20024200351500000000001891991825200102001020010129724704916955020035200351742831750420010200204002020035641120021109102001010010000000001270227221999520000200102003620036200362003620036
2002420035150000000000611991825200102001020010129724704916955020035200351742831750420010200204002020035641120021109102001010010000000001270227221999520000200102003620036200362003620036
20024200351500000000006119918252001020010200101297247149169550200352003517428317504200102002040020200356411200211091020010100100000420301270227221999520000200102003620036200362003620036
200242003515000000000061199182520010200102001012972470491695502003520035174283175042001020020400202003564112002110910200101001000004406301270227321999520000200102003620036200362003620036
2002420035150000000000611991825200102001020010129724704916955020035200351742831750420010200204002020035641120021109102001010010000010001270227221999520000200102003620036200362003620036
2002420035150000000000611991825200102001020010129724714916955020035200351742831750420010200204002020035641120021109102001010010000041018001270227221999520000200102003620036200362003620036
2002420035150000000000611991825200102001020010129724714916955020035200351742831750420010200204002020035641120021109102001010010000035011101270227221999520000200102003620036200362003620036
2002420035150000000000611991825200102001020010129724714916955020035200351742831750420010200204002020035641120021109102001010010000000001270227221999520000200102003620036200362003620036
2002420035150000000000611991825200102001020010129724714916955020035200351742831750420010200204002020035641120021109102001010010000000001270227221999520000200102003620036200362003620036
20024200351490000000001871991825200102001020010129724714916955020035200351742831750420010200204002020035641120021109102001010010000000001270227221999520000200102003620036200362003620036

Test 7: Latency 4->4

Code:

  adcs w0, w1, w2
  mov x0, 1
  mov x1, 2
  mov x2, 3
  mov x3, 4
  mov x4, 5

(non-fused SUB/CBNZ loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0035

retire uop (01)cycle (02)031e1f3a3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst int alu (97)st unit uop (a7)l1d cache writeback (a8)a9acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfmap dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
10204100357500061992725102001020010210647712049695510035100678673887351021010224302721003540111020110099101000000011172016001001310100101001003610036100361003610036
10204100357500061992725102001020010210647712149695510035100358673887361021010224302721003540111020110099101000000011172016001001310100101001003610036100361003610036
10204100357500061992725102001020010210647712149695510035100358673887351021010224302721003540111020110099101000009011171916001001310100101001003610036100361003610036
10204100357500061992725102001020010210647712149695510035100358673887351021010224302721003540111020110099101000000011172016001001310100101001003610036100361003610036
10204100857500061992725102001020010210647712049695510035100358673887361021010224302721003540111020110099101000000011172016001001310100101001003610036100361003610036
10204100357500061992725102001020010210647712049695510035100358673887351021010224302721003540111020110099101000000011172016001001310100101001003610036100361003610036
10204100357530061992725102001020010210647712049695510035100358673887351021010224302721003540111020110099101000000011172016001001310100101001003610036100361003610036
102041003577000619927251020010200102106477121496955100351003586738873610210103203027210035401110201100991010000018011172016001001310100101001003610036100361003610036
10204100357500061992725102001020010210647712149695510035100358673887351021010224302721003540111020110099101000000011172016001001310100101001003610036100361003610036
102041003575000619927251020010200102106477120496955100351003586738873510210102243027210035401110201100991010000021011172016001001310100101001003610036100361003610036

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0035

retire uop (01)cycle (02)03mmu table walk data (08)09181e1f3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst int alu (97)l1d tlb access (a0)a9accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
10024100357500017106199182510020100201002064729614969551003510035867838754100201002030020100354011100211091001000064022722999710010100101003610036100361003610036
100241003575000006199182510020100201002064729604969561003510035867838754100201002030020100354011100211091001000064022722999710010100101003610036100361003610036
100241003575000006199182510020100201002064729604969551003510035867838754100201002030020100354011100211091001000064032722999710010100101008410036100361003610036
100241003575000006199182510020100201002064729604969551003510035867838754100201002030020100354011100211091001000064022722999710010100101003610036100361003610036
1002410035750000061991825100201002010020647296149695510035100358678387541002010020300201003540111002110910010220640327221003110032100101003610036100361003610036
100241003575001006199182510020100201002064729614970031008210035867838754100201002030020100354011100211091001000064022732999710010100101003610036100361003610036
1002410035750001206199182510020100201002064729604969551003510035867838754100201002030020100354011100211091001000064032732999710010100101003610036100361003610036
100241003575000151766199182510020100201002064729604969561003510035867838754100201002030020100354011100211091001000064022722999710010100101003610036100361003610036
100241003575000006199182510020100201002064729604969551003510035867838754100201002030020100354011100211091001000064022722999710010100101003610036100361003610036
100241003575000006199182510020100201002064729604969551003510035867838754100201002030020100354011100211091001000064022722999710010100101003610036100361003610036

Test 8: throughput

Count: 8

Code:

  ands xzr, xzr, xzr
  adcs w0, w8, w9
  ands xzr, xzr, xzr
  adcs w1, w8, w9
  ands xzr, xzr, xzr
  adcs w2, w8, w9
  ands xzr, xzr, xzr
  adcs w3, w8, w9
  ands xzr, xzr, xzr
  adcs w4, w8, w9
  ands xzr, xzr, xzr
  adcs w5, w8, w9
  ands xzr, xzr, xzr
  adcs w6, w8, w9
  ands xzr, xzr, xzr
  adcs w7, w8, w9
  mov x8, 9
  mov x9, 10
  mov x10, 11

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.6675

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)181e1f3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
16020453409400000005282816012016012016012810637384950329534095340833347733357160128160240240260534095211160201100991001601001000000000111101201160053406160020801005345753410534105341053409
16020453409399000001812816012016012016012810637384950328534095340933347733357160128160240240260534095211160201100991001601001000000000111101200160053406160020801005341053409534095341053410
16020453408400000301602716012016012016012810637384950324534045340433339333359160100160200240200534045211160201100991001601001000000000000101101191153401160000801005340553405534055340553405
16020453404400000001902516010016010016010010635884950324534045340433339333359160100160200240368534045211160201100991001601001000000000000101101191153437160000801005340553405534055340553405
16020453404400000001462516010016010016010010635884950324534045340433339333359160100160200240200534045211160201100991001601001000033000000101101191153401160000801005340553405534055340553405
1602045340440010000372516010016010016010010635884950324534045340433339333359160100160200240200534045211160201100991001601001000000000000101101191153401160000801005340553405534055340553405
16020453404400000001462516010016010016010010635884950324534045340433339833359160100160200240200534045211160201100991001601001002000000000101101191153401160000801005340553405534055340553405
16020453404400000001692516010016010016010010635884950324534045340433339333390160100160200240200534045211160201100991001601001000200000000101101191153401160000801005340553405534055340553405
16020453404399000002952516010016010016010010635884950324534045340433339333359160100160200240200534045211160201100991001601001000000000000101101191153401160000801005340553405534055340553405
16020453404399000001462516010016010016010010635884950324534045340433339333359160100160200240200534045211160201100991001601001000000000000101101191153401160000801005340553405534055340553405

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.6672

retire uop (01)cycle (02)03181e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)5f60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)a9acbranch cond mispred nonspec (c5)cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaeb? int retires (ef)f5f6f7f8fd
1600245339040000432516001016001016001010293881149502945337453374333313333511600101600202400205337452111600211091016001010000010022612121962153533701600004018800105337553375533755337553375
16002453374399004925160010160010160010102938810495029453374533743333133335116001016002024002053374521116002110910160010100000100246226194123953370160000409800105337553375533755337553375
16002453374399004925160010160010160010102938801495029453374533743333133335116001016002024002053374521116002110910160010100000100223229194113553370160000209800105337553375533755337553375
16002453374400004325160010160010160010102938811495029453374533743333133335116001016002024002053374521116002110910160010100000100223113194113953370160000209800105337553375533755337553375
16002453374399004325160010160010160010102938811495029453374533743333133335116001016002024002053374521116002110910160010100000100223113194125353370160000209800105337553375533755337553375
160024533744000043251600101600101600101029388114950294533745337433331333351160010160020240020533745211160021109101600101000001002231131921141153370160000209800105337553375533755337553375
16002453374400004325160010160010160010102938811495029453374533743333133335116001016002024002053374521116002110910160010100000100223118194113553370160000209800105337553375533755337553375
160024533744000043251600101600101600101029388114950294533745337433331333351160010160020240020533745211160021109101600101000001002231131921151053370160000209800105337553375533755337553375
160024533744000943251600101600101600101029388114950294533745337433331333351160010160020240020533745211160021109101600101000001002231151921151153370160000209800105337553375533755337553375
160024533744000184325160010160010160010102938811495029453374533743333133335116001016002024002053374521116002110910160010100000100223115192119553370160000209800105337553375533755337553375

Test 9: throughput

Count: 4

Code:

  fcmp s0, s0
  adcs w0, w4, w5
  adcs w1, w4, w5
  adcs w2, w4, w5
  adcs w3, w4, w5
  mov x4, 5
  mov x5, 6
  mov x6, 7

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3353

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)flags prf full (73)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9faccdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
5020413455104045025501004010010000401001000058744280000113383134121341256433342371175010040200100001202002000013412134121150201100991004010010000100003210119221340840000401001341313413134131341313413
5020413412100045025501004010010000401001000057474680000013383134121341255533081371175010040200100001202002000013412134121150201100991004010010000100003210119111340940000401001341313413134131341313413
5020413412100045025501004010010000401001000058744280000113383134121341255383353371175010040200100001202002000013412134121150201100991004010010000100003210219211340940000401001341313413134131341313413
50204134121010280025501004010010000401001000058744280000013383134121341255383353371175010040200100001202002000013412134121150201100991004010010000100003210119111340940000401001341313413134131341313413
5020413412100045025501004010010000401001000058744280000013383134121341260263081371175010040200100001202002000013412134121150201100991004010010000100003210119111340940000401001341313413134131341313413
5020413412101045025501004010010000401001000058744280000013383134121341260263353371175010040200100001202002000013412134121150201100991004010010000100003210119211340840000401001341313413134131341313413
5020413412100066025501004010010000401001000057474680000113383134121341255383081371175010040200100001202002000013412134121150201100991004010010000100003210219111340840000401001341313413134131341313413
50204134121000213025501004010010000401001000058744280000013383134121341260263081371175010040200100001202002000013412134121150201100991004010010000100003210119121340940000401001341313413134131341313413
5020413412100045025501004010010000401001000057474680000113383134121341260263081371175010040200100001202002000013412134122150201100991004010010000100003210119221340940000401001341313413134131341313413
5020413412100045025501004010010000401001000058744280000013383134121341260263081371175010040200100001202002000013412134121150201100991004010010000100003210119111340840000401001341313413134131341313413

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3346

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)flags prf full (73)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)cfl1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
5002413404100071025500104001010000400101000057345680000013353133821338255803254371095001040020100001200202000013382133821150021109104001010000100314041719841337940000400101338313383133831338313383
50024133821000452550010400101000040010100005734568000001335313382133825580325437109500104002010000120020200001338213382115002110910400101000010031404619641337940000400101338313383133831338313383
500241338210104592550010400101000040010100005734568000001335313382133825580325437109500104002010000120020200001338213382115002110910400101000010031404319371337940000400101338313383133831355813383
50024133821000454450107400101000040010100005734568000001335313382133825580325437109500104002010000120020200001338213429115002110910400101000010031404519461337940000400101338313383133831338313383
50024133821000452550010400101000040010100005734568000011335313382133825580391337109500104002010000120020200001338213382115002110910400101000010031404319651337940000400101338313383133831338313383
50024133821000452550010400101000040010100005734568000001335313382133825951325437109500104002010000120020200001338213382115002110910400101000010031403419531337940000400101338313383133831338313383
50024133821000452550010400101000040010100005734568000011335313382133825951325437109500104002010000120020200001338213382115002110910400101000010031404519551337940000400101338313383133831338313383
50024133821000452550010400101000040010100005734568000001335313382133825580325437109500104002010000120020200001338213382115002110910400101000010131404419811337940000400101338313383133831338313383
500241338210001082550010400101000040010100005734568000001335313382133825580391337109500104002010000120020200001338213382115002110910400101000010031403319471337940000400101338313383133831338313383
50024133821000452550010400101000040010100005734568000001335313382133825951391337109500104002010000120020200001338213382215002110910400101000010031403419461337940000400101338313383133831338313383