Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

EXTR (register, 64-bit)

Test 1: uops

Code:

  extr x0, x0, x1, 13
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 2.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)1e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst int alu (97)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
200410368004625200020002000110000103610367393768200020003000103684111001100000731171110332000200010371037103710371037
200410368004625200020002000110000103610367393768200020003000103684111001100000731171110332000200010371037103710371037
200410368004625200020002000110000103610367393768200020003000103684111001100000731171110332000200010371037103710371037
200410368004625200020002000110000103610367393768200020003000103684111001100000731171110332000200010371037103710371037
200410368004625200020002000110000103610367393768200020003000103684111001100000731171110332000200010371037103710371037
200410367004625200020002000110000103610367393768200020003000103684111001100000731171110332000200010371037103710371037
200410367004625200020002000110000103610367393768200020003000103684111001100000731171110332000200010371037103710371037
20041036712644625200020002000110000103610367393768200020003000103684111001100000731171110332000200010371037103710371037
200410367004625200020002000110000103610367393768200020003000103684111001100000731171110332000200010371037103710371037
2004103680154625200020002000110000103610367393768200020003000103684111001100000731171110332000200010371037103710371037

Test 2: Latency 1->2

Code:

  extr x0, x0, x1, 13
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0036

retire uop (01)cycle (02)03191e1f3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)st unit uop (a7)l1d cache writeback (a8)a9accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
202041004575000472520100201002010011050014969561003610083746437494201002020030200100368411102011009910010100100000000710116111003320054201001003710037100371003710037
2020410036750570472520100201002010011050014969561003610036746437494201002020030200100368411102011009910010100100000000710116111003320000201001003710037100371003710037
2020410036750540472520100201002010011050014969561003610036746437494201002026630200100368411102011009910010100100000000710116211003320000201001003710037100371003710037
2020410036750600472520100201002010011050014969561003610036746437494201002020030200100368411102011009910010100100020000710116111003320000201001003710037100371003710037
2020410036750540472520100201002010011050004969561003610036746437494201002020030200100368411102011009910010100100000000710116111003320000201001003710037100371003710037
2020410036750600472520100201002010011050004969561003610036746437494201002020030200100368411102011009910010100100000020710116111003320054201001008510037100371008410037
202041003675060472520100201542010011050014969561003610036746437494201002020030200100368421102011009910010100100000000710116111003320000201001003710037100371003710037
202041003675090472520100201002010011050014969561003610036746437494201002020030200100368411102011009910010100100000000710116111003320000201001003710037100371003710037
202041003675039019192520100201002010011050014969561003610036746437494201002020030200100368411102011009910010100100000000710116111003320000201001003710037100371003710037
202041003675000472520100201002010011050014969561003610036746437494201002020030200100368411102011009910010100100000100710116111003320000201001003710037100371003710037

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0036

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)181e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fld unit uop (a6)l1d cache writeback (a8)acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
200241003675000255563252001020010200101100504969561003610036748637515200102002030020100368411100211091010010100000640517671003320000200101003710037100371003710037
20024100367500029446252001020010200101100504969561003610036748637515200102002030020100368411100211091010010100000640617651003320000200101003710037100371003710037
20024100367500030646252001020010200101100504969561003610036750137515200102002030020100368411100211091010010100000640717671003320000200101003710037100371003710037
20024100367501030646252001020010200101100504970521003610036748637515200102002030020100368411100211091010010100000640617661003320000200101003710037100371003710037
2002410036750004588252001020010200101100504969561003610036748637515200102002030020100368411100211091010010100000640617661003320000200101003710037100371003710037
20024100367500013846252001020010200101100504969561003610036748637515200102002030020100368411100211091010010100000640717771003320000200101003710037100371003710037
20024100367500043246252001020010200101100504969561003610036748637515200102002030020100368411100211091010010100000640617761003320000200101003710037100371003710037
20024100367500041746252001020010200101100504969561003610036748637515200102002030020100368411100211091010010100000640717771003320000200101003710037100371003710037
2002410036750001246252001020010200101100504969561003610036748637515200102002030020100368411100211091010010100000640717761003320000200101003710037100371003710037
2002410036750014546252001020010200101100504969561003610036748637515200102002030020100368411100211091010010100000640717761003320000200101003710037100371003710037

Test 3: Latency 1->3

Code:

  extr x0, x1, x0, 13
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.0035

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)accdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
202042003515024619951252010020100201001264318049169552003520035174063174932010020200302002003510411102011009910010100100000710116111999320000201002003620036200362003620036
20204200351500619951252010020100201001264318049169552003520035174063174932010020200302002003510411102011009910010100100000710116111999320000201002003620036200362003620036
202042003515001039951252010020100201001264318049169552003520035174063174932010020200302002003510411102011009910010100100000710116111999320000201002003620036200362003620036
20204200351500619951252010020100201001264318049169552003520035174063174932010020200302002003510411102011009910010100100000710116111999320000201002003620036200362003620036
20204200351500619951252010020100201001264318049169552003520035174063174932010020200302002003510411102011009910010100100000710116111999320000201002003620036200362003620036
20204200351500619951252010020100201001264318049169552003520035174063174932010020200302002003510411102011009910010100100000710116111999320000201002003620036200362003620036
20204200351500619951252010020100201001264318049169552003520035174063174932010020200302002003510411102011009910010100100000710116111999320000201002003620036200362003620036
20204200351500619951252010020100201001264318049169552003520035174063174932010020200302002003510411102011009910010100100000710116111999320000201002003620036200362003620036
20204200351500619951252010020100201001264318049169552003520035174063174932010020200302002003510411102011009910010100100000710116111999320000201002003620036200362003620036
20204200351500619951252010020100201001264318049169552003520035174063174932010020200302002003510411102011009910010100100000710116111999320000201002003620036200362003620036

1000 unrolls and 10 iterations

Result (median cycles for code): 2.0035

retire uop (01)cycle (02)03191e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d cache writeback (a8)acc2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
2002420035150015619953252001020010200101268655491695520035200351742431751520010200203002020035104111002110910100101000000640216221999120000200102003620036200362003620036
2002420035150001039953252001020010200101268655491695520035200351742431751520010200203002020035104111002110910100101000000640216221999120000200102003620036200362003620036
2002420035150018619953252001020010200101268655491695520035200351742431751520010200203002020035104111002110910100101001000640216221999120000200102003620036200832003620036
200242008015019619953252001020010200101268655491695520035200351742471751520010200203002020035104111002110910100101021000640216221999120000200102003620036200362003620036
200242003515002646199532520010200102001012686554916955200352003517424131751520010200203002020035104111002110910100101000000640216221999120000200102003620036200362003620036
200242003515000619953252001020010200101268655491695520035200351742431751520010200203002020035104111002110910100101000000640216221999120000200102003620036200362003620036
2002420035150036619953252001020010200101268655491695520035200351742431751520010200203002020035104111002110910100101000000640216221999120000200102003620036200362003620036
2002420035150018619953462003220010200101268655491695520035200351742431751520010200203002020035104111002110910100101000000640216221999120000200102003620036200362003620036
200242003515000619953252001020010200101268655491695520035200351742431751520010200203002020035104111002110910100101000000640216221999120000200102003620036200362003620036
2002420035149021619953252001020010200101268655491695520035200351742431751520010200203002020035104111002110910100101000000640216221999120000200102003620036200362003620036

Test 4: throughput

Count: 8

Code:

  extr x0, x8, x9, 13
  extr x1, x8, x9, 13
  extr x2, x8, x9, 13
  extr x3, x8, x9, 13
  extr x4, x8, x9, 13
  extr x5, x8, x9, 13
  extr x6, x8, x9, 13
  extr x7, x8, x9, 13
  mov x8, 9
  mov x9, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fld unit uop (a6)l1d cache writeback (a8)acbranch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
160204800406000712251601001601001601008805001497695608003680036599643599941601001602002402008003684118020110099100801001000100513621611800331600001601008003780037800378003780037
16020480036599047251601001601001601008805001497695608003680036599643599941601001602002402008003684118020110099100801001000000511011611800331600001601008003780037800378003780037
16020480036600047251601001601001601008805000497695608003680036599643599941601001602002402008003684118020110099100801001000000511011611800331600001601008003780037800378003780037
16020480036599047251601001601001601009211171497695638003680036599643599941601001602002402008003684118020110099100801001000000511011611800331600001601008003780037800378003780037
160204800366000712251601001601001601008805001497695608003680036599643599941601001602002402008003684118020110099100801001000000511011611800331600001601008003780037800378003780037
160204800366000522251601001601001601008805001497695608003680036599643599941601001602002402008003684118020110099100801001000000511011611800331600001601008003780037800378003780037
16020480036599047251601001601001601008805001497695608003680036599643599941601001602002402008003684118020110099100801001000000511011611800331600001601008003780037800378003780037
16020480036599068251601001601001601008805001497695608003680036599643599941601001602002402008003684118020110099100801001000000511011611800331600001601008003780037800378003780037
160204800365991247251601001601001601008805001497695608003680036599643599941601001602002402008003684118020110099100801001000000511011611800331600001601008003780037800378003780037
16020480036599047251601001601001601008805001497695608003680036599643599941601001602002402008003684118020110099100801001000000511011611800331600001601008003780037800378003780037

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)acc2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
16002480038599046251600101600101600108800501497695680036800365998603600151600101600202400208003684118002110910800101000000502011711800331600001600108003780037800378003780037
16002480036599046251600101600101600108800501497695680036800365998603600151600101600202400208003684118002110910800101000010502011711800331600001600108003780037800378003780037
16002480073599046251600101600101600108800501497695680036800365998603600151600101600202400208003684118002110910800101000000502011711800331600001600108003780037800378003780037
16002480036600046251600101600101600108800501497695680036800366000803600151600101600202400208003684118002110910800101000000502011711800331600001600108003780037800378003780078
16002480036600046251600101600101600108800501497695680079800365998603600151600101600202400208007984118002110910800101000000502011712800331600001600108003780037800378003780037
16002480036599046251600101600101600108800501497695680036800365998603600151600101600202400208003684118002110910800101000000502011711800331600001600108003780037800378003780037
160024800365990829251600101600101600108800501497695680036800365998603600151600101600202400208003684118002110910800101003000502011711800331600001600108003780037800378003780037
16002480036599078925160010160010160010880050149769568003680036599860360015160010160020240020800368421800211091080010100524200502011711800331600001600108003780037800378003780037
16002480036599046251600101600101600108800501497695680036800365998603600151600101600202400208003684118002110910800101010000502015311800331602161600108003780037800378003780037
16002480036599046251600101600101600108800501497695680036800365998603600151600101600202400208003684118002110910800101000000502011711800331600001600108003780037800378003780037