Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

EXTR (register, 32-bit)

Test 1: uops

Code:

  extr w0, w0, w1, 13
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 2.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk instruction (07)1e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst int alu (97)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
200410368004625200020002000110001103610367393768200020003000103684111001100003731171110332000200010371037103710371037
2004103680046252000200020001100011036103673937682000200030001036841110011000048731171110332000200010371037103710371037
2004103670154625200020002000110000103610367393768200020003000103684111001100000731171110332000200010371037103710371037
2004103680046252000200020001100011036103673937682000200030001036841110011000027731171110332000200010371037103710371037
200410368004625200020002000110001103610367393768200020003000103684111001100000732171210332000200010371037103710371037
20041036801214125200020002000110000103610367393768200020003000103684111001100006731171110332000200010371037103710371037
2004103670046252000200020001100011036103673937682000200030001036841110011000018731171110332000200010371037103710371037
200410367004625200020002000110000103610367393768200020003000103684111001100000731171110332000200010371037103710371037
200410368004625200020002000110000103610367393768200020003000103684111001100000731171110332000200010371037103710371037
200410368004625200020002000110001103610367393768200020003000103684111001100000732172210332000200010371037103710371037

Test 2: Latency 1->2

Code:

  extr w0, w0, w1, 13
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0036

retire uop (01)cycle (02)031e1f3a3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)acbranch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
20204100367500047252010020100201001105000496956010036100367464374942010020200302001003684111020110099100101001000000710116111003320000201001003710037100371003710037
20204100367500047252010020100201001105000496956010036100367464374942010020200302001003684111020110099100101001000000710116111003320000201001003710037100371003710037
20204100367500047252010020100201001105000496956010036100367464374942010020200302001003684111020110099100101001000000710116111003320000201001003710037100371003710037
20204100367500047252010020100201001105001496956010036100367464374942010020200302001003684111020110099100101001000000710116111003320000201001003710037100371003710037
20204100367500047252010020100201001105000496956010036100367464374942010020200302001003684111020110099100101001000000710116111003320000201001003710037100371003710037
20204100367500047252010020100201001105000496956010036100367464374942010020200302001003684111020110099100101001000300710116111003320000201001003710037100371003710037
20204100367500047252010020100201001105000496956010036100367464374942010020200302001003684111020110099100101001002000710116111003320000201001003710037100371003710037
20204100367500047252010020100201001105000496956010036100367464374942010020200302001003684111020110099100101001000000710116111003320000201001003710037100371003710037
20204100367500047252010020100201001105000496956010036100367464374942010020200302001003684111020110099100101001000000710116111003320000201001003710037100371003710037
20204100367500147252015420100201001105001496956010036100367464374942010020200302001003684111020110099100101001000000710116111003320000201001003710037100371003710037

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0036

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)ld unit uop (a6)l1d cache writeback (a8)a9accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
20024100367609625200102001020010110050149695610036100367486375152001020020300201003684111002110910100101000000640217221003320000200101003710037100371003710037
20024100367504625200102001020010110050149695610036100367486375152001020020300201003684111002110910100101000000640217221003320000200101003710037100371003710037
20024100367504625200102001020010110050049695610036100367486375462001020020300201003684111002110910100101000000640217221003320000200101003710037100371003710037
200241003675014125200102001020010110050149695610036100367486375152001020020300201003684111002110910100101000000640217221003320000200101003710037100371003710037
20024100367504625200102001020010110050149695610036100367486375152001020020300201003684111002110910100101000000640217221003320000200101003710037100371003710037
2002410036750109252001020010200101100500496956100361003674863751520010200203002010036841110021109101001010000063640217221003320000200101003710037100371003710037
20024100367504625200102001020010110050149695610036100367486375152001020020300201003684111002110910100101000000640217221003320000200101003710037100371003710037
20024100367504625200102001020010110050049695610036100367486375152001020020300201003684111002110910100101000000640217221003320000200101003710037100371003710037
20024100367594625200102001020010110050049695610036100367486375152001020020300201003684111002110910100101000003640217221003320000200101003710037100371003710037
20024100367604625200102001020010110050049695610036100367486375152001020020300201003684111002110910100101000000640217221003320000200101003710037100371003710037

Test 3: Latency 1->3

Code:

  extr w0, w1, w0, 13
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.0035

retire uop (01)cycle (02)03181e3a3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)6061696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)accfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
2020420035150000103995125201002010020100126431815491695520035200351740631749320100202003020020035104111020110099100101001000071051216321999320000201002003620036200362003620036
202042003515000061995125201002010020100126431815491695520035200351740631749320100202003020020035104111020110099100101001000071051216221999320000201002003620036200362003620036
202042003515000061995125201002010020100126431815491695520035200351740631749320100202003020020035104111020110099100101001000071001216221999320000201002003620036200362003620036
202042003515000061995125201002010020100126431815491695520035200351740631749320100202003020020035104111020110099100101001000071051216221999320000201002003620036200362003620036
2020420035150000619951252010020100201001264318154916955200352003517406221746820100202003020020035104111020110099100101001000071051216221999320000201002003620036200362003620036
2020420035150000170995125201002010020100126431815491695520035200351740631749320100202003020020035104111020110099100101001000071051216221999320000201002003620036200362003620036
202042003515000061995125201002010020100126431815491695520035200351740631749320100202003020020035104111020110099100101001000071051216221999320000201002003620036200362003620036
20204200351500001040995125201002010020100126431815491695520035200351740631749320100202003020020035104111020110099100101001000071051216221999320000201002003620036200362003620036
202042003515000061995125201002010020100126431815491695520035200351740631749320100202003020020035104111020110099100101001000071051216221999320000201002003620036200362003620036
202042003515000061995125201002010020100126431815491695520035200351740631749320100202003020020035104111020110099100101001000071051216221999320000201002003620036200362003620036

1000 unrolls and 10 iterations

Result (median cycles for code): 2.0035

retire uop (01)cycle (02)0309191e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
20024200351500006199532520010200102001012686550491695520035200351742431751520010200203002020035104111002110910100101000640216221999120000200102003620036200362003620036
200242003515000016899532520010200102001012686550491695520035200351742431751520010200203002020035104111002110910100101000640216221999120000200102003620036200362003620036
20024200351501006199532520010200102001012686551491695520035200351742431751520010200203002020035104111002110910100101000640216221999120000200102003620036200362003620036
200242003515000061995325200102001020010126865504916955200352003517424317515200102002030020200351041110021109101001010410640216221999120000200102003620036200362003620036
20024200351500006199532520010200102001012686550491695520035200351742431751520010200203002020035104111002110910100101000640216221999120000200102003620036200362003620036
20024200351500006899532520010200102001012686551491695520035200351742431751520010200203002020035104111002110910100101000640216221999120000200102003620036200362003620036
20024200351500006199532520010200102001012686551491695520035200351742431751520010200203002020035104411002110910100101000640216221999120000200102003620036200362003620036
20024200351500008299532520010200102001012686550491695520035200351742431751520010200203002020035104111002110910100101000640216221999120000200102003620036200362003620036
20024200351500006199532520010200102001012686551491695520035200351742431751520010200203002020035104111002110910100101000640216221999120000200102003620036200362003620036
20024200351500006199532520010200102001012686550491695520035200351742431751520010200203002020035104111002110910100101000640216221999120000200102003620036200362003620036

Test 4: throughput

Count: 8

Code:

  extr w0, w8, w9, 13
  extr w1, w8, w9, 13
  extr w2, w8, w9, 13
  extr w3, w8, w9, 13
  extr w4, w8, w9, 13
  extr w5, w8, w9, 13
  extr w6, w8, w9, 13
  extr w7, w8, w9, 13
  mov x8, 9
  mov x9, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)0918191e1f3a3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fld unit uop (a6)l1d cache writeback (a8)acc2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
160204800405990000000472516010016010016010088050000497695680036800365996435999416010016020024020080036841180201100991008010010030000511021611800331600001601008003780085800378003780037
16020480036599000015300682516010016010016010088050010497695680036800365996435999416010016028824020080036841180201100991008010010000000511011611800331600001601008003780037800378003780037
160204800365991000000472516010016010016010088050002497695680036800365996435999416010016020024020080036841180201100991008010010000000511011611800331600001601008003780037800378003780037
160204800365990000000472516010016010016010088050000497695680036800365996435999416010016020024020080036841180201100991008010010000000511011611800331600001601008003780037800378003780037
160204800365990000000472516010016010016010088050010497695680036800365996435999416010016020024020080036841180201100991008010010000000511011611800331600001601008003780037800378003780037
16020480036599000000028602516010016010016010088050010497695680036800745996435999416010016020024020080036841180201100991008010010000000511011611800331600001601008003780037800378003780037
160204800365990000000682516010016010016010088050000497695680036800365996435999416010016020024020080036841180201100991008010010000000511011611800331600001601008003780037800378003780037
160204800366000000000472516010016010016010088050000497695680036800365996435999416010016020024020080036841180201100991008010010000000511011611800331600001601008003780037800378003780037
160204800366000000000472516010016010016010088050010497695680036800365996435999416010016020024020080036841180201100991008010010000000511011611800331600001601008003780037800378003780037
160204800365990000000472516010016010016010088050010497695680036800365996435999416010016020024020080036841180201100991008010010000000511011611800331600001601008003780037800378003780037

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)191e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache writeback (a8)acbranch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaec? int retires (ef)f5f6f7f8fd
160024800385990000521251600101600101600108800501497695608003680036599863600151600101600202400208003684118002110910800101000030050200006176580033160000001600108003780037802578003780037
16002480036599000046251600101600101600108800501497695608003680036599863600151600101600202400208003684118002110910800101000000050200005175480033160000001600108003780037800378003780037
16002480036600000646251600721600101600108800501497695608003680036600623600151600101600202400208003684118002110910800101000000050200006175680033160000001600108008580037800378003780037
1600248003659900004262516001016001016001088005014976956080036800365998636001516001016002024002080036841180021109108001010000000502000051765800331600002201600108003780037800378003780037
16002480036599000612711251600101600101600108800501497695608003680036599863600151600101600202400208003684118002110910800101000000050200004176580033160000001600108003780037800378003780037
160024800365990000521421600101600101600108800501497695608003680036599863600151600101600202400208003684118002110910800101010000050200006175580033160000001600108003780037800378003780037
16002480036600000046251600101600101600108800501497695608003680036599863600151600101600202400208003684118002110910800101000000050200006175680033160000001600108003780037800378003780037
1600248003659900007112516001016001016001088005014976956080036800365998636001516001016002024002080036841180021109108001010000000502000061755800331600000131600108003780037800378003780037
160024800366000000521251600101600101600108800501497695608003680036599863600151600101600202400208003684118002110910800101000090050200006174680033160000001600108003780037800378003780037
160024800365990000711251600101600101600108800501497695608003680036599863600151600731600202400208003684118002110910800101000000050200005175580033160000001600108003780037800378003780037