Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

TST (register, 32-bit)

Test 1: uops

Code:

  tst w0, w1
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst int alu (97)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)f5f6f7f8fd
10043693036251000100010005000136936920632251000100020003696611100110000073318333661000370370370370370
10043693036251000100010005000036936920632251000100020003696611100110000073318333661000370370370370370
10043692036251000100010005000136936920632251000100020003696611100110000073318333661000370370370370370
10043693036251000100010005000136936920632251000100020003696611100110000073318333661000370370370370370
10043693036251000100010005000036936920632251000100020003696611100110000073318333661000370370370370370
10043693036251000100010005000036936920632251000100020003696611100110000073318333661000370370370370370
10043693036251000100010005000036936920632251000100020003696611100110000073318333661000370370370370370
10043693036251000100010005000036936920632251000100020003696611100110000073318333661000370370370370370
10043693036251000100010005000036936920632251000100020003696611100110000073318333661000370370370370370
10043693036251000100010005000136936920632251000100020003696611100110000073318333661000370370370370370

Test 2: Latency 3->1

Chain cycles: 1

Code:

  tst w0, w1
  cset x0, cc
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acc2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
2020420035150000030006119926252010020100201001297150491695520035200351740631748120100202003020020035104112020110099100201001010000000013101228221999220000101002003620036200362003620036
20204200351500000006119926252010020100201001297150491695520035200351740631748120100202003020020035104112020110099100201001010000000013101228231999220000101002003620036200362003620036
202042003515000000016619926252010020100201001297150491695520035200351740631748120100202903020020035104212020110099100201001010000000013101228231999220000101002003620036200362003620036
20204200351500000006119926252010020100201001297150491695520035200351740631748120100202003020020035104112020110099100201001010000000013101328221999220000101002003620036200362003620036
20204200351500000282072619926252010020100201001297150491695520035200351740631748120100202003020020035104112020110099100201001010000000013101328331999220000101002003620036200362003620036
2020420035150000038106119926252010020100201001297150491695520035200351740631748120100202003020020035104112020110099100201001010000000013101328221999220000101002003620036200362003620036
20204200351500000006119926252010020100201001297150491695520035200351740631748120100202003020020035104112020110099100201001010000000013101328321999220000101002003620036200362003620036
20204200351500000006119926252010020100201001297150491695520035200351740631748120100202003020020035104112020110099100201001010000000013101228221999220000101002003620036200362003620036
20204200351490000906119926252010020100201001297150491695520035200351740631748120100202003020020035104112020110099100201001010000000013101328321999220000101002003620036200362003620036
2020420035150000044106119926252010020100201001297150491695520035200351740631748120100202003020020035104112020110099100201001010000000013101228221999220000101002003620036200362003620036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)03191e3a3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb miss (a1)l1d cache writeback (a8)a9accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
200242003515002040611991825200102001020010129724704916955020035200351742831750420010200203002020035104112002110910200101001000001270127211999520000100102003620036200362003620036
200242003515001860611991825200102001020010129724714916955020035200351742831750420010200203002020035104112002110910200101001001001270127111999520000100102003620036200362003620036
2002420035150000611991825200102001020010129724714916955020035200351742831750420010200203002020035104112002110910200101001000001270127111999520000100102003620036200362003620036
2002420035150000611991825200102001020010129724704916955320035200351742831750420010200203002020035104112002110910200101001000001270127111999520000100102003620036200362003620036
20024200351500300611991825200102001020010129724714916955020035200351742831750420010200203002020035104112002110910200101001000001270227111999520000100102003620036200362003620036
20024200351500210611991825200102001020010129724704916955020035200351742831750420010200203002020035104112002110910200101001000001270127111999520000100102003620036200362003620036
2002420035150000611991825200102001020010129724704916955020035200351742831750420010200203002020035104112002110910200101001000001270127111999520000100102003620036200362003620036
2002420035150000611991825200102001020010129724704916955020035200351742831750420010200203002020035104112002110910200101001000001270127111999520000100102006820036200362003620036
200242003515003607261991825200102001020010129724704916955020035200351742831750420010200203002020035104112002110910200101001000001270127111999520000100102003620036200362003620036
2002420035150000611991825200102001020010129724704916955020035200351742831750420010200203002020035104112002110910200101001000001270127111999520000100102003620036200362003620036

Test 3: Latency 3->2

Chain cycles: 1

Code:

  tst w0, w1
  cset x1, cc
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)03191e1f3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fa9accfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
202042003515001806119926252010020100201001297150149169552003520035174063174812010020200302002003510411202011009910020100101000013101228221999220000101002003620036200362003620036
20204200351500006119926252010020100201001297150149169552003520035174063174812010020200302002003510411202011009910020100101000013101228221999220000101002003620036200362003620036
20204200351500006119926252010020100201001297150149169552003520035174063174812010020200302002003510411202011009910020100101000013101328221999220000101002003620036200362003620036
202042003514903306119926252010020100201001297150149169552003520035174063174812010020200302002003510411202011009910020100101000013101228221999220000101002003620036200362003620036
20204200351500006119926252010020100201001297150149169552003520035174063174812010020200302002003510411202011009910020100101000013101228321999220000101002003620036200362003620036
20204200351500006119926252010020100201001297150149169552003520035174063174812010020200302002003510411202011009910020100101000013101228221999220000101002003620036200362003620036
202042003515001206119926252010020100201001297150149169552003520035174063174812010020200302002003510411202011009910020100101000013101228221999220000101002003620036200362003620036
202042003515001806119926252010020100201001297150149169552003520035174063174812010020200302002003510411202011009910020100101000013101228221999220000101002003620036200362003620036
2020420035150062706119926252010020100201001297150149169552003520035174063174812010020200302002003510411202011009910020100101000013101228221999220000101002003620036200362003620036
20204200351500006119926252010020100201001297150149169552003520035174063174812010020200302002003510411202011009910020100101000013101228231999220000101002003620036200362003620036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)5f60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9faccdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
200242003515020461199182520010200102001012972470049169550200352003517428317504200102002030020200351041120021109102001010010001270227221999520000100102003620036200362003620036
200242003515025861199182520010200102001012972470049169550200352003517428317504200102002030020200351041120021109102001010010001270227221999520000100102003620036200362003620036
2002420035150061199182520010200102001012972470049169550200352003517428317504200102002030020200351041120021109102001010010001270327221999520000100102003620036200362003620036
2002420035150061199182520010200102001012972470049169550200352003517428317504200102002030020200351041120021109102001010010001270227221999520000100102003620036200362003620036
20024200351502161199182520010200102001012972470049169550200352003517428317504200102002030020200351041120021109102001010010001270327221999520000100102003620036200362003620036
2002420035150061199182520010200102001012972470049169550200352003517428317504200102002030020200351041120021109102001010010001270227221999520000100102003620036200362003620036
20024200351501261199183220010200102001012972470049169550200352003517428317504200102002030020200351041120021109102001010010001270327221999520000100102003620036200362003620036
2002420035150061199182520010200102001012972470049169550200352003517428317504200102002030020200351041120021109102001010010001270227221999520000100102003620036200362003620036
2002420035150061199182520010200102001012972470049169550200352003517428317504200102002030020200351041120021109102001010010001270227221999520000100102003620036200362003620036
2002420035150061199182520010200102001012972470098169550200352003517428317504200102002030020200351041120021109102001010010001270227321999520000100102003620036200362003620036

Test 4: throughput

Count: 8

Code:

  tst w0, w1
  tst w0, w1
  tst w0, w1
  tst w0, w1
  tst w0, w1
  tst w0, w1
  tst w0, w1
  tst w0, w1
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03191e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)flags prf full (73)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
80204267712004483525801008010080100400500149236552673526801166720316690801008020016020026735661180201100991008010010051102191126731800001002673626736267362673626736
8020426735200003525801008010080100400500149236552673526735166720316690801008020016020026735661180201100991008010010051101191126731800001002673626736267362673626736
8020426735200003525801008010080100400500149236552673526735166720316690801008020016020026735661180201100991008010010051101191126731800001002673626736267362673626736
8020426735200003525801008010080100400500149236552673526735166720316690801008020016020026735661180201100991008010010051101191126731800001002673626736267362673626736
8020426735201003525801008010080100400500049236552673526735166720316690801008020016020026735661180201100991008010010051101191126731800001002673626736267362673626736
8020426735200003525801008010080100400500049236552673526735166720316690801008020016020026735661180201100991008010010051101191126731800001002673626736267362673626736
8020426735200003525801008010080100400500149236552673526735166720316690801008020016020026735661180201100991008010010051101191126731800001002673626736267362673626736
8020426735200003525801008010080100400500149236552673526735166720316690801008020016020026735661180201100991008010010051101191126731800001002673626736267362673626736
8020426735200003525801008010080100400500049236552673526735166720316711801008020016020026735661180201100991008010010051101191126732800001002673626736267362673626736
8020426735200003525801008010080100400500149236552673526735166720316690801008020016020026735661180201100991008010010051101191126731800001002673626736267362673626736

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3338

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)5f6061696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)accfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
800242671020063525800108001080010400050010492362526705267051666531668380010800201600202670566118002110910800101000502000118112670180000102670626706267062670626706
8002426705200243525800108001080010400050010492362526705267051666531668380010800201600202670566118002110910800101000502053118112670180000102670626706267062670626706
8002426705200243525800108001080010400050010492362526705267051666531668380010800201600202670566118002110910800101000502050118112670180000102670626706267062670626706
8002426705200243525800108001080010400050010492362526705267051666531668380010800201600202670566118002110910800101000502050118112670180000102670626706267062670626706
800242670520003525800108001080010400050010492362526705267051666531668380010800201600202670566118002110910800101000502050118112670180000102670626706267062670626706
8002426705200213525800108001080010400395005492362526705267051666531668380010800201600202670566118002110910800101000502054118112670180000102670626706267062670626706
80024267052002493525800108001080010400050005492362526705267051666531668380010800201600202670566118002110910800101000502054118112670180000102670626706267062670626706
800242670520003525800108001080010400050010492362526705267051666531668380010800201600202670566118002110910800101000502054118112670180000102670626706267062670626706
8002426705200243525800108001080010400050010492362526705267051666531668380010800201600202670566118002110910800101000502054118112670180000102670626706267062670626706
8002526705200213525800108001080010400050005492362526705267051666531668380010800201600202670566118002110910800101000502000118112670180000102670626706267062670626706