Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

TST (immediate, 32-bit)

Test 1: uops

Code:

  tst w0, #3
  mov x0, 1

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst int alu (97)l1d cache writeback (a8)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)f5f6f7f8fd
10043693036251000100010005000036936920632251000100010003696611100110000073218223661000370370370370370
10043693036251000100010005000036936920632251000100010003696611100110000073218223661000370370370370370
10043692036251000100010005000136936920632251000100010003696611100110000073218223661000370370370370370
10043693036251000100010005000036936920632251000100010003696611100110000073218223661000370370370370370
10043692036251000100010005000036936920632251000100010003696611100110000073218223661000370370370370370
10043692036251000100010005000036936920632251000100010003696611100110006073218223661000370370370370370
10043692036251000100010005000136936920632251000100010003696611100110000073218223661000370370370370370
10043692036251000100010005000036936920632251000100010003696611100110000073218223661000370370370370370
10043693036251000100010005000036936920632251000100010003696611100110000073218223661000370370370370370
10043693036251000100010005000136936920632251000100010003696611100110000073218223661000370370370370370

Test 2: Latency 2->1

Chain cycles: 1

Code:

  tst w0, #3
  cset x0, cc
  mov x0, 1

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)030918191e1f3a3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)6061696a6d6emap stall dispatch (70)flags prf full (73)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb miss (a1)l1d cache writeback (a8)a9acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
20204200351500000006119930252010020100201121297233004916955200352003517425061748720112202002020020035104112020110099100201001010000000111131851116212001120000101002003620036200362003620036
20204200351500000006119930252010020100201121297233004916955200352003517425061748720112202242022420035104112020110099100201001010000000111131851116112001120000101002003620036200362003620036
20204200351500000006119930252010020100201121297233004916955200352003517425061748720112202242022420035104112020110099100201001010000000111131850116412001120000101002003620036200362003620036
202042003515010000023219930252010020100201121297233004916955200352003517425061748720112202242022420035104112020110099100201001010000000111131851116112001120000101002003620036200362003620036
20204200351500000006119930252010020100201121297233004916955200352003517425061748720112202242022420035104112020110099100201001010000000111133750116112004020000101002003620036200362003620127
20204200351500000006119930252010020100201121297233004916955200352003517425061748720112202242022420035104112020110099100201001010000000111131850116112001120000101002003620036200362003620036
2020420035150000210010319930252010020100201121297233004916955200352003517425061748720112202242022420035104112020110099100201001010000000111131850116112001120000101002003620036200362003620036
20204200351500000006119930252010020100201121297233004916955200352003517425761748720112202242022420035104112020110099100201001010000000111131850148112001120000101002003620036200362003620036
20204200351490000006119930252010020100201121297233004916955200352003517425061748720112202242022420035104112020110099100201001010000000111131851116112001120000101002003620036200362003620036
202042003515000000045119930252010020100201121297233004916955200352003517425061748720112202242022420035104112020110099100201001010000000111131851116112001120000101002003620036200362003620036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)03mmu table walk data (08)191e1f3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb miss (a1)l1d cache writeback (a8)a9accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
2002420035150000061199182520010200102001012972470491695520035200351742831750420010200202002020035104112002110910200101001000001270127121999520000100102003620036200362003620082
20024200351500000124199182520010200102001012972470491695520035200351742831750420010200202002020035104112002110910200101001000001286127111999520022100102017220036200362003620082
2002420035150100061199182520010200102001012972470491695520035200351742831750420010200202002020035104112002110910200101001000031270127111999520000100102003620036202152003620036
2002420035150000061199182520010200102001012972470491695520035200351742831750420010200202002020035104112002110910200101001000001270127111999520000100102003620036200362003620036
2002420035150000061199182520010200102001012972470491695520035200351742831750420010200202002020035104112002110910200101001000001270127111999520000100102003620036200812012720036
200242003515000120168199184620010200102009212978740491695520035200351744471753020010200202011720035104112002110910200101001000001270127111999520000100102003620036200362003620036
20024200351500000103199182520010200102001012972470491695520035200351742831750420010200202002020035104112002110910200101001001001270127111999520000100102003620036200362003620036
2002420035150000061199182520010200102001012972470491695520035200351742831750420010200202002020035104112002110910200101001000001270227111999520000100102003620036200362003620036
20024200351500000360199182520010200102001012972470491695520035200351742831750420010200202002020035104112002110910200101001001031270127111999520000100102003620036200362003620036
2002420035150000061199182520010200102001012972470491695520035200351742831750420010200202002020035104112002110910200101001000001270127111999520000100102003620036200362003620036

Test 3: throughput

Count: 8

Code:

  tst w0, #3
  tst w0, #3
  tst w0, #3
  tst w0, #3
  tst w0, #3
  tst w0, #3
  tst w0, #3
  tst w0, #3
  mov x0, 1

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)1e3a3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d cache writeback (a8)a9acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
8020426761200100132266278011580115801214005900492365926739267391667916166898025880367802322673966118020110099100801001000000011151228168826736800151002674026740267402674026740
8020426739200100127623427801158011580121400590149236592673926739166796166898012180232802322673966118020110099100801001000000011151228168926736800151002674026740267402674026740
8020426739200100121323427801158011580121400590149236592673926739166796166898012180232802322673966118020110099100801001000000011151228163826736800151002674026740267402674026740
8020426739200100133623427801158011580121400590049236592673926739166796166898012180232802322673966118020110099100801001000000011151228168826736800151002674026740267402674026740
80204267392001001240234278011580115801214005901492365926739267391667961668980121802328023226739661180201100991008010010000000111512210163826736800151002674026740267402674026740
802042673920010010233827801158011580121400590149236592673926739166796166898012180232802322673966118020110099100801001000000011151228169926736800151002674026740267402674026740
80204267392001001216234278011580115801214005900492365926739267391667961668980121802328023226739661180201100991008010010004030111512281610826736800151002674026740267402674026740
80204267392001001228234278011580115801214005900492365926739267391667961668980121802328023226739132118020110099100801001000000011151228168826736800151002674026740267402674026740
8020426739200100121023427801158011580121400590049236592673926739166796166898012180232802322673966118020110099100801001000000011151228169926736800151002674026740267402674026740
802042673920010012423427801158011580121400590049236592673926739166796166898012180232802322673966118020110099100801001000000011151228168826736800151002674026740267402674026740

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3338

retire uop (01)cycle (02)033f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)accfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ebec? int retires (ef)f5f6f7f8fd
800242672220031125800108001080010400050492362526705267051666531668380010800208002026705661180021109108001010005020021811267018000000102670626706267062670626706
80024267052003525800108001080010400050492362526705267051666531668380010800208002026705661180021109108001010005020011811267018000000102670626706267062670626706
8002426705200528258001080010800104000504923625267052670516665316683800108002080020267056611800211091080010100050200118112670180000016102670626706267062670626706
80024267052003525800108001080010400050492362526705267051666531668380010800208002026705661180021109108001010005020011811267018000000102670626706267062670626706
800242670520014425800108001080010400050492362526705267101666531668380010800208002026705661180021109108001010005020011811267018000000102670626706267062670626706
80024267052003525800108001080010400050492362526705267051666531668380010800208002026705661180021109108001010005020011811267018000000102670626706267062670626706
80024267051993525800108001080010400050492362526705267051666531668380010800208002026705661180021109108001010005020011811267018000000102670626706267062670626706
80024267052003525800108001080010400050492362526705267051666531668380010800208002026705661180021109108001010005020011811267018000000102670626706267062670626706
80024267052003525800108001080010400050492362526705267051666531668380010800208002026705661180021109108001010105020011811267018006400102670626706267062670626706
80024267052003547800108001080010400050492362526705267051666531668380010800208002026705661180021109108001010105020011811267018000000102670626706267062670626706