Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

UMOV (D to X)

Test 1: uops

Code:

  umov x0, v0.d[1]
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch simd uop (57)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map simd uop (7e)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst int alu (97)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
10045384062252000100010001000800015195385383703396100010001000538538111001100000731160153510001000539539539539539
10045384043252000100010001000800005195385383703396100010001000538538111001100003731161153510001000539539539539539
10045384043252000100010001000800005195385383703396100010001000538538111001100000731160153510001000539539539539539
10045384043252000100010001000800005195385383703396100010001000538538111001100000731160153510001000539539539539539
10045384043252000100010001000800005195385383703396100010001000538538111001100000731160153510001000539539539539539
10045384043252000100010001000800005195385383703396100010001000538538111001100000752160153510001000539539539539539
10045384043252000100010001000800005195385383703396100010001000538538111001100000731161153510001000539539539539539
10045384043252000100010001000800005195385383703396100010001000538538111001100000732160153510001000539539539539539
10045384043252000100010001000800005195385383703396100010001000538538111001100000731160153510001000539539539539539
10045385043252000100010001000800005195385383703396100010001000538538111001100010751160153510001000539539539539539

Test 2: Latency 1->2 roundtrip

Code:

  umov x0, v0.d[1]
  fmov d0, x0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 10.0038

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0f18191e1f3a3f4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20204100038775003000001000238956125301001010010000100001001000010000500477848155803490100013100038100038969003974962010020010000100002001000010000100038100038112020110099100101001000010000100000120013102161199639100001000010100100039100039100039100039100039
20204100116775000000001000238956125301001010010000100001001000010000527477857755803491100013100038100038969003974962010020010000100002001000010000100038100038212020110099100101001000010000100000103013101161199637100001000010100100039100039100039100039100039
202041000387760000012001000238956825301001010010000100001001000010000500477848155803490100013100038100038969003974972010020010000100002001000010000100038100038112020110099100101001000010000100000003013101161199637100001000010100100039100039100040100203100039
202041000387750000021001000298956125301001010010000100001001000010000500477862555803490100013100038100038969003974972010020010000100002001000010000100038100038112020110099100101001000010000100000006013101161199637100001000010100100039100039100039100039100039
2020410003877500000180010002389561253010010100100001000010010000100005004778481558040701000131000381001189690039749620224200100001000020010000100001000381000381120201100991001010010000100001000001000131011611100038100001000010100100039100039100473100040100039
20204100038775000020001000248956125301001010010000100001001000010000500477848155803490100013100038100041969003974972010020010000100002001000010000100038100038112020110099100101001000010000100000100013101161199637100001000010100100039100039100039100041100042
20204100038776000000001000238956125301001010010000100001001000010000500477848155803490100013100038100038969003974962010020010000100002001000010000100038100038112020110099100101001000010000100000000013101162199637100001000010100100042100042100039100039100039
20204100038776000000001000238956125301001010010000100001001000010000500477848155803490100013100038100038969003974962010020010000100002001000010000100038100038112020110099100101001000010000100000000013101251199637100001000010100100039100042100039100039100039
20204100117776000100001000238956125301001010010000100001001000010050500477848155803491100013100038100124969003974962010020010000100002001000010000100038100122112020110099100101001000010000100000000013101161199637100001000010100100039100039100039100039100039
20204100038776000100001000238956125301001010010000100001001005110050500477848155803490100013100038100038969003974982010020010000101232081030910247100354100038112020110099100101001000010000100020003013102163199708100001000010100100039100039100039100039100043

1000 unrolls and 10 iterations

Result (median cycles for code): 10.0038

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f3e3f494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20024100038776000000010002308956125300101001010000100001010000100005047784815578954100013010003810003896922397518200102010000100002010000100001000381000381120021109101001010000100010000000000012701916102199637100001000010010100039100039100039100039100039
20024100038775000000010002308956125300101001010000100001010000100005047784815578954100013010003810003896922397518200102010000100002010000100001000381000381120021109101001010000100010000000000012701616181899637100001000010010100039100040100039100039100039
20024100038775000000010002308956125300101001010000100001010000100005047784815578954100016010003810003896922397518200102010000100002010000100001000381000381120021109101001010000100010000000000012701516191999637100001000010010100039100039100039100039100039
20024100038775000000010002308956325300101001010000100001010000100005047784815578954100013010003810003896922397518200102010000100002010000100001000381000381120021109101001010000100010000000000012701816191899637100001000010010100039100039100039100039100039
20024100038775000000010002308956160300101001010000100001010000100005047784815578954100013010003810003896922397518200102010000100002010000100001000381000381120021109101001010000100010000000000012701016101899637100001000010010100039100039100039100039100039
200241000387750123567032100023368964133300101001610000100001010000100005047784815579011100013010003810003896922397518200102010000100002010000100001000381000401120021109101001010000100010000003000012701916181999637100001000010010100039100039100040100039100039
200241000388030000000100107089561253001010010100001000010100001000050477848155789541000130100039100038969223975182001020100001000020100001000010003810003811200211091010010100001000100000000000147419161932101481100001000010010100044100041100040100039100041
200241000458030000240010002608956125300161001010000100001010000100005047784815578954100013010003810003896922397518200102010000100002010000100001000381000381120021109101001010000100010002000000012701916111999637100001000010010100040100039100039100039100041
200241000387750000120010010808956125300101001010000100001010000100005047786255578954100013010003810004296922397518200102010000100002010000100001000421000381120021109101001010000100010000200011012701916191899698100041000010010100039100039100039100039100039
20024100038775000000010017708956125300181001010000100001210000100005047784815578954100013010004010004196922397518200102010066100002010000100001000381000381120021109101001010000100010000003000012702016172099637100001000010010100039100039100039100039100039

Test 3: throughput

Count: 8

Code:

  umov x0, v8.d[1]
  umov x1, v8.d[1]
  umov x2, v8.d[1]
  umov x3, v8.d[1]
  umov x4, v8.d[1]
  umov x5, v8.d[1]
  umov x6, v8.d[1]
  umov x7, v8.d[1]
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5006

retire uop (01)cycle (02)03mmu table walk data (08)0918191e1f3a3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)ld unit uop (a6)l1d cache writeback (a8)a9acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
8020440053300000000032251601008010080000100800045006400241400190400384003829976062999180104200800162008001640038400381180201100991008010010000000011151170116114003580000801004003940039400394003940039
80204400383000000001697251601008010080000100800045006400240400190400384003829976062999180104200800162008001640038400381180201100991008010010000000011151170116114003580000801004003940039400394003940039
8020440038300000000032251601008010080000100800045006400241400190400384003829976062999180104200800162008001640038400381180201100991008010010000100011151190116114003580000801004003940039400394003940039
8020440038311000000032251601008010080000100800045006400241400190400384003829976062999180104200800162008001640038400381180201100991008010010000003011151170116114003580000801004003940039400394003940039
8020440038310000000132251601008010080000100800045006400241400193400384003829976062999180104200800162008001640038400381180201100991008010010000000011151170116114003580000801004003940039400394003940039
8020440038327000000074251601008010080000100800045006400241400190400384003829976062999180104200800162008001640038400381180201100991008010010000000011151170116114003580000801004003940039400394003940039
80204400383100000000120251601008010080000100800045006400241400190400384003829976062999180104200800162008042440038400381180201100991008010010000100011151170116114003580000801004003940039400394003940039
8020440038310000000132251601008010080000100800045006400241400190400384003829976062999180104200800162008001640038400381180201100991008010010000100011151170116114003580000801004003940039400394003940039
80204400383100000000697251601008010080000100800045006400241400190400384003829976062999180104200800162008001640038400381180201100991008010010000000011151170116114003580000801004003940039400394003940039
8020440038310000000032251601008010080000100800045006400241400190400384003829976062999180104200800162008001640038400381180201100991008010010000100011151170116114003580000801004003940039400394003940108

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)0309l2 tlb miss data (0b)18191e1f3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)accfd5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
800244004731011221742325126216032680093800001080000506400004017940038400382999214300188009520800002080000400384003821800211091080010100321185020116000714003580000800104003940039400394003940039
800244003832300001056028825160010801678000010800005064000040019400384003829992330065800102080000208000040038400381180021109108001010000035020116000214003580000800104003940039400394003940039
8002440038325000021033125160010800108000010800005064000040019400384003829992330018800102080089208000040038400381180021109108001010000195020116000124003580000800104003940039400394003940039
8002440038330000090120725160010800108007910800005064000040019400384003829992730018800102080000208000040038400381180021109108001010000135020116000114003580000800104003940102400394003940039
8002440038321000000116525160010800108000010800005064000040019400384010429992330065800102080000208000040103400382180021109108001010000135020124000234003580000800104003940039400394003940039
800244003832100001204325160010800108000010800005064000040019400384003829992330018800952080000208000040038400381180021109108001010000135020116000224003580000800104010640039400394003940039
80024400383250000901512516001080010800771080000506400004001940038400382999233001880010208000020800004003840038118002110910800101000003502011612703224003580000800104003940039400394003940039
800244003832100000013025160010800108008010800005064063840019401034003829992330018800102080000208009640038400381180021109108001010400105020116001114009180000800104003940107401044003940039
8002440038324000012041325160010800108000010800885064000040019400384003829992330018800102080000208000040038401031180021109108001010420135020116001114003580000800104003940039400394003940106
8002440038327000090432516001080010800001080000506400004001941384413593051410930479801792080000208000040038400381180021109108001010000135020116030114003580000800104003940039400394003940039