Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

SCVTF (scalar, integer, D from W)

Test 1: uops

Code:

  scvtf d0, w0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.001

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)? int output thing (e9)? ldst retires (ed)? simd retires (ee)
20045982001110001000100010003000800020001000100010001000110001000
20045382001110001000100010003000800020001000100010001000110001000
20046302061110301030104210003000800020001000100010001000110001000
20045382001110001000100010003000800020001000100010001000110001000
20045372001110001000100010003000800020001000100010001000110001000
20045552001110001000100010003000800020001000100010001000110001000
20045452001110001000100010003000800020001000100010001000110001000
20045792001110001000100010003000800020001000100010001000110001000
20045372001110001000100010003000800020001000100010001000110001000
20045372001110001000100010003000800020001000100010001000110001000

Test 2: Latency 1->2 roundtrip

Code:

  scvtf d0, w0
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 10.0030

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)
3020410003040101101012000010000100200001000130015461722578264301012001000220004200100022000410001100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100022000210001100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100212004110003100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100022000210001100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100022000210001100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100022000210001100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100022000210001100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100022000210001100001000010100
3020510012140108101032000310002100200291000030015461742578264301002001000220002200100022000410001100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100022000210001100001000010100

1000 unrolls and 10 iterations

Result (median cycles for code): 10.0030

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)
300241000314001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010
300241000304001110011200001000010200001001730154741125802313005520100182003520100002000010001100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010

Test 3: throughput

Count: 8

Code:

  scvtf d0, w8
  scvtf d1, w8
  scvtf d2, w8
  scvtf d3, w8
  scvtf d4, w8
  scvtf d5, w8
  scvtf d6, w8
  scvtf d7, w8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5019

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)
16020440130160115101800068000810080012801383002580396635381603762008013880138200800128001218000080000100
16020440257160177101800378003910080055800543002402706404641602082008005480054200800128001218000080000100
16020540516160358102801278012910180182800123002479906491801601242008001280012200801808018018000080000100
16020440090160115101800068000810080012800543002595946663961602082008005480054200800548005418000080000100
16020440153160115101800068000810080012802223002724196796341605442008022280222200801388013818000080000100
16020440210160175101800368003810080054800123002400366400801601242008001280012200800128001218000080000100
16020540384160295101800968009810080138800123002426846427741601242008001280012200800968009618000080000100
16020440090160115101800068000810080012800123002595466677031601242008001280012200800548005418000080000100
16020440146160115101800068000810080012801383002577356655321603762008013880138200800128001218000080000100
16020440342160235101800668006810080096800123002400366400801601242008001280012200800128001218000080000100

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5013

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)
16002540622160147118006780069108009780012302400366400801600342080012800122080000800001800008000010
16002540243160131118006080060108008480000302400006400001600102080000800002080000800001800008000010
16002440183160071118003080030108004280055302402736404721601202080055800552080000800001800008000010
16002440096160011118000080000108000080084302776746852061601782080084800842080054800541800008000010
16002440046160011118000080000108000080000302578956588141600102080000800002080084800841800008000010
16002540120160071118003080030108004280000302400006400001600102080000800002080000800001800008000010
16002440309160131118006080060108008480000302400006400001600102080000800002080096800961800008000010
16002440062160011118000080000108000080000302400006400001600102080000800002080000800001800008000010
16002540237160145118006680068108009680000302400006400001600102080000800002080000800001800008000010
16002440179160071118003080030108004280055302784936918331601202080055800552080000800001800008000010