Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

SCVTF (scalar, integer, H from X)

Test 1: uops

Code:

  scvtf h0, x0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.001

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)? int output thing (e9)? ldst retires (ed)? simd retires (ee)
20045822001110001000100010003000800020001000100010001000110001000
20045382001110001000100010003000800020001000100010421042110001000
20045392001110001000100010003000800020001000100010001000110001000
20045372001110001000100010003000800020001000100010001000110001000
20045372001110001000100010003000800020001000100010001000110001000
20045372001110001000100010003000800020001000100010001000110001000
20045372001110001000100010003000800020001000100010001000110001000
20045372001110001000100010003000800020001000100010001000110001000
20045372001110001000100010003000800020001000100010001000110001000
20045372001110001000100010003000800020001000100010001000110001000

Test 2: Latency 1->2 roundtrip

Code:

  scvtf h0, x0
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 10.0030

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)
3020410003040101101012000010000100200001000130015461722578264301012001000220004200100022000410001100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100022000210001100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100212003910003100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100022000210001100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100022000210001100001000010100
3020410004940101101012000010000100200001000030015461742578264301002001000220002200100022000210001100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100212003910002100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100022000210001100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100022000210001100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100022000210001100001000010100

1000 unrolls and 10 iterations

Result (median cycles for code): 10.0030

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)
300241000304001110011200001000010200001000130154617625782643001120100022000420100002000010001100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010
300241000304001110011200001000010200001001730154638425786173005620100212003920101132021810016100001000010010
300241004214005010022200191000910201171031737156270326058593085622103382066424104122081210065100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100212003910003100001000010010

Test 3: throughput

Count: 8

Code:

  scvtf h0, x8
  scvtf h1, x8
  scvtf h2, x8
  scvtf h3, x8
  scvtf h4, x8
  scvtf h5, x8
  scvtf h6, x8
  scvtf h7, x8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5011

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)
16020440163160115101800068000810080012800123002400366400801601242008001280012200800128001218000080000100
16020440090160115101800068000810080012800123002400366400801601242008001280012200800128001218000080000100
16020440090160115101800068000810080012800123002400366400801601242008001280012200800128001218000080000100
16020440090160115101800068000810080012800123002400366400801601242008001280012200800128001218000080000100
16020440090160115101800068000810080012800123002400366400801601242008001280012200800128001218000080000100
16020440090160115101800068000810080012800123002400366400801601242008001280012200800128001218000080000100
16020440090160115101800068000810080012800123002400366400801601242008001280012200800128001218000080000100
16020540140160175101800368003810080054800123002400366400801601242008001280012200800128001218000080000100
16020440112160115101800068000810080012800123002400366400801601242008001280012200800128001218000080000100
16020440092160115101800068000810080012800123002400366400801601242008001280012200800128001218000080000100

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5006

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)
16002440499160025118000680008108001280012302400366400801600342080012800122080000800001800008000010
16002440062160011118000080000108000080000302400006400001600102080000800002080000800001800008000010
16002440046160011118000080000108000080000302400006400001600102080000800002080000800001800008000010
16002440046160011118000080000108000080000302400006400001600102080000800002080000800001800008000010
16002440128160011118000080000108000080000302453886493771600102080000800002080000800001800008000010
16002440048160011118000080000108000080000302400006400001600102080000800002080000800001800008000010
16002440046160011118000080000108000080000302400006400001600102080000800002080000800001800008000010
16002440056160011118000080000108000080000303112527265911600102080000800002080000800001800008000010
16002440049160011118000080000108000080000302400006400001600102080000800002080000800001800008000010
16002440048160011118000080000108000080000302760646920531600102080000800002080000800001800008000010