Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

UCVTF (scalar, integer, H from X)

Test 1: uops

Code:

  ucvtf h0, x0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.001

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)? int output thing (e9)? ldst retires (ed)? simd retires (ee)
20046032001110001000100010003000800020001000100010001000110001000
20045392001110001000100010003000800020001000100010001000110001000
20045372001110001000100010003000800020001000100010001000110001000
20045372001110001000100010003000800020001000100010001000110001000
20045402001110001000100010003000800020001000100010001000110001000
20045372001110001000100010003084808420001000100010001000110001000
20045392001110001000100010003000800020001000100010001000110001000
20045372001110001000100010003000800020001000100010001000110001000
20045372001110001000100010003000800020001000100010001000110001000
20045372001110001000100010003000800020001000100010001000110001000

Test 2: Latency 1->2 roundtrip

Code:

  ucvtf h0, x0
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 10.0030

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)
3020410003040101101012000010000100200001000130015461722578264301012001000220004200100022000410001100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100022000210001100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100022000210001100001000010100
3020410003040101101012000010000100200001000130015461762578264301012001000220004200100022000210001100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100022000210001100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100022000210001100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100022000210001100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100022000210001100001000010100
3020510006040107101022000310002100200281000030015464382578680301002001000220002200100022000210001100001000010100
3020410003040101101012000010000100200001000030015461742578264301002001000220002200100022000210001100001000010100

1000 unrolls and 10 iterations

Result (median cycles for code): 10.0030

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)
300241000334001110011200001000010200001000030154616825782643001020100002000020100002000010001100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010
300241004184005110023200201000810201161000030154617425782643001020100002000020100002000010001100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100212003910003100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010
300241000304001110011200001000010200001000030154617425782643001020100002000020100002000010001100001000010010

Test 3: throughput

Count: 8

Code:

  ucvtf h0, x8
  ucvtf h1, x8
  ucvtf h2, x8
  ucvtf h3, x8
  ucvtf h4, x8
  ucvtf h5, x8
  ucvtf h6, x8
  ucvtf h7, x8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5011

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)
16020440135160117101800078000910080013800123002400366400801601242008001280012200800128001218000080000100
16020440090160115101800068000810080012800123002400366400801601242008001280012200800128001218000080000100
16020440090160115101800068000810080012800123002400366400801601242008001280012200800128001218000080000100
16020440101160115101800068000810080012800123002400366400801601242008001280012200800128001218000080000100
16020440090160115101800068000810080012800543002628326668531602082008005480054200800128001218000080000100
16020440158160117101800078000910080013800123002405446413871601242008001280012200800128001218000080000100
16020440094160115101800068000810080012800123002410226418381601242008001280012200800128001218000080000100
16020440094160115101800068000810080012800123002400366400801601242008001280012200800128001218000080000100
16020440090160115101800068000810080012800123002400366400801601242008001280012200800128001218000080000100
16020440090160115101800068000810080012800123002400366400801601242008001280012200800128001218000080000100

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5008

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)
16002440396160027118000780009108001380012302400366400801600342080012800122080012800121800008000010
16002440083160025118000680008108001280000302400006400001600102080000800002080000800001800008000010
16002440063160011118000080000108000080000302400006400001600102080000800002080000800001800008000010
16002440063160011118000080000108000080000302400006400001600102080000800002080000800001800008000010
16002440063160011118000080000108000080000302400006400001600102080000800002080000800001800008000010
16002440249160011118000080000108000080000302400006400001600102080000800002080054800541800008000010
16002440071160011118000080000108000080000302827746972861600102080000800002080000800001800008000010
16002440065160011118000080000108000080000302479156479151600102080000800002080000800001800008000010
16002440063160011118000080000108000080000302400006400001600102080000800002080000800001800008000010
16002440065160011118000080000108000080000302400006400001600102080000800002080000800001800008000010