Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

UCVTF (scalar, fixed-point, H from W)

Test 1: uops

Code:

  ucvtf h0, w0, #3
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f51inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
200437630361252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437620361252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437630361252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377377398
200437930361252000100010001000100014075228201357376376723109200010001000100010003763761110011000100010731161137310001000377377377377377
200437620361252000100010001000100014075228201357376376723109200010001000100010003763761110011000100003731161137310001000377383377377377
200437630361252000100010001000100014075228201357376384723109200010001000100010003763761110011000100000731161137410001000377377377377377
200437620361252000100010001000100014075228201357376378723139200010001000100010003763761110011000100000731161137310001000377377377377377
200437630361252000100010001000100014498228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437630361252000100010001000100014075228200357376376723109200010001000100010003783761110011000100000731161137310001000377377377377377
200437620361252000100010001000100014075228200357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377

Test 2: Latency 1->2 roundtrip

Code:

  ucvtf h0, w0, #3
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)030708090a0b18191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2c5branch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3020413003297410010000001300171194082540100101002000010000100200001004950062145451480372911300130130032130032125473612623530289200100032000520010003200051300321300331120201100991001010010000100001000000001111317021632129527100000100001000010100130033130033130033130033130033
3020413003297410000000001300171194082540100101002000010000100200001000050062144971480270901300130130032130032125473612623630100200100032000520010003200051300321300321120201100991001010010000100001000000001111318031632129527100000100001000010100130036130033130033130033130033
3020413003297400010000001300171194082540100101002000010000100200001000050062144971480270901300130130032130032125466312624230100200100002000020010000200001300321300321120201100991001010010000100001000000000001310131634129519100000100001000010100130033130033130033130033130210
3020413003297400001000001300231194082540117101002000010000100200001000050062144971480270901300130130032130034125466312624030100200100002000020010000200001300321300321120201100991001010010000100001000000000001310131633129519100000100001000010100130033130033130033130033130033
30204130032974000000000013001711940825401001010020000100001002000010000500621449714802709013001301300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000010000001800001310131644129519100000100001000010100130033130033130033130033130033
3020413003297400000000001300171194082540100101002000010000100200001000050062144971480270901300130130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100001000000000001310141644129519100000100001000010100130033130033130033130033130033
3020413003297400000000001300171194082540100101002000010000100200001000061662144971480509601300490130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100001000000000001310131634129519100000100001000010100130033130033130033130033130033
3020413003297400000000001300171194082540100101002000010000100200001000050062144971480270901300130130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100001000000000001310131634129519100000100001000010100130033130033130033130033130033
3020413003297400000000001300171194082540100101002000010000100200001000050062144971480270901300130130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100001000200000001310141633129519100000100001000010100130033130033130033130033130033
3020413003297400000000001300171194082540100101002000010000100200001000050062144971480270901300130130032130032125466312624030100200100002000020210000200001300321300321120201100991001010010000100001000000000001310131634129519100000100001000010100130033130033130033130033130033

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)03080b191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8acc5branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
30024130032973101900013009811941025400161001020000100001020000100005062145451480052813001301301161300321254891312634530159201000020000201006220000130032130032212002110910100101000010210000130012701251112953010000100001000010010130033130118130033130207130033
300241300329740000001300171194082540010100102000010000102000010000506214497148005281300130130032130032125489312648430010201000020000201000020000130032130032112002110910100101000010010000000012701161112951910000100001000010010130033130033130033130035130033
300241300329740000001300171194082540010100102000010000102011810000506214497148005281300130130032130032125489312631730010201000020000201000020000130032130034112002110910100101000010010000000012701161212951910005100001000010010130033130033130033130033130033
300241300329740000001300171194082540010100102000010000102000010000506214497148005281300130130032130032125489312630530010201000020000201000020000130032130032112002110910100101000010010000000012701161112951910000100001000010010130033130033130033130033130033
300241300329740003001300171194082540010100102000010000102000010000506214497148005281300130130032130032125489312626230010201000020000201000020000130032130032112002110910100101000010010000000012702161112952210000100001000010010130033130033130033130033130033
300241300329740000001300171194082540010100102000010000102000010000506214497148005281300130130032130032125489312626230010201000020000201000020000130032130032112002110910100101000010010000000012701161112951910000100001000010010130033130207130035130217130033
3002413011697401143810401300191194082540010100102000010000102000010000506214497148005281300130130032130032125489312626230010201000020000201006220000130032130032112002110910100101000010010000230012701161112951910000100001000010010130034130033130033130033130033
300241300329740000011300171194422540010100102000010000102000010000506214497148005281300130130032130032125489312626230010201000020000201000020000130033130032112002110910100101000010010000060012701161112951910000100001000010010130033130081130033130033130033
300241300329740000001300171194082540010100102000010000102000010000506214497148005281300130130032130032125494312626230010201000020000201000020000130032130032112002110910100101000010010000090012701161112951910000100001000010010130033130033130033130033130033
300241300359740000001300171194082540010100102000010000102000010000506214497148005281300130130032130032125489312626230172201000020000201000020000130032130032112002110910100101000010010000000012701161112951910000100001000010010130033130033130033130033130033

Test 3: throughput

Count: 8

Code:

  ucvtf h0, w8, #3
  ucvtf h1, w8, #3
  ucvtf h2, w8, #3
  ucvtf h3, w8, #3
  ucvtf h4, w8, #3
  ucvtf h5, w8, #3
  ucvtf h6, w8, #3
  ucvtf h7, w8, #3
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)031e3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8acc5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204267102000266940251601001008000080000100800208001550011689511884163026690267092670967320666621601352008002080020200800208002026709267091180201100991001008000010080000001115117016002670680000800001002671026710267102671026710
1602042670920015266940251601001008000080000100800208001550011689511884163026690267092670967300666681601352008002080020200800208002026709267091180201100991001008000010080000031115117016002670680000800001002671026710267102671026710
1602042670920024266942251601001008000080000100800208001550011689511884163026690267092670967130666601601352008002080024200800208002026711267131180201100991001008000010080000001115117016002670980000800001002671026713267232671026710
16020426770200297266940251601001008000080000100800208001550011689511884163026690267092670967080666601601352008002080020200800208002026709267091180201100991001008000010080000001115117016002670680000800001002671026710267102671026710
1602042670920015266940251601001008000080000100800208001550011689511884163026690267092670967410666591601352008002080020200800208002026709267091180201100991001008000010080000001115117016112670680000800001002671026710267102671026710
1602042670920018266940251601001008000080000100800208001550011688461884163026690267092670967440666691601352008002080020200800208002026709267091180201100991001008000010080000001115117016032670680000800001002671026711267102671326710
160204267171999266940251601001008000080000100800208001550011689511884163026690267092670967340666611601352008002080020200800208002026709267091180201100991001008000010080000001115117016002670680000800001002671026710267102671026710
1602042670919992669402516010010080000800001008002080015500116895118841630266902670926709672402666631601352008002080020200800208002026709267091180201100991001008000010080000001115117016002670680000800001002671026710267102671026710
1602042670920018266940251601001008000080000100800208001550011689511884163026690267092670967140666601601352008021580020200800208002026713267291180201100991001008000010080000001115117016002670680000800001002671026710267102671026710
1602042670920018266940251601001008000080000100800208001550011689511884163026690267092670967030666581601352008002080020200800208002026709267091180201100991001008000010080000001115117016002670680000800001002671026710267102671026710

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)0318191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a5a6a7a8a9accfd2d5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600242671120000002669402516001010800008000010800008000050116888018840320026690267092670966530366891600102080000800002080000800002670926709118002110910108000010008000000005020051655267068000080000102671026710267102671026710
16002426709200000026694025160010108000080000108000080000501168880188403201266902670926709665303668916001020800008000020800008000026709267091180021109101080000100080000000155020061635267068000080000102671026710267102671026710
1600242670920000002669402516001010800008000010800008000050116888018840320126690267092670966530366891600102080000800002080000800002670926709118002110910108000010008000000005020051656267128000080000102671026710267102671026710
160024267092000000266941492516079010800008000010800008000050116888018840320026690267092670966530366921600102080000800002080000800002670926709118002110910108000010008000000005020031635267068000080000102671626710267102671026710
1600242670920000002669412516001010800008000010800008000050116888018840320026690267152689666560367001600102080000800002080026800002677026709118002110910108000010008000000005020031655267068000080000102671026710267102671026710
1600242670920000002669402516001010800008000010800008000050116888018840320126690267092670966530366891600102080000800002080000800002670926709118002110910108000010008000000005020031655267758000080000102671026710267102671026710
1600242670920000002669402516001010800008000010800008000050116888018840320026690267092670966530366891600102080000800002080000800002670926709118002110910108000010008000000005020051653267178000080000102671026710267102671026710
1600242670920000002669402516001010800008000010800008000050116888018840320126690267092670966530366891600102080000800002080000800002670926709118002110910108000010008000000005020051657267068000080000102671026710267102671026710
1600242670920000002669402516001010800008000010800008000050116888018840320026690267092670966530366891600102080000800002080000800002670926709118002110910108000010008000000005020051655267128000080000102671026710267102671026710
1600242670920000002669402516001010800008000010800008000050116888018840320026690267092670966530366891600102080000800002080000800002670926709118002110910108000010008000000005020031655267118000080000102671026710267132671226710