Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

UCVTF (scalar, fixed-point, D from W)

Test 1: uops

Code:

  ucvtf d0, w0, #3
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f51inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
200437630361252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437930361252000100010001000100014075228530357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437620361252000100010001000100014115228200357376376723109200010001000100010003763761110011000100000731161137410001000377377377377377
200437630361252000100010001000100014075228200357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437630361252000100010001000100014075228200357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437630361252000100010001000100014075228200357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437630361252000100010001000100014075228200357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437630361252000100010001000100014075228200357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437630361252000100010001000100014075228200357376376723109200010001000100010003763761110011000100000731161137310001000377377381377377
200437630361252000100010001000100014075228200357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377

Test 2: Latency 1->2 roundtrip

Code:

  ucvtf d0, w0, #3
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)03090b18191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
302041300329740000001300171194082540100101002000010000100200001000050062144971480270911300130130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100001000000000131013163312952210000100001000010100130033130033130033130033130033
302041300329740000001300171194082540100101002000010000100200001000050062144971480270911300130130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100001000000000131013162212951910000100001000010100130033130033130033130033130033
302041300329740000001300171194082540100101002000010000100200001000050062144971480270911300130130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100001000000000131013163312951910000100001000010100130062130033130033130033130033
302041300329740000001300171194102540100101002000010000100200001000050062144971480270911300130130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100001000000000131013163212951910000100001000010100130033130034130040130033130033
30204130032974000062101300171194082540100101002000010000100200001000050062144971480282111300130130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100001000000000131013163312951910000100001000010100130033130033130033130033130033
302041300329740000001300171194082540100101002000010000100200001000050062144971480270911300130130032130032125466312624030100200100002000020210000200001300321300321120201100991001010010000100001000000000131013163312951910000100001000010100130033130033130033130033130033
3020413003297400009013001711940825401001010020000100001002000010000500621449714802709113001301300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000010000000360131013162212951910000100001000010100130033130033130033130033130033
302041300329740000001300561194082540100101002000010000100200001000050062144971480270911300130130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100001000000000131012162212951910000100001000010100130033130033130033130033130033
30204130032974000000130017119408254010010100200001000010020000100005006214497148027091130013013003213003212546631262403010020010064200002001000020000130032130032112020110099100101001000010000100000001770131012163312951910000100001000010100130033130033130033130033130099
302041300329740000001300171194082540100101002000010000100200001000050062144971480270911300130130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100001000000000131013162312951910000100001000010100130033130033130033130033130033

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)0318191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2c5cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eald/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3002413003210080000130017119408254001010010200001000010200001000050621449714800528111300130130032130067125551312626230010201000020000201000020000130032130032112002110910100101000010001000000000127031623129519100000100001000010010130033130033130033130033130033
3002413003210070000130017119408254001010010200001000010200001000050621449714800528001300130130032130032125494312626230010201000020000201000020000130032130032112002110910100101000010001000000300127031623129519100000100001000010010130033130035130033130033130036
3002413003210080000130017119409254001010010200001000010200001000050621449714800528001300190130036130087125491312626230180201000020000201000020000130032130032112002110910100101000010001000000000127031632129519100000100001000010010130033130033130033130033130033
3002413003210080000130017119408254001010010200001000010200001000050621500114800528001300130130032130082125489312626230010201000020000201000020000130032130032112002110910100101000010001000000000127021633129519100000100001000010010130033130033130033130033130033
3002413003210080000130017119408254001010010200001000010200001000050621459314801442001300130130032130116125489312626230010201000020000201000020000130032130032112002110910100101000010001000000000127031633129559100000100001000010010130033130033130033130033130099
30024130032100800270130017119408254001010010200001000010200001000050621449714800528001300130130032130032125585312626230010201000020000201000020000130032130032112002110910100101000010001000000000127031632129519100000100001000010010130033130033130033130033130038
30024130032100810001300171194082540010100102000010000102000010000506214497148005280013001301300321300321254902012630230010201000020000201000020000130032130032112002110910100101000010001000000000127031634129519100000100001000010010130033130033130033130033130036
3002413003210080000130017119408254001010010200001000010200001000050621449714800528001300130130032130032125492312626230010201000020000201000020000130032130032112002110910100101000010001000000000127031632129519100000100001000010010130033130033130033130033130033
3002413003210080000130017119408254001010010200001000010200001000050621449714800528001300130130032130109125489312626230010201000020000201000020000130032130032112002110910100101000010001000000000127031632129519100000100001000010010130033130033130033130033130033
3002413003210080000130017119408254001010010200001000010200001000050621449714800528001300130130032130032125512312626230010201006620000201000020000130106130036112002110910100101000010001000000000127021623129519100000100001000010010130033130033130033130033130033

Test 3: throughput

Count: 8

Code:

  ucvtf d0, w8, #3
  ucvtf d1, w8, #3
  ucvtf d2, w8, #3
  ucvtf d3, w8, #3
  ucvtf d4, w8, #3
  ucvtf d5, w8, #3
  ucvtf d6, w8, #3
  ucvtf d7, w8, #3
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)030a0b1e3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a8acc5branch mispredict (cb)cdcfd5d6dde0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020426712207000026694025160100100800008000010080020800155001168951188416326690267092670966320666581601352008002080020200800208002026709267091180201100991001008000010008000000111511701602670680000800001002671026710267102671026710
16020426709207000026694025160100100800008000010080020800155001168951188416326690267092671266320666581601352008002080020200800208002026709267091180201100991001008000010008000000111511701602670680000800001002671026710267102671426710
160204267092070012026694025160100100800008000010080020800155001168951188416326690267092670966320666581601352008002080020200800208002026709267131180201100991001008000010008000000111511711602670680000800001002671426710267102671426710
16020426709207000026694225160100100800008000010080020800155001168951188416326690267092670966320666581601352008002080020200800208002026709267091180201100991001008000010008000000111511701602670680000800001002671026712267102671026710
16020426713207000026694025160100100800008000010080020800155001168951188331826690267092670966320666581601352008002080020200800208002026709267091180201100991001008000010008000000111511701602670680000800001002671026710267102671026710
16020426709207000026698025160100100800008000010080020800155001168951188416326694267092670966320666581601352008002080020200800208002026709267091180201100991001008000010008000000111511701602670680000800001002671426710267102671026711
16020426709207000026694025160100100800008000010080020800155001170925188416326690267152670966320666581601352008002080020200800208002026709267091180201100991001008000010008000010111511701602670680000800001002671026710267102671026710
16020426713207000026694025160100100800008000010080020800155001168951188416326690267092670966320666581601352008002080020200800208002026709267131180201100991001008000010008000000111511701602670680000800001002671026710267102671026710
16020426709207000026694025160100100800008000010080020800155001168951188416326690267092670966320666581601352008002080020200800208002026709267131180201100991001008000010008000000111511701602670680000800001002671026710267102671026710
16020426709207100026694025160100100800008000010080020800155001168951188416326692267092670966320666581601352008002080020200800208002026709267091180201100991001008000010008000010111511701602670680000800001002671026710267102671026710

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)03080b18191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa5a6a8a9acc2branch mispredict (cb)cfd5d6daddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024267112000000450026694325160010108000080000108000080000501168880188403200266902670926709681406066941600102080000800002080000800002670926709118002110910108000010080000000005020316042268708000080000102671026710267102671026710
1600242670920000006002669402516001010800008000010800008000050116888018840320026690267092670968090366911600102080000800002080000800002670926714118002110910108000010080000003005020216022267138000080000102671026710267172671326713
1600242671321900000002669402516001010800008000010800008000050116888018840320026859267172672766530366891600102080000800002080000800002670926713118002110910108000010080000000005020316022267068000080000102671026710267102671026710
1600242670920000000002669822516001010800008000010800008000050116888018930930026690267222671366900366891600102080000800002080000800002670926709118002110910108000010080000000005020216033267068000080000102671026710267102671026714
1600242670920000000002669402516001010800008000010800008000050116888018840320126690267202672166530366891600102080000800002080000800002670926709118002110910108000010080000000005020216022267068000080000102671026710267102671026710
1600242670920000000002669402516001010800008000010800008000050116888018840320026825267092687866550366891600102080000800002080000800002670926709118002110910108000010080000000005020216022267088000080000102671026710267102671026710
1600242670920000000002669402516001010800008000010800008000050116888018840320026690267122677266530366891600102080000800002080000800002670926709118002110910108000010080000000005020316032267068000080000102671026710267102671026710
1600242670920000000002669402516001010800008000010800008000050116888018840320026690267122671766530366891600102080000800002080000800002670926709118002110910108000010080000000005020216022267068000080000102671026710267102671026710
1600242670920000000002669402516001010800008000010800008000050116888018840320026690267612671166540366891600102080000800002080000800002670926709118002110910108000010080000000005020216032267068000080000102671026710267102671026710
1600242670920000000002669402516001010800008000010800008000050116888018840320026857267152670966530366891600102080000800002080000800002670926709118002110910108000010080000000005020316042267068000080000102671026710267102671026710