Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SCVTF (scalar, integer, D from X)

Test 1: uops

Code:

  scvtf d0, x0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f51inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
200437620361252000100010001000100014075228201357376376733109200010001000100010003763761110011000100010731161137310001000377382386377377
200437830361252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437620361252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437630361252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437620361252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437620361252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437620361252000100010001000100014075228201357376376723109200010001000100010003763761110011000100003731161137310001000377377377377377
200437630361252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437630361252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437630361252000100010001000100014075228201357376376723111200010001000100010003763761110011000100000731161137310001000377377392378377

Test 2: Latency 1->2 roundtrip

Code:

  scvtf d0, x0
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)030b18191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a8a9accdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
30204130032974000000130017119408254010010100200001000210020000100005006214497148027091300131300321300321254663126240301002001000020000200100002000013006413004211202011009910010100100001000100000000131012162212951910000100001000010100130033130033130033130033130034
30204130032973000000130017119408254010010100200001000010020000100005006214497148027091300131300321300321254663126240301002001000020000200100002000013010713005211202011009910010100100001000100000000131012162212951910000100001000010100130033130033130033130033130033
30204130032974000000130017119408254010010100200001000010020000100005006214497148027091300131300321300321254663126240301002001000020000200100002000013008713006111202011009910010100100001000100000000131012162212951910000100001000010100130033130033130033130033130033
302041300329730002100130017119408254010010100200001000010020000100005006214497148027091300131300321300361254663126240301002001000020000200100002000013006713005911202011009910010100100001000100000000131012162212951910000100001000010100130033130033130033130033130034
30204130034974000000130017119408254010010100200001000010020000100005006214497148027091300131300321300321254663126240301002001000020000200100002000013008413007111202011009910010100100001000100000000131012162212951910000100001000010100130150130232130033130033130033
30204130032974000000130017119408254010010100200001000010020000100005006214497148027091300131300321300351254663126240301002001000020000200100002000013003913003211202011009910010100100001000100000001131012162212951910000100001000010100130033130033130033130033130033
30204130032974000000130017119408254010010100200001000010020000100005006214497148027091300131300321300321254663126240301002001000020000200100002000013003513003211202011009910010100100001000100000000131012162212951910000100001000010100130033130033130033130033130036
30204130032974000000130017119408254010010100200001000010020000100005006214497148027091300131300321300321254663126240301002001000020000200100002000013008213006011202011009910010100100001000100000000131012162212951910000100001000010100130033130033130033130033130033
30204130032974000000130017119408254010010100200001000010020000100005006214497148027091300131300321300321254663126243301002001000020000200100002000013006213003511202011009910010100100001000100000000131012162212951910000100001000010100130033130033130033130033130033
30204130032974000000130017119408254010010100200001000010020000100005006214497148027091300131300321300321254663126240301002001000020000200100002000013007613004211202011009910010100100001000100000000131012162212951910000100001000010100130036130033130033130033130035

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)03081e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst int load (95)inst integer (97)inst neon or fp (9a)9fa1a6a8acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3002413003297401201300171194112540010100102000010000102000010098506219447148008640130098130034130034125489312638830334201012120241201012120123130127130209112002110910010010100001001000013012701161112952210000100001000010010130081130034130034130033130033
3002413003397401201300171194082540010100102000010000102000010000506214497148005280130016130032130032125491312626330010201000020000201000020000130032130032112002110910010010100001001000016012701161212952210000100001000010010130035130036130033130033130033
30024130033101301201300171194082540010100102000010000102000010000506214497148005280130014130032130032125493312626230010201000020000201000020000130032130032112002110910010010100001001000013012701161112952410000100001000010010130038130036130033130033130033
3002413003297401201300171194082540010100102000010000102000010000506214593148005280130015130034130032125489312626430010201000020000201000020000130032130032112002110910010010100001071000321434504202533291413054510009100001000010010131750131504130039130033130034
3002413025411560001300171194082540010100102000010000102000010000506214497148008761130013130032130032125489312626230010201000020000201000020000130032130032112002110910010010100001001000013012701161112951910000100001000010010130033130033130033130036130033
3002413003297401201300171194082540010100102000010000102000010000506214497148005280130016130032130032125489312626230010201000020000201000020000130032130032112002110910010010100001001000413012701161112959910000100001000010010130033130033130036130033130036
30024130032974118013001711940825400101001020000100001020000100005062144971480087611300131300321300351254923126262300102010000200002010000201311300321300321120021109107010010100001001000003012701161112952310000100001000010010130033130033130033130033130033
300241300329740301300171194082540010100102000010000102000010000506214497148005280130013130032130032125489312626230010201006720000201000020000130034130033112002110910010010100001001000013012701161112952110000100001000010010130033130033130033130033130033
300241300329740301300171194082540010100102000010000102000010000506214641148005280130013130032130032125490312626230010201000020000201000020000130032130032112002110910010010100001001000013012701161112951910000100001000010010130034130033130034130033130033
3002413003497301201300171194092540010100102000010000102000010000506214497148005280130019130034130034125492312626230010201000020000201000020000130032130032112002110910010010100001001000013012701161112951910000100001000010010130033130033130033130033130033

Test 3: throughput

Count: 8

Code:

  scvtf d0, x8
  scvtf d1, x8
  scvtf d2, x8
  scvtf d3, x8
  scvtf d4, x8
  scvtf d5, x8
  scvtf d6, x8
  scvtf d7, x8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)031e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8c5branch mispredict (cb)cdcfd6inst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204267122000026694025160100100800008000010080020800155001168951188416312669926709267096632666581601352008002080020200800208002026712267091180201100991001008000010080000011151171602670680000800001002671026710267102671026710
160204267092000026694025160100100800008000010080020800155001168951188416312669526709267166632666581601352008002080020200800208002026709267091180201100991001008000010080000011151171602670680000800001002671026710267102671326710
160204267092000026694025160100100800008000010080020800155001168951188416312669026709267096632666581601352008002080020200800208002026709267091180201100991001008000010080000011151171602670680000800001002671726710267172671026710
160204267092000026694025160100100800008000010080020800155001168951188416312669826709267096632666581601352008002080020200800208002026709267091180201100991001008000010080000011151171602670680000800001002671026710267102671026710
160204267092000026694025160100100800008000010080020800155001168951188416312669026709267096632666581601352008002080020200800208002026709267091180201100991001008000010080000011151171602670680000800001002671026710267102671026710
160204267091990026694025160100100800008000010080020800155001168951188416312670326709267096632666581601352008002080020200800208002026709267091180201100991001008000010080000011151171602670680000800001002671026710267102671026710
160204267092000026694025160100100800008000010080020800155001166710188416312671026715267146632666581601352008002080020200800208002026709267091180201100991001008000010080000011151171602670680000800001002671026710267102671026710
160204267092000026694025160100100800008000010080020800155001168951188416312670426709267096632666581601352008002080020200800208002026709267091180201100991001008000010080000011151171602670680000800001002671026710267102671026710
160204267092000026694043160100100800008000010080020800155001168951188416312669926709267096632666581601352008002080020200800208002026709267091180201100991001008000010080000011151171632670680000800001002671026710267142671026713
160204267092000026694025160100100800008000010080020800155001168951188416312670026709267096632666581601352008002080020200800208002026709267091180201100991001008000010080000011151171602670680000800001002671026710267102671026710

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)03080b18191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f6061696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a5a6a8a9acc2cfd0d2d5d6d9dadbddinst fetch restart (de)e0eald/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002426709200000000026694025160010108000080000108000080000501168880188403231027119267452671066530366891600102080000800002080000800002670926709118002110910108000010000800000000502000316000232670608000080000102671026710267102671026710
16002426709200000000026694025160010108000080000108000080000501168880188403271026691267172671066530366891600102080000800002080000800002670926709118002110910108000010000800000000502000316000332670608000080000102671026710267102671026710
16002426709200000000026694025160010108000080000108000080000501168880188403281026703267092670966530366891600102080000800002080000800002671326713118002110910108000010000800000000502000316000332670608000080000102671026710267102671026710
16002426709200000000026694025160010108000080000108000080000501168880188403271026706267092670966530366891600102080000800002080000800002670926709118002110910108000010000800000000502052316000232670608000080000102671026710267102671026710
16002426709200000000026694025160010108000080000108000080000501168880188403271526755267142670966530366891600102080000800002080000800002670926709118002110910108000010000800000000502000316000332670608000080000102671026710267102671026710
16002426709200000000026694025160010108000080000108000080000501168880188403291526695267092670966620366891600102080000800002080000800002670926709118002110910108000010000800000000502052316000232670608000080000102671026710267102671026710
16002426709200000000026694025160010108000080000108018580000501161344188498881526695267092671066530366891600102080000800002080000800002670926709118002110910108000010000800000000502000316000332700408000080000102671026710267102671026710
16002426709200000000026694025160010108000080000108000080000501168880188403241526700267092670966530366891600102080000800002080000800002670926709118002110910108000010000800000000502000316000232670608000080000102671026710267102671026710
16002426709199000000026694025160010108000080000108000080000501168880188403291526704267092670966530366891600102080000800002080000800002670926709118002110910108000010000800000000502052316000332670608000080000102671026710267102671026710
16002426709200000000026694025160010108000080000108000080000501168880188403291026704267092670966530366891600102080000800002080000800002670926709118002110910108000010000800000000502000316000332670608000080000102671026710267102671026710