Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SCVTF (scalar, fixed-point, S from X)

Test 1: uops

Code:

  scvtf s0, x0, #3
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)03191e3f51inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
2004379300361252000100010001000100014075228720357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376300361252000100010001000100014075228200357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376300361252000100010001000100014075228200357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376200369252000100010001000100014075229851357376376723109200010001000100010003763761110011000100000731161137310001000377377377379377
2004376300364252000100010001000100014075228201357376376753109200010001000100010003763761110011000100000731161137310001000377377377377378
2004376400361252000100010001000100014075228201357376379723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376200372252000100010001000100014075228200357376376723110200010001000100010003763761110011000100000731161137310001000377377377377377
2004376310362252000100010001000100014075228201357376376723112200010001000100010003763761110011000100000731161137310001000377377377377377
2004376300361252000100010001000100014104228200357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376300361252000100010001000100014225228200357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377

Test 2: Latency 1->2 roundtrip

Code:

  scvtf s0, x0, #3
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)03070a1e3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8acc5branch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
30204130032974110013001711940825401001010020000100001002000010000500621449714802709113001301300321300321254737126235301002001000320005200100032000513003213003211202011009910010100100001001000200111131702163212952710000100001000010100130033130033130033130033130033
30204130032974100013001711940825401001010020000100001002000010000500621449714802709013001301300321300321254736126236301002001000320005200100032000513003213003211202011009910010100100001001000060111135502162212953210000100001000010100130033130033130033130033130033
30204130032974000113001711940825401001010020000100001002000010000500621449714802709013001301300321300321254737126235301002001000320005200100032000513003213003311202011009910010100100001001000000111131802162212953210000100001000010100130033130034130033130033130033
30204130032974100013001711940825401001010020000100001002000010000500621449714802709113001301300321300321254736126235301002001000320005200100032000513003213004311202011009910010100100001001000000111131702162212953210000100001000010100130033130033130033130033130033
30204130032974110113001711940825401001010020000100001002000010000500621449714802709013001301300321300321254737126236301002001000320005200100032000513003213003211202011009910010100100001001000000111131802162212953210000100001000010100130033130033130033130033130033
30204130032974000013001711940825401001010020000100001002000010000500621449714802709113001301300321300321254737126235301002001000320005200100032000513003213003211202011009910010100100001001000000111131702162212953210000100001000010100130033130033130033130033130033
30204130032974100113001811940825401001010020000100001002000010000500621449714802709013001301300321300321254736126235301002001000320005200100032000513003213003211202011009910010100100001001000000111131702162212952710000100001000010100130033130033130033130033130033
30204130032974100113001711940825401001010020000100001002000010000500621449714802709113001301300321300321254736126235301002001000320005200100032000513003213003211202011009910010100100001001000000111131702162212952710000100001000010100130064130033130033130033130033
30204130032973000013001711940825401001010020000100001002000010000500621449714802709113001301300321300321254737126236301002001000320005200100032000513003213003211202011009910010100100001001000000111131702162212952710000100001000010100130033130033130033130033130033
30204130032974000013001711940825401001010020000100001002000010000500621449714802709113001301300321300321254736126235301002001000320005200100032000513003213003211202011009910010100100001001000000111131702162212953210000100001000010100130033130033130033130033130033

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)031e3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
30024130032974013001711940825400101001020000100001020000100005062144971480052811300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100100000312702171112951910000100001000010010130033130033130033130033130033
30024130032974013001711940825400101001020000100001020000100005062144971480052811300131300321300321254893126264300102010000200002010000200001300321300321120021109101001010000100100000012702161112951910000100001000010010130033130033130033130033130033
30024130032974013001711940825400101001020000100001020000100005062144971480052811300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100100000012701161112951910000100001000010010130033130033130033130033130033
30024130032974013001711940825400101001020000100001020000100005062144971480052801300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100100006012701161112951910000100001000010010130033130033130033130033130033
30024130032974013001711940825400101001020000100001020000100005062144971480052801300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100100000012701161112973910000100001000010010130033130033130033130033130033
300241300329741213001711940825400101001020000100001020000100005062144971480052801300131300321300321254893126262301792010000200002010000200001300321300321120021109101001010000100100000012701161112951910001100001000010010130033130033130033130033130033
30024130032974013001711940825400101001020000100001020000100005062144971480052801300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100100000012701161112951910000100001000010010130033130033130033130033130033
30024130032974013001711940825400101001020000100001020000100005062144971480052801300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100100000012701161112951910000100001000010010130033130033130033130033130033
30024130032974013001711940825400101001020000100001020000100005062144971480052801300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100100000012701161112951910000100001000010010130033130033130033130033130033
30024130032974013001711940825400101001020000100001020000100005062144971480052801300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100100000012701161112951910000100001000010010130033130033130033130033130033

Test 3: throughput

Count: 8

Code:

  scvtf s0, x8, #3
  scvtf s1, x8, #3
  scvtf s2, x8, #3
  scvtf s3, x8, #3
  scvtf s4, x8, #3
  scvtf s5, x8, #3
  scvtf s6, x8, #3
  scvtf s7, x8, #3
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)0304080918191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a5a6a7a8a9acc2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020426710200000000022669402516010010080000800001008002080015500116839618841631266902670926709663206665816013520080020800202008002080020267092670911802011009910010080000100008000003303011151193164826706080000800001002671026710267102671026786
1602042670920000000002266940251601001008000080000100800208001550011689511884163126690267092670966320666601601352008002080020200800208002026709267091180201100991001008000010000800000000011151196169826706080000800001002671026710267102671026800
16020426709200000000022669402516010010080000800001008002080015500116895118841631266902670926709663206665816013520080020800202008002080020267092670911802011009910010080000100008000003300011151198169826706080000800001002671026710267102671026839
160204267092000000000226694025160100100800008000010080020800155001168951188416312669026709267096632066658160135200800208002020080020800202670926709118020110099100100800001000080000010865011151198163826706080000800001002671026710267102671026717
1602042671820000000213022669429251601001008000080000100800208001550011689511884163126690267092670966320666581601352008002080020200800208002026709267091180201100991001008000010000800000100011151198168926706080000800001002671026710267102671026714
1602042670920000000002266942251601001008000080000100800208001550011689511884390126690267092670966320666581601352008002080020200800208002026709267091180201100991001008000010000800000000011151198168926706080000800001002671026710267102671026724
1602042670920000000002266948251601001008000080000100800208001550011689511891994126690267092670966320666581601352008002080020200800208002026709267091180201100991001008000010000800000100011151199168826706080000800001002699826719268202671226710
160204267092000000026488226694625160100100800008000010080020800155001168951188416312669026709267096632066658160135200800208002020080020800202670926709118020110099100100800001000080000000002225129933101026707080000800001002671126711267112671126720
160204272162001000300226863092160100100800008000010080023800185001168352188346012669126710267156621096714160141200807818021520280023800232671026710118020110099100100800001000080000000690222519611529926851080000800001002671126711267112671127248
1602042671020001100405882272086725160100100805208013010080023800185001168352188346012669127210272546621010664516014120280783800002008000080573267102671021802011009910010080000100408000002500211151221025101026707080000800001002721126835269542705026869

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)031e3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8a9accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002426709200026694222516001010800008000010800008000050116888018842061266902670926709665336689160010208000080000208000080000267092670911800211091010800001080000100502011621267068000080000102671026710267102671026717
1600242670920002669402516001010800008000010800008000050117025218847751266902670926709665336689160010208000080000208000080000267092670911800211091010800001080000103502011611267068000080000102678126756268372672626710
1600242670920602669402516001010800008000010800008000050116888018840321266902670926709665336689160010208000080000208000080000267092670911800211091010800001080000201748502011611267068000080000102671026710267102671026715
1600242671020102669402516001010800008000010800008000050116960518840320266902670926709665336689160010208000080000208000080000267092670911800211091010800001080000100502011611267068000080000102671026710267102671026719
1600242670920002669402516001010800008000010800008000050116888018840321266902670926709665336689160010208000080000208000080000267092670921800211091010800001080000100502011611267068000080000102671026710267102671026716
1600242670920002669402516001010800008000010800008000050116888018840321266902670926709665336689160010208000080000208000080000267092670911800211091010800001080000103502011611267068000080000102671026710267102671026719
1600242671120002669402516001010800008000010800008000050116888018840321266902670926709665336689160010208000080000208000080000267092670911800211091010800001080000103502011611267068000080000102671026710267102671026719
1600242671520002669402516001010800008000010800008000050116843518840321266902670926709665336689160010208000080000208000080000267092670911800211091010800001080000103502011611267068000080000102671026710267102671026716
1600242670919902669402516001010800008000010800008000050116888018840321266902670926709665336689160010208000080000208000080000267092670911800211091010800001080000100502011611267068000080000102671026710267102671026719
1600242670920002669402516001010800008000010800008000050116888018840321266902670926709665336689160010208000080000208000080000267092670911800211091010800001080000100502011611267068000080000102671026900267742672826710