Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SCVTF (scalar, fixed-point, D from W)

Test 1: uops

Code:

  scvtf d0, w0, #3
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4f51inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
200437630361025200010001000100010001411222820357376376723109200010001000100010003763761110011000100000733161137310001000377377377377377
200437620361025200010001000100010001407522820357376376723109200010001000100010003793761110011000100000731161137310001000377377377377377
200437630361025200010001000100010001407522820357376376723109200010001000100010003763761110011000100003731161137310001000378377377377377
200437630361025200010001000100010001407522820357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437620361025200010001000100010001407522820357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437630361025200010001000100010001416522820357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376303610252000100010001000100014075228203573763767231122000100010001000100037637611100110001000120731161137310001000377377377377377
200437630361025200010001000100010001407522837357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437630361025200010001000100010001407522820357376380723109200010001000100010003763761110011000100000731161137310001000377380377377377
200437630361025200010001000100010001407522820357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377

Test 2: Latency 1->2 roundtrip

Code:

  scvtf d0, w0, #3
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)030818191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a8acc2cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
30204130032977000000130017119408254010010100200001000010020000100005006214497148027090130013130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100010000000131012163212951910000100001000010100130033130033130033130033130033
30204130032974000000130017119408254010010100200001000010020000100005006214497148027090130013130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100010000000131013163312951910000100001000010100130033130033130033130033130033
30204130032973000000130017119406254010010100200001000010020000100005006214497148027091130013130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100010000000137312163312951910000100001000010100130033130033130033130033130033
30204130032974000000130017119408254010010100200001000010020000100005006214497148027091130013130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100010000000131013163312951910000100001000010100130033130033130033130033130033
30204130032974000000130017119408254010010100200001000010020000100005006214497148027091130013130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100010000030131012163212951910000100001000010100130033130033130033130033130033
30204130032974000000130017119408254010010100200001000010020000100005006214497148027090130013130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100010000000131013163212951910000100001000010100130033130071130033130033130033
30204130032973000000130017119408254010010100200001000010020000100005006214497148027091130013130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100010000000131013163212957310000100001000010100130033130033130033130033130033
30204130036974000000130017119408254010010100200001000010020000100005006214497148027091130013130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100010000000139112162312951910000100001000010100130033130033130033130033130033
30204130032974000000130017119408254010010100200001000010020000100005006214497148027090130013130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100010000000131013163312951910000100001000010100130033130034130036130034130033
302041300329740001200130017119409254010010100200001000010020000100005006214497148028220130014130035130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100010000000131012162212952010000100001000010100130033130033130033130034130033

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)030b181e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8a9accfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
300241300329910008813001911940845400101001020000100041020000100005062144971480052811301641300331300331254893126262300102010000200002010000200001300321300321120021109101001010000101000000013292161112951910000100001000010010130033130033130033130033130033
30024130032974000013001711940825400101001020000100001020000100005062144971480052811300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000101000000012701161112951910000100001000010010130033130033130033130033130033
30024130032974000013001711940825400101001020000100001020000100005062144971480052801300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000101000000012701161112951910000100001000010010130033130033130033130033130033
300241300321018009013001711943625400101001020000100001020000100005062144971480052801300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000101000000012702161112951910000100001000010010130033130068130033130033130033
300241300329740027013001711940825400101001020000100001020000100005062144971480052801300131300351300321254893126262300102010000200002010000200001300321300321120021109101001010000101000000012702161112957910000100001000010010130033130033130033130033130033
30024130032974000013002511940825400101001020000100001020000100005062144971480052811300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000101000000012701161112951910000100001000010010130033130033130033130033130033
30024130033974000013001711940825400101001020000100001020000100005062144971480052811300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000101000000012701161112951910000100001000010010130033130033130033130033130033
30024130032974000013001711940825400101001020000100001020000100005062144971480052811300131300321300321254893126262300102010000200002010000200001300331300321120021109101001010000101000000012701161112951910000100001000010010130033130033130035130033130033
30024130032974003013001711940825400101001020000100001020000100005062144971480052801300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000101000000012701161112951910000100001000010010130033130033130033130033130033
30024130032974000013001711940825400101001020000100001020000100005062144971480052801300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000101000000012701161112951910000100001000010010130033130033130033130033130033

Test 3: throughput

Count: 8

Code:

  scvtf d0, w8, #3
  scvtf d1, w8, #3
  scvtf d2, w8, #3
  scvtf d3, w8, #3
  scvtf d4, w8, #3
  scvtf d5, w8, #3
  scvtf d6, w8, #3
  scvtf d7, w8, #3
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)03080b18191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a5a6a8acc2c5branch mispredict (cb)cdcfd6inst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204267112000000002674402516010010080000800001008002080015500116895118841631266902670926709663206666416013520080020800202008002080020267092670911802011009910010080000100008000000011151171602670680000800001002671026710267102671026710
160204267092000000002669402516010010080000800001008002080015500116895118841631266902670926709663206665816013520080020800202008002080020267092670911802011009910010080000100008000000011151171602670680000800001002671026710267102671026710
160204267092000000002670302516010010080000800001008002080015500116895118841631266902670926709663206665816013520080020800202008002080020267092670911802011009910010080000100008000000011151171602670680000800001002671026710267102671026710
160204267092000000002671602516010010080000800001008002080015500116895118841631266902670926709663206665816013520080020800202008002080020267092670911802011009910010080000100008000000011151171602670680000800001002671426710267102671026710
160204267092000000002670602516010010080000800001008002080015500116895118841631266902670926709663206665816013520080020800202008002080020267092670911802011009910010080000100008000000011151171602670680000800001002671026710267102671026710
160204267092000000002669502516010010080000800001008002080015500116895118841631266902670926709663206665816013520080020800202008002080020267092670911802011009910010080000100008000000011151171602670680000800001002671026710267102671026710
1602052670920000002648826925128941603601008026080260102803968019650011795261898015126989269352705167266545681416086720080407802152028040780411270392692131802011009910010080000100208000000211151393422699480000800001002699526930269382706127052
160204268822011112264882670002516010010080000800001008002080014500116963018886961266902670926714663606666216013420080020800242008002080020267092670911802011009910010080000100008000013011151171602670680000800001002671026710267102671026710
160204267092000000002670202516010010080000800001008002080015500116895118841631266902670926709663206665816013520080020800202008002080020267092670911802011009910010080000100008000000011151171602670680000800001002671026710267102671026710
160204267092000000002670402516010010080000800001008002080015500116895118841631266902670926709663206665816013520080020800202008002080020267092670911802011009910010080000100008000000011151171602670680000800001002671026710267102671026710

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)031e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8accfd0d5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002426714200002677422516001010800008000010800008000050116622218852761266942671426713665436694160010208000080000208000080000267132671211800211091010800001080000135020061654267108000080000102672426710267142671026714
160024267132011202669822516001010800008000010800008000050116840018843181266942671326709665636694160010208000080000208000080000267102671311800211091010800001080000135020051655267108000080000102672526715267102671126711
16002426713200002669822516001010800008000010800008000050116882718891261266942670926713665636694160010208000080000208000080000267132670911800211091010800001080000135020051663267118000080000102671526711267152683426715
160024267102001202669822516001010800008000010800008000050116810418838991266902671426713665336693160010208000080000208000080000267132671311800211091010800001080000135020031635267118000080000102672926714267142671426714
16002426713200002669402516001010800008000010800008000050116864318854241266942670926713665736693160010208000080000208000080000267092670911800211091010800001080000135020031663267108000080000102671926714267142671426710
160024267132001202669822516001010800008000010800008000050116806618825501266952670926709665736698160010208000080000208000080000267142671311800211091010800001080000135020031646267118000080000102672626714267142671026714
16002426709200002669852516001010800008000010800008000050116823418840321266942671326713665336693160010208000080000208000080000267132671311800211091010800001080000035020041663267108000080000102671026715267142671426715
160024267132001202669922516001010800008000010800008000050116835118840321266942671326713665636693160010208000080000208000080000267092670911800211091010800001080000105020031636267108000080000102671426835267132672226714
160024267132001202669822516001010800008000010800008000050116871518785101266942671326713665736693160010208000080000208000080000267132671311800211091010800001080000165020051665267108000080000102673026717267142671426714
16002426709200002669822516001010800008000010800008000050116846318820091266942671426713665336693160010208000080000208000080000267132670911800211091010800001080000105020061666267108000080000102671926714267142671426714