Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SCVTF (scalar, fixed-point, D from X)

Test 1: uops

Code:

  scvtf d0, x0, #3
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4f51inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
200437630365025200010001000100010001407522820357376380723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437630361025200010001000100010001407522869357376376723109200010001000100010003763761110011000100010731161137310001000377377379377377
200437930361025200010001000100010001407522820357376376753109200010001000100010003763771110011000100000731161137310001000377377377377377
200437620361025200010001000100010001407522820357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437630361025200010001000100010001407522820357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437620361025200010001000100010001407522820357376376733109200010001000100010003763761110011000100000731161137610001000377377377377377
200438030361025200010001000100010001407522820357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004379203610252000100010001000100014075228203663763767231092000100010001000100037637611100110001000012731161137310001000378377377377377
200437630361025200010001000100010001407522820357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
200437639361025200010001000100010001407522820357376376723113200010001000100010003793761110011000100000731161137310001000377377377377377

Test 2: Latency 1->2 roundtrip

Code:

  scvtf d0, x0, #3
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)03080b18191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2c5cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3020413003297400000001300171194082540100101002000010000100200001000050062144971480270911300131300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000010000000000131012162212951910000100001000010100130033130033130033130033130033
3020413003297400000001300171194082540100101002000010000100200001000050062144971480270911300131300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000010000000000131012162212951910000100001000010100130033130033130033130033130033
3020413003297400000001300171194082540100101002000010000100200001000050062144971480270911300131300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000010000010000131012162212951910000100001000010100130033130033130033130033130033
3020413003297400000001300181194082540100101002000010000100200001000050062144971480270911300131300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000010000000000131012162212951910000100001000010100130033130033130033130033130033
3020413003297400000001300171194082540100101002000010000100200001000050062144971480270901300131300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000010000000000131012162212951910000100001000010100130033130033130033130033130033
3020413003797400009001300171194082540100101002000010000100200001000050062144971480270911300131300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000010000000300131012162212951910000100001000010100130033130033130033130033130033
3020413003297400000001300171194112540100101002000010000100200001000050062144971480270911300131300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000010000000000131012162212951910000100001000010100130033130033130033130033130033
3020413003297400000001300171194082540100101002000010000100200001000050062144971480270911300131300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000010000000000131012162212951910000100001000010100130033130033130033130033130033
3020413003297400000001300171194082540100101002000010000100200001000050062144971480270901300131300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000010000000000131012162212951910000100001000010100130033130033130033130033130033
3020413003297400000001300171194082540100101002000010000100200001000050062144971480270901300131300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000010000000000131012162212951910000100001000010100130033130033130033130033130033

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)03080b191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8accdcfd5d6daddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3002413003397400012013001711940825400101001020000100001020000100005062144971480052811300131300321300321254893126265300102010000200002010000200001300321300321120021109101001010000104010000130127011601112959510000100001000010010130033130033130033130036130033
30024130032974000120130017119409254001010010200001000010200001000050621449714800528113001313003213003212548914126262300102010000200002010000200001300321300321120021109101001010000100010000000127021601112957410000100001000010010130033130033130033130034130033
300241300329740000013001711940825400101001020000100001020000100005062144971480052811300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100010000030127011601112951910000100001000010010130033130033130033130033130033
300241300329740000013001711940825400101001020000100001020000100005062144971480052811300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000102410003000127013201112952210000100001000010010130035130033130033130034130033
300241300329730000013001711940825400101001020000100001020000100005062144971480052811300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100010000000127011601112951910000100001000010010130033130033130033130033130033
3002413003297300069013001711940825400101001020000100001020000100005062144971480052811300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100010000000127011602112951910000100001000010010130033130033130033130033130033
300241300329740000013001711940825400101001020000100001020000100005062144971480052811300131300321300321254893126263300102010000200002010000200001300321300321120021109101001010000100010000000127011601112951910000100001000010010130033130033130033130033130033
300241300329740000013001711940825400101001020000100001020000100005062144971480052811300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100010000000127021601112951910000100001000010010130033130033130033130033130033
300241300329740000013001711940825400101001020000100001020000100005062144971480052811300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100010000030127011601112951910000100001000010010130033130033130033130033130033
300241300329740000013001711940825400101001020000100001020000100005062144971480052811300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100010000000127011601112951910000100001000010010130033130033130033130033130033

Test 3: throughput

Count: 8

Code:

  scvtf d0, x8, #3
  scvtf d1, x8, #3
  scvtf d2, x8, #3
  scvtf d3, x8, #3
  scvtf d4, x8, #3
  scvtf d5, x8, #3
  scvtf d6, x8, #3
  scvtf d7, x8, #3
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)03080b18191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a5a6a8acc2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602042670920000001202669422516010010080000800001008002080015500117028218823911266902671326713663606666216013620080024800202008002480020267222671311802011009910010080000100000800000301115117016002670680000800001002671426714267152671426710
1602042671320000001202669402516010010080000800001008002080015500116895118841631266942670926709663606666216013620080024800202008002080020267132671311802011009910010080000100000800001301115117016002670680000800001002671526715267102671426710
1602042671320000001202669822516010010080000800001008002080015500116895118848330266942670926713663606665816013420080024800202008002480020267092670911802011009910010080000100000800001301115117016002671080000800001002671526714267142671026710
1602042671320000001202669802516010010080000800001008002080015500117571218817190266942670926713663206665816013520080020804032008002080020267192687811802011009910010080000100000800001301115117016002671080000800001002671426714267102671526714
1602042670920000001202669822516010010080000800001008002080015500116829918841631266902670926713663206665816013520080020800202008002080020267132671311802011009910010080000100000800001301115117016002671080000800001002671426710267142671026710
160204267142000000002669822516010010080000800001008002080015500116895118814120266902670926713663606665816013520080024800242008002080020267092671311802011009910010080000100000800001301115117016002671080000800001002671426714267102671526714
1602042671320000001202669412516010010080000800001008002080014500116895118977840266942671326714663206666216013620080020800202008002480020267212670911802011009910010080000100000800000001115117016002671080000800001002671026715267142671426714
1602042670920000001202669802516010010080000800001008002080015500117765918840340266942671326709663206665816013520080024800202008002080024267102671311802011009910010080000100000800001301115117016002670680000800001002671426710267102671426714
1602042671320000001202669432516010010080000800001008002080015500117542718787720266902670926713663206665816013420080595800202008002080020267152671311802011009910010080000100000800000301115117016002670780000800001002671426710267142671426710
160204267092000000002669832516010010080000800001008002080015500116854018842150266942671426714663206666216013520080020800202008002080020267172670911802011009910010080000100000800001001115117016002671080000800001002671826724267142672426721

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)03191e3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa5a6a8accfd0d2icache miss (d3)d5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002426709207002669402516001010800008000010800008000050116888018840320266902670926709665336689160010208000080000208000080000267252676811800211091010800001008000000502000061632267068000080000102671026710267102671026710
16002426709200002669402516001010800008000010800008000050116888018840320266902670926709665336689160010208000080000208000080000268022670911800211091010800001008000000502000021622267068000080000102671026710267102671026710
16002426709200002669402516001010800008000010800008000050116888018840320266902670926709665536689160010208000080000208000080000267142670921800211091010800001008000000502000021663267068000080000102690126885267142671026710
16002426709200002669402516001010800008000010800008000050116888018840320266902670926709665336689160010208000080000208000080000267182671211800211091010800001008000000502000021622267068000080000102671026710267102671026710
16002426709200002669402516001010800008000010800008000050116888018840320266902670926709665336689160010208000080000208000080000268242671111800211091010800001008000000502000061622267068000080000102671026710267102671026710
16002426709200002669402516001010800008000010800008000050116888018840320266902670926709665336689160010208000080000208000080000267152682711800211091010800001008000000502000021662267168000080000102671026710267102671026710
16002426709200002669432516001010800008000010800008000050116888018840320266902670926709665336689160010208000080000208000080000267092670911800211091010800001008000000502000021636267068000080000102671026710267102671026710
16002426709200002669402516001010800008000010800008000050116888018840320266902670926709665336689160010208000080000208000080000267092670911800211091010800001008000000502000021622267068000080000102671026710267102671026710
16002426709200002669402516001010800008000010800008000050116888018840321266902670926709665336689160010208000080000208000080000267092670911800211091010800001008000000502000021666267088000080000102671026710267102671026710
16002426709200002669402516001010800008000010800008000050116888018840320266902670926709665336689160010208000080000208000080000267092670911800211091010800001008000000502000061666267118000080000102671026710267142671326710