Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SCVTF (scalar, integer, D from W)

Test 1: uops

Code:

  scvtf d0, w0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4f51inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
2004376303610252000100010001000100014075228201357376376723109200010001000100010003793761110011000100000731161137310001000377377377377377
2004376303640252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376203610252000100010001000100014075228201357376376723109200010001000100010003763761110011000100003731161137310001000377377377377377
2004376303610252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376303610252000100010001000100014075228201357376376723109200010001000100010003763761110011000100010731161137310001000377377377377378
2004376203612252000100010001000100014075228201357379376723109200010001000100010003763761110011000100000731161137310001000377377377377380
2004376303610252000100010001000100014075228201357376376723109200010001000100010003893761110011000100000731161137310001000377377377377377
2004376333610252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000380377377377377
2004376203640252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137610001000377377377377377
2004376303610252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377

Test 2: Latency 1->2 roundtrip

Code:

  scvtf d0, w0
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)03191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8a9accfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
30204130032974039013001711941225401001010020000100001002000010000500621454514803054113001313046713011712558231262403010020010000200002001006420000130036130032112020110099100101001000010010000000131014162212952010000100001000010100130033130033130033130033130033
30204130034974012013001711940825401001010220000100041172000010098500622229314802709113008213003313003312546631263013042920410000207352021006120489130032130033312020110099100101001000010010000003131012162212951910000100001000010100130033130033130033130033130034
3020413003297400013001711941025401001010020003100001002000010000500621449714802824113001413003213003212546631262403010020010000200002001000020000130032130032112020110099100101001000010010000000131012162212952110000100001000010100130033130033130033130033130033
3020413003297409013001711940825401001010020000100001002000010000500621449714802709113001313003213003212546631262403010020010000200002001000020000130032130032112020110099100101001000010010000000131012162212951910000100001000010100130033130033130033130033130033
3020413003297400013001711940825401001010020000100001002000010000500621449714802709113001613003213044712546631262403010020010000200002001000020000130032130032112020110099100101001000010010000000131012162212951910000100001000010100130033130033130033130033130033
3020413003297400013001711940825401001010020000100001002000010000500621449714802709113001313003213003212546631262403010020010000200002001000020000130032130032112020110099100101001000010010000000131012162212951910000100001000010100130033130033130033130033130033
30204130032973012013001711940825401001010020000100001002000010000500621449714802709113001413003213003212546631262403010020010000200002001000020000130032130032112020110099100101001000010010000000131012162212951910000100001000010100130033130033130033130033130033
3020413003297300013001711940825401001010020000100001002000010000500621449714802709113001313003213003212546631262403010020010000200002001000020000130032130032112020110099100101001000010010000000133612162212951910000100001000010100130033130033130033130033130033
3020413003297309013001711940825401001010020000100001002000010000500621449714802709113001613003213003212546631262403010020010000200002001000020000130032130032112020110099100101001000010010000000131012162212952210000100001000010100130033130033130033130033130033
3020413003297400013001711940825401001010020000100001002000010000500621449714802709113001313003213003212546631262403010020010000200002001000020000130032130032112020110099100101001000010010000000131012162212951910000100001000010100130033130033130033130036130033

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)03040818191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a7a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
30024130032974000012013001711940825400101001020000100001020000100005062144971480052801300141300351300331254893126262300102010000200002010000200001300331300321120021109101001010000100100000103012705162212951910000100001000010010130035130035130033130033130033
3002413003297400000013001711940825400101001020000100001020000100005062144971480052801300161300321300371254893126263300102010000200002010000200001300321300321120021109101001010000100100000100012702162312951910000100001000010010130033130033130035130033130033
30024130033974000012013001911940925400101001020000100001020000100005062144971480052801300131300391300321254903126263300102010000200002010000200001300321300321120021109101001010000100100000103012702162212951910000100001000010010130033130033130033130036130033
30024130036973000012013001711940825400101001020000100001020000100005062144971480052801300131300321300321254903126262300102010000200002010000200001300321300321120021109101001010000100100000103012702162112957310000100001000010010130033130033130033130035130033
30024130032974000012013001711940925400101001020000100001020000100005062145451480052811300131300321300351254893126262300102010000200002010000200001300321300321120021109101001010000100100000103212703165312951910000100001000010010130033130033130033130033130034
30024130032974000012013001711940825400101001020000100001020000100005062145451480052811300131300321300321254893126262300102010000200002010000200001300331300321120021109101001010000100100000103012702164312952210000100001000010010130033130033130033130033130033
30024130035974000012013001711940825400101001020000100001020000100005062146411480052801300131300341300321254893126262300102010000200002010000200001300321300321120021109101001010000100100000100012704162112951910000100001000010010130033130033130033130033130033
3002413003297400000013001711940825400101001020000100001020000100005062144971480052801300131300321300321254893126264300102010000200002010000200001300321300321120021109101001010000100100010103012702162312951910000100001000010010130033130033130035130033130033
30024130032974000012013001711940925400101001020000100001020000100005062144971480075801300151300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100100000006012702164312952010000100001000010010130033130033130033130033130033
3002413003297400000013001711940825400101001020000100001020000100005062144971480052801300131300321300321254893126263300102010000200002010000200001300321300321120021109101001010000100100000000212704164412951910000100001000010010130033130033130033130033130033

Test 3: throughput

Count: 8

Code:

  scvtf d0, w8
  scvtf d1, w8
  scvtf d2, w8
  scvtf d3, w8
  scvtf d4, w8
  scvtf d5, w8
  scvtf d6, w8
  scvtf d7, w8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)031e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8acc2c5branch mispredict (cb)cdcfd5d6daddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204267092001202669812516010010080000800001008002080015500116974618809360267092670926710663666662160135200800248002020080024800242671326713118020110099100100800001008000010011151172160122671080000800001002671526714267142671426714
160204267092001202669822516010010080000800001008002080016500116895118841630266942671326713663666662160134200800208002020080020800202671426713118020110099100100800001008000013011151171160122671180000800001002671426710267142671426714
160204267132001202669822516010010080000800001008002080015500116973418839500267002671326713663666662160135200800208002020080020800202670926709118020110099100100800001008000013011151173160232671080000800001002671026710267142671426714
160204267092001202669822516010010080000800001008002080016500116895118823350266942671326713663666662160135200800208002020080020800202670926709118020110099100100800001008000013011151172160212670680000800001002671426715267102671026714
160204267132001202669822516010010080000800001008002080015500117023818827800267002671326713663666658160135200800208002020080024800242671326713118020110099100100800001008000010011151172160212671080000800001002671426714267142671026714
16020426714200002669432516010010080000800001008002080014500116966718838530266902671326713663666662160135200800208002420080020800202671426713118020110099100100800001008000010011151172160222671080000800001002671526714267142671026715
1602042670920012026694025160100100800008000010080024800145001168931188416502669426713267136632256662160135200800208002020080020800202670926709118020110099100100800001008000013011151172160212671080000800001002671526714267142671426715
160204267132001202669722516010010080000800001008002080015500116827818842050266942671026714663666662160135200800208002020080020800242671326713118020110099100100800001008000013011151172160222671080000800001002671426710267102671426715
160204267132001202669832516010010080000800001008002080015500116071318884500266942670926709663666659160134200800208002020080020800202670926713118020110099100100800001008000000011151172160222671180000800001002671426714267142671426714
160204267132001202669422516010010080000800001008002080014500116812618816130266992671426713663266661160133200800208002020080024800202671326709118020110099100100800001008000013011151172160222671080000800001002671026714267142671526710

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)031e3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a5a6a8acc2cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002426710200026694025160010108000080000108000080000501168880188403212673626709267096653036689160010208000080000208000080000267092670911800211091010800001000080000000502016161314267068000080000102671026710267102671026710
160024267092001526694025160010108000080000108000080000501168880188403212669526709267206653036689160010208000080000208000080000268152670911800211091010800001000080000400502015161414267068000080000102671026710267102671026710
16002426709199026694025160010108000080000108000080000501168880188403202669826709267096665036689160010208000080000208000080000267092670911800211091010800001000080000000502013161311267068000080000102671026710267102671026710
16002426709200026694025160010108000080000108000080000501168880188403202669126709267096653036689160010208000080000208000080000267092670911800211091010800001000080000000502013161314267068000080000102671026710267102671026710
1600242670920002669402516001010800008000010800008000050116888018840320267002670926709665303669216001020800008000020800008000026709267091180021109101080000100008000000050201216149267068000080000102671026710267102671026710
1600242670920002669402516001010800008000010800008000050116888018840320267012670926709665303668916001020800008000020800008000026709267091180021109101080000100008000000050208161412267068000080000102671026710267102671026710
16002426709200026694025160010108000080000108000080000501168880188403212670126709267096653036689160010208000080000208000080000267092670911800211091010800001000080000000502016161313267068000080000102671026710267102671026710
16002426709200026694025160010108000080000108000080000501168880188403202669926716267136653036689160010208000080000208000080000267092670911800211091010800001000080000000502012161111267068000080000102671026710267102671026710
16002426709200026694025160010108000080000108000080000501168880188403202669926712267156653036689160010208000080000208000080000267092670911800211091010800001000080000000502012161414267068000080000102671026710267102671026710
1600242670920002669402516001010800008000010800008000050116888018840321267012670926709665303668916001020800008000020800008000026711267091180021109101080000100008000000050201316812267068000080000102671026714267102671026710