Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SCVTF (scalar, fixed-point, S from W)

Test 1: uops

Code:

  scvtf s0, w0, #3
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4f51inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
2004376303610252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004377303610252000100011301000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376233610252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376303610252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377378377377377
2004376203610252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377378377
2004376303610252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161237310001000377377377377377
2004376303610252000100010001000100014075228201357376376733109200010001000100010003763761110011000100030731161237310001000377377377377377
2004376303610252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161237310001000377377377377377
2004376303614252000100010001000100014075228201357376376723109200010001000100010003763791110011000100000731161237310001000377377377377377
2004376303612252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161237310001000377377377377377

Test 2: Latency 1->2 roundtrip

Code:

  scvtf s0, w0, #3
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)03080b18191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a8accfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
30204130032974001130601300211194092540100101002000010000100200001000050062144971480270913001313003213003212546631262403010020010000200002001000020000130032130032112020110099100101001000010001000013131012162212952110000100001000010100130038130035130033130033130033
3020413003297400000013001711955411240100101002000010000100200001000050062147851480270913001313003213003312546631262413010020010000200002001000020000130032130054112020110099100101001000010001000013131013163212952110000100001000010100130033130036130033130033130033
3020413003297400003001300171194082540100101002000010000100200001000050062144971480282513001313003213003212546631262403010020010000200002001000020000130032130032112020110099100101001000010001000013131013163312951910000100001000010100130033130033130033130037130033
3020413003297400001201300171194082540100101002000010000100200001000050062144971480270913001313003213003212549531262423010020010000200002001000020000130032130083112020110099100101001000010001000013131013163312951910000100001000010100130033130033130033130033130033
3020413003297400001201300171194082540100101002000010000100200001000050062144971480294113001313003213003212546631262403010020010000200002001000020000130032130032112020110099100101001000010001000010131012162212952010000100001000010100130033130033130033130034130033
3020413003297400001201300171194082540100101002000010000100200001000050062144971480270913001313003413003512546631262403010020010000200002001000020000130032130032112020110099100101001000010001000013131012162212951910000100001000010100130033130034130033130035130033
302041300329730000001300171194102540100101002000010000100200001000050062145451480270913001313003213003212546631262433010020010000200002001000020000130036130032112020110099100101001000010001000013131013162212952010000100001000010100130033130033130033130033130033
3020413003297400001201300171194082540100101002000010000100200001000050062144971480270913001313003213003212546731262403010020010000200002001000020000130032130052112020110099100101001000010001000013131012162212951910000100001000010100130035130033130033130033130062
3020413003297300001201300171194082540100101002000010000100200001000050062145451480304813001313003213003212546831262403010020010000200002001000020000130032130032112020110099100101001000010001000013131013163312951910000100001000010100130033130033130033130033130033
3020413003297400001201300171194102540100101002000010000100200001000050062144971480270913001313003213003312546631262403010020010000200002001000020000130032130032112020110099100101001000010001000010133612163212951910000100001000010100130034130036130033130033130033

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)0318191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8acc2branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
300241300329740000130017119408254001010010200001000010200001000050621449714800528013001313003213003212548931262623001020100002000020100002000013003213003211200211091010010100001000100000000012702161112952010000100001000010010130033130033130033130033130033
300241300329740000130017119408254001010010200001000010200001000050621449714800528013001313003213003212548931262623001020100002000020100002000013003213003211200211091010010100001000100000000012701161112952210000100001000010010130033130033130033130033130033
300241300329740000130017119408464001010010200001000010200001000050621449714800528013001313003213003212548931262623001020100002000020100002000013003213003211200211091010010100001000100000000012701161112955410000100001000010010130033130033130033130033130033
300241300329740000130017119408254001010010200001000010200001000050621449714800528013001313003213003212548931262623001020100002000020100002000013003213003211200211091010010100001000100000000012701161312953910000100001000010010130033130033130033130036130033
3002413003297400120130017119408254001010010200001000010200001000050621454514800528013001313003213003212548931262623001020100002000020100002000013003213003211200211091010010100001000100001300012701161112952110000100001000010010130033130034130033130033130033
3002413003297400120130017119408254001010010200001000010200001000050621449714800528013001313003213003412548931262623001020100002000020100002000013003513003211200211091010010100001000100001300012701161112955410000100001000010010130033130033130033130033130033
3002413003297400120130018119409254001010010200001000010200001000050621449714800642113001313003513003212548931262623001020100002000020100002000013003213003611200211091010010100001000100001300012701161112954510000100001000010010130033130033130036130033130033
300241300359740000130019119408254001010010200001000010200001000050621464114808070113001313003213003212548931262653001020100002000020100002000013003213003211200211091010010100001000100000000013951793312996310000100001000010010130033130033130033130367130373
300241303689740090988130017119408254001010010200001000010200001000050621449714800528013001313003213003212548931262623001020100002000020100002000013003213003211200211091010010100001000100001300012701161112954010000100001000010010130033130033130033130033130033
3002413003297400120130017119410254001010010200001000010200001000050621449714800864113001313003213003512549231262623001020100002000020100002000013003313003311200211091010010100001000100001300012701161112955810000100001000010010130033130033130033130035130033

Test 3: throughput

Count: 8

Code:

  scvtf s0, w8, #3
  scvtf s1, w8, #3
  scvtf s2, w8, #3
  scvtf s3, w8, #3
  scvtf s4, w8, #3
  scvtf s5, w8, #3
  scvtf s6, w8, #3
  scvtf s7, w8, #3
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)033f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8acc5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602042671020026700025160100100800008000010080020800155001168951188416312669026709267096632666581601352008002080020200800208002026917270551180201100991001008000010080000001115117016002670680000800001002671026710267102671026710
1602042670920026694625160100100800008000010080020800155001168951188416312669026709267096632666581601352008002080024200800208002426725267211180201100991001008000010080000001115117016002670680000800001002671026710267102671026710
1602042670920026694025160100100800008000010080020800155001168951188416312669026709267096632666581601352008002080020200800208002026709267091180201100991001008000010080000001115117016002670680000800001002671026710267102671026710
1602042670920026694025160100100800008000010080020800155001168951188416312669026709267096632666581601352008002080020200800208002026709267091180201100991001008000010080000001115117016002670680000800001002671826710267102671026710
1602042670920026694025160100100800008000010080020800155001168951188416312669026709267096632666581601352008002080020200800208002026709267091180201100991001008000010080000001115117016002670680000800001002671026710267102671026710
1602042670920026694025160100100800008000010080020800155001168951188534812669026709267096632666581601352008002080020200800208002026709267091180201100991001008000010080000001115117016002670680000800001002671026710267102671026710
1602042670920026694025160100100800008000010080020800155001168951188416312669026709267096632666581601352008002080020200800208002026709267091180201100991001008000010080000001115117016002670680000800001002671026710267102671026710
1602042670920026694025160100100800008000010080020800155001168951188416312669026709267096632666581601352008002080020200800208002026709267091180201100991001008000010080000001115117016002671080000800001002671026710267102671026710
1602042670920026694025160100100800008000010080020800155001168951188416312669026709267096632666581601352008002080020200800208002026709267091180201100991001008000010080000001115117016002670680000800001002671026710267102671026710
1602042670920026694025160100100800008000010080020800155001174360188419812669026709267096632666581601352008002080020200800208002026715267101180201100991001008000010080000001115117016002670680000800001002671026710267102671026710

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)03081e3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a5a6a8a9accficache miss (d3)d5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024267152000122670132516001010800008000010800008000050118212518841061266902671426709665736693160010208000080000208000080000267132671311800211091010800001000800001035020031633268748000080000102671426714267102671426714
16002426713200012266992251600101080000800001080000800005011687011885392126694267132671466573669416001020800008000020800008000026713267131180021109101080000100111800002035020021633267128000080000102671426714267102671426714
160024267132000122669802516001010800008000010800008000050117066218807821266942671326713665336693160010208000080000208000080000267092674011800211091010800001000800000005020031633267168000080000102671426714267142671426714
160024267102000122669832516001010800008000010800008000050117541518838471266902671326713665436693160010208000080000208000080000268632674011800211091010800001000800001035020031663267178000080000102671526710267142671426714
1600242671320000266982251600101080130801301080000800005011683881883204126690267132671366573669316001020800008000020800008000026711267091180021109101080000100080000148815020181623267108000080000102671426710267142671426710
160024267132000122669832516001010800008000010800008000050116876418901941266952671326709665636693160010208000080000208000080000267132671411800211091010800001000800000035020041666267188000080000102671526714267102671426714
160024267092050122669822516001010800008000010800008000050116945518853221266952671426714665336693160010208000080000208000080000267132671311800211091010800001000800001035020031632267368000080000102671426714267152671426715
160024267092000122669422516001010800008000010800008000050116860018835111266952671026713665736694160010208000080000208000080000267132671411800211091010800001000800000035020061632267188000080000102671426714267142671426714
160024267092000122669832516001010800008000010800008000050116888018840321266902671326714665736690160010208000080000208000080000267142671311800211091010800001000800000035020031653267188000080000102671526714267152671026714
16002426709200002669822516001010800008000010800008000050116888018840321266902671426713665336693160010208000080000208000080000267132671311800211091010800001040800000035020031636267148000080000102671026710267142671426710