Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SCVTF (scalar, integer, H from X)

Test 1: uops

Code:

  scvtf h0, x0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4f51inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
20043762243610252000100010001000100014075228701357376376733110200010001000100010003763761110011000100000731161137310001000377377377377377
2004376393620252000100010001000100014075228201357376376723112200010001000100010003763791110011000100000731161137310001000377377377377380
2004376303610252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377381377
2004376203612252000100010001000100014075228201357376376723114200010001000100010003763761110011000100000731161137310001000377377377377377
2004376303610252000100010001000100014075230161357376376723109200010001000100010003763761110011000100010731161137310001000377377377377380
2004376303610252000100010001000100014075228201357376376723111200010001000100010003763771110011000100000731161137310001000377377377377377
2004376303610252000100010001000100014075228201357376376723128200010001000100010003763761110011000100000731161137310001000377377377377377
2004376303620252000100010001000100014075228201357376376723114200010001000100010003763761110011000100000731161137310001000377377377377377
2004376303610252000100010001000100014075229491357376376723115200010001000100010003793761110011000100000731161137410001000377377377377377
2004376303610252000100010001000100014125228201357376376723115200010001000100010003763761110011000100000731161137310001000377377377377377

Test 2: Latency 1->2 roundtrip

Code:

  scvtf h0, x0
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)030818191e3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a7a8a9acc2branch mispredict (cb)cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3020413003297400001300171194472540100101002000010000100200001000050062144971480270911300131300321300321254663126240301002001000020000200100002000013003213003211202011009901001010010000100010000030000131012162212951910000100001000010100130037130033130080130046130033
3020413003297400001300171194302540100101002000010000100200001000050062144971480270911300131300321300321254663126240301002001000020000200100002000013003213003211202011009901001010010000100010000000000131012162212951910000100001000010100130033130036130100130071130033
3020413003297400001300171194082540100101002000010000100200001000050062144971480270911300131300321300321254663126240301002001000020000200100002000013003213003211202011009901001010010000100010000000000131013162212951910000100001000010100130033130033130033130080130033
3020413003297400001300171194332540100101002000010000100200001000050062144971480270911300131300321300321254663126240301002001000020000200100002000013003213003211202011009901001010010000100010000010920131012162212951910000100001000010100130033130033130033130079130033
3020413003297400001300171194082540100101002000010000100200001000050062144971480270911300131300321300321254663126240301002001000020000200100002000013003213003211202011009901001010010000100010000000000131013162312951910000100001000010100130033130033130033130063130033
3020413003297400001300171194152540100101002000010000100200001000050062144971480270911300131300321300321254663126240301002001000020000200100002000013003213003211202011009901001010010000100010000000000131012162212951910000100001000010100130033130033130088130063130033
3020413003297400001300171194282540100101002000010000100200001000050062144971480270911300131300321300321254663126240301002001000020000200100002000013003213003211202011009901001010010000100010000000000131012162212951910000100001000010100130033130033130097130059130034
3020413003297400001300171194082540100101002000010000100200001000050062146411480270911300131300321300321254663126240301002001000020000200100002000013003213003211202011009901001010010000100010000000000131012162212954210000100001000010100130033130070130057130034130033
30204130032974000013001711941015040107101002000010000100203521029451162144971480270911300131305271300321255073126240301002001000020000202100002000013003213003211202011009901001010010000100210000000000131012162212951910000100001000010100130033130080130079130036130033
3020413003297400001300171194082540100101002000010000100200001000050062144971480270911300131300321300321254663126240301002001000020000200100002000013003213003211202011009901001010010000100010000000000131012162212951910000100001000010100130075130036130034130033130033

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)030b181e3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8a9acc2cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)ebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
30024130032974001213001711940825400101001020000100001020000100005062173771480064101300151300321300331254893126262300102010000200002010000200001300361300321120021109101001010000101000010300127021611129519100000100001000010010130033130033130033130033130033
30024130034974001213001711940925400101001020000100001020000100005062144971480700511300131300321300321254903126262300102010000200002010000200001300321300321120021109101001010000101000010300127011611129519100000100001000010010130033130033130033130033130035
30024130032974001213001711940825400101001020000100001020000100005062168971480392301300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000101000010000127011611129519100000100001000010010130033130033130033130035130033
30024130035974001213001711940825400101001020000100001020000100005062144971480734011300141300321300321254893126262300102010000200002010000200001300321300321120021109101001010000101000010300127011621129519100000100001000010010130033130033130033130034130033
30024130032974006613001711940825400101001020000100001020000100005062172451480098311300131300321300341254893126262300102010000200002010000200001300361300331120021109101001010000101000010300127011611129519100000100001000010010130033130033130033130033130033
3002413006597400013001711940825400101001020000100001020000100005062156011480415101300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000101000000000127011611129519100000100001000010010130033130033130033130033130033
300241300329740001300171194082540010100102000010000102000010000506214497148047310130013130032130032125489312626230010201000020000201000020000130032130032112002110910100101000010100000000012702161112951910000229100001000010010130035130033130033130033130033
3002413003297400013001711940825400101001020000100001020000100005062162251480499101300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000101000000000127011611129519100000100001000010010130033130033130033130033130033
3002413003297400013001711940825400101001020000100001020000100005062167531480302301300131300321300321254893126262300102010000200002010000200001300321300321120021109101001010000101000000000127011611129519100000100001000010010130033130033130033130033130033
30024130032974000130021119408254001010010200001000010200001000050621449714800528013001313003213003212548931262623001020100002000020100002000013003213003211200211091010010100001010000003000127021621129520100000100001000010010130033130033130033130033130033

Test 3: throughput

Count: 8

Code:

  scvtf h0, x8
  scvtf h1, x8
  scvtf h2, x8
  scvtf h3, x8
  scvtf h4, x8
  scvtf h5, x8
  scvtf h6, x8
  scvtf h7, x8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)030b1e3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a9acc5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020426716200012267113251601001008000080000100800208001450011720361884045026694267092671366366666316013520080024800202008002480020267092671311802011009910010080000100080000031115117016002671080000800001002681326743267222671526710
16020426714206012266942251601001008000080000100800208001451111689511881571026690267092670966366666016013520080024800202008002080020267092670911802011009910010080000100080000001115117016002671580000800001002671026710267102671026710
1602042670920000266940251601001008000080000100800208001550011689511884163026690267092670966326665816013520080020800202008002080020267092670911802011009910010080000100080000001115117016002670680000800001002671026710267102671026710
1602042670920000266940251601001008000080000100800208001550011689511884163026690267092670966326665816013520080020800202008002080020267092670911802011009910010080000100080000001115117016002670880000800001002671026710267102671026710
1602042670920000266940251601001008000080000100800208001550011689511884163026690267092670966326665816013520080020800202008002080024267092670911802011009910010080000100080000001115117016002670680000800001002671026710267102671026710
1602042670920000266940251601001008000080000100800208001550011689511884163026690267092670966326665816013520080024800242008002480020267092671811802011009910010080000100080000001115117016002670680000800001002671026710267102671026710
1602042670920000266940251601001008000080000100800208001550011689511884163026690267092670966326665816013520080024800202008002080020267192671611802011009910010080000100080000001115117016002670680000800001002671026710267102671026710
1602042670920000266940251601001008000080000100800208001550011689511884163026690267092670966326665816013520080020800202008002080020267132671511802011009910010080000100080000001115117016002670680000800001002671026710267102671026710
1602042670920000266940251601001008000080000100800208001550011689511884163126690267092670966326684016013420080020800202008002080020267112678711802011009910010080000100080000001115117016002670680000800001002671026710267102671026710
1602042670920000266940251601001008000080000100800208001550011689511884163026690267092670966326665816013520080020800202008002080020267152681411802011009910010080000100080000001115117016002670680000800001002671026710267102671026710

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)031e3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a5a6a7a8a9acc2cfd5d6dbddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600242671120002669402516001010800008000010800008000050116888018840321266902670926709665336689160010208000080000208000080000267182671711800211091010800001000800000000050201916088267068000080000102671026710267102671026710
160024267092000266948251600101080000800001080000800005011688801884032126690267092670966533668916001020800008000020800008000026709267091180021109101080000100080000000005020516088267088000080000102696626727267202671526710
160024267092000266940251600101080000800001080000800005011688801884032126690267092671066533668916001020800008000020800008000026709267091180021109101080000100080000000005020916047267068000080000102696426718267202671926710
1600242670919902669402516001010800008000010800008000050116888018840321266902670926709665336689160010208000080000208000080000267092670911800211091010800001000800000300050201016096267068000080000102696926727267202672026710
1600242670920002669402516001010800008000010800008000050116888018840321266902670926709665336689160010208000080000208000080000267092670911800211091010800001000800000000050205160913267068000080000102680326717268832671326710
160024267092000266943251600101080000800001080000800005011688801884032126690267092670966533668916001020800008000020800008000026709267131180021109101080000100080000000305020616068267068000080000102695726719267982671826712
160024267092000266940251600101080000800001080000800005011688801884032126690267092670966533668916001020800008000020800008000026709267091180021109101080000100080000000005020616058267068000080000102680426716268092671226710
1600242670920002669452516001010800008000010800008000050116888018840321266902670926709665336689160010208000080000208000080000267092670911800211091010800001000800000000050201016057267068000080000102697826727267212672026710
1600242670920002669442516001010800008000010800008000050116888018840321266902670926709665336689160010208000080000208000080000267092670911800211091010800001000801320000050209160109267068000080000102699226854267202672126710
160024267092000266940251600101080000800001080000800005011688801884032126690267092670966533668916001020800008000020800008000026709267091180021109101080000100080000000005020616078267068000080000102681726718268612671026710