Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

UCVTF (scalar, integer, S from X)

Test 1: uops

Code:

  ucvtf s0, x0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)030a1e3f4f51inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
2004376300361025200010001000100010001413222820357376376723109200010001000100010003763761110011000100000732161137310001000377377377377377
2004376300361025200010001000100010001407522820357376376733109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376300361025200010001000100010001407522820357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376200361025200010001000100010001407522820357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376300361025200010001000100010001407522820357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376300361025200010001000100010001407522820357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376310361025200010001000100010001407522820357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376300361025200010001000100010001407522820357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376200361025200010001000100010001407522820357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376300361025200010001000100010001407522820357376376723109200010001000100010003763761110011000100000731161137610001000377377377377377

Test 2: Latency 1->2 roundtrip

Code:

  ucvtf s0, x0
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)03091e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8acbranch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
30204130032974000013001711940825401001010020000100001002000010000500621449714802709130013013003213003212546631262403010020010000200002001000020000130073130068112020110099100101001000010000100000000131012162212951910000100001000010100130033130033130033130033130033
30204130032973000013001711945625401001010020000100001002000010000500621449714802709130013013003213003212546631262403010020010000200002001000020000130032130032112020110099100101001000010000100000000131012162212951910000100001000010100130033130033130033130033130033
30204130032973000013001711940825401001010020000100001002000010000500621449714802709130013013003213003212546631262403010020010000200002001000020000130032130032112020110099100101001000010000100000000131012162212951910000100001000010100130033130033130033130033130033
30204130032974000013001711940825401001010020000100001002000010000500621449714802709130013013003213003212546631262403010020010000201312001000020000130035130032112020110099100101001000010000100020000131012162212951910000100001000010100130074130038130034130033130033
30204130032974000013001711940825401001010020000100001002000010000500621449714802709130013013003213003212546631262403010020010000200002001000020000130032130032112020110099100101001000010000100000000131013162212951910000100001000010100130033130033130033130033130033
30204130032974000013001711940825401001010020000100001002000010000500621449714802709130013013003213003212546631262403010020010000200002001000020000130032130032112020110099100101001000010000100000000131012162212951910000100001000010100130033130034130033130038130033
30205130058974000013001711940825401001010020000100001002000010000500621449714802709130013013003213003212546631262403010020010000200002001000020000130032130032112020110099100101001000010000100000600131012162212951910000100001000010100130035130033130033130033130033
30204130032974000013001711940825401001010020000100001002000010000500621449714802709130013013003213003212546631262403010020010000200002001000020128130032130032112020110099100101001000010000100000000131012162112951910000100001000010100130033130033130033130033130033
30204130032974000013001711940825401001010020003100001002000010000500621449714802709130013013003213003512549631262403010020010000200002001000020000130032130032112020110099100101001000010000100000000131012162212951910000100001000010100130071130034130033130033130033
30204130032974000013001711940825401001010020004100001002000010000500621449714802709130013013003213003212546631262403010020010000200002001000020000130032130032112020110099100101001000010000100000010131011162212951910000100001000010100130033130033130033130033130033

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)0318191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8acc2c5cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
300241300329740000130017119408254001010010200001000010200001000050621449714800528113007701300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100100000300012701161112951910000100001000010010130033130033130033130033130033
3002413003297400210130017119409254001010010200001000010200001000050621449714800528113001301300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100100000000012701161112951910000100001000010010130033130033130033130033130033
300241300329740000130017119408254001010010200001000010200001000050621449714800528113001301300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100100000000012701161112951910000100001000010010130033130033130033130033130034
300241300329740000130017119408254001010010200001000010200001000050621449714800528113001301300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100100000000012701161112951910000100001000010010130033130033130033130033130033
300241300329740700130192119409254001010010200001000610201181004950621449714800528113006201300321300321254913126263300102010120200002010000200001300341300331120021109101001010000100100000300012701161112951910000100001000010010130368130033130036130036130035
3002413003297400270130018119408254001010010200001000010200001000050621449714800528013001801300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100100000000012701160312951910000100001000010010130033130049130033130033130033
300241300329740000130017119408254001010010200001000010200001000050621449714800528013004201300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100100000001012701161112951910000100001000010010130036130035130034130033130033
300241300349740000130017119408254001010010200001000010200001000050621449714800528113009301300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100100000000012701161112951910000100001000010010130519130034130033130291130033
300241300329770000130017119408254001010010200001000010200001000050621449714800528113007401300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100100004000012701161112951910000100001000010010130033130033130033130033130033
300241300329740000130017119408254001010010200001000010200001000050621449714800528113009201300321300321254893126262300102010000200002010000201311300321300321120021109101001010000100100000000012701161112951910000100001000010010130033130033130033130033130033

Test 3: throughput

Count: 8

Code:

  ucvtf s0, x8
  ucvtf s1, x8
  ucvtf s2, x8
  ucvtf s3, x8
  ucvtf s4, x8
  ucvtf s5, x8
  ucvtf s6, x8
  ucvtf s7, x8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)0318191e3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a8acc5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602042670920700026694025160100100800008000010080020800155001168951188416326690267092670966326684216013520080020800202008002080020267092670911802011009910010080000100080000001115117116002670680000800001002671026710267102671026714
1602042670920611026694025160100100800008000010080020800155001168951188416326691267092670966326683116013520080020800202008002080020267092670911802011009910010080000100080000001115117016002670680000800001002671026710267102671026710
1602042670920700026695025160100100800008000010080020800155001168951188416326690267092670966326678516013520080020800202008002080020267132670911802011009910010080000100080000001115117016002670680000800001002671026710267102671026710
1602042670920700026694025160100100800008000010080020800155001168951188416326690267092670966326680016013520080020800202008002080020267092670911802011009910010080000100080000001115117016002671380000800001002671426710267102671026710
1602042671320700026694025160100100800008000010080020800155001168951188416326690267092670966326681216013620080020800202008002080020267092670911802011009910010080000100080000001115117016002670680000800001002671026710267102671026710
1602042670920700926699025160100100800008000010080020800155001168951188416326694267092670966326682216013420080020800202008002080020267142670911802011009910010080000100080000001115183016002670680000800001002671026710267102671026710
1602042670920700026694025160100100800008000010080020800155001168951188416326690267092670966326675116013520080020800202008002080020267092671011802011009910010080000100080000001115117016002670680000800001002671026710267102671026710
1602042670920700026694225160100100800008000010080020800155001168951188416326690267132671366326677816013420080020800202008002080020267092670911802011009910010080000100080000001115117016002670680000800001002671426710267102671026710
1602042671320700026694025160100100800008000010080020800155001168951188416326690267092671366326675516013520080020800202008002080020267092670911802011009910010080000100080000001115117016002670680000800001002671026710267102671026710
1602042670920700026694025160100100800008000010080020800145001168951188416326690267092670966326681816013620080020800202008002080020267092670911802011009910010080000100080000001115117016002670680000800001002671026710267102671026710

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)0308090a0b18191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a5a6a8a9acc2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024267102070000000002669402516001010800008000010800008000050116888018840320266902670926709665303668916001020800008000020800008000026709267091180021109101080000100008000000000502011611267068000080000102671026710267102671026710
160024267092070000000002669402516001010800008000010800008000050116862818840320266902670926709665303668916001020800008000020800008000026709267091180021109101080000100008000000000502011611267068000080000102671026710267102671026710
16002426709207000000120026709025160010108000080000108000080000501168880188403202670026715267096653036689160010208000080000208000080000267092670911800211091010800001000080000003005219111521280748000080000102853128588285492854728533
160024280802200000121213326161285327824031631301081430813291081116819585012298471963137028373283692853374162562087887164012208228082286208228081140286992858612180021109101080000100040381562009635205284112411284318000080000102869028202286932869429202
16002428541224001151214555280281920251600101080000800001080000800005011684101884032026690267092670966530366891600102080000800002080000800002670926709118002110910108000010002928117000530800515427921276928000080000102820028199282022804528092
1600242803521811015811917920282116524011623501281170813001081671816025012191171942361027809280392770671814836689160010208000080000208000080000267092681211800211091010800001000378013200300519627011279688000080000102671026710267102671026710
16002426709207100075531704027904146231160532108000080000108000080000501168880188403202669026709267096653381136690160373208000080000208018980192270402688311800211091010800001044748091004350520502011611267068000080000102771326710267102688026713
160024270472101000492649680280320251600101080000800001080000800005011688801884032026690267142671366530366921600102081707807562080189800002670926709118002110910108000010222298000000000502011611267068000080000102671026710267102671026710
1600242670920700000018002669402516001010800008000010800008000050116888018840320266902670926709665303668916001020800008000020800008000026709267091180021109101080000100008000000000502013411267278000080000102671026710267102671026710
160024267092070000000002669402516001010800008000010800008000050116888018840320266902670926709665303668916001020800008000020800008000026709267091180021109101080000100008000000000502011611267068000080000102671026710267102671026710