Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

UCVTF (scalar, integer, S from W)

Test 1: uops

Code:

  ucvtf s0, w0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4f51inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
20043763036102520001000100010001000140752282035737637672310920001000100010001000376376111001100010000731161137310001000377377377377377
20043763036102520001000100010001000140752282035737637672310920001000100010001000376376111001100010000731161137310001000377377377377377
20043764036102520001000100010001000140752282035737637672310920001000100010001000376376111001100010000731161137310001000377377377377377
20043763036102520001000100010001000140752282038037637674310920001000100010001000376376111001100010000731161137310001000378377377377377
20043763036102520001000100010001000140752282035737637672310920001000100010001000376376111001100010000731161137310001000377377377377377
20043763036102520001000100010001000140752282035737637672310920001000100010001000376376111001100010000731161137310001000377377377377377
20043763036102520001000100010001000140752282035737637672310920001000100010001000376376111001100010000731161137310001000377377377377377
20043762036102520001000100010001000140752282035737637672310920001000100010001000376376111001100010000731161137310001000377377377377377
20043763036102520001000100010001000140752282035737637672310920001000100010001000376380111001100010000731161137310001000377377377377377
20043762036102520001000100010001000140752282035737637672310920001000100010001000376376111001100010000731161137310001000377377377377377

Test 2: Latency 1->2 roundtrip

Code:

  ucvtf s0, w0
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)030818191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a7a8a9acc5branch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
30204130032100800012900013001711940825401001010020000100001002000010000500621449714802709013001301300321300321254663126253301002001000020000200100002000013003213003211202011009910010100100001000100000000000131012162212951910000100001000010100130033130033130033130033130033
3020413003297400000013001711940825401001010020000100001002000010000500621449714802709113001301300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000100000009000131012163212951910000100001000010100130033130033130033130033130033
3020413003297400000013001711941925401001010020000100001002000010000500621449714802709013001301300321300321254663126240301002001000020000200100002000013006913003211202011009910010100100001000100000000000131012163212951910000100001000010100130033130033130035130035130033
30204130032999000029921133365120936875404231021720123100821592477911960833632075315034491013261901329791333891268822471280303665526612144249862461251424872132952133412371202011009910010100100001002100580021758780002068143905213219910000100001000010100132442132658132583132781132109
302041327161027147337071396011320261202994964028210182200871004814623031113725006214497148027090132077013239813102312546631264643638823311884235402341151123498132190132756311202011009910010100100001000100000003000131012162112951910000100001000010100130033130033130033130033130033
3020413003297400010920013001711940825401001010020000100001002000010000500621449714802709013001301300331300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000100000000010131012162112952210000100001000010100130033130033130033130033130033
3020413003210080005490013001711940825401001010020000100001002000010000500621449714802709013001301300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000100000000000131012162212951910000100001000010100130033130033130033130033130033
302041300329740001050013001711940825401001010020000100001002000010000500621520314802709013001301300321300321255133126240301002001000020000200100002000013003213003211202011009910010100100001000100000000000131012162212951910000100001000010100130033130033130033130033130038
302041300329740008400013001711940825401001010020000100001002000010000500621449714802709013001301300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000100000000000131012161212951910000100001000010100130033130033130033130033130033
302041300329910002580013001711940825401001010020000100001002000010000500621449714802709013001301300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000100000000000131012162212951910000100001000010100130033130034130033130033130033

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)0318191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8acbranch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
30024130032974000013001711940825400101001020000100001020000100005062144971480052800130013013003213003212548931262623001020100002000020100002000013003213003211200211091010010100001010000000012702162212951910000100001000010010130033130033130033130033130033
30024130032974000013001711940825400101001020000100001020000100005062144971480052800130013013003213003212548931262623001020100002000020100002000013003213003211200211091010010100001010000000012702162212951910000100001000010010130033130033130033130038130033
30024130032974000013001711940825400101001020000100001020000100005062145931480052800130013013003213003212548931262623001020100002000020100002000013003213003211200211091010010100001010000000012702162212951910000100001000010010130033130033130033130033130033
30024130032974000013001711940825400101001020000100001020000100005062144971480052800130013013003213003212548931262623001020100002000020100002000013003213003211200211091010010100001010000000012702162312951910000100001000010010130033130033130033130033130033
30024130032974000013001711940825400101001020000100001020000100005062144971480052800130013013003213003212548931262623001020100002000020100002000013003213003211200211091010010100001010000000012702162212951910000100001000010010130033130132130034130035130974
30024130032974000013001711940825400101001020000100001020000100005062144971480052800130013013003213003212548931262623001020100002000020100002000013003213003211200211091010010100001010000000012702162212951910000100001000010010130033130033130033130033130033
30024130032974000013240111940825400101001020000100001020000100005062144971480052800130013013003213003212548931262623001020100002000020100002000013003213003211200211091010010100001010000000012702162312951910000100001000010010130034130033130033130033130033
30024130032974000013001711940825400101001020000100001020000100005062144971480052800130013013003213003212548931262623001020100002000020100002000013003213003211200211091010010100001010000000012702162212951910000100001000010010130033130033130033130033130033
30024130032974000013001711940825400101001020000100001020000100005062144971480052800130013013003213003212548931262623017920100002000020100002000013003213003211200211091010010100001010000000012702162212951910000100001000010010130033130033130033130033130033
30024130032974000013001711940825400101001020000100001020000100005062144971480052800130013013003213003212548931262623001020100002000020100002000013003213003211200211091010010100001010000000012702162312951910000100001000010010130033130035130033130033130033

Test 3: throughput

Count: 8

Code:

  ucvtf s0, w8
  ucvtf s1, w8
  ucvtf s2, w8
  ucvtf s3, w8
  ucvtf s4, w8
  ucvtf s5, w8
  ucvtf s6, w8
  ucvtf s7, w8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)030708090b18191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a5a6a8a9acc2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602042671120000000000267610251601001008000080000100800208001550011689511884163026690267092670966320666581601352008002080020200800208002026709267091180201100991001008000010000080000000011151171160026706080000800001002671026710267102671026710
1602042670920000000000266990251601001008000080000100800208001550011689511884163026690267092670966400666581601352008002080020200800208002026709267091180201100991001008000010000080000000011151170160026782080000800001002671026710267102671026710
160204268932151011999247042851972025160622106814798156010882055821365331250428197754602837029025292827260318263852316556220682881821172108135680023282152863312180201100991001008000010004582819500013890022251271241126707180000800001002671326711267162671226715
160204267102070000339026695725331162444102811708143010081512819755221237382191262102752628033286957118411977626164884202819248192920481934805962836428047151802011009910010080000100005048143000141700111531831702228444380000800001002671126720267112671126711
160204267102080000000028226904402163485103815608221011082240826705271256629199659202841429185293667654357322827616488320682004832392068323483234292042965919180201100991001008000010002623813000014015011151170160026779080000800001002671026710267102671026710
1602042670920701011313171611442901310125421642671048169081950110825998267053312489241984758028643289292920571394533187969165251210828848345220682313834412874829585121802011009910010080000100003580000000022251281241126761080000800001002671226712267132671126711
16020426710214000000002669702516010010080000800001008002380018500116835218846370266912671026710662101066451601412008002380023200800238002326710267101180201100991001008000010000080000000022151170160126870080000800001002671026710267102671026856
1602042671022900000000266940251601001008000080000100800208001550011689511884163026690267092670966320666581601352008002080020200800208002026709267091180201100991001008000010000080000000011151170160026798080000800001002671026710267102671026710
1602042670920000000000266940251601001008000080000100800208001550011689511884163026690267092670966320666581601352008002080020200800208002026709267091180201100991001008000010000080000000011151170160026715080000800001002671026710267102671226710
1602042670920000000000266940251601001008000080000100800208001550011689511884163026690267092670966320666581601352008002080020200800208002026709267091180201100991001008000010000080000000011151170160026805080000800001002671026710267102671026710

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)03081e3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa5a6a9acbranch mispredict (cb)cfd0d5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600242670920000266942251600101080000800001080000800005011683891884032026690267092670966533668916001020800008000020800008000026709267091180021109101080000100800000005020041624267068000080000102671226719267112671926710
1600242670920000266940251600101080000800001080000800005011688801884032126690267092670966533668916001020800008000020800008000026713267091180021109101080000100800000005020021642267068000080000102671626710267122671026710
16002426709200048266940251600101080000800001080000800005011700841884032126694270432671266613668916001020800008000020800008000026709267091180021109101080000100800000005020041624267098000080000102671826714267182671426710
1600242671120000266940251600101080000800001080000800005011688801884032126690267092670966533668916001020800008000020800008000026709267091180021109101080000100800000005020041624267068000080000102671526710267122671026710
1600242670920000266940251600101080000800001080000800005011688801884032026690267112670966533668916001020800008000020800008000026709267091180021109101080000100800000005020041642267068000080000102671626718267102672726710
1600242670920000266943251600101080000800001080000800005011688801884032026690267092670966533668916001020800008000020800008000026709267091180021109101080000100800000005020061642267068000080000102671626718267112671526710
1600242670920000266940251600101080000800001080000800005011688801884032026690267092670966533668916001020800008000020800008000026709267091180021109101080000100800000005020021644267068000080000102671626719267102672326710
1600242670920000266940251600101080000800001080000800005011688801884032126690267092670966533668916001020800008000020800008000026709267091180021109101080000100800000005020021642267068000080000102671327215267172671526710
1600242671220000266945251600101080000800001080000800005011688801884032026695267092670966533668916001020800008000020800008000026709267091180021109101080000100800000005020021644267068000080000102671926714267202671026710
16002426709200002669426251600101080000800001080000800005011688801884032026690267092670966533668916001020800008000020800008000026709267091180021109101080000100800000005020041624267068000080000102671126718267102671026710