Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

UCVTF (scalar, fixed-point, S from W)

Test 1: uops

Code:

  ucvtf s0, w0, #3
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4f51inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
200437639361025200010001000100010001407522820035737637672310920001000100010001000376376111001100010000732162237310001000377377377377377
200437636361025200010001000100010001407522820035737937672310920001000100010001000376376111001100010000732162237310001000377377377377377
200437630361025200010001000100010001407522820035738037672310920001000100010001000376376111001100010000732162237310001000377377377377377
200437630361025200010001000100010001407522820035739037672310920001000100010001000376376111001100010000732162237310001000377377377377377
200437620361025200010001000100010001407522820035737637672310920001000100010001000376376111001100010000732162237310001000377377377377377
200437620361025200010001000100010001407522820035737737872310920001000100010001000376376111001100010000732162237310001000377377377377377
200437630361025200010001000100010001407522820035737637672310920001000100010001000376376111001100010000732162237310001000377377377377377
200437630361025200010001000100010001407522820035737637672310920001000100010001000376376111001100010000732162237310001000377377377377377
200437620361025200010001000100010001407522820035737637672310920001000100010001000376376111001100010000732162237310001000377377377377377
200437630361025200010001000100010001407522820135737737972310920001000100010001000376376111001100010000732162237310001000377377377377377

Test 2: Latency 1->2 roundtrip

Code:

  ucvtf s0, w0, #3
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)03090b1e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8a9acc2cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3020413003210080000013001711940825401001010020000100001002000010000500621449714802709001300131300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000100000000131012162212951910000100001000010100130033130033130033130033130033
3020413003210070100013001711940825401001010020000100001002000010000500621449714802709001300131300321300321254663126240301002001000020000200100002000013003213003221202011009910010100100001000100010002131012162212953110000100001000010100130033130033130033130033130033
302041301371008002701133097120527556401611019120111100661432500810147736627135414979320441130013132711132940126889195126841316262001000020000200100002000013003213003211202011009910010100100001000100000000131012162412954610000100001000010100130033130033130033130033130033
3020413003210080000013001711940825401001010020000100001002000010000500621449714802709001300131300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000100000000131012162212951910000100001000010100130033130033130033130033130033
3020413003210080000013002511940825401001010020000100001002000010000500621449714802709001300151300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000100000000131012163212951910000100001000010100130033130033130033130033130035
3020513003210080000013002011940825401001010020000100001002000010000500621449714802709001300131300321300321254663126240301002001000020000200100002000013003213003211202021009910010100100001000100003000131012162212956410000100001000010100130033130033130036130033130033
3020413003210080000013001711940825401001010020000100001002000010000500621449714802709001300131300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000100000000131012162212951910000100001000010100130033130033130033130033130033
3020413003210080000013001711940825401001010020000100001002000010049500621449714802709001300131300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000100000000131012162412953710000100001000010100130033130033130033130033130033
3020413003210070000013001711940825401001010020000100001002000010000500621449714802709001300131300321300321254663126240301002001000020000200100002000013003213003221202011009910010100100001000100000000131012163212951910000100001000010100130033130033130033130033130033
30204130032100800000130017119408254010010100200001000010020000100005006214497148027090013001313003213003212546631262403010020010000200002001000020000130032130032112020110099100101001000010021001600386500131012163212952010002100001000010100130033130454130033130033130795

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)030708090a181e3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a8a9acc2c5cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3002413003297310010021300171194082540010100102000010000102000010000506215000148005281130013013012513003212548931262623001020100002000020100002000013003213003211200211091010010100001001000000300012741616151412951910000100001000010010130033130033130033130035130033
3002413003297410010021300171194082540010100102000010000102000010000506214497148005280130013013003213003212548931262623001020100002000020100002000013003213003211200211091010010100001001000000001012741416121412951910000100001000010010130125130033130033130033130033
3002413003297410010021300171194082540010100102000010000102000010000506214497148005280130013013003213003212548931262623001020100002000020100002000013003213003211200211091010010100001001000000000012741316101412951910000100001000010010130033130069130077130033130033
3002413003297410010782130017119408254001010010200001000010200001000050621449714800528013001301300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100100000000001274141614812952310000100001000010010130033130033130033130033130033
3002413003297410010021300171194082540010100102000010000102000010000506214497148005280130013013003213003212548931262623001020100002000020100002000013003213003211200211091010010100001021000000000012741616151012952310000100001000010010130033130033130033130036130033
3002413003297410010021300171194082540010100132000010000102000010000506214497148005280130013013003213003212548931262623001020100002000020100002000013003213003211200211091010010100001001000000000013191316131512952110000100001000010010130033130033130033130033130033
30024130032974100106392130017119408254001010010200001000010200001000050621449714800528013001301300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100100000000001274151614912973710000100001000010010130033130033130033130033130033
3002413003297410010021300171194082540010100102000010000102000010000506214497148005281130013013003213003212548931262623001020100002000020100002000013003213014811200211091010010100001001000000000112741516121512951910000100001000010010130033130033130033130033130033
3002413003297410010021300171194082540010100192002410002112000010000506214497148005281130013013003213003212548931262623001020100002000020100002000013003213003211200211091010010100001001000000000012741516161412951910000100001000010010130033130033130033130033130033
3002413003297410010212130017119408254001010010200001000010200001000050621449714800528013001301300321300321254893126262300102010000200002010000200001300321300321120021109101001010000100100000000001274131614812951910000100001000010010130033130033130033130033130033

Test 3: throughput

Count: 8

Code:

  ucvtf s0, w8, #3
  ucvtf s1, w8, #3
  ucvtf s2, w8, #3
  ucvtf s3, w8, #3
  ucvtf s4, w8, #3
  ucvtf s5, w8, #3
  ucvtf s6, w8, #3
  ucvtf s7, w8, #3
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)030b1e243f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020426712207012026694025160100100800008000010080020800155001168951188416312669026709267096632066658160135200800208002020080020800202670926709118020110099100100800001000080000000011151172160026706080000800001002671026710267102671026710
1602042670920700026694125160100100800008000010080020800155001168951188416302669026709267096633066658160135200800208002020080020800202670926709118020110099100100800001000080000000011151170160026706080000800001002671026710267142671026710
1602042670920700026694025160100100800008000010080020800155001168951188416302669026709267096632066874160136200800208002020080020800202670926709118020110099100100800001000080000000011151170160026706080000800001002671026710267102671026710
1602042670920700026694025160100100800008000010080020800155001168951188416312669026709267096636066658160136200800208002020080020800202670926709118020110099100100800001000080000003011151170160026706080000800001002671026710267102671026710
1602042670920703026694025160100100800008000010080020800155001168951188416312669026709267096632066658160136200800208002020080020800202670926709118020110099100100800001000080000000011151170160026706080000800001002671026710267102671026710
1602042670920700026864025160100100800008000010080020800155001168951188416302669026709267096632066658160135200800208002020080024800202670926709118020110099100100800001000080000003011151170160026706080000800001002671026710267152671426710
1602042670920700026694325160100100800008000010080020800155001170516188416312669026709267096632066658160135200800208002020080020800202670926709118020110099100100800001000080000100011151170160026706080000800001002671026710267122671026710
1602042670921400026694025160100100800008000010080020800155001168951188416302669526709267096632066658160136200800208002020080020800202670926709118020110099100100800001000080000000011151170160026706080000800001002671026710267102671026710
1602042670920700026694025160100100800008000010080020800155001168951188416302669026709267096632066658160135200800208002020080020800202670926709118020110099100100800001000080000000011151170160026706080000800001002671226710267102671026710
1602042670920700026694025160100100800008000010080020800155001168951188416302669026709267126632066658160136200800208002020080020800202671026709118020110099100100800001000080000000011151170160026710080000800001002671026710267102671326715

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)030b1e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f60696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8accfd0d2d5d6d9daddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024267112070002669402516001010800008000010800008000050116888018840320026690026709267096653036689160010208000080000208000080000267092670911800211091010800001080000005020001916001717267068000080000102671026710267102671026710
160024267092070120266940251600101080000800001080000800005011688801884032112669002670926709665303668916001020800008000020800008000026709267091180021109101080000108000009502000816001715267068000080000102671026710267102671026710
1600242670920700026694025160010108000080000108000080000501168880188403200266900267092671066530366931600102080000800002080000800002671426779118002110910108000010800000050200061600178267068000080000102671026710267102671026710
160024267092070002669402516001010800008000010800008000050116888018869630026690026709267096653036689160010208000080000208000080000267092670911800211091010800001080000005020001716001717267088000080000102671026710267102671026710
16002426709207000266940251600101080000800001080000800005011688801884032012669002670926709665303668916001020800008000020800008000026718267181180021109101080000108000000502000121600617267068000080000102671026710267102671026710
1600242670920700026694025160010108000080000108000080000501168880188403200266900267092670966580366891600102080000800002080000800002670926710118002110910108000010800000050200071600817267068000080000102671026710267102671026710
160024267092070002669422516001010800008000010800008000050116831118837660026690026709267136653036689160010208000080000208000080000267092685011800211091010800001080000705020001716001717267068000080000102671026710267102671026710
1600242670920700026694025160010108000080000108000080000501168880188403200266900267092670966530366891600102080000800002080000800002670926709118002110910108000010800000050200061600178267068000080000102671026710267102671026710
1600242670920700026694025160010108000080000108000080000501168880188403211266900267092670966530366891600102080000800002080000800002671026709118002110910108000010800001050200083400714267068000080000102671026710267102671426710
16002426709207000266940251600101080000800001080000800005011688801884032002669002670926709665303668916001020800008000020800008000026709267091180021109101080000108000000502001171600817267068000080000102671026710267102671026710