Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

UCVTF (scalar, integer, D from X)

Test 1: uops

Code:

  ucvtf d0, x0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)033f51inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
20043763361252000100010001000100014075228200357376376723109200010001000100010003813761110011000100000731161137310001000377377377377377
20043763361252000100010001000100014075228200357376376723109200010001000100010003763761110011000100000731161137310001000377377377382377
20043762361252000100010001000100014075228200357376376723109200010001000100010003763761110011000100000731161137310001000380377378377377
20043762361252000100010001000100014075231070357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
20043763361252000100010001000100014075228200357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
20043763361252000100010001000100014075228200357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
20043763361252000100010001000100014075228200357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
20043763361252000100010001000100014075229510357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
20043763361252000100010001000100014075228200357376376723109200010001000100010003763801110011000100000731161137310001000377377377377377
20043763362252000100010001000100014227228200357376378723109200010001000100010003763761110011000100010731161137410001000377377377377381

Test 2: Latency 1->2 roundtrip

Code:

  ucvtf d0, x0
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)03080b18191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2c5branch mispredict (cb)cdcfd0icache miss (d3)d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
30204130032974000000130017119618254010010100200001000010020000100005006214497148027090130013130032130032125466312624230100200100002000020010000200001300321300331120201100991001010010000100001000000000001310102162212951910000100001000010100130092130037130033130033130033
302041300329740000001300721194082540100101002000010000100200001000050062144971480270901300131300321301581254663126242301002001000020000200100002000013003613003211202011009910010100100001000010000001291800001310102162212951910000100001000010100130033130150130036130033130033
30204130032974000000130017119408254010010100200001000010020000100005006214497148027090130013130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100001000000000001310102163212951910000100001000010100130033130102130033130033130033
30204130032974000000130017119408254010010100200001000010020000100005006214497148027090130013130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100001000000000001310102162212951910000100001000010100130033130110130035130033130033
3020413003297300000013001711940825401001010020000100001002000010000500621449714802709013001313003213003412546631262403010020010000200002001000020000130032130032112020110099100101001000010000100002016200001310102162212951910000100001000010100130033130033130048130052130033
30204130032974000000130017119408254010010100200001000010020000100005006214497148027090130013130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100001000000000011310102162212951910000100001000010100130111130034130033130033130033
30204130032974000000130017119408254010010100200001000010020000100005006214497148027090130013130032130032125466312624030100200100002000020010000200001300331300321120201100991001010010000100001000000300001329102334212981310000100001000010100130033130107130083130035130033
30204130032974000000130017119408254010010100200001000010020000100005006214497148030540130013130032130032125468312624030100200100002000020010000200001300361300321120201100991001010010000100001000000300001310102162212951910000100001000010100130033130080130068130036130033
30204130032973000000130017119408254010010100200001000010020000100005006214497148027090130025130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100001000000000001310102162212951910000100001000010100130101130038130033130033130033
30204130032974000000130017119411254010010100200001000010020000100005006214497148027090130013130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100001000000000001310102162212951910000100001000010100130033130070130033130033130033

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)03080b1e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8acc2c5cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
300241300329740000130017119408254001010010200001000210200001000050621449714800528013001313003213003212548931262623001020100002000020100002000013003213003211200211091010010100001000100000000012702161112951910000100001000010010130033130033130033130033130033
300241300329740000130017119408254001010010200001000010200001000050621449714800528013001313003213003212548931262623001020100002000020100002000013003213003211200211091010010100001000100000000012701161112951910000100001000010010130033130033130033130033130033
300241300329740000130017119408254001010010200001000010200001000050621449714800528113001313003213003212548931262623001020100002000020100002000013003213003211200211091010010100001000100000000012701161112951910000100001000010010130033130033130033130033130033
300241300329740000130017119408254001010010200001000010200001000050621449714800528113009013003213003412548931262623001020100002000020100002000013003213003211200211091010010100001000100000000012701161112951910000100001000010010130033130033130033130033130033
300241300329740000130017119408254001010010200001000010200001000050621449714800528113001313003213003312548931262623001020100002000020100002000013003213003211200211091010010100001000100000000012701161112951910000100001000010010130033130035130035130033130033
30024130032974000013262712036673040190100532009310054152374311274716288102149518650131984132793132879126722175127955351392411618240092811657236791327891322663412002110910100101000010221000010200185216345181613228110057100001000010010132638133283132908133241133260
300241329749980060130018119408254001010014200001000010200001000050621449714800528013001313003213003212548931262623001020100002000020100002000013003213003211200211091010010100001002100000000012701161212952010000100001000010010130033130033130083130033130033
300241300329740000130017119408254001010010200001000010200001000050621449714800528013001313003213003212548931262623001020100002000020100002000013003213003211200211091010010100001000100000000012701162112957210000100001000010010130036130033130033130033130033
300241300329730000130017119408254001010010200001000010200001000050621449714800528013001313003213003212549431262623001020100002000020100002000013003213003211200211091010010100001000100000000012701162112951910000100001000010010130033130033130033130033130033
300241300329740000130017119408254001010010200001000010200001000050621449714800528013001313003213003212548931262623001020100002000020100662000013003313003211200211091010010100001000100000300012701161112951910000100001000010010130033130033130033130033130033

Test 3: throughput

Count: 8

Code:

  ucvtf d0, x8
  ucvtf d1, x8
  ucvtf d2, x8
  ucvtf d3, x8
  ucvtf d4, x8
  ucvtf d5, x8
  ucvtf d6, x8
  ucvtf d7, x8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)03080b18191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8a9acc5branch mispredict (cb)cdcfd6inst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020426709207000000026694025160100100800008000010080020800145001177206188468802669026709267096632666581601352008021580024200800248002026730267141180201100991001008000010080000003111511716026706080000800001002671026710267102671026710
16020426718207000000026694025160100100800008000010080020800165001168137188416302669026709267096632666581601352008002080020200800208002426709267091180201100991001008000010080000003111511716026706080000800001002671026710267102671026710
16020426717207000000026694025160100100800008000010080020800155001169098188416302669026709267096632666581601352008002080020200800208002026709267091180201100991001008000010080000003111511716026706080000800001002671026710267102671026710
16020426717207000000026694025160100100800008000010080020800155001168983188552912669026710269086636666581601352008002080020200800208002026709267091180201100991001008000010080000003111511716026706080000800001002671026710267102671026835
16020426712207000000026694225160100100800008000010080020800155001166210187523002669026709267096632666581601352008002080020200800208002026709267091180201100991001008000010080000006111511716026706080000800001002671026710267102671026710
160204267092080000000266940251601001008000080000100800208001550011683501892522126690267092670966326665816013520080020800202008002080020267092670911802011009910010080000100800009027111511716026706080000800001002671026710267102671026710
16020426719207000000026694025160100100800008000010080020800155001168951188416302669026709267096632666581601352008002080020200800208002026709267091180201100991001008000010080000000111511716026706080000800001002671026710267102671026710
16020426725207000000026694025160100100800008000010080020800155001168951188416302669026709267096632666581601352008002080020200800208002026709267091180201100991001008000010080000003111511716026706080000800001002671026710267102671026710
16020426712210000000026694025160100100800008000010080020800145001168951188416302669026709267096632666581601352008002080020200800208002026709267091180201100991001008000010080000003111513916026706080000800001002671026710267102671026890
16020426717207000012002669402516010010080000800001008002080015500116895118843011266902670926709663266662160135200800208002020080020800202670926709118020110099100100800001008000000171111511716026706080000800001002671026710267102671026710

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)03191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8acc5branch mispredict (cb)cfd5d6d9ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024267112000270266947251600101080000800001080000800005011688801884032026704267152670966533669216001020800008000020800008000026709267151180021109101080000108000000005020116011267068000080000102671026710267102671026710
16002426709200000266940251600101080000800001080000800005011688801884032126845267132670966533668916001020800008000020800008000026709267091180021109101080000108000000005020116011267068000080000102671026710267102671026710
16002426709200000266940591602701080000800001080000800005011685061884032026854267172670966613668916001020800008000020800008000026709267091180021109101080000108000000015020116011267068000080000102671026710267102671026710
16002426709200000266980251600101080000800001080000800005011688801884032026755267202670966583668916001020800008000020800008000026709267091180021109101080000108000000005020116011267068000080000102671026710267102671026710
16002426709200000266940251600101080000800001080000800005011688801884032026789267172671766583668916001020800008000020800008000026709267131180021109101080000108000000005020116011267068000080000102671026710267102671026710
16002426709200000266940251600101080000800001080000800005011688801884032126774267152671866633668916001020800008000020800008000026709267091180021109101080000108000000005020116011267068000080000102671026710267102671026710
16002426709200000266940251600101080000800001080000800005011688801884032126690268392672066573668916001020800008000020800008000026709267091180021109101080000108000000005020116011267068000080000102671026710267102671026710
160024267092004240266940251600101080000800001080000800005011688801884032026690267092683866603668916001020800008000020801898000026709267091180021109101080000108000000005020116011268138000080000102671026710267102671026710
16002426709200000266940251600101080000800001080000800005011710861884080026838267102672866563668916001020800008000020800008000026709267091180021109101080000108000000005020116011267068000080000102671026710267102671026710
16002426709200000266940251600101080000800001080000800005011688801884032126825267172671466543668916001020800008000020800008000026709267091180021109101080000108000000005020116011267068000080000102671026710267102671026710