Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SCVTF (scalar, integer, H from W)

Test 1: uops

Code:

  scvtf h0, w0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)0318191e3f4f51inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a5a6a8acc2cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
20043763000361025200010001000100010001407522820357376376720310920001000100010001000376376111001100001000100731161137310001000377377377377377
20043762000361025200010001000100010001407522820357376376720310920001000100010001000389376111001100001000030731161137310001000377377377381377
20043762000372025200010001000100010001407522820357376376720310920001000100010001000376376111001100001000000731161137310001000377377377377377
20043762000361225200010001000100010001407522820357376376720310920001000100010001189378376111001100001000000731161137310001000377377377377380
20043762000361025200010001000100010001407522820357376376720310920001000100010001000376376111001100001000000731161137310001000377377377380377
20043763000371025200010001000100010001407522820357376376720310920001000100010001000376376111001100001000000731161137310001000377377378377377
20043763000361025200010001000100010001407522820357376376720310920001000100010001000376376111001100001000000731161137310001000377381377377377
20043763000361025200010001000100010001407522820357376376720310920001000100010001000376376111001100001000000731161137310001000377377377377377
20043763000361025200010001000100010001407522820357376376720310920001000100010001000376376111001100001000000731161137310001000377377377377377
20043763000361025200010001000100010001407522820357376376720310920001000100010001000376376111001100001000000731161137710001000377377377377377

Test 2: Latency 1->2 roundtrip

Code:

  scvtf h0, w0
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)03041e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
30204130032974030130017119408254010010100200001000010020000100005006214545148027090130013130032130032125466312624030100202100002000020010000200001300341300321120201100991001010010000100001000041030131012162212951910027100001000010100130034130033130034130033130044
3020413003297400013001711940825401001011520000100021002000010000500621449714803669013010713003213003212546631262403010020010000200002021000020000130032130032112020110099100101001000010000100003030131012162212951910000100001000010100130033130033130033130033130034
3020413003297400013001711940825401001010020000100001002000010000500621449714802709013001613003213011912546631262403010020010061200002001000020000130119130033112020110099100101001000010000100001200131012162212951910000100001000010100133420133651133056133585134021
302041334761029700130017119408254010010100200001000010020000100005006214497148027090130013130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100001000049000131012162212951910000100001000010100130033130033130033130033130059
30204130032974000130017119408254010010100200001000010020000100005006214497148027090130013130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100001000059000131012162212951910000100001000010100130033130033130033130033130065
3020413003297409013001711940825401001010020000100001002000010000500621449714802709013001313003213003212546631262403010020010000200002001000020000130032130032112020110099100101001000010000100002000131012162212951910000100001000010100130033130033130033130033130062
30204130032974000130017119408254010010100200061000210020000100005006214497148027090130013130124130032125466312624130100200100002000020010000200001300321300321120201100991001010010000100001000065000131012162212951910000100001000010100130033130033130033130033130051
30204130032974000130017119408254010010100200001000010020000100005006214497148027090130013130032130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100001000055000131012162212951910000100001000010100130033130033130033130033130033
302041300329730120130017119408254010010100200001000010020000100005006214497148027090130013130032130032125466312624030100200100002000020010067200001300321300321120201100991001010010000100001000041030131012162212951910000100001000010100130033130033130033130033130084
3020413003297400013001711940825401001010020000100001002000010000500621449714802709013001313003213003212546731262403010020010000200002001000020000130032130032112020110099100101001000010000100000000131012162212951910000100001000010100130033130033130033130033130041

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)030818191e3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
300241300329740003013001711944168400101001620000100001020000100495062219171480052801300130130032130034125491312629530820201000020000201000020000130121130205112002110910100101000010100000312701165112952010000100001000010010130033130034130036130033130033
300241300329740001213001711941025400101001020000100001020000100005062146411480052811300130130032130032125489312631430010201000020000201000020000130032130032112002110910100101000010100043312725161112951910000100001000010010130033130033130034130033130036
300241300339990001213001711940825400101001020000100001020000100005062144971480086111300130130032130032125493312629830010201000020000201000020000130062130032112002110910100101000010100009312702161112951910000100001000010010130033130034130036130036130033
300241300339750104813001711940825400101001020000100001020000100005062144971480087011300133130032130034125489312630330010201000020000201000020000130032130033112002110910100101000010100001012701162112951910000100001000010010130033130033130033130033130033
30024130032974000013001711940825400101001020000100001020000100005062144971480064011300130130039130036125489312631130010201000020000201000020000130032130032112002110910100101000010100001012701161112951910000100001000010010130036130033130033130035130035
300241300349950001213001711941025400101001020000100001020000100005062144971480097911300130130036130032125489312631530010201000020000201000020000130032130035112002110910100101000010100001312721161212951910000100001000010010130033130033130033130033130033
30024130032974000013001711940825400101001020000100001020000100005062144971480052811300130130032130032125489312630830010201000020000201000020000130032130032112002110910100101000010100000012701161112951910000100001000010010130114130033130033130033130033
30024130032974001102913001711940825400101001020000100001020000100005062144971480052811300130130032130032125489312631330010201000020000201000020000130032130032112002110910100101000010100000012701162112951910000100001000010010130033130063130033130033130033
30024130032974000013001711940825400101001020000100001020000100005062144971480052811300130130032130032125489312630030010201000020000201000020000130032130032112002110910100101000010100000012701162112951910000100001000010010130033130033130033130033130033
30024130032974000013001711940825400101001020000100001020000100005062144971480052811300130130032130032125489312630630010201000020000201000020000130032130032112002110910100101000010100001012701161112951910000100001000010010130033130033130033130033130033

Test 3: throughput

Count: 8

Code:

  scvtf h0, w8
  scvtf h1, w8
  scvtf h2, w8
  scvtf h3, w8
  scvtf h4, w8
  scvtf h5, w8
  scvtf h6, w8
  scvtf h7, w8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)03181e3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a5a6a8a9acc2c5branch mispredict (cb)cdcfd5d6inst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204267131990122669822516010010080000800001008002080014500117394218844241266942671426713663666662160134200800248002420080020800242670926713118020110099100100800001000080000360001115117016026706080000800001002671426714267142671426854
160204267132000122669902516010010080000800001008002080016500116816618838981266902671326713663666662160136200800208002020080020800202671326713118020110099100100800001000080000360301115117016026711080000800001002671026710269192680026723
16020426713200012266982251601001008000080000100800208001550011787871884163026694267142671366366666216013420080020800242008002080024267132671311802011009910010080000100008000040301115117016026710080000800001002671526714267102671026742
16020426753201012266982251601001008000080000100802088019950011689511888873126694267092671366366666216013420080020800202008002080020267092671411802011009910010080000100008000025093501115117016026710080000800001002671426710267102671426962
16020426713200002669412516010010080000800001008002080015500116895118862541266942671326709663666662160135200800208002020080020800202671426713118020110099100100800001000080000350001115117016026710080000800001002671426715268952674126723
16020426713200002669822516010010080000800001008002080014500116860518847751266942671326710663266662160134200800208002020080024800202671326709118020110099100100800001000080000680601115117016026706080000800001002671026714267102671526950
160204267152000122669472516010010080000800001008002080015500116895118890120266942671326709663266662160135200800208002020080020800202670926713118020110099100100800001000080000610601115117016026711080000800001002671026714267102671026919
160204267152000122669422516010010080000800001008002080016500117005918841631266942671326713663666658160135200800208002020080024800202671326713118020110099100100800001000080000420301115117016026710080000800001002671426714267142671426712
16020426712200002669422516010010080000800001008039680015500117885218851570266942671326714663266659160136200800208002020080024800202671426710118020110099100100800001000080000680901115117016026710080000800001002671426715267102671026949
16020426713200012266993251601001008000080000100800208001450011685971884201126691267092671466366666216013520080024800202008002080024267132671311802011009910010080000100008000050001115117016026710080000800001002671526710267142671026895

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3340

retire (01)cycle (02)03081e3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8a9accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024267102000026694125160010108000080000108000080000501168880188344302669026709267096653366891600102080000800002080000800002670926709118002110910108000010800006300502081653267068000080000102671026710267102671026929
160024267152000026694025160010108000080000108000080000501168568188403212669026709267096653366891600102080000800002080000800002670926709118002110910108000010800005906502051664267068000080000102671026710267102671026935
160024267222000026694425160790108000080000108000080000501183157188403202669026709267096653366891600102080000800002080000800002670926709118002110910108000010800006009502031635267068000080000102671026710267102671026934
160024267242000026694025160010108000080000108000080000501168880188403202669026709267096653366891600102080000800002080000800002670926709118002110910108000010800006109502061664267068000080000102671026710267102671026944
1600242672120000266949251600101080000800001080000800005011688801884032026690267092670966533668916001020800008000020800008000026709267091180021109101080000108000067015502031664267068000080000102671026710267102671026945
160024267202000026694025160010108000080000108000080000501168880188403212669026709267096655366891600102080000800002080000800002670926709118002110910108000010800006309502061653267068000080000102671026710267102671026934
160024267302000026694025160010108000080000108000080000501168880188403202669026709267096653366891600102080000800002080000800002670926709118002110910108000010800004400502031655267068000080000102671026710267102671026948
160024267202000026694025160010108000080000108000080000501176821188368102669026709267096653366891600102080000800002080000800002670926709118002110910108000010800006903502031635267068000080000102671026710267102671026961
1600242672220000266941725160010108000080000108000080000501168880188403202669026709267096653366891600102080000800002080000800002670926709118002110910108000010800006803502051655267068000080000102671026710267102671026816
160024267092000026694025160010108000080000108000080000501168880188403202669026709267096653366891600102080000800002080000800002670926709118002110910108000010800007006502051646267068000080000102671026710267102671026954