Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SCVTF (scalar, integer, S from W)

Test 1: uops

Code:

  scvtf s0, w0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4f51inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
2004376203610252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000732162137310001000377377377377378
2004376203610252000100010001000100014075228200357376376723109200010001000100010003763791110011000100000732162237310001000377377380377379
2004376303610252000100010001000100014075228200357376376723109200010001000100010003763761110011000100000732162237310001000377377377377377
2004376203610252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000732162237310001000377377377377377
2004376303610252000100010001000100014075228200357376376723109200010001000100010003763761110011000100000732162237310001000377377377377377
2004376203610252000100010001000100014075228200357376376723109200010001000100010003773761110011000100000732162237310001000377377377377377
2004376203610252000100010001000100014075228200357376376723109200010001000100010003773761110011000100000732162237310001000377377377377377
2004376303612252000100010001000100014075228200357376376723109200010001000100010003763761110011000100000732162237310001000377377377377377
2004376303610252000100010001000100014075229301358376376723109200010001000100010003763791110011000100000732162237310001000377377377377377
2004376203610252000100010001000100014075228200357376376723109200010001000100010003763761110011000100000732162237310001000377377377377377

Test 2: Latency 1->2 roundtrip

Code:

  scvtf s0, w0
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)03070b18191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a8a9acc2cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3020413003297400000013001711940925401001010020000100001002000010000500621449714802709013001301300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000100005000131016163212951910000100001000010100130033130033130033130033130033
3020413003297400000013001711940825401001010020000100001002000010000500621449714802709013001301300321300321254663126240301002001000020000200100002012713003213003211202011009910010100100001000100000000131012162212951910000100001000010100130037130033130033130033130033
3020413003297410000013001711940825401001010020000100001002000010000500621449714802709013001331300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000100000000131012162212951910000100001000010100130033130033130033130033130033
3020413003297410000013001711940825401001010020000100001002000010000500621449714802709013001301300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000100000000131012162212951910000100001000010100130033130033130033130033130033
3020413003297400000013001711940825401001010020000100001002000010000500621449714802821013001301300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000100000000131012162212951910000100001000010100130033130033130033130033130033
3020413003297400009013001711940825401001010020000100001002000010000500621449714802709013001401300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000100000000131012162212951910000100001000010100130033130033130033130033130033
30204130032974000000130017119476254010010100200001000010020116100005006214497148027090130013013003313003212550915126240301002001000020000200100002000013003213003211202011009910010100100001000100033000131012162212951910000100001000010100130033130033130033130033130068
3020413003297400000013001711940825401001010020000100001002000010000500621449714802709013001301300321300331254663126240301002001000020000200100002000013003213003211202011009910010100100001003100001285682131016162212951910000100001000010100130033130033130033130033130033
3020413003297400000013001711940925401001010020000100001002000010000500621464114802709013001301300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000100000030131002162212951910000100001000010100130033130033130033130033130033
3020413003297400000013001811940825401001010020000100001002000010000500621449714802709013001301300321300321254663126240301002001000020000200100002000013003213003211202011009910010100100001000100000000131012162212951910000100001000010100130033130033130033130033130033

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)030a1e3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8acc2c5branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3002413003297401213001711940825400101001020000100001020000100005062145931480052800130013130032130032125489312626430010201000020000201000020000130032130032112002110910100101000010100000600012705165312951910000100001000010010130040130036130033130035130033
3002413003297401213001711941025400101001020000100001020000100005062144971480052800130013130032130032125491312626230010201000020000201000020000130032130032112002110910100101000010100001300012705163512951910000100001000010010130077130036130033130033130033
300241300329740013001711940825400101001020000100001020000100005062144971480052800130016130033130032125489312626230010201000020000201000020000130032130032112002110910100101000010100001300012724164512951910000100001000010010130049130039130034130033130036
3002413003297401213001711940825400101001020000100001020000100005062145931480052800130016130032130032125489312626230012201000020000201000020000130032130032112002110910100101000010100001300012706165412951910000100001000010010130034130036130034130033130033
3002413003297401213001711940825400101001020000100001220000100006062144971480052800130013130032130033125491312626230010201000020000201000020000130032130032112002110910100101000010100001300012705163512952010000100001000010010130120130195130046130034130033
3002413003297401213001711940825400101001020000100001020000100005062144971480075200130014130034130033125489312626230010201000020000201000020000130032130032112002110910100101000010100000300012704167512951910000100001000010010130071130034130033130033130034
3002413003297401213001811940825400101001020000100001020000100005062144971480052800130013130033130032125489312626230010201000020000201000020000130032130032112002110910100101000010100001000012705165612951910000100001000010010130037130033130033130033130036
3002413003397401213001711940825400101001020000100001020000100005062144971480087600130013130032130033125489312626230010201000020000201000020000130032130032112002110910100101000010100001300012704164612951910000100001000010010130033130033130034130036130033
3002413003297401213001711940825400101001020000100001020000100005062144971480052800130013130034130032125490312626630010201000020000201000020000130032130032112002110910100101000010100001300012704164512951910000100001000010010130033130033130033130033130033
3002413003297401213001711941025400101001020000100001020000100005062144971480052800130013130032130035125489312626230010201000020000201000020000130032130032112002110910100101000010100000000012704164512951910000100001000010010130360130050130034130037130033

Test 3: throughput

Count: 8

Code:

  scvtf s0, w8
  scvtf s1, w8
  scvtf s2, w8
  scvtf s3, w8
  scvtf s4, w8
  scvtf s5, w8
  scvtf s6, w8
  scvtf s7, w8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)03080b18191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8acc5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602042671620000201202669902516010010080000800001008002080015500115956818841632669426713267136636666621601352008002480024200800248002026713267131180201100991001008000010080000031115117016002671080000800001002671426715267102671426714
1602042670920000001202669922516010010080000800001008002080015500117053218841632669426714267136636666621601352008002080020200800248002426713267091180201100991001008000010080000131115117016002671080000800001002671426710267142671526714
160204267141990000002669822516010010080000800001008002080015500116848218791302669626714267106624966511601412008002780023200800238002326714267141180201100991001008000010080000032225127124112671180000800001002671626715267162671526711
16020426714200000012026700325160100100800008000010080023800175001168398188273126696267142671566211066491601412008002380023200800238002326715267101180201100991001008000010080000132225128124112671180000800001002671426711267112671526715
1602042671020000001202677432516010010080000800001008002380018500117412918838312669526714267106624966471601412008002380023200800238002326714267101180201100991001008000010080000102225127124112671180000800001002671526715267112671526715
1602042671420000001202670822516010010080000800001008002380018500117502918840582669126714267146625106646160141200800238002320080023800232671526715118020110099100100800001008000019052225128242112686580000800001002688826892268882689127009
160204274052011111291208270321229516062210080260800001008020880017500117476118837622669626714267136625966491601412008002380023200800238002326717267131180201100991001008000010080000232225128124112671180000800001002671526711267152671526715
1602042671420000001202683822516010010080000800001008002380017500118028218834602669126714267136621966501601412008002780023200800278002326713267151180201100991001008000010080000102225127124112671280000800001002671526714267152671126716
1602042671020000001202670332516010010080000800001008002380018500116742718817342669626710267146625966461601412008002380023200800238002326715267151180201100991001008000010080000132225127124112671180000800001002671526715267152671126711
1602042671420000001202669922516010010080000800001008002380018500116957118833692669526710267136625966501601422008002380023200800238002726714267141180201100991001008000010080000102225127124112671180000800001002672726716267152671126715

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)0304080b18191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8a9accfd0d5d6ddinst fetch restart (de)e0eald/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600242670920000000002672302516001010800008000010800008000050116888018840322669326710267146656036689160010208000080000208000080000267092670911800211091010800001008000000050200216222670608000080000102671026710267102671026710
1600242670920000000002671402516001010800008000010800008000050116888018840322669026709267096653036689160010208000080000208000080000267092670911800211091010800001008000000050200116112670608000080000102671026710267102671026710
1600242670920000000002670102516001010800008000010800008000050116888018840322669026709267096653036689160010208000080000208000080000267092670911800211091010800001008000000050200116222670608000080000102671026710267102671026710
1600242670920000000002669402516001010800008000010800008000050116888018840322669026709267096653036689160010208000080000208000080000267092670911800211091010800001008000000050200116112670608000080000102671026710267102671026710
1600242670920000000002670202516001010800008000010800008000050116888018840322669026709267096656036689160010208000080000208000080000267092670911800211091010800001008000000050200116112670608000080000102671026710267102671026710
1600242670920000000002669402516001010800008000010800008000050116888018840322669026709267096653036689160010208000080000208000080000267092670911800211091010800001008000000050200116112670608000080000102671426716267372687826710
1600242670920000040002669602516001010800008000010800008000050116888018840322669026709267096653036689160010208000080000208000080000267092670911800211091010800001008000000050200216112670608000080000102671026710267112671226710
1600242671320000000002670502516001010800008000010800008000050116888018840322669026709267096653036689160010208000080000208000080000267092670911800211091010800001008000000050200116112670608000080000102671026710267102671026710
1600242670920000000002670402516001010800008000010800008000050116888018840322669026709267096653036689160010208000080000208000080000267092670911800211091010800001008000000050200116112670608000080000102671026710267102671026710
1600242670919900000002669802516001010800008000010800008000050116888018840322669026709267096653036689160010208000080000208000080000267092670911800211091010800001008000000050200116112670608000080000102671026710267102671026710