Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SCVTF (scalar, integer, S from X)

Test 1: uops

Code:

  scvtf s0, x0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)03071e1f3f4f51inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
20043762000361225200010001000100010001407522820357376376723109200010001000100010003763761110011000100000732163337310001000377377377381377
20043763000361025200010001000100010001407522820357376376723109200010001000100010003763761110011000100000733163337310001000377377377377377
20043763000361025200010001000100010001407522820357376376723112200010001000100010003763761110011000100000733163337310001000377377377377377
20043762000361025200010001000100010001420722820357376376723109200010001000100010003763761110011000100000733163337310001000377377377377377
200437630003612125200010001000100010001407522820358376376723109200010001000100010003763761110011000100000733163337310001000377377380377377
20043763000361025200010001000100010001407522820357376376723109200010001000100010003763761110011000100000733163337310001000377377377377377
20043763000361025200010001000100010001407522820357379376723109200010001000100010003923761110011000100000733163337310001000377377377377377
20043763000361025200010001000100010001407522820357376376753109200010001000100010003763761110011000100000733163337310001000377377377377377
20043763000363025200010001000100010001407522820357376376723111200010001000100010003763761110011000100000733163337310001000379377377377377
20043763000361025200010001000100010001407522820357376376723109200010001000100010003763761110011000100000733163337310001000377377377377377

Test 2: Latency 1->2 roundtrip

Code:

  scvtf s0, x0
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)030818191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a7a8a9acc5cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
30204130032974004001300171194082540100101002000010000100200001000050062144971480270901300130130037130032125466312624030100202100002000020010000200001300321300321120201100991001010010000100010000000000131012162212951910000100001000010100130033130033130033130033130033
30204130034974000001300171194082540100101002000010000100200001000050062144971480270901300130130039130034125466312624230100200100002000020010000200001300321300321120201100991001010010000100210000000300131012162212951910000100001000010100130033130033130033130037130033
30204130033974000001300171194082540100101002000010000100200001000050062144971480270901300130130034130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100010000000000131012162212951910000100001000010100130033130033130033130033130033
30204130032974000601300171194082540100101002000010000100200001000050062144971480270901300130130053130037125466312624030458200100602000020010000200001300321300321120201100991001010010000100010000000000131012162212951910000100001000010100130033130033130033130033130033
30204130032974000001300171194082540100101002000010000100200001000050062144971480270901300130130066130034125466312624030100200100002000020010000200001300321300321120201100991001010010000100010000000000131012162212951910000100001000010100130033130033130033130033130033
302041300329740004801300171194082540100101002000010000100200001000050062144971480270911300130130033130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100010000020300131012162212951910000100001000010100130033130033130033130064130035
30204130032974000001300171194082540100101002000010000100200001000050062144971480294111300130130050130033125466312624030100200100002000020010000200001300321300321120201100991001010010000100010000000000131012162212952110000100001000010100130033130033130033130033130033
30204130032974000001300171194082540100101002000010000100200001000050062144971480270901300130130047130032125466312624030100200100002000020010000200001300321300321120201100991001010010000100010000000002131012162212951910000100001000010100130033130038130033130033130033
30204130032974000001300171194082540100101002000010000100200001000050062144971480282301300130130060130033125466312624030100200100002000020010000200001300321300321120201100991001010010000100010000010000131012162212951910000100001000010100130033130033130033130033130033
30204130032974000001300171194082540100101002000010000100200001000050062144971480270911300130130067130034125466312624030100200100002000020010000200001300321300321120201100991001010010000100010000000000131012162212951910000100001000010100130033130033130033130033130033

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)03080b18191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8acc2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3002413003497400001200130017119408254001010010200001000010200001000050621449714801504113001313003213003212548931262683001020100002000020100002000013003213003211200211091010010100001010000000013061161312951910000100001000010010130033130033130033130033130036
3002413003297400019090130019119408254001610010200001000011200001000050623386814807028013001313003313003212548931262643001020101242000020100002000013045813003411200211091010010100001010000000012701161112952110000100001000010010130033130033130033130033130033
30024130032974000010800130018119408254001010010200001000010200001000050621449714800528113001313003213003212548931262623001020100002000020100002000013003213003211200211091010010100001010000102012701751112951910003100001000010010130033130131130034130033130033
30024130032116800002280130017119408254001010010200001000010200001000050621449714800528013001313003213003212548931262633001020100002000020100002000013003213003221200211091010010100001010000000012701162112951910000100001000010010130033130033130068130033130033
3002413003210081100276352130017119408254001010010200001000010200001000050621449714800528113001313003213003212548931262633001020100002000020100002000013003213003211200211091010010100001010000000012701161112951910000100001000010010130033130065130033130033130033
3002413003297400001200130017119408254001010010200001000010200001000050621449714800528113001313003213003212548931262623001020100002000020100002000013003213003211200211091010010100001010000000012701161112951910000100001000010010130033130033130033130033130033
3002413003297400008400130017119408254001010010200001000010200001000050621449714800528113001313003213003212548931262623001020100002000020100002000013003213003211200211091010010100001010000000012701161112951910000100001000010010130033130033130033130033130044
3002413003298600001860130017119408254001010010200001000010200001000050621449714800528013001313003213003212548931262623001020100002000020100002000013003213003211200211091010010100001010000000012701161112951910000100001000010010130033130033130033130033130033
3002413003297400001590130017119408254001010010200001000010200001000050621449714800528013001313003213003212548931262623001020100002000020100002000013003213003211200211091010010100001010000000012701161112951910000100001000010010130033130033130033130033130033
30024130032974000063352130019119408254001010010200001000010200001000050621449714800873113001313003213003212548931262623001020100002000020100002000013003213003211200211091010010100001010000000012701161212951910000100001000010010130033130033130033130033130033

Test 3: throughput

Count: 8

Code:

  scvtf s0, x8
  scvtf s1, x8
  scvtf s2, x8
  scvtf s3, x8
  scvtf s4, x8
  scvtf s5, x8
  scvtf s6, x8
  scvtf s7, x8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3340

retire (01)cycle (02)03080b18191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a5a6a7a8a9acc2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eald/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602042671520000001200266982251601001008000080000100800208001550011653041884291126694267132671066366666216013520080020800242008002080020267132671311802011009910010080000100000800000110120111511701600267100080000800001002671026710267102671026719
1602042672020700000002669802516010010080000800001008002080016500116895118828820266902671326718663666661160136200800208002420080020800202671326709118020110099100100800001000008000001030111511701600267100080000800001002671426714267142671426727
16020426711200000000026699125160100100800008000010080027800205001168352188463812669526714269136625106650160142200800238002320080023800232671026714118020110099100100800001000008000002060222512712411267110080000800001002671126715267152671526811
160204267232000000120026695425160100100800008000010080023800185001170734188360702669626710267106625106649160141200800238002720080023800232671026714118020110099100100800001000008000005030222512812411267120080000800001002671126715267152671426715
160204267512000000120026699225160100100800008000010080023800185001166375188439112669526710267106625966501601402008002380027200800238002326714267131180201100991001008000010000080000010240222512712411267110080000800001002671526715267112671226715
160204267372000000000266952251601001008000080000100800238001850011574901883414126694267152671466251066501601412008002380027200800238002326710267141180201100991001008000010000080000000150222512712411267110080000800001002671426714267152671526715
160204267432000000120026700125160100100800008000010080023800185001188204188372912669526714267136625966491601412008002380023200800238002326713267141180201100991001008000010000080000020060222512812411267110080000800001002671126711267152671626719
160204272212000000120026700225160100100800008000010080023800185001167786188816312669526714267136625966501601412008002380023200800238002326714267141180201100991001008000010000080000030060222512712411267110080000800001002671626730267192673226726
1602042671720000001200266950251601001008000080000100800238001850011875491884038126695267152671466211066501601432008002380027200800238002326713267131180201100991001008000010000080000059090222512712411267110080000800001002671526716267142671526715
160204267402000000120026695225160100100800008000010080023800185001175098188454602669426715267146621106650160140200800238002320080023800232671426714118020110099100100800001000008000003030222512712411267110080000800001002671126713267142671126716

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)03080b0f1e3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600242671120000202669402516001010800008000010800008000050116888018838690266902670926709665303668916001020800008000020800008000026709267091180021109101080000108000000502017161717267068000080000102671026710267102671026710
1600242670920000002669402516001010800008000010800008000050116888018904630266902670926709665348366891600102080000800002080189800002707026709118002110910108000010800000050207171616267068000080000102671026710267102671026710
16002426709199000026694025160010108000080000108000080000501168880188558802669026709267096653036689160010208000080000208000080000267092670911800211091010800001080000005020716167267068000080000102671026710267102671026710
160024267092000000266940251600101080000800001080000800005011687871891037026690267092670966530366891600102080000800002080000800002670926709118002110910108000010800000650201616716267068000080000102671026713267102671026710
160024267092000000266940251600101080000800001080000800005011688801887134026690267092670966530366891600102080000800002080000800002670926709118002110910108000010800000050201716136267068000080000102671026710267102671026713
160024267092000000266940251600101080000800001080000800005011688801892336026690267092670966530366891600102080000800002080000800002670926709118002110910108000010800000050201716146267068000080000102671026710267102671026710
160024267092000100266940251600101080000800001080000800005011688801892337026690267092670966530366891600102080000800002080000800002671126709118002110910108000010800000050201616176267068000080000102671026713267102671026710
1600242670920000002669402516001010800008000010800008000050116888018917780266902670926709665303668916001020800008000020800008000026709267091180021109101080000108000000502016161616267068000080000102671026710267102671026710
1600242670920000002669402516001010800008000010800008000050116888018990630266902670926709665303668916001020800008000020800008000026709267091180021109101080000108000000502016161616267068000080000102671026710267102671026710
1600242670920000002669402516001010800008000010800008000050116888018876150266902670926709665303668916001020800008000020800008000026709267091180021109101080000108000006502016161616267068000080000102671026710267102671026710