Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FCVTAS (vector, 4H)

Test 1: uops

Code:

  fcvtas v0.4h, v0.4h
  movi v0.16b, 1
  movi v1.16b, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)0308191e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)inst ldst (9b)a8a9accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
100430372300061254725100010001000398160301830373037241432895100010001000303730371110011000000073116112629100030383038303830383038
100430372400061254725100010001000398160301830373037241432895100010001000303730371110011000030073116112629100030383038303830383038
100430372400089254725100010001000398160301830373037241432895100010001000303730371110011000000073116112629100030383038303830383038
1004303724100103254725100010001000398160301830373037241432895100010001000303730371110011000000073116112629100030383038303830383038
1004303724001261254725100010001000398160301830373037241432895100010001000303730371110011000010073116112629100030383038303830383038
100430372300061254725100010001000398160301830373037241432895100010001000303730371110011000000073116112629100030383038303830383038
100430372400061254725100010001000398160301830373037241432895100010001000303730371110011000000373116112629100030383038303830383038
100430372400061254725100010001000398160301830373037241432895100010001000303730371110011000000073116112629100030383038303830383038
100430372300061254744100010001000398160301830373037241432895100010001000303730371110011000010073116112629100030383038303830383038
100430372300061254725100010001000398160301830373037241432895100010001000303730371110011000000073116112629100030383038303830383038

Test 2: Latency 1->2

Code:

  fcvtas v0.4h, v0.4h
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020430037224040529547251010010010000100100005004277160030018030037300372826432874510100200100002001000030037300371110201100991001001000010000071011611296330100001003003830038300383003830038
1020430037225072629547251010010010000100100005004277160030018030037300372826432874510100200100002001000030037300371110201100991001001000010000071011611296330100001003003830038300383003830038
1020430037225063129547251010010010000100100005004277160030018030037300372826432874510100200100002001000030037300371110201100991001001000010000071011611296330100001003003830038300383003830038
102043003722406129547251010010010000100100005004277160130018030037300372826432874510100200100002001000030037300371110201100991001001000010009071011611296330100001003003830038300383003830038
102043003722566129547251010010010000100100005004277160130018030037300372826432874510100200100002001000030037300371110201100991001001000010000071011611296330100001003003830038300383003830038
102043003722506129547251010010010000100100005004277160130018030037300372826432874510100200100002001000030037300371110201100991001001000010000071011611296330100001003003830038300383003830038
102043003722506129547251010010010000100100005004277160130018030037300372826432874510100200100002001000030037300371110201100991001001000010000071011611296331100001003003830038300383003830038
102043003722406129547251010010010000100100005004277160030018030037300372826432874510100200100002001000030037300371110201100991001001000010000071011611296330100001003003830038300383003830038
1020430037225029829547251010010410000100100005004277160030018030037300372826432874510100200100002001000030037300371110201100991001001000010000071011611296330100001003003830038300383003830038
102043003722506129547251010010010000100100005004277160030018030037300372826432874510100200100002001000030037300371110201100991001001000010000071011611296330100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)0307080a0b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002430037225000000000612954725100101010000101000050427716003001830037300372828632876710010201000020100003003730037111002110910101000010000000006402162229629010000103003830038300383003830038
1002430037225000000000612954725100171010000101000050427716003001830037300372828632876710010201000020100003003730037111002110910101000010000000006402162229629010000103003830038300383003830038
100243003722400000000014362954725100101010000101000050427716003001830037300372828632876710010201001220100003003730037111002110910101000010000000006403162229629010000103003830038300383003830038
1002430037225000000000612954725100101010000101000050427716003001830037300372828632876710010201000020100003003730037111002110910101000010000000006402162229629010000103003830038300383003830038
1002430037225000000000612954725100121010000101000050427716003001830037300372828632876710010201000020100003003730037111002110910101000010000000006403162229629010000103003830038300383003830038
1002430037225000000000612954725100101010000101000050427716003001830037300372828632876710010201000020100003003730037111002110910101000010000000006692162229629010000103003830038300383003830038
1002430037224000000000612954725100101010000101000050427716003001830037300372828632876710010201000020100003003730037111002110910101000010000000006402168529629010000103003830038300383003830038
1002430037225000000000612954725100101210000101000050427716003001830037300372828632876710010201000020100003003730037111002110910101000010000000006402162229629010000103003830038300383003830038
1002430037225000000000612954725100101010000101000050427716003001830037300372828632876710010201000020100003003730037111002110910101000010000000006402162229629010000103003830038300383003830038
1002430037225000000000612954725100101010000101000050427716003001830037300372828632877310010201000020100003003730037111002110910101000010000000006402162229629010000103003830038300383003830038

Test 3: throughput

Count: 8

Code:

  fcvtas v0.4h, v8.4h
  fcvtas v1.4h, v8.4h
  fcvtas v2.4h, v8.4h
  fcvtas v3.4h, v8.4h
  fcvtas v4.4h, v8.4h
  fcvtas v5.4h, v8.4h
  fcvtas v6.4h, v8.4h
  fcvtas v7.4h, v8.4h
  movi v8.16b, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)03070b18191e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
8020420058155000000302580108100800081008002050064013202002020039200399977699908012020080032200800322003920039118020110099100100800001000000000111511821621200360800001002004020040200402004020040
8020420039155000000302580108100800081008002050064013212002020039200399977699908012020080032200800322003920039118020110099100100800001000000000111511821622200360800001002004020040200402004020040
8020420039155000000302580108100800081008002050064013202002020039200399977699908012020080032200800322003920039118020110099100100800001000000000111511821622200360800001002004020040200402004020040
8020420039155000000302580108100800081008002050064013202002020039200399977699908012020080032200800322003920039118020110099100100800001000000000111511821622200360800001002004020040200402004020040
8020420039155000000302580108100800081008002050064013202002020039200399977699908012020080032200800322003920039118020110099100100800001000000000111511821612200360800001002004020040200402004020040
80204200391550000003025801081008000810080020500640132020020200392003999776999080120200800322008003220039200391180201100991001008000010000000001115239211922203291800001002040120406204062040620448
802042039815800558255282263160807761008077910180822500645606020309204062041710048371010180832200808432028072120418204078180201100991001008000010000000309821115233611535200450800001002004920049200492004920049
802042004815600000016328380395100803701028030950064250002002920048202011000835101458033420280346202805582031520048418020110099100100800001004400030222512832333201270800001002004920050200492020520206
80204200491560000006426801161008001610080028500640196020029200482004899761099868012820080038200800382004920049118020110099100100800001000000000222512832323200460800001002004920050200502004920049
8020420048155000000642680116100800161008002850064019602002920048200489976999868012820080038200800382004820048118020110099100100800001000000000222512832332200450800001002004920049200492005020049

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2505

retire (01)cycle (02)03081e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc2cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
8002420050155008225800101080000108000050640000002002020039200399996310019800102080000208000020039200391180021109101080000100005020116112003680000102004020040200402004020092
8002420039156104025800101080000108000050640000002002020039200399996310019800102080000208000020039200391180021109101080000100005020116112003680000102004020040200402004020040
80024200391550040258001010800001080000506400000120020200392003910011310019800102080000208000020039200391180021109101080000100005020116112003680000102004020040200402004020040
8002420039155004025800101080000108000050640000002002020039200399996310019800102080000208000020039200391180021109101080000100005020116112003680000102004020040200402004020040
8002420039155004025800101080000108000050640000002002020039200399996310019800102080000208000020039200391180021109101080000101005020116112003680000102004020040200402004020040
8002420039155006225800101080000108000050640000002002020039200399996310019800102080000208000020039200391180021109101080000100005020116112003680000102004020040200402004020040
8002420039155004025800101080000108000050640000002002020039200399996310019800102080000208000020039200391180021109101080000100005020116112003680000102004020040200402004020040
8002420039155004025800101080000108000050640000012002020039200399996310019800102080000208000020039200391180021109101080000100005020116112003680000102004020040200402004020040
8002420039156004025800101080000108000050640000002002020039200399996310019800102080000208000020039200391180021109101080000100005020116112003680000102004020040200402004020040
8002420039155004025800101080000108000050640000002002020039200399996310019800102080000208000020039200391180021109101080000100005020116112003680000102004020040200402004020040