Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SMINP (vector, 4H)

Test 1: uops

Code:

  sminp v0.4h, v0.4h, v1.4h
  movi v0.16b, 1
  movi v1.16b, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)03181e3a3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8a9acc2cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
10042037160026216872510001000100026468012018203720371572318951000100020002037203711100110000000077416441787100020382038203820382038
10042037150026216872510001000100026468012018203720371572318951000100020002037203711100110000000077416441787100020382038203820382038
10042037150026216872510001000100026468002018203720371572318951000100020002037203711100110000000077416441787100020382038203820382038
10042037160026216872510001000100026468012018203720371572318951000100020002037203711100110000000077416441787100020382038203820382038
10042037160026216872510001000100026468012018203720371572318951000100020002037203711100110000000077416441787100020382038203820382038
10042037150026216872510001000100026468012018203720371572318951000100020002037203711100110000000077416441787100020382038203820382038
100420371500210616872510001000100026468002018203720371572318951000100020002037203711100110000000077416441787100020382038203820382038
10042037150026216872510001000100026468002018203720371572318951000100020002037203711100110000000077416441787100020382038203820382038
10042037150026216872510001000100026468002018203720371572318951000100020002037203711100110000100077416441787100020382038203820382038
10042037150026216872510001000100026468012018203720371572318951000100020002037203711100110000100077416441787100020382038203820382038

Test 2: Latency 1->2

Code:

  sminp v0.4h, v0.4h, v1.4h
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.0037

retire (01)cycle (02)03191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9facc5cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102042003715001206119687251010010010000100100005002847680120018200372003718422318745101002001000020020000200372003711102011009910010010000100007101161119791100001002003820038200382003820038
1020420037150042306119687251010010010000100100005002847680120018200372003718422318745101002001000020020000200372003711102011009910010010000100007101161119931100001002003820038200382003820038
10204200371500006119687251010010010000100100005002847680020018200372003718422318745101002001000020020000200372003711102011009910010010000100007101161119791100001002003820038200382003820038
10204200371500006119687251010010010000100100005002847680020018200372003718422318745101002001000020020000200372003711102011009910010010000100007101161119791100001002003820038200382003820038
102042003715002106119687251010010010000100100005002847680020018200372003718422318745101002001000020020000200372003711102011009910010010000100007101161119791100001002003820038200382003820038
102042003715002106119687251010010010000100100005002847680020018200372003718422318745101002001000020020000200372003711102011009910010010000100007101161119791100001002003820038200382003820038
10204200371500006119687251010010010000100100005002847680020018200372003718422318745101002001000020020000200372003711102011009910010010000100007101161119791100001002003820038200382003820038
10204200371500006119687251010012210000100100005002847680120018200372003718422318745101002001000020020000200372003711102011009910010010000100007101161119791100001002003820038200382003820038
102042003715000072619687251010010010000100100005002847680120018200372003718422318745101002001000020020000200372003711102011009910010010000100007101161119791100001002003820038200382003820038
102042003715002706119687251010010010000100100005002847680120018200372003718422318745101002001000020020000200372003711102011009910010010000100007101161119791100001002003820038200382003820038

1000 unrolls and 10 iterations

Result (median cycles for code): 2.0037

retire (01)cycle (02)03090b191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a9accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100242003715000027611968725100101010000101000050284768002001820037200371844431876710010201000020200002003720037111002110910101000010000640216221978510000102003820038200382003820038
1002420037150000441821968725100101010000101015250284768002009020133200371844431876710010201000020200002003720037111002110910101000010200640216221978510000102003820038200382003820038
100242003715000027611968725100101010000101000050284768002001820037200371844431876710010201000020200002003720037111002110910101000010000640216221978510000102003820038200382003820038
10024200371500001563461968725100101010000101000050284768012001820037200371844431876710010201000020200002003720037111002110910101000010006640216221978510000102003820038200382003820038
100242003715000012611968725100101010000101000050284768002001820037200371844431876710010201000020200002003720037111002110910101000010000640216221978510000102003820038200382003820038
10024200371500010611968725100101010000101000050284768002001820037200371844431876710010201000020200002003720037111002110910101000010000640216221978510000102003820038200382003820038
10024200371500000611968725100101010000101000050284768002001820037200371844431876710010201000020200002003720037111002110910101000010000640216221978510000102003820038200382003820038
1002420037150000435611968725100101010000101000050284768002001820037200371844431876710010201000020200002003720037111002110910101000010000640216521978510000102003820038200382003820038
10024200371500000611968725100101010000101000050284768002001820037200371844431876710010201000020200002003720037111002110910101000010000640216221978510000102003820038200382003820038
10024200371500006611968725100101010000101000050284768012001820037200371844431876710010201000020200002003720037111002110910101000010000640216221978510000102003820038200382003820038

Test 3: Latency 1->3

Code:

  sminp v0.4h, v1.4h, v0.4h
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.0037

retire (01)cycle (02)03181e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102042003715002461196872510100100100001001000050028476801200182003720037184223187451010020010000200200002003720037111020110099100100100001007102161119791100001002003820038200382003820038
10204200371500061196872510100100100001001000050028476801200182003720037184223187451010020010000200200002003720037111020110099100100100001007101161119791100001002003820038200382003820038
10204200371500061196872510100100100001001000050028476801200182003720037184223187451010020010000200200002003720037111020110099100100100001007101161119791100001002003820038200382003820038
1020420037149069061196872510100100100001001000050028476801200182003720037184223187451010020010000200200002003720037111020110099100100100001007101161119791100001002003820038200382003820038
10204200371500061196872510100100100001001000050028476801200182003720037184223187451010020010000200200002003720037111020110099100100100001007101161119791100001002003820038200382003820038
10204200371500061196872510100100100001001000050028476801200182003720037184223187451010020010000200200002003720037111020110099100100100001007101161119791100001002003820038200382003820038
10204200371500061196872510100100100001001000050028476801200182003720037184223187451010020010000200200002003720037111020110099100100100001007101161119791100001002003820038200382003820038
10204200371500061196872510100100100001001000050028476801200182003720037184223187451010020010000200200002003720037111020110099100100100001007101161119791100001002003820038200382003820038
102042003715003091895196872510100100100001001000050028476801200182003720037184223187451010020010000200200002003720037111020110099100100100001007101161119861100001002003820038200382003820038
10204200371500061196872510100100100001001000050028476801200182003720037184223187451010020010000200200002003720037111020110099100100100001007101161119791100001002003820038200382003820038

1000 unrolls and 10 iterations

Result (median cycles for code): 2.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024200371503536196872510010101000010100005028476802001820037200371844431876710010201000020200002003720037111002110910101000010000640316331978510000102003820038200382003820038
10024200371500935196872510010101000010100005028476802001820037200371844431876710010201000020200002003720037111002110910101000010000640316331978510000102003820038200382003820038
10024200371501861196872510010101000010100005028476802001820037200371844431876710010201000020200002003720037111002110910101000010000640316331978510000102003820038200382003820038
1002420037150061196872510010101000010100005028476802001820037200371844431876710010201000020200002003720037111002110910101000010000640316331978510000102003820038200382003820038
1002420037150661196872510010101000010100005028476802001820037200371844431876710010201000022200002003720037111002110910101000010000640316331978510000102003820038200382003820038
1002420037150061196872510010101001210100005028476802001820037200371844431876710010201000020200002003720037111002110910101000010000640316331978510000102003820038200382003820038
1002420037150061196872510010101000010100005028476802001820037200371844431876710010201000020200002003720037111002110910101000010000640316331978510000102003820038200382003820038
1002420037150061196872510010101000010100005028476802001820037200371844431876710010201000020200002003720037111002110910101000010001640316331978510000102003820038200382003820038
1002420037150061196872510010101000010100005028476802001820037200371844431876710010201000020200002003720037111002110910101000010000640316331978510000102003820038200382003820038
10024200371500726196872510010101000010100005028476802001820037200371844431876710010201000020200002003720037111002110910101000010000640316331978510000102003820038200382003820038

Test 4: throughput

Count: 8

Code:

  sminp v0.4h, v8.4h, v9.4h
  sminp v1.4h, v8.4h, v9.4h
  sminp v2.4h, v8.4h, v9.4h
  sminp v3.4h, v8.4h, v9.4h
  sminp v4.4h, v8.4h, v9.4h
  sminp v5.4h, v8.4h, v9.4h
  sminp v6.4h, v8.4h, v9.4h
  sminp v7.4h, v8.4h, v9.4h
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2505

retire (01)cycle (02)030818191e1f3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8a9accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
802042004715000000040258010010080000100800005006400001200192003820038997339996801002008000020016000020038200381180201100991001008000010000024511031611200350800001002003920039200392003920039
802042003815000000040258010010080000100800005006400001200192003820038997339996801002008000020016000020038200381180201100991001008000010000027511011610200350800001002003920039200392003920039
80204200381500000006125801001008000010080000500640000120019200382003899733999680100200800002001600002003820038118020110099100100800001000000511011611200350800001002003920039200392003920039
802042003815000030040258010010080000100800005006400001200192003820038997326999680100200800002001600002003820038118020110099100100800001000000511011611200350800001002003920039200392003920039
80204200381500000004025801001008000010080000500640000020019200382003899733999680100200800002001600002003820038118020110099100100800001000000511011611200350800001002003920039200392003920039
80204200381500000004025801001008000010080000500640000020019200382003899733999680100200800002001600002003820038118020110099100100800001000000511011611200350800001002003920039200392003920039
80204200381500000004025801001008000010080000500640000120019200382003899733999680100200800002001600002003820038118020110099100100800001000000511011611200350800001002003920039200392003920039
8020420038150000000402580100100800001008000050064000012001920038200389973399968010020080000200160000200382003811802011009910010080000100000165511011611200350800001002003920039200392003920039
80204200381500000004025801001008000010080000500640000120019200382003899733999680100200800002001600002003820038118020110099100100800001000000511011611200350800001002003920039200392003920039
802042003815000012008225801001088018910080000500640000120019200382003899733999680100200800002001600002003820038118020110099100100800001000000511011611200350800001002003920039200392003920039

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2505

retire (01)cycle (02)031e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6daddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
8002420049150039258001010800001080000506400000020019200382003899963100188001020800002016000020038200381180021109101080000100050203160352003580000102003920039200392003920039
8002420038150039258001010800001080000506400000020019200382003899963100188001020800002016000020038200381180021109101080000100050206160532003580000102003920039200392003920039
80024200381500102258001010800001080000506400000020019200382003899963100188001020800002016000020038200381180021109101080000100050203160352003580000102003920039200392003920039
8002420038150039258001010800001080000506400000020019200382003899963100188001020800002016000020038200381180021109101080000100050205160532003580000102003920039200392003920039
8002420038150039258001010800001080000506407680120019200382003899963100188001020800002016000020038200381180021109101080000100050205160532003580000102003920039200392003920344
8002420038150039258001010800001080000506400000120019200902003899963100188001020800002016000020038200381180021109101080000100050205160552003580000102003920039200392003920039
8002420038150039258001010800001080000506400000020019200382003899963100188001020800002016000020038200381180021109101080000100050203160552003580000102003920039200392003920039
800242003815003242580010108000010800005064000000200192003820038999631001880010208000020160000200382003811800211091010800001008450205160352003580000102003920039200392003920039
8002420038150039258001010800001080000506400000020019200382003899963100188001020800002016000020038200381180021109101080000100050203160552003580000102003920039200392003920039
8002420038150039258001010800001080000506400000020019200382003899963100188001020800002016000020038200381180021109101080000100050203160352003580000102003920039200392003920039