Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SMAXV (vector, 16B)

Test 1: uops

Code:

  smaxv b0, v0.16b
  movi v0.16b, 1
  movi v1.16b, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)03070a1e3a3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
100430372311026825472510001000100039816003018303730372414328951000100020003037303711100110000077416442629100030383038303830383038
100430372311026825472510001000100039816003018303730372414328951000100020003037303711100110000077416442629100030383038303830383038
1004303722118126825472510001000100039816003018303730372414328951000100020003037303711100110001077416442629100030383038303830853038
100430372311026825472510001000100039816003018303730372414328951000100020003037303711100110000377416442629100030383038303830383038
100430372311026825472510001000100039816003018303730372414328951000100020003037303711100110000077416442629100030383038303830383038
100430372311026825472510001000100039816003018303730372414328951000100020003037303711100110000077416442629100030383038303830383038
100430372211026825472510001000100039816003018303730372414328951000100020003037303711100110000077416442629100030383038303830383038
1004303722110218725472510001000100039816003018303730372414328951000100020003037303711100110000077416442629100030843038303830383038
100430372311026825472510001000100039816003018303730372414328951000100020003037303711100110000077416442629100030383038303830383038
100430372311026825472510001000100039816003018303730372414328951000100020003037303711100110000077416442629100030383038303830383038

Test 2: Latency 1->2

Code:

  smaxv b0, v0.16b
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102043003722511461295472510100100100001001000050042771603001803003730037282643287451027220010000200200003003730037111020110099100100100001000071011611296330100001003003830038300383003830038
1020430037243061295472510100100100001001000050042771603006503003730037282643287451010020010180200200003003730037111020110099100100100001000071011611296330100001003003830038300383003830085
10204300372250160295472510100100100001001000050042771603001803003730037282643287451010020010000200200003003730037111020110099100100100001000071011611296330100001003003830038300383003830038
10204300372251261295472510100100100001001000050042771603001803003730037282643287451010020010000200200003003730037111020110099100100100001000071011611296330100001003008530038300383003830038
1020430037225061295472510100100100001001000050042771603001803003730037282643287451010020010000200200003003730037111020110099100100100001000073911611296330100001003003830038300383003830038
1020430037225061295472510100100100001001000050042771603001803003730037282643287451010020010000200200003003730037111020110099100100100001000071011611296330100001003003830038300383003830038
10204300372240114295472510100100100001001000050042771603001833003730037282643287451010020010000200200003003730037111020110099100100100001000071011611296330100001003003830038300383003830038
1020430037225061295472510100100100001001000050042771603001803003730037282643287451010020010000200200003003730037111020110099100100100001000071011611296330100001003003830038300383003830038
1020430037225061295472510100100100001001000050042771603001803003730037282643287451010020010000200200003003730037311020110099100100100001000071011611296330100001003003830038300383003830038
1020430037225061295472510100100100001001000050042771603001803003730037282643287451010020010000200200003003730037111020110099100100100001000071011611296330100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03181e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc5cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002430037241006129547251001010100001010000504277160030018300373003728286328767100102010000202000030228300371110021109101010000100006402162229629010000103003830038300383003830038
1002430037225006129547251001010100001010000504277160130018300373003728286328767100102010000202000030037300371110021109101010000100006402162229629010000103003830038300383003830038
1002430037225006129547251001010100001010000504277160130018300373003728286328767100102010000202000030037300371110021109101010000100006402162229629010000103003830038300383003830038
1002430037225006129547251001010100001010000504277160030018300853003728299328767100102010180202000030037300371110021109101010000100006402162229629010000103003830038300383003830038
1002430037225006129547251001010100001010000504277160030018300373003728286328767100102010000202000030037300371110021109101010000100006402162229629010000103003830038300383003830038
1002430037225006129547251001010100001010000504277160030018300373003728286328767100102010000202032230037300371110021109101010000100006402162229629010000103003830038300383003830038
1002430037225006129547251001010100001010000504277160030018300373003728286328767100102010000202000030037300371110021109101010000100006402162229629010000103003830038300383003830038
1002430037225006129547251001010100001010000504277160130018300373003728286328767100102010000202000030037300371110021109101010000100306402162229629010000103003830038300383003830038
100243003722500352629547251001010100001010000504277160030018300853003728286328767100102010000202000030135300371110021109101010000100006402162229629010000103003830038300383003830038
1002430037225006129547251001010100001010000504277160030018300373003728286328767100102010000202000030037300371110021109101010000100006402162229629010000103003830038300383003830038

Test 3: throughput

Count: 8

Code:

  smaxv b0, v8.16b
  smaxv b1, v8.16b
  smaxv b2, v8.16b
  smaxv b3, v8.16b
  smaxv b4, v8.16b
  smaxv b5, v8.16b
  smaxv b6, v8.16b
  smaxv b7, v8.16b
  movi v8.16b, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2505

retire (01)cycle (02)03070818191e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9acc5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
8020420061150000015030258010810080008100800205006401320200202003920039997769990801202008003220016006420039200391180201100991001008000010000011151181161120036800001002004020040200402004020040
802042003915000000030258010810080008100800205006401320200202003920039997769990801202008034420016006420039200391180201100991001008000010000011151181161120036800001002004020040200402004020040
802042003915001000030258010810080008100800205006401320200202003920039997769990801202008003220016006420039200391180201100991001008000010000011151181161120036800001002004020040200402004020040
80204200391500000204030258010810080008100800205006401320200202003920039997769990801202008003220016006420039200391180201100991001008000010000011151181161120036800001002004020040200402004020040
802042003915000000030258010810080008100800205006401320200202003920039997769990801202008003220016006420039200391180201100991001008000010000011151181161120036800001002004020040200402004020040
8020420039150000000170258010810080008100800205006401320200202003920039997769990804252008003220016006420039200391180201100991001008000010000011151181161120036800001002004020040200402004020040
802042003915000000030258010810080008100800205006401320201822003920039997769990801202008003220016006420039200391180201100991001008000010000011151181161120036800001002004020107200402004020040
802042003915000000030258010810080008100800205006401320200202003920039997769990801202008003220016006420039200391180201100991001008000010000022251292232220046800001002004920049200492005020049
8020420049150000000642680116100800161008002850064019602002920048200489976109986801282008003820016007620048200491180201100991001008000010000022251282232320046800001002004920049200502005020049
802042004815000000064268011610080016100800285006401960200292004820048997699986801282008003820016007620048200481180201100991001008000010000022251282232220045800001002005020049200492004920050

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2505

retire (01)cycle (02)0318191e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa7a8a9acc2c5cfd2d5d6dbddinst fetch restart (de)dfe0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
800242005015500040258001010800001080000506400000120020200392003999963100198001020800002016000020039200391180021109101080000100000005022141605522003680000102004020040200402004020040
80024200391500027402580010108000010801045064000001200602013120142100156100738001020800002016000020039200391180021109101080000100000005022031603522003680000102004020040200402004020040
800242003915000040258001010800001080000506400000120020200392003999963100198001020800002016000020039200391180021109101080000100009005022051603522003680000102004020040200402004020040
800242003915000040258001010800001080000506400000120020200392003999963100198001020800002016000020039200391180021109101080000100000005022031607522003680000102004020040200402004020040
800242003915000040258001010800001080000506400000120020200392003999963100198001020800002016000020039200391180021109101080000100000005022021603222003680000102004020040200402004020040
800242003915000040518001010800001080000506400000120020200392003999963100198001020800002016000020039200391180021109101080000100000005022021606522003680000102004020040200402004020040
800242003915000040258001010800001080000506400000120020200392003999963100198001020800002016000020039200391180021109101080000100000015022021605322003680000102004020040200402004020040
800242003915000040258001010800001080000506400000120020200392003999963100198001020800002016000020039200391180021109101080000100000005022051605322003680000102004020040200402004020040
8002420039150001240258001010800001080000506400000120020200392003999963100198001020800002016000020039200391180021109101080000100000005022051603322003680000102004020040200402004020040
800242003915000040258001010800001080000506400000120020200392003999963100198001020800002016000020039200391180021109101080000100000005022051605522003680000102004020040200402004020040